From 17c99291918e3541f0693b8e2945e4355334b309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Tue, 20 Feb 2024 22:14:39 +0100 Subject: [PATCH 01/12] feat: named capture groups & backreferences wip chore: more tests feat: improved refs refactor: merge name and ref refactor: self code review refactor: tweaks refactor: rename reference to ref feat: example with html tags chore: self code review --- jest-setup.ts | 2 + src/__tests__/example-html-tags.ts | 42 +++++++++ src/constructs/__tests__/capture.test.tsx | 106 +++++++++++++++++++++- src/constructs/capture.ts | 54 ++++++++++- src/index.ts | 8 +- test-utils/to-equal-regex.ts | 13 ++- test-utils/to-match-all-named-groups.ts | 36 ++++++++ test-utils/to-match-groups.ts | 6 +- test-utils/to-match-named-groups.ts | 36 ++++++++ website/docs/api/captures.md | 54 +++++++++++ website/docs/api/constructs.md | 50 ---------- website/sidebars.js | 1 + 12 files changed, 348 insertions(+), 60 deletions(-) create mode 100644 src/__tests__/example-html-tags.ts create mode 100644 test-utils/to-match-all-named-groups.ts create mode 100644 test-utils/to-match-named-groups.ts create mode 100644 website/docs/api/captures.md diff --git a/jest-setup.ts b/jest-setup.ts index 3411846..2999085 100644 --- a/jest-setup.ts +++ b/jest-setup.ts @@ -1,4 +1,6 @@ import './test-utils/to-equal-regex'; import './test-utils/to-match-groups'; import './test-utils/to-match-all-groups'; +import './test-utils/to-match-named-groups'; +import './test-utils/to-match-all-named-groups'; import './test-utils/to-match-string'; diff --git a/src/__tests__/example-html-tags.ts b/src/__tests__/example-html-tags.ts new file mode 100644 index 0000000..33b946d --- /dev/null +++ b/src/__tests__/example-html-tags.ts @@ -0,0 +1,42 @@ +import { + any, + buildRegExp, + capture, + charClass, + charRange, + digit, + oneOrMore, + ref, + zeroOrMore, +} from '..'; + +test('example: html tag matching', () => { + const tagName = oneOrMore(charClass(charRange('a', 'z'), digit)); + + const tagRef = ref('tag'); + const tagMatcher = buildRegExp( + [ + '<', + capture(tagName, { as: tagRef }), + '>', + capture(zeroOrMore(any, { greedy: false }), { as: 'content' }), + '', + ], + { ignoreCase: true, global: true }, + ); + + expect(tagMatcher).toMatchAllNamedGroups('abc', [{ tag: 'a', content: 'abc' }]); + expect(tagMatcher).toMatchAllNamedGroups('abc', [ + { tag: 'a', content: 'abc' }, + ]); + expect(tagMatcher).toMatchAllNamedGroups('abc1abc2', [ + { tag: 'a', content: 'abc1' }, + { tag: 'b', content: 'abc2' }, + ]); + + expect(tagMatcher).not.toMatchString('abc'); + + expect(tagMatcher).toEqualRegex('<(?[a-z\\d]+)>(?.*?)<\\/\\k>'); +}); diff --git a/src/constructs/__tests__/capture.test.tsx b/src/constructs/__tests__/capture.test.tsx index 422f27f..5b9cfc7 100644 --- a/src/constructs/__tests__/capture.test.tsx +++ b/src/constructs/__tests__/capture.test.tsx @@ -1,4 +1,15 @@ -import { capture, oneOrMore } from '../..'; +import { + any, + anyOf, + buildRegExp, + capture, + digit, + inverted, + oneOrMore, + ref, + word, + wordBoundary, +} from '../..'; test('`capture` pattern', () => { expect(capture('a')).toEqualRegex(/(a)/); @@ -12,3 +23,96 @@ test('`capture` matching', () => { expect(['a', capture('b')]).toMatchGroups('ab', ['ab', 'b']); expect(['a', capture('b'), capture('c')]).toMatchGroups('abc', ['abc', 'b', 'c']); }); + +test('named `capture` pattern', () => { + expect(capture('a', { as: 'xyz' })).toEqualRegex('(?a)'); + expect(capture('abc', { as: 'xyz' })).toEqualRegex('(?abc)'); + expect(capture(oneOrMore('abc'), { as: 'xyz' })).toEqualRegex('(?(?:abc)+)'); + expect(oneOrMore(capture('abc', { as: 'xyz' }))).toEqualRegex('(?abc)+'); +}); + +test('named `capture` matching', () => { + expect(capture('b', { as: 'x1' })).toMatchGroups('ab', ['b', 'b']); + expect(capture('b', { as: 'x1' })).toMatchNamedGroups('ab', { x1: 'b' }); + + expect(['a', capture('b', { as: 'x1' })]).toMatchGroups('ab', ['ab', 'b']); + expect(['a', capture('b', { as: 'x1' })]).toMatchNamedGroups('ab', { x1: 'b' }); + + expect([capture('a'), capture('b', { as: 'x1' }), capture('c', { as: 'x2' })]).toMatchGroups( + 'abc', + ['abc', 'a', 'b', 'c'], + ); + expect([capture('a'), capture('b', { as: 'x1' }), capture('c', { as: 'x2' })]).toMatchNamedGroups( + 'abc', + { x1: 'b', x2: 'c' }, + ); +}); + +// Should have `ref0` as name. +const firstRef = ref(); + +test('`reference` pattern', () => { + expect([firstRef]).toEqualRegex(/\k/); + expect([ref('xyz')]).toEqualRegex(/\k/); + expect([capture(any, { as: firstRef }), ' ', firstRef]).toEqualRegex('(?.) \\k'); + + const otherRef = ref('r123'); + expect(['xx', capture(any, { as: otherRef }), ' ', otherRef, 'xx']).toEqualRegex( + 'xx(?.) \\kxx', + ); +}); + +test('`reference` matching basic case', () => { + const someRef = ref(); + expect([capture(word, { as: someRef }), someRef]).toMatchString('aa'); + expect([capture(digit, { as: someRef }), someRef]).toMatchString('11'); + + expect([capture(any, { as: someRef }), someRef]).not.toMatchString('ab'); + + expect([capture(digit, { as: someRef }), someRef]).not.toMatchString('1a'); + expect([capture(digit, { as: someRef }), someRef]).not.toMatchString('a1'); +}); + +test('`reference` matching HTML attributes', () => { + const quoteRef = ref('quote'); + const quote = anyOf('"\''); + const htmlAttributeRegex = buildRegExp([ + wordBoundary, + capture(oneOrMore(word), { as: 'name' }), + '=', + capture(quote, { as: quoteRef }), + capture(oneOrMore(inverted(quote)), { as: 'value' }), + quoteRef, + ]); + + expect(htmlAttributeRegex).toMatchNamedGroups('a="b"', { + name: 'a', + quote: '"', + value: 'b', + }); + expect(htmlAttributeRegex).toMatchNamedGroups('aa="bbb"', { + name: 'aa', + quote: '"', + value: 'bbb', + }); + expect(htmlAttributeRegex).toMatchNamedGroups(`aa='bbb'`, { + name: 'aa', + quote: `'`, + value: 'bbb', + }); + expect(htmlAttributeRegex).toMatchNamedGroups('', { + quote: '"', + name: 'type', + value: 'number', + }); + expect(htmlAttributeRegex).toMatchNamedGroups(``, { + quote: "'", + name: 'type', + value: 'number', + }); + + expect(htmlAttributeRegex).not.toMatchString(`aa="bbb'`); + expect(htmlAttributeRegex).not.toMatchString(`aa='bbb"`); + expect(htmlAttributeRegex).not.toMatchString(``); +}); diff --git a/src/constructs/capture.ts b/src/constructs/capture.ts index ca2ce40..a1c0683 100644 --- a/src/constructs/capture.ts +++ b/src/constructs/capture.ts @@ -6,19 +6,71 @@ import type { RegexConstruct, RegexElement, RegexSequence } from '../types'; export interface Capture extends RegexConstruct { type: 'capture'; children: RegexElement[]; + options?: CaptureOptions; } -export function capture(sequence: RegexSequence): Capture { +export interface CaptureOptions { + /** + * Either a name to be given to the capturing group or a `Reference` object ({@link ref}) + * that will allow to match the captured text again later. */ + as?: Backreference | string; +} + +export interface Backreference extends RegexConstruct { + type: 'reference'; + name: string; +} + +/** + * Creates a capturing group which allows the matched pattern to be available: + * - in the match results (`String.match`, `String.matchAll`, or `RegExp.exec`) + * - in the regex itself, through backreferences (@see ref) + */ +export function capture(sequence: RegexSequence, options?: CaptureOptions): Capture { return { type: 'capture', children: ensureArray(sequence), + options, encode: encodeCapture, }; } +let counter = 0; + +/** + * Creates a backreference to a capturing group. + * + * Backreferences allows to match the same text that was previously captured by a capturing group. + * + * @param name - Name to be given to the capturing group which receives this `Backreference`. If not provided, a unique name will be generated. + */ +export function ref(name?: string): Backreference { + return { + type: 'reference', + name: name ?? `ref${counter++}`, + encode: encodeReference, + }; +} + function encodeCapture(this: Capture): EncodeResult { + const ref = this.options?.as; + if (ref) { + const refName = typeof ref === 'string' ? ref : ref?.name; + return { + precedence: 'atom', + pattern: `(?<${refName}>${encodeSequence(this.children).pattern})`, + }; + } + return { precedence: 'atom', pattern: `(${encodeSequence(this.children).pattern})`, }; } + +function encodeReference(this: Backreference): EncodeResult { + return { + precedence: 'atom', + pattern: `\\k<${this.name}>`, + }; +} diff --git a/src/index.ts b/src/index.ts index e0576b0..14b922f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,13 @@ +// Types export type * from './types'; +export type { CaptureOptions } from './constructs/capture'; +export type { QuantifierOptions } from './constructs/quantifiers'; +export type { RepeatOptions } from './constructs/repeat'; +// Builders export { buildPattern, buildRegExp } from './builders'; +// Constructs export { endOfString, nonWordBoundary, @@ -9,7 +15,7 @@ export { startOfString, wordBoundary, } from './constructs/anchors'; -export { capture } from './constructs/capture'; +export { capture, ref } from './constructs/capture'; export { any, anyOf, diff --git a/test-utils/to-equal-regex.ts b/test-utils/to-equal-regex.ts index 81e79df..786bbd4 100644 --- a/test-utils/to-equal-regex.ts +++ b/test-utils/to-equal-regex.ts @@ -4,7 +4,7 @@ import { wrapRegExp } from './utils'; export function toEqualRegex( this: jest.MatcherContext, received: RegExp | RegexSequence, - expected: RegExp, + expected: RegExp | string, ) { received = wrapRegExp(received); @@ -12,10 +12,15 @@ export function toEqualRegex( isNot: this.isNot, }; + const expectedSource = typeof expected === 'string' ? expected : expected.source; + const expectedFlags = typeof expected === 'string' ? undefined : expected.flags; + return { - pass: expected.source === received.source && expected.flags === received.flags, + pass: + expectedSource === received.source && + (expectedFlags === undefined || expectedFlags === received.flags), message: () => - this.utils.matcherHint('toHavePattern', undefined, undefined, options) + + this.utils.matcherHint('toEqualRegex', undefined, undefined, options) + '\n\n' + `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expected)}\n` + `Received: ${this.utils.printReceived(received)}`, @@ -28,7 +33,7 @@ declare global { namespace jest { // eslint-disable-next-line @typescript-eslint/no-unused-vars interface Matchers { - toEqualRegex(expected: RegExp): R; + toEqualRegex(expected: RegExp | string): R; } } } diff --git a/test-utils/to-match-all-named-groups.ts b/test-utils/to-match-all-named-groups.ts new file mode 100644 index 0000000..d3c3d02 --- /dev/null +++ b/test-utils/to-match-all-named-groups.ts @@ -0,0 +1,36 @@ +import type { RegexSequence } from '../src/types'; +import { wrapRegExp } from './utils'; + +export function toMatchAllNamedGroups( + this: jest.MatcherContext, + received: RegExp | RegexSequence, + inputText: string, + expectedGroups: Array>, +) { + const receivedRegex = wrapRegExp(received); + const matchResult = inputText.matchAll(receivedRegex); + const receivedGroups = matchResult ? [...matchResult].map((r) => r.groups) : null; + const options = { + isNot: this.isNot, + }; + + return { + pass: this.equals(receivedGroups, expectedGroups), + message: () => + this.utils.matcherHint('toMatchGroups', undefined, undefined, options) + + '\n\n' + + `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expectedGroups)}\n` + + `Received: ${this.utils.printReceived(receivedGroups)}`, + }; +} + +expect.extend({ toMatchAllNamedGroups }); + +declare global { + namespace jest { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + interface Matchers { + toMatchAllNamedGroups(inputText: string, expectedGroups: Array>): R; + } + } +} diff --git a/test-utils/to-match-groups.ts b/test-utils/to-match-groups.ts index 32b1612..6feb6f5 100644 --- a/test-utils/to-match-groups.ts +++ b/test-utils/to-match-groups.ts @@ -4,11 +4,11 @@ import { wrapRegExp } from './utils'; export function toMatchGroups( this: jest.MatcherContext, received: RegExp | RegexSequence, - expectedString: string, + inputText: string, expectedGroups: string[], ) { const receivedRegex = wrapRegExp(received); - const matchResult = expectedString.match(receivedRegex); + const matchResult = inputText.match(receivedRegex); const receivedGroups = matchResult ? [...matchResult] : null; const options = { isNot: this.isNot, @@ -30,7 +30,7 @@ declare global { namespace jest { // eslint-disable-next-line @typescript-eslint/no-unused-vars interface Matchers { - toMatchGroups(input: string, expected: string[]): R; + toMatchGroups(inputText: string, expectedGroups: string[]): R; } } } diff --git a/test-utils/to-match-named-groups.ts b/test-utils/to-match-named-groups.ts new file mode 100644 index 0000000..b844a58 --- /dev/null +++ b/test-utils/to-match-named-groups.ts @@ -0,0 +1,36 @@ +import type { RegexSequence } from '../src/types'; +import { wrapRegExp } from './utils'; + +export function toMatchNamedGroups( + this: jest.MatcherContext, + received: RegExp | RegexSequence, + inputText: string, + expectedGroups: Record, +) { + const receivedRegex = wrapRegExp(received); + const matchResult = inputText.match(receivedRegex); + const receivedGroups = matchResult ? matchResult.groups : null; + const options = { + isNot: this.isNot, + }; + + return { + pass: this.equals(receivedGroups, expectedGroups), + message: () => + this.utils.matcherHint('toMatchGroups', undefined, undefined, options) + + '\n\n' + + `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expectedGroups)}\n` + + `Received: ${this.utils.printReceived(receivedGroups)}`, + }; +} + +expect.extend({ toMatchNamedGroups }); + +declare global { + namespace jest { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + interface Matchers { + toMatchNamedGroups(inputText: string, expectedGroups: Record): R; + } + } +} diff --git a/website/docs/api/captures.md b/website/docs/api/captures.md new file mode 100644 index 0000000..a4a409b --- /dev/null +++ b/website/docs/api/captures.md @@ -0,0 +1,54 @@ +--- +id: captures +title: Captures +--- + +### `capture()` + +```ts +function capture( + sequence: RegexSequence, + options?: { + name?: string; + }, +): Capture; +``` + +Regex syntax: + +- `(...)` for capturing groups +- `(?...)` for named capturing groups + +Captures, also known as capturing groups, extract and store parts of the matched string for later use. + +Capture results are available using array-like [`match()` result object](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match#using_match). + +#### Named groups + +When using `name` options, the group becomes a [named capturing group](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Named_capturing_group) allowing to refer to it using name instead of index. + +Named capture results are available using [`groups`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match#using_named_capturing_groups) property on `match()` result. + +:::note + +TS Regex Builder does not have a construct for non-capturing groups. Such groups are implicitly added when required. E.g., `zeroOrMore("abc")` is encoded as `(?:abc)+`. + +::: + +### `ref()` + +```ts +function ref( + name?: string; +): Reference; +``` + +Regex syntax: `\k<...>`. + +References, also known as backreferences, allow matching the same text again that was previously matched by a capturing group. + +:::note + +TS Regex Builder doesn't support using ordinal backreferences (`\1`, `\2`, etc) because in complex regex patterns, these references are difficult to accurately use. + +::: diff --git a/website/docs/api/constructs.md b/website/docs/api/constructs.md index b6ab438..6653e7d 100644 --- a/website/docs/api/constructs.md +++ b/website/docs/api/constructs.md @@ -18,53 +18,3 @@ Regex syntax: `a|b|c`. The `choiceOf` (disjunction) construct matches one out of several possible sequences. It functions similarly to a logical OR operator in programming. It can match simple string options as well as complex patterns. Example: `choiceOf("color", "colour")` matches either `color` or `colour` pattern. - -### `capture()` - -```ts -function capture( - sequence: RegexSequence, -): Capture; -``` - -Regex syntax: `(...)`. - -Captures, also known as capturing groups, extract and store parts of the matched string for later use. - -:::note - -TS Regex Builder does not have a construct for non-capturing groups. Such groups are implicitly added when required. E.g., `zeroOrMore("abc")` is encoded as `(?:abc)+`. - -::: - -### `regex()` - -```ts -function regex( - sequence: RegexSequence, -): Regex; -``` - -Regex syntax: the pattern remains unchanged when wrapped by this construct. - -This construct is a no-op operator that groups array of `RegexElements` into a single element for composition purposes. This is particularly useful for defining smaller sequence patterns as separate variables. - -Without `regex()`: - -```ts -const exponent = [anyOf('eE'), optional(anyOf('+-')), oneOrMore(digit)]; -const numberWithExponent = buildRegExp([ - oneOrMore(digit), - ...exponent, // Need to spread "exponent" as it's an array. -]); -``` - -With `regex()`: - -```ts -const exponent = regex([anyOf('eE'), optional(anyOf('+-')), oneOrMore(digit)]); -const numberWithExponent = buildRegExp([ - oneOrMore(digit), - exponent, // Easily compose "exponent" sequence as a single element. -]); -``` diff --git a/website/sidebars.js b/website/sidebars.js index 54f8e79..ed97527 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -29,6 +29,7 @@ export default { 'api/types', 'api/builder', 'api/constructs', + 'api/captures', 'api/quantifiers', 'api/character-classes', 'api/assertions', From ceeb5213daee7fa26a4e5f6a1e5cbde1c8b751e9 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 13 Mar 2024 19:00:36 +0100 Subject: [PATCH 02/12] chore: tweak option names --- src/__tests__/example-html-tags.ts | 4 +- src/constructs/__tests__/capture.test.tsx | 47 ++++++++++++----------- src/constructs/capture.ts | 30 ++++++++------- website/docs/api/captures.md | 5 ++- 4 files changed, 45 insertions(+), 41 deletions(-) diff --git a/src/__tests__/example-html-tags.ts b/src/__tests__/example-html-tags.ts index 33b946d..8bca19a 100644 --- a/src/__tests__/example-html-tags.ts +++ b/src/__tests__/example-html-tags.ts @@ -17,9 +17,9 @@ test('example: html tag matching', () => { const tagMatcher = buildRegExp( [ '<', - capture(tagName, { as: tagRef }), + capture(tagName, { ref: tagRef }), '>', - capture(zeroOrMore(any, { greedy: false }), { as: 'content' }), + capture(zeroOrMore(any, { greedy: false }), { name: 'content' }), '', diff --git a/src/constructs/__tests__/capture.test.tsx b/src/constructs/__tests__/capture.test.tsx index 5b9cfc7..f01811b 100644 --- a/src/constructs/__tests__/capture.test.tsx +++ b/src/constructs/__tests__/capture.test.tsx @@ -25,27 +25,28 @@ test('`capture` matching', () => { }); test('named `capture` pattern', () => { - expect(capture('a', { as: 'xyz' })).toEqualRegex('(?a)'); - expect(capture('abc', { as: 'xyz' })).toEqualRegex('(?abc)'); - expect(capture(oneOrMore('abc'), { as: 'xyz' })).toEqualRegex('(?(?:abc)+)'); - expect(oneOrMore(capture('abc', { as: 'xyz' }))).toEqualRegex('(?abc)+'); + expect(capture('a', { name: 'xyz' })).toEqualRegex('(?a)'); + expect(capture('abc', { name: 'xyz' })).toEqualRegex('(?abc)'); + expect(capture(oneOrMore('abc'), { name: 'xyz' })).toEqualRegex('(?(?:abc)+)'); + expect(oneOrMore(capture('abc', { name: 'xyz' }))).toEqualRegex('(?abc)+'); }); test('named `capture` matching', () => { - expect(capture('b', { as: 'x1' })).toMatchGroups('ab', ['b', 'b']); - expect(capture('b', { as: 'x1' })).toMatchNamedGroups('ab', { x1: 'b' }); + expect(capture('b', { name: 'x1' })).toMatchGroups('ab', ['b', 'b']); + expect(capture('b', { name: 'x1' })).toMatchNamedGroups('ab', { x1: 'b' }); - expect(['a', capture('b', { as: 'x1' })]).toMatchGroups('ab', ['ab', 'b']); - expect(['a', capture('b', { as: 'x1' })]).toMatchNamedGroups('ab', { x1: 'b' }); + expect(['a', capture('b', { name: 'x1' })]).toMatchGroups('ab', ['ab', 'b']); + expect(['a', capture('b', { name: 'x1' })]).toMatchNamedGroups('ab', { x1: 'b' }); - expect([capture('a'), capture('b', { as: 'x1' }), capture('c', { as: 'x2' })]).toMatchGroups( + expect([capture('a'), capture('b', { name: 'x1' }), capture('c', { name: 'x2' })]).toMatchGroups( 'abc', ['abc', 'a', 'b', 'c'], ); - expect([capture('a'), capture('b', { as: 'x1' }), capture('c', { as: 'x2' })]).toMatchNamedGroups( - 'abc', - { x1: 'b', x2: 'c' }, - ); + expect([ + capture('a'), + capture('b', { name: 'x1' }), + capture('c', { name: 'x2' }), + ]).toMatchNamedGroups('abc', { x1: 'b', x2: 'c' }); }); // Should have `ref0` as name. @@ -54,23 +55,23 @@ const firstRef = ref(); test('`reference` pattern', () => { expect([firstRef]).toEqualRegex(/\k/); expect([ref('xyz')]).toEqualRegex(/\k/); - expect([capture(any, { as: firstRef }), ' ', firstRef]).toEqualRegex('(?.) \\k'); + expect([capture(any, { ref: firstRef }), ' ', firstRef]).toEqualRegex('(?.) \\k'); const otherRef = ref('r123'); - expect(['xx', capture(any, { as: otherRef }), ' ', otherRef, 'xx']).toEqualRegex( + expect(['xx', capture(any, { ref: otherRef }), ' ', otherRef, 'xx']).toEqualRegex( 'xx(?.) \\kxx', ); }); test('`reference` matching basic case', () => { const someRef = ref(); - expect([capture(word, { as: someRef }), someRef]).toMatchString('aa'); - expect([capture(digit, { as: someRef }), someRef]).toMatchString('11'); + expect([capture(word, { ref: someRef }), someRef]).toMatchString('aa'); + expect([capture(digit, { ref: someRef }), someRef]).toMatchString('11'); - expect([capture(any, { as: someRef }), someRef]).not.toMatchString('ab'); + expect([capture(any, { ref: someRef }), someRef]).not.toMatchString('ab'); - expect([capture(digit, { as: someRef }), someRef]).not.toMatchString('1a'); - expect([capture(digit, { as: someRef }), someRef]).not.toMatchString('a1'); + expect([capture(digit, { ref: someRef }), someRef]).not.toMatchString('1a'); + expect([capture(digit, { ref: someRef }), someRef]).not.toMatchString('a1'); }); test('`reference` matching HTML attributes', () => { @@ -78,10 +79,10 @@ test('`reference` matching HTML attributes', () => { const quote = anyOf('"\''); const htmlAttributeRegex = buildRegExp([ wordBoundary, - capture(oneOrMore(word), { as: 'name' }), + capture(oneOrMore(word), { name: 'name' }), '=', - capture(quote, { as: quoteRef }), - capture(oneOrMore(inverted(quote)), { as: 'value' }), + capture(quote, { ref: quoteRef }), + capture(oneOrMore(inverted(quote)), { name: 'value' }), quoteRef, ]); diff --git a/src/constructs/capture.ts b/src/constructs/capture.ts index a1c0683..1a77bdc 100644 --- a/src/constructs/capture.ts +++ b/src/constructs/capture.ts @@ -9,14 +9,16 @@ export interface Capture extends RegexConstruct { options?: CaptureOptions; } -export interface CaptureOptions { - /** - * Either a name to be given to the capturing group or a `Reference` object ({@link ref}) - * that will allow to match the captured text again later. */ - as?: Backreference | string; -} - -export interface Backreference extends RegexConstruct { +export type CaptureOptions = + | { + /** Name to be given to the capturing group. */ + name: string; + } + | { + /** Reference object ({@link ref}) that will allow to match the captured text again later. */ + ref: Reference; + }; +export interface Reference extends RegexConstruct { type: 'reference'; name: string; } @@ -44,7 +46,7 @@ let counter = 0; * * @param name - Name to be given to the capturing group which receives this `Backreference`. If not provided, a unique name will be generated. */ -export function ref(name?: string): Backreference { +export function ref(name?: string): Reference { return { type: 'reference', name: name ?? `ref${counter++}`, @@ -53,12 +55,12 @@ export function ref(name?: string): Backreference { } function encodeCapture(this: Capture): EncodeResult { - const ref = this.options?.as; - if (ref) { - const refName = typeof ref === 'string' ? ref : ref?.name; + // @ts-expect-error + const name = this.options?.ref?.name ?? this.options?.name; + if (name) { return { precedence: 'atom', - pattern: `(?<${refName}>${encodeSequence(this.children).pattern})`, + pattern: `(?<${name}>${encodeSequence(this.children).pattern})`, }; } @@ -68,7 +70,7 @@ function encodeCapture(this: Capture): EncodeResult { }; } -function encodeReference(this: Backreference): EncodeResult { +function encodeReference(this: Reference): EncodeResult { return { precedence: 'atom', pattern: `\\k<${this.name}>`, diff --git a/website/docs/api/captures.md b/website/docs/api/captures.md index a4a409b..ad20a89 100644 --- a/website/docs/api/captures.md +++ b/website/docs/api/captures.md @@ -10,14 +10,15 @@ function capture( sequence: RegexSequence, options?: { name?: string; + ref?: string; }, ): Capture; ``` Regex syntax: -- `(...)` for capturing groups -- `(?...)` for named capturing groups +- `(...)` for capturing groups (no `name` option) +- `(?...)` for named capturing groups (`name` or `ref` option) Captures, also known as capturing groups, extract and store parts of the matched string for later use. From 8995d131369e69adc6d62a4cb4c180b5d4cbb2f5 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 13 Mar 2024 22:07:32 +0100 Subject: [PATCH 03/12] refactor: tweak API --- src/__tests__/example-html-tags.ts | 5 +++-- src/constructs/__tests__/capture.test.tsx | 16 ++++++++-------- src/constructs/capture.ts | 22 ++++++++++------------ website/docs/api/captures.md | 20 +++++++++++++++++--- 4 files changed, 38 insertions(+), 25 deletions(-) diff --git a/src/__tests__/example-html-tags.ts b/src/__tests__/example-html-tags.ts index 8bca19a..67e8749 100644 --- a/src/__tests__/example-html-tags.ts +++ b/src/__tests__/example-html-tags.ts @@ -12,14 +12,15 @@ import { test('example: html tag matching', () => { const tagName = oneOrMore(charClass(charRange('a', 'z'), digit)); + const tagContent = zeroOrMore(any, { greedy: false }); const tagRef = ref('tag'); const tagMatcher = buildRegExp( [ '<', - capture(tagName, { ref: tagRef }), + capture(tagName, { name: tagRef }), '>', - capture(zeroOrMore(any, { greedy: false }), { name: 'content' }), + capture(tagContent, { name: 'content' }), '', diff --git a/src/constructs/__tests__/capture.test.tsx b/src/constructs/__tests__/capture.test.tsx index f01811b..923455a 100644 --- a/src/constructs/__tests__/capture.test.tsx +++ b/src/constructs/__tests__/capture.test.tsx @@ -55,23 +55,23 @@ const firstRef = ref(); test('`reference` pattern', () => { expect([firstRef]).toEqualRegex(/\k/); expect([ref('xyz')]).toEqualRegex(/\k/); - expect([capture(any, { ref: firstRef }), ' ', firstRef]).toEqualRegex('(?.) \\k'); + expect([capture(any, { name: firstRef }), ' ', firstRef]).toEqualRegex('(?.) \\k'); const otherRef = ref('r123'); - expect(['xx', capture(any, { ref: otherRef }), ' ', otherRef, 'xx']).toEqualRegex( + expect(['xx', capture(any, { name: otherRef }), ' ', otherRef, 'xx']).toEqualRegex( 'xx(?.) \\kxx', ); }); test('`reference` matching basic case', () => { const someRef = ref(); - expect([capture(word, { ref: someRef }), someRef]).toMatchString('aa'); - expect([capture(digit, { ref: someRef }), someRef]).toMatchString('11'); + expect([capture(word, { name: someRef }), someRef]).toMatchString('aa'); + expect([capture(digit, { name: someRef }), someRef]).toMatchString('11'); - expect([capture(any, { ref: someRef }), someRef]).not.toMatchString('ab'); + expect([capture(any, { name: someRef }), someRef]).not.toMatchString('ab'); - expect([capture(digit, { ref: someRef }), someRef]).not.toMatchString('1a'); - expect([capture(digit, { ref: someRef }), someRef]).not.toMatchString('a1'); + expect([capture(digit, { name: someRef }), someRef]).not.toMatchString('1a'); + expect([capture(digit, { name: someRef }), someRef]).not.toMatchString('a1'); }); test('`reference` matching HTML attributes', () => { @@ -81,7 +81,7 @@ test('`reference` matching HTML attributes', () => { wordBoundary, capture(oneOrMore(word), { name: 'name' }), '=', - capture(quote, { ref: quoteRef }), + capture(quote, { name: quoteRef }), capture(oneOrMore(inverted(quote)), { name: 'value' }), quoteRef, ]); diff --git a/src/constructs/capture.ts b/src/constructs/capture.ts index 1a77bdc..ede2432 100644 --- a/src/constructs/capture.ts +++ b/src/constructs/capture.ts @@ -9,15 +9,13 @@ export interface Capture extends RegexConstruct { options?: CaptureOptions; } -export type CaptureOptions = - | { - /** Name to be given to the capturing group. */ - name: string; - } - | { - /** Reference object ({@link ref}) that will allow to match the captured text again later. */ - ref: Reference; - }; +export type CaptureOptions = { + /** + * Name to be given to the capturing group can either by a string or {@link ref} instance. + */ + name: string | Reference; +}; + export interface Reference extends RegexConstruct { type: 'reference'; name: string; @@ -40,11 +38,11 @@ export function capture(sequence: RegexSequence, options?: CaptureOptions): Capt let counter = 0; /** - * Creates a backreference to a capturing group. + * Creates a reference (a.k.a. backreference) to a capturing group. * * Backreferences allows to match the same text that was previously captured by a capturing group. * - * @param name - Name to be given to the capturing group which receives this `Backreference`. If not provided, a unique name will be generated. + * @param name - Name to be given to the capturing group which receives this reference. If not provided, a unique name will be generated. */ export function ref(name?: string): Reference { return { @@ -56,7 +54,7 @@ export function ref(name?: string): Reference { function encodeCapture(this: Capture): EncodeResult { // @ts-expect-error - const name = this.options?.ref?.name ?? this.options?.name; + const name = this.options?.name?.name ?? this.options?.name; if (name) { return { precedence: 'atom', diff --git a/website/docs/api/captures.md b/website/docs/api/captures.md index ad20a89..cb817bd 100644 --- a/website/docs/api/captures.md +++ b/website/docs/api/captures.md @@ -10,7 +10,6 @@ function capture( sequence: RegexSequence, options?: { name?: string; - ref?: string; }, ): Capture; ``` @@ -18,7 +17,7 @@ function capture( Regex syntax: - `(...)` for capturing groups (no `name` option) -- `(?...)` for named capturing groups (`name` or `ref` option) +- `(?...)` for named capturing groups (`name` option) Captures, also known as capturing groups, extract and store parts of the matched string for later use. @@ -46,7 +45,22 @@ function ref( Regex syntax: `\k<...>`. -References, also known as backreferences, allow matching the same text again that was previously matched by a capturing group. +Creates a reference, also known as backreferences, which allows matching the same text again that was previously matched by a capturing group. To form a valid regex, reference need to be attached to named capturing group earlier in the expression. + +If you do not specify the reference name, a auto-generated unique value will be assigned for it. + +Usage with `capture()`: + +```ts +// Define ref with name "some". +const someRef = ref('some'); + +const regex = buildRegExp([ + capture(..., { ref: someRef}), // Here you make a named capture using name from `someRef`. + // ... + someRef, // Here you match the same text as captured in capture using `someRef`. + ]) +``` :::note From cc37124ea53359147c0ead518d2c5dd7b8cf4504 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 13 Mar 2024 22:17:52 +0100 Subject: [PATCH 04/12] refactor: tweaks --- src/__tests__/example-html-tags.ts | 4 ++-- src/constructs/__tests__/capture.test.tsx | 12 ++++++------ src/constructs/capture.ts | 6 +++--- src/index.ts | 10 ++-------- website/docs/api/captures.md | 16 +++++++++------- 5 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/__tests__/example-html-tags.ts b/src/__tests__/example-html-tags.ts index 67e8749..d548a87 100644 --- a/src/__tests__/example-html-tags.ts +++ b/src/__tests__/example-html-tags.ts @@ -6,7 +6,7 @@ import { charRange, digit, oneOrMore, - ref, + reference, zeroOrMore, } from '..'; @@ -14,7 +14,7 @@ test('example: html tag matching', () => { const tagName = oneOrMore(charClass(charRange('a', 'z'), digit)); const tagContent = zeroOrMore(any, { greedy: false }); - const tagRef = ref('tag'); + const tagRef = reference('tag'); const tagMatcher = buildRegExp( [ '<', diff --git a/src/constructs/__tests__/capture.test.tsx b/src/constructs/__tests__/capture.test.tsx index 923455a..68f44c2 100644 --- a/src/constructs/__tests__/capture.test.tsx +++ b/src/constructs/__tests__/capture.test.tsx @@ -6,7 +6,7 @@ import { digit, inverted, oneOrMore, - ref, + reference, word, wordBoundary, } from '../..'; @@ -50,21 +50,21 @@ test('named `capture` matching', () => { }); // Should have `ref0` as name. -const firstRef = ref(); +const firstRef = reference(); test('`reference` pattern', () => { expect([firstRef]).toEqualRegex(/\k/); - expect([ref('xyz')]).toEqualRegex(/\k/); + expect([reference('xyz')]).toEqualRegex(/\k/); expect([capture(any, { name: firstRef }), ' ', firstRef]).toEqualRegex('(?.) \\k'); - const otherRef = ref('r123'); + const otherRef = reference('r123'); expect(['xx', capture(any, { name: otherRef }), ' ', otherRef, 'xx']).toEqualRegex( 'xx(?.) \\kxx', ); }); test('`reference` matching basic case', () => { - const someRef = ref(); + const someRef = reference(); expect([capture(word, { name: someRef }), someRef]).toMatchString('aa'); expect([capture(digit, { name: someRef }), someRef]).toMatchString('11'); @@ -75,7 +75,7 @@ test('`reference` matching basic case', () => { }); test('`reference` matching HTML attributes', () => { - const quoteRef = ref('quote'); + const quoteRef = reference('quote'); const quote = anyOf('"\''); const htmlAttributeRegex = buildRegExp([ wordBoundary, diff --git a/src/constructs/capture.ts b/src/constructs/capture.ts index ede2432..b9d628d 100644 --- a/src/constructs/capture.ts +++ b/src/constructs/capture.ts @@ -11,7 +11,7 @@ export interface Capture extends RegexConstruct { export type CaptureOptions = { /** - * Name to be given to the capturing group can either by a string or {@link ref} instance. + * Name to be given to the capturing group can either by a string or {@link reference} instance. */ name: string | Reference; }; @@ -24,7 +24,7 @@ export interface Reference extends RegexConstruct { /** * Creates a capturing group which allows the matched pattern to be available: * - in the match results (`String.match`, `String.matchAll`, or `RegExp.exec`) - * - in the regex itself, through backreferences (@see ref) + * - in the regex itself, through {@link reference} */ export function capture(sequence: RegexSequence, options?: CaptureOptions): Capture { return { @@ -44,7 +44,7 @@ let counter = 0; * * @param name - Name to be given to the capturing group which receives this reference. If not provided, a unique name will be generated. */ -export function ref(name?: string): Reference { +export function reference(name?: string): Reference { return { type: 'reference', name: name ?? `ref${counter++}`, diff --git a/src/index.ts b/src/index.ts index 14b922f..79a6d7c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,14 +8,8 @@ export type { RepeatOptions } from './constructs/repeat'; export { buildPattern, buildRegExp } from './builders'; // Constructs -export { - endOfString, - nonWordBoundary, - notWordBoundary, - startOfString, - wordBoundary, -} from './constructs/anchors'; -export { capture, ref } from './constructs/capture'; +export { endOfString, notWordBoundary, startOfString, wordBoundary } from './constructs/anchors'; +export { capture, reference } from './constructs/capture'; export { any, anyOf, diff --git a/website/docs/api/captures.md b/website/docs/api/captures.md index cb817bd..79d76a9 100644 --- a/website/docs/api/captures.md +++ b/website/docs/api/captures.md @@ -35,10 +35,10 @@ TS Regex Builder does not have a construct for non-capturing groups. Such groups ::: -### `ref()` +### `reference()` ```ts -function ref( +function reference( name?: string; ): Reference; ``` @@ -52,13 +52,15 @@ If you do not specify the reference name, a auto-generated unique value will be Usage with `capture()`: ```ts -// Define ref with name "some". -const someRef = ref('some'); +// Define reference with name "some". +const someRef = reference('some'); const regex = buildRegExp([ - capture(..., { ref: someRef}), // Here you make a named capture using name from `someRef`. - // ... - someRef, // Here you match the same text as captured in capture using `someRef`. + // Create a named capture using name from `someRef`. + capture(..., { name: someRef}), + // ... some other elements ... + // Match the same text as captured in capture using `someRef`. + someRef, ]) ``` From 3d5763169cdfdde43435718293b8667246a4ea26 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 27 Mar 2024 12:58:53 +0100 Subject: [PATCH 05/12] chore: fix exports --- src/index.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/index.ts b/src/index.ts index 79a6d7c..bb3cd62 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,7 +8,13 @@ export type { RepeatOptions } from './constructs/repeat'; export { buildPattern, buildRegExp } from './builders'; // Constructs -export { endOfString, notWordBoundary, startOfString, wordBoundary } from './constructs/anchors'; +export { + endOfString, + nonWordBoundary, + notWordBoundary, + startOfString, + wordBoundary, +} from './constructs/anchors'; export { capture, reference } from './constructs/capture'; export { any, From 4d81a98be2075ebb50a0121837b27a0e4093c740 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 27 Mar 2024 13:10:50 +0100 Subject: [PATCH 06/12] refactor: rename `reference` to `ref` --- src/__tests__/example-html-tags.ts | 4 ++-- src/constructs/__tests__/capture.test.tsx | 12 ++++++------ src/constructs/capture.ts | 6 +++--- src/index.ts | 2 +- website/docs/api/captures.md | 10 +++++----- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/__tests__/example-html-tags.ts b/src/__tests__/example-html-tags.ts index d548a87..67e8749 100644 --- a/src/__tests__/example-html-tags.ts +++ b/src/__tests__/example-html-tags.ts @@ -6,7 +6,7 @@ import { charRange, digit, oneOrMore, - reference, + ref, zeroOrMore, } from '..'; @@ -14,7 +14,7 @@ test('example: html tag matching', () => { const tagName = oneOrMore(charClass(charRange('a', 'z'), digit)); const tagContent = zeroOrMore(any, { greedy: false }); - const tagRef = reference('tag'); + const tagRef = ref('tag'); const tagMatcher = buildRegExp( [ '<', diff --git a/src/constructs/__tests__/capture.test.tsx b/src/constructs/__tests__/capture.test.tsx index 68f44c2..923455a 100644 --- a/src/constructs/__tests__/capture.test.tsx +++ b/src/constructs/__tests__/capture.test.tsx @@ -6,7 +6,7 @@ import { digit, inverted, oneOrMore, - reference, + ref, word, wordBoundary, } from '../..'; @@ -50,21 +50,21 @@ test('named `capture` matching', () => { }); // Should have `ref0` as name. -const firstRef = reference(); +const firstRef = ref(); test('`reference` pattern', () => { expect([firstRef]).toEqualRegex(/\k/); - expect([reference('xyz')]).toEqualRegex(/\k/); + expect([ref('xyz')]).toEqualRegex(/\k/); expect([capture(any, { name: firstRef }), ' ', firstRef]).toEqualRegex('(?.) \\k'); - const otherRef = reference('r123'); + const otherRef = ref('r123'); expect(['xx', capture(any, { name: otherRef }), ' ', otherRef, 'xx']).toEqualRegex( 'xx(?.) \\kxx', ); }); test('`reference` matching basic case', () => { - const someRef = reference(); + const someRef = ref(); expect([capture(word, { name: someRef }), someRef]).toMatchString('aa'); expect([capture(digit, { name: someRef }), someRef]).toMatchString('11'); @@ -75,7 +75,7 @@ test('`reference` matching basic case', () => { }); test('`reference` matching HTML attributes', () => { - const quoteRef = reference('quote'); + const quoteRef = ref('quote'); const quote = anyOf('"\''); const htmlAttributeRegex = buildRegExp([ wordBoundary, diff --git a/src/constructs/capture.ts b/src/constructs/capture.ts index b9d628d..37cbe01 100644 --- a/src/constructs/capture.ts +++ b/src/constructs/capture.ts @@ -11,7 +11,7 @@ export interface Capture extends RegexConstruct { export type CaptureOptions = { /** - * Name to be given to the capturing group can either by a string or {@link reference} instance. + * Name to be given to the capturing group can either by a string or {@link ref} instance. */ name: string | Reference; }; @@ -24,7 +24,7 @@ export interface Reference extends RegexConstruct { /** * Creates a capturing group which allows the matched pattern to be available: * - in the match results (`String.match`, `String.matchAll`, or `RegExp.exec`) - * - in the regex itself, through {@link reference} + * - in the regex itself, through {@link ref} */ export function capture(sequence: RegexSequence, options?: CaptureOptions): Capture { return { @@ -44,7 +44,7 @@ let counter = 0; * * @param name - Name to be given to the capturing group which receives this reference. If not provided, a unique name will be generated. */ -export function reference(name?: string): Reference { +export function ref(name?: string): Reference { return { type: 'reference', name: name ?? `ref${counter++}`, diff --git a/src/index.ts b/src/index.ts index bb3cd62..14b922f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -15,7 +15,7 @@ export { startOfString, wordBoundary, } from './constructs/anchors'; -export { capture, reference } from './constructs/capture'; +export { capture, ref } from './constructs/capture'; export { any, anyOf, diff --git a/website/docs/api/captures.md b/website/docs/api/captures.md index 79d76a9..8f9ec21 100644 --- a/website/docs/api/captures.md +++ b/website/docs/api/captures.md @@ -35,10 +35,10 @@ TS Regex Builder does not have a construct for non-capturing groups. Such groups ::: -### `reference()` +### `ref()` ```ts -function reference( +function ref( name?: string; ): Reference; ``` @@ -53,13 +53,13 @@ Usage with `capture()`: ```ts // Define reference with name "some". -const someRef = reference('some'); +const someRef = ref('some'); const regex = buildRegExp([ // Create a named capture using name from `someRef`. - capture(..., { name: someRef}), + capture(..., { name: someRef }), // ... some other elements ... - // Match the same text as captured in capture using `someRef`. + // Match the same text as captured in a `capture` using `someRef`. someRef, ]) ``` From f2ad95c5434b42486b022022e86d2686b9a5bfc1 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 3 Apr 2024 16:34:56 +0200 Subject: [PATCH 07/12] chore: adapt examples --- src/__tests__/example-html-tags.ts | 5 ++- src/constructs/__tests__/capture.test.tsx | 41 ++++++++++++----------- src/constructs/capture.ts | 13 +++---- website/docs/api/captures.md | 14 +++----- 4 files changed, 34 insertions(+), 39 deletions(-) diff --git a/src/__tests__/example-html-tags.ts b/src/__tests__/example-html-tags.ts index 67e8749..f7d4c92 100644 --- a/src/__tests__/example-html-tags.ts +++ b/src/__tests__/example-html-tags.ts @@ -14,15 +14,14 @@ test('example: html tag matching', () => { const tagName = oneOrMore(charClass(charRange('a', 'z'), digit)); const tagContent = zeroOrMore(any, { greedy: false }); - const tagRef = ref('tag'); const tagMatcher = buildRegExp( [ '<', - capture(tagName, { name: tagRef }), + capture(tagName, { name: 'tag' }), '>', capture(tagContent, { name: 'content' }), '', ], { ignoreCase: true, global: true }, diff --git a/src/constructs/__tests__/capture.test.tsx b/src/constructs/__tests__/capture.test.tsx index 923455a..718317b 100644 --- a/src/constructs/__tests__/capture.test.tsx +++ b/src/constructs/__tests__/capture.test.tsx @@ -4,7 +4,7 @@ import { buildRegExp, capture, digit, - inverted, + negated, oneOrMore, ref, word, @@ -49,41 +49,44 @@ test('named `capture` matching', () => { ]).toMatchNamedGroups('abc', { x1: 'b', x2: 'c' }); }); -// Should have `ref0` as name. -const firstRef = ref(); - test('`reference` pattern', () => { - expect([firstRef]).toEqualRegex(/\k/); + expect([ref('ref0')]).toEqualRegex(/\k/); expect([ref('xyz')]).toEqualRegex(/\k/); - expect([capture(any, { name: firstRef }), ' ', firstRef]).toEqualRegex('(?.) \\k'); + expect([capture(any, { name: 'ref0' }), ' ', ref('ref0')]).toEqualRegex('(?.) \\k'); - const otherRef = ref('r123'); - expect(['xx', capture(any, { name: otherRef }), ' ', otherRef, 'xx']).toEqualRegex( + expect(['xx', capture(any, { name: 'r123' }), ' ', ref('r123'), 'xx']).toEqualRegex( 'xx(?.) \\kxx', ); }); test('`reference` matching basic case', () => { - const someRef = ref(); - expect([capture(word, { name: someRef }), someRef]).toMatchString('aa'); - expect([capture(digit, { name: someRef }), someRef]).toMatchString('11'); + expect([capture(word, { name: 'a' }), ref('a')]).toMatchString('aa'); + expect([capture(digit, { name: 'a' }), ref('a')]).toMatchString('11'); + + expect([capture(any, { name: 'a' }), ref('a')]).not.toMatchString('ab'); + expect([capture(digit, { name: 'a' }), ref('a')]).not.toMatchString('1a'); + expect([capture(digit, { name: 'a' }), ref('a')]).not.toMatchString('a1'); +}); - expect([capture(any, { name: someRef }), someRef]).not.toMatchString('ab'); +test('`reference` matching variable case', () => { + const someRef = ref('test'); + expect([capture(word, { name: someRef.name }), someRef]).toMatchString('aa'); + expect([capture(digit, { name: someRef.name }), someRef]).toMatchString('11'); - expect([capture(digit, { name: someRef }), someRef]).not.toMatchString('1a'); - expect([capture(digit, { name: someRef }), someRef]).not.toMatchString('a1'); + expect([capture(any, { name: someRef.name }), someRef]).not.toMatchString('ab'); + expect([capture(digit, { name: someRef.name }), someRef]).not.toMatchString('1a'); + expect([capture(digit, { name: someRef.name }), someRef]).not.toMatchString('a1'); }); test('`reference` matching HTML attributes', () => { - const quoteRef = ref('quote'); - const quote = anyOf('"\''); + const quoteChars = anyOf('"\''); const htmlAttributeRegex = buildRegExp([ wordBoundary, capture(oneOrMore(word), { name: 'name' }), '=', - capture(quote, { name: quoteRef }), - capture(oneOrMore(inverted(quote)), { name: 'value' }), - quoteRef, + capture(quoteChars, { name: 'quote' }), + capture(oneOrMore(negated(quoteChars)), { name: 'value' }), + ref('quote'), ]); expect(htmlAttributeRegex).toMatchNamedGroups('a="b"', { diff --git a/src/constructs/capture.ts b/src/constructs/capture.ts index 37cbe01..e8bd107 100644 --- a/src/constructs/capture.ts +++ b/src/constructs/capture.ts @@ -11,9 +11,9 @@ export interface Capture extends RegexConstruct { export type CaptureOptions = { /** - * Name to be given to the capturing group can either by a string or {@link ref} instance. + * Name to be given to the capturing group. */ - name: string | Reference; + name: string; }; export interface Reference extends RegexConstruct { @@ -35,8 +35,6 @@ export function capture(sequence: RegexSequence, options?: CaptureOptions): Capt }; } -let counter = 0; - /** * Creates a reference (a.k.a. backreference) to a capturing group. * @@ -44,17 +42,16 @@ let counter = 0; * * @param name - Name to be given to the capturing group which receives this reference. If not provided, a unique name will be generated. */ -export function ref(name?: string): Reference { +export function ref(name: string): Reference { return { type: 'reference', - name: name ?? `ref${counter++}`, + name, encode: encodeReference, }; } function encodeCapture(this: Capture): EncodeResult { - // @ts-expect-error - const name = this.options?.name?.name ?? this.options?.name; + const name = this.options?.name; if (name) { return { precedence: 'atom', diff --git a/website/docs/api/captures.md b/website/docs/api/captures.md index 8f9ec21..2d913cc 100644 --- a/website/docs/api/captures.md +++ b/website/docs/api/captures.md @@ -47,20 +47,16 @@ Regex syntax: `\k<...>`. Creates a reference, also known as backreferences, which allows matching the same text again that was previously matched by a capturing group. To form a valid regex, reference need to be attached to named capturing group earlier in the expression. -If you do not specify the reference name, a auto-generated unique value will be assigned for it. - Usage with `capture()`: ```ts -// Define reference with name "some". -const someRef = ref('some'); - const regex = buildRegExp([ - // Create a named capture using name from `someRef`. - capture(..., { name: someRef }), + // Create a named capture using name from `someKey`. + capture(..., { name: 'someKey' }), // ... some other elements ... - // Match the same text as captured in a `capture` using `someRef`. - someRef, + + // Match the same text as matched by `capture` with the same name. + ref('someKey'), ]) ``` From 1aa293f3ed43b752cb8573013d173d114c9cc414 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 3 Apr 2024 16:43:33 +0200 Subject: [PATCH 08/12] refactor: self code review --- src/constructs/capture.ts | 2 +- website/docs/api/captures.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/constructs/capture.ts b/src/constructs/capture.ts index e8bd107..7bfa5c3 100644 --- a/src/constructs/capture.ts +++ b/src/constructs/capture.ts @@ -13,7 +13,7 @@ export type CaptureOptions = { /** * Name to be given to the capturing group. */ - name: string; + name?: string; }; export interface Reference extends RegexConstruct { diff --git a/website/docs/api/captures.md b/website/docs/api/captures.md index 2d913cc..6f08ca2 100644 --- a/website/docs/api/captures.md +++ b/website/docs/api/captures.md @@ -39,13 +39,13 @@ TS Regex Builder does not have a construct for non-capturing groups. Such groups ```ts function ref( - name?: string; + name: string; ): Reference; ``` Regex syntax: `\k<...>`. -Creates a reference, also known as backreferences, which allows matching the same text again that was previously matched by a capturing group. To form a valid regex, reference need to be attached to named capturing group earlier in the expression. +Creates a reference, also known as a backreference, which allows matching again the exact text that a capturing group previously matched. The reference must use the same name as some capturing group earlier in the expression to form a valid regex. Usage with `capture()`: From 6525391e2330073effb0b881f04ed6a9dade4756 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 3 Apr 2024 16:48:25 +0200 Subject: [PATCH 09/12] chore: tweak docs --- src/constructs/capture.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/constructs/capture.ts b/src/constructs/capture.ts index 7bfa5c3..471c463 100644 --- a/src/constructs/capture.ts +++ b/src/constructs/capture.ts @@ -36,11 +36,13 @@ export function capture(sequence: RegexSequence, options?: CaptureOptions): Capt } /** - * Creates a reference (a.k.a. backreference) to a capturing group. + * Creates a reference, also known as backreference, which allows matching + * again the exact text that a capturing group previously matched. * - * Backreferences allows to match the same text that was previously captured by a capturing group. + * In order to form a valid regex, the reference must use the same name as + * a capturing group earlier in the expression. * - * @param name - Name to be given to the capturing group which receives this reference. If not provided, a unique name will be generated. + * @param name - Name of the capturing group to reference. */ export function ref(name: string): Reference { return { From c0c770a6fb29a20532d9c66c2626b44be504d5d5 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 3 Apr 2024 16:51:48 +0200 Subject: [PATCH 10/12] chore: restore regex docs --- website/docs/api/constructs.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/website/docs/api/constructs.md b/website/docs/api/constructs.md index 6653e7d..5182ed8 100644 --- a/website/docs/api/constructs.md +++ b/website/docs/api/constructs.md @@ -18,3 +18,33 @@ Regex syntax: `a|b|c`. The `choiceOf` (disjunction) construct matches one out of several possible sequences. It functions similarly to a logical OR operator in programming. It can match simple string options as well as complex patterns. Example: `choiceOf("color", "colour")` matches either `color` or `colour` pattern. + +### `regex()` + +```ts +function regex(sequence: RegexSequence): Regex; +``` + +Regex syntax: the pattern remains unchanged when wrapped by this construct. + +This construct is a no-op operator that groups array of `RegexElements` into a single element for composition purposes. This is particularly useful for defining smaller sequence patterns as separate variables. + +Without `regex()`: + +```ts +const exponent = [anyOf('eE'), optional(anyOf('+-')), oneOrMore(digit)]; +const numberWithExponent = buildRegExp([ + oneOrMore(digit), + ...exponent, // Need to spread "exponent" as it's an array. +]); +``` + +With `regex()`: + +```ts +const exponent = regex([anyOf('eE'), optional(anyOf('+-')), oneOrMore(digit)]); +const numberWithExponent = buildRegExp([ + oneOrMore(digit), + exponent, // Easily compose "exponent" sequence as a single element. +]); +``` From ec0497cfc56a667225a144c2c0e97fcfa6ebcc60 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 3 Apr 2024 16:53:21 +0200 Subject: [PATCH 11/12] chore: reformat --- website/docs/api/captures.md | 4 +--- website/docs/api/overview.md | 1 - website/docs/api/types.md | 9 ++------- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/website/docs/api/captures.md b/website/docs/api/captures.md index 6f08ca2..6944447 100644 --- a/website/docs/api/captures.md +++ b/website/docs/api/captures.md @@ -38,9 +38,7 @@ TS Regex Builder does not have a construct for non-capturing groups. Such groups ### `ref()` ```ts -function ref( - name: string; -): Reference; +function ref(name: string): Reference; ``` Regex syntax: `\k<...>`. diff --git a/website/docs/api/overview.md b/website/docs/api/overview.md index 64bab53..6f5f71f 100644 --- a/website/docs/api/overview.md +++ b/website/docs/api/overview.md @@ -100,5 +100,4 @@ See [Character Classes](./api/character-classes) for more info. | `lookbehind(...)` | `(?<=...)` | Match preceding text without consuming it | | `negativeLookbehind(...)` | `(? Date: Wed, 3 Apr 2024 16:55:18 +0200 Subject: [PATCH 12/12] chore: reformat with prettier --- .github/ISSUE_TEMPLATE/bug_report.md | 4 ++-- .github/ISSUE_TEMPLATE/feature_request.md | 8 ++++---- .watchmanconfig | 2 +- .yarnrc.yml | 4 ++-- CODE_OF_CONDUCT.md | 21 ++++++++++----------- README.md | 1 - lefthook.yml | 4 ++-- 7 files changed, 21 insertions(+), 23 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 6454a84..10597e0 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,10 +1,9 @@ --- name: Bug report about: Create a report to help us improve -title: "[Bug]" +title: '[Bug]' labels: '' assignees: '' - --- **Describe the bug** @@ -12,6 +11,7 @@ A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: + 1. 2. 3. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index fc7c007..75e24e0 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,10 +1,9 @@ --- name: Feature request about: Suggest an idea for this project -title: "[Feature]" +title: '[Feature]' labels: enhancement assignees: '' - --- **Is your feature request related to a problem? Please describe.** @@ -17,10 +16,11 @@ A clear and concise description of what you want to happen. A clear and concise description of any alternative solutions or features you've considered. **Checklist** + - [ ] Implementation - [ ] Tests -- [ ] API docs -- [ ] README docs (if relevant) +- [ ] API docs +- [ ] README docs (if relevant) - [ ] Example docs & tests (if relevant) **Additional context** diff --git a/.watchmanconfig b/.watchmanconfig index 9e26dfe..0967ef4 100644 --- a/.watchmanconfig +++ b/.watchmanconfig @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/.yarnrc.yml b/.yarnrc.yml index 13215d6..5badb2e 100644 --- a/.yarnrc.yml +++ b/.yarnrc.yml @@ -3,8 +3,8 @@ nmHoistingLimits: workspaces plugins: - path: .yarn/plugins/@yarnpkg/plugin-interactive-tools.cjs - spec: "@yarnpkg/plugin-interactive-tools" + spec: '@yarnpkg/plugin-interactive-tools' - path: .yarn/plugins/@yarnpkg/plugin-workspace-tools.cjs - spec: "@yarnpkg/plugin-workspace-tools" + spec: '@yarnpkg/plugin-workspace-tools' yarnPath: .yarn/releases/yarn-3.6.1.cjs diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 45d257b..8b4fcfd 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,4 +1,3 @@ - # Contributor Covenant Code of Conduct ## Our Pledge @@ -18,23 +17,23 @@ diverse, inclusive, and healthy community. Examples of behavior that contributes to a positive environment for our community include: -* Demonstrating empathy and kindness toward other people -* Being respectful of differing opinions, viewpoints, and experiences -* Giving and gracefully accepting constructive feedback -* Accepting responsibility and apologizing to those affected by our mistakes, +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience -* Focusing on what is best not just for us as individuals, but for the overall +- Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: -* The use of sexualized language or imagery, and sexual attention or advances of +- The use of sexualized language or imagery, and sexual attention or advances of any kind -* Trolling, insulting or derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or email address, +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, without their explicit permission -* Other conduct which could reasonably be considered inappropriate in a +- Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities diff --git a/README.md b/README.md index 79b1a4e..d3c0fa7 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,6 @@ See [Character Classes API doc](https://callstack.github.io/ts-regex-builder/api | `lookbehind(...)` | `(?<=...)` | Match preceding text without consuming it | | `negativeLookbehind(...)` | `(?