diff --git a/GUIDELINES.md b/GUIDELINES.md index 181a949..8032347 100644 --- a/GUIDELINES.md +++ b/GUIDELINES.md @@ -8,4 +8,4 @@ ## Implementation guidelines -1. When the user passes the text to any regex component, it should be treated as an exact string to match and not as a regex string. We might provide an escape hatch for providing raw regex string through, but the user should use it explicitly. +1. When the user passes the text to any regex construct, it should be treated as an exact string to match and not as a regex string. We might provide an escape hatch for providing raw regex string through, but the user should use it explicitly. diff --git a/README.md b/README.md index d5769fd..8d29ab9 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ const hexDigit = charClass( charRange('0', '9'), ); -const hexColor = buildRegex( +const hexColor = buildRegExp( startOfString, optionally('#'), capture( @@ -47,32 +47,35 @@ yarn add ts-regex-builder ## Basic usage ```js -import { buildRegex, capture, oneOrMore } from 'ts-regex-builder'; +import { buildRegExp, capture, oneOrMore } from 'ts-regex-builder'; // /Hello (\w+)/ -const regex = buildRegex(['Hello ', capture(oneOrMore(word))]); +const regex = buildRegExp(['Hello ', capture(oneOrMore(word))]); ``` ## Regex domain-specific language -TS Regex Builder allows you to build complex regular expressions using domain-specific language of regex components. +TS Regex Builder allows you to build complex regular expressions using domain-specific language. Terminology: -- regex component (e.g., `capture()`, `oneOrMore()`, `word`) - function or object representing a regex construct -- regex element (`RegexElement`) - object returned by regex components -- regex sequence (`RegexSequence`) - single regex element or string (`RegexElement | string`) or array of such elements and strings (`Array`) +- regex construct (`RegexConstruct`) - common name for all regex constructs like character classes, quantifiers, and anchors. -Most of the regex components accept a regex sequence. Examples of sequences: +- regex element (`RegexElement`) - fundamental building block of a regular expression, defined as either a regex construct or a string. -- single string: `'Hello World'` (note: all characters will be automatically escaped in the resulting regex) -- single element: `capture('abc')` -- array of elements and strings: `['$', oneOrMore(digit)]` +- regex sequence (`RegexSequence`) - a sequence of regex elements forming a regular expression. For developer convenience it also accepts a single element instead of array. -Regex components can be composed into a complex tree: +Most of the regex constructs accept a regex sequence as their argument. + +Examples of sequences: +- array of elements: `['USD', oneOrMore(digit)]` +- single construct: `capture('abc')` +- single string: `'Hello'` + +Regex constructs can be composed into a tree: ```ts -const currencyAmount = buildRegex([ +const currencyAmount = buildRegExp([ choiceOf( '$', '€', @@ -87,14 +90,14 @@ const currencyAmount = buildRegex([ ### Regex Builders -| Regex Component | Regex Pattern | Description | -| --------------------------------------- | ------------- | ----------------------------------- | -| `buildRegex(...)` | `/.../` | Create `RegExp` instance | -| `buildRegex(..., { ignoreCase: true })` | `/.../i` | Create `RegExp` instance with flags | +| Builder | Regex Pattern | Description | +| ---------------------------------------- | ------------- | ----------------------------------- | +| `buildRegExp(...)` | `/.../` | Create `RegExp` instance | +| `buildRegExp(..., { ignoreCase: true })` | `/.../i` | Create `RegExp` instance with flags | -### Components +### Regex Constructs -| Regex Component | Regex Pattern | Notes | +| Regex Construct | Regex Pattern | Notes | | ------------------- | ------------- | ------------------------------- | | `capture(...)` | `(...)` | Create a capture group | | `choiceOf(x, y, z)` | `x\|y\|z` | Match one of provided sequences | @@ -106,7 +109,7 @@ Notes: ### Quantifiers -| Regex Component | Regex Pattern | Description | +| Regex Construct | Regex Pattern | Description | | -------------------------------- | ------------- | ------------------------------------------------- | | `zeroOrMore(x)` | `x*` | Zero or more occurence of a pattern | | `oneOrMore(x)` | `x+` | One or more occurence of a pattern | @@ -119,7 +122,7 @@ All quantifiers accept sequence of elements ### Character classes -| Regex Component | Regex Pattern | Description | +| Regex Construct | Regex Pattern | Description | | --------------------- | ------------- | ------------------------------------------- | | `any` | `.` | Any character | | `word` | `\w` | Word characters | @@ -140,7 +143,7 @@ Notes: ### Anchors -| Regex Component | Regex Pattern | Description | +| Regex Construct | Regex Pattern | Description | | --------------- | ------------- | ---------------------------------------------------------------- | | `startOfString` | `^` | Match start of the string (or start of a line in multiline mode) | | `endOfString` | `$` | Match end of the string (or end of a line in multiline mode) | diff --git a/docs/API.md b/docs/API.md index 44c573c..677967e 100644 --- a/docs/API.md +++ b/docs/API.md @@ -2,12 +2,13 @@ ## Builder -### `buildRegex()` function +### `buildRegExp()` function ```ts -function buildRegex(sequence: RegexSequence): RegExp; +function buildRegExp(sequence: RegexSequence): RegExp; -function buildRegex( +function buildRegExp( + sequence: RegexSequence, flags: { global?: boolean; ignoreCase?: boolean; @@ -15,13 +16,12 @@ function buildRegex( hasIndices?: boolean; sticky?: boolean; }, - sequence: RegexSequence ): RegExp; ``` -## Components +## Constructs -### `capture()` component +### `capture()` Captures, also known as capturing groups, are used to extract and store parts of the matched string for later use. @@ -31,7 +31,7 @@ function capture( ): Capture ``` -### `choiceOf()` component +### `choiceOf()` ```ts function choiceOf( @@ -45,7 +45,7 @@ Example: `choiceOf("color", "colour")` matches either `color` or `colour` patter ## Quantifiers -### `zeroOrMore()` component +### `zeroOrMore()` ```ts function zeroOrMore( @@ -53,7 +53,7 @@ function zeroOrMore( ): ZeroOrMore ``` -### `oneOrMore()` component +### `oneOrMore()` ```ts function oneOrMore( @@ -61,7 +61,7 @@ function oneOrMore( ): OneOrMore ``` -### `optionally()` component +### `optionally()` ```ts function optionally( @@ -69,7 +69,7 @@ function optionally( ): Optionally ``` -### `repeat()` component +### `repeat()` ```ts function repeat( @@ -96,7 +96,7 @@ const whitespace: CharacterClass; * `digit` matches any digit. * `whitespace` matches any whitespace character (spaces, tabs, line breaks). -### `anyOf()` component +### `anyOf()` ```ts function anyOf( @@ -108,7 +108,7 @@ The `anyOf` class matches any character present in the `character` string. Example: `anyOf('aeiou')` will match either `a`, `e`, `i` `o` or `u` characters. -### `characterRange()` component +### `characterRange()` ```ts function characterRange( @@ -124,7 +124,7 @@ Examples: * `characterRange('A', 'Z')` will match all uppercase characters from `a` to `z`. * `characterRange('0', '9')` will match all digit characters from `0` to `9`. -### `characterClass()` component +### `characterClass()` ```ts function characterClass( @@ -132,13 +132,13 @@ function characterClass( ): CharacterClass ``` -The `characterClass` component creates a new character class that includes all passed character classes. +The `characterClass` construct creates a new character class that includes all passed character classes. Example: * `characterClass(characterRange('a', 'f'), digit)` will match all lowercase hex digits (`0` to `9` and `a` to `f`). * `characterClass(characterRange('a', 'z'), digit, anyOf("._-"))` will match any digit, lowercase latin lettet from `a` to `z`, and either of `.`, `_`, and `-` characters. -### `inverted()` component +### `inverted()` ```ts function inverted( @@ -146,7 +146,7 @@ function inverted( ): CharacterClass ``` -The `inverted` component creates a new character class that matches any character that is not present in the passed character class. +The `inverted` construct creates a new character class that matches any character that is not present in the passed character class. Examples: * `inverted(digit)` matches any character that is not a digit diff --git a/docs/Examples.md b/docs/Examples.md index 910fd32..ae07788 100644 --- a/docs/Examples.md +++ b/docs/Examples.md @@ -13,7 +13,7 @@ const octet = choiceOf( ); // Match -const regex = buildRegex([ +const regex = buildRegExp([ startOfString, // repeat([octet, '.'], { count: 3 }), octet, diff --git a/package.json b/package.json index 9bf19ea..ebe56e8 100644 --- a/package.json +++ b/package.json @@ -121,7 +121,7 @@ "quoteProps": "consistent", "singleQuote": true, "tabWidth": 2, - "trailingComma": "es5", + "trailingComma": "all", "useTabs": false } ], diff --git a/src/__tests__/builder.test.ts b/src/__tests__/builder.test.ts index 42d8b8e..7377430 100644 --- a/src/__tests__/builder.test.ts +++ b/src/__tests__/builder.test.ts @@ -1,29 +1,29 @@ -import { buildRegex } from '../builders'; +import { buildRegExp } from '../builders'; test('`regexBuilder` flags', () => { - expect(buildRegex('a').flags).toBe(''); - expect(buildRegex('a', {}).flags).toBe(''); + expect(buildRegExp('a').flags).toBe(''); + expect(buildRegExp('a', {}).flags).toBe(''); - expect(buildRegex('a', { global: true }).flags).toBe('g'); - expect(buildRegex('a', { global: false }).flags).toBe(''); + expect(buildRegExp('a', { global: true }).flags).toBe('g'); + expect(buildRegExp('a', { global: false }).flags).toBe(''); - expect(buildRegex('a', { ignoreCase: true }).flags).toBe('i'); - expect(buildRegex('a', { ignoreCase: false }).flags).toBe(''); + expect(buildRegExp('a', { ignoreCase: true }).flags).toBe('i'); + expect(buildRegExp('a', { ignoreCase: false }).flags).toBe(''); - expect(buildRegex('a', { multiline: true }).flags).toBe('m'); - expect(buildRegex('a', { multiline: false }).flags).toBe(''); + expect(buildRegExp('a', { multiline: true }).flags).toBe('m'); + expect(buildRegExp('a', { multiline: false }).flags).toBe(''); - expect(buildRegex('a', { hasIndices: true }).flags).toBe('d'); - expect(buildRegex('a', { hasIndices: false }).flags).toBe(''); + expect(buildRegExp('a', { hasIndices: true }).flags).toBe('d'); + expect(buildRegExp('a', { hasIndices: false }).flags).toBe(''); - expect(buildRegex('a', { sticky: true }).flags).toBe('y'); - expect(buildRegex('a', { sticky: false }).flags).toBe(''); + expect(buildRegExp('a', { sticky: true }).flags).toBe('y'); + expect(buildRegExp('a', { sticky: false }).flags).toBe(''); expect( - buildRegex('a', { + buildRegExp('a', { global: true, // ignoreCase: true, multiline: false, - }).flags + }).flags, ).toBe('gi'); }); diff --git a/src/__tests__/examples.test.ts b/src/__tests__/examples.test.ts index c7845fe..090731f 100644 --- a/src/__tests__/examples.test.ts +++ b/src/__tests__/examples.test.ts @@ -1,5 +1,5 @@ import { - buildRegex, + buildRegExp, charRange, choiceOf, digit, @@ -14,10 +14,10 @@ test('example: IPv4 address validator', () => { [charRange('1', '9'), digit], ['1', repeat(digit, { count: 2 })], ['2', charRange('0', '4'), digit], - ['25', charRange('0', '5')] + ['25', charRange('0', '5')], ); - const regex = buildRegex([ + const regex = buildRegExp([ startOfString, // repeat([octet, '.'], { count: 3 }), octet, @@ -38,6 +38,6 @@ test('example: IPv4 address validator', () => { expect(regex).not.toMatchString('255.255.255.256'); expect(regex).toHavePattern( - /^(?:(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$/ + /^(?:(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$/, ); }); diff --git a/src/builders.ts b/src/builders.ts index e63f36f..b550299 100644 --- a/src/builders.ts +++ b/src/builders.ts @@ -1,23 +1,6 @@ -import type { RegexSequence } from './types'; +import type { RegexFlags, RegexSequence } from './types'; import { encodeSequence } from './encoder/encoder'; -import { asNodeArray } from './utils/nodes'; - -export interface RegexFlags { - /** Global search. */ - global?: boolean; - - /** Case-insensitive search. */ - ignoreCase?: boolean; - - /** Allows ^ and $ to match newline characters. */ - multiline?: boolean; - - /** Generate indices for substring matches. */ - hasIndices?: boolean; - - /** Perform a "sticky" search that matches starting at the current position in the target string. */ - sticky?: boolean; -} +import { ensureArray } from './utils/elements'; /** * Generate RegExp object from elements with optional flags. @@ -26,8 +9,8 @@ export interface RegexFlags { * @param flags RegExp flags object * @returns RegExp object */ -export function buildRegex(sequence: RegexSequence, flags?: RegexFlags): RegExp { - const pattern = encodeSequence(asNodeArray(sequence)).pattern; +export function buildRegExp(sequence: RegexSequence, flags?: RegexFlags): RegExp { + const pattern = encodeSequence(ensureArray(sequence)).pattern; const flagsString = encodeFlags(flags ?? {}); return new RegExp(pattern, flagsString); } @@ -38,7 +21,7 @@ export function buildRegex(sequence: RegexSequence, flags?: RegexFlags): RegExp * @returns regex pattern string */ export function buildPattern(sequence: RegexSequence): string { - return encodeSequence(asNodeArray(sequence)).pattern; + return encodeSequence(ensureArray(sequence)).pattern; } function encodeFlags(flags: RegexFlags): string { diff --git a/src/components/__tests__/anchors.test.tsx b/src/constructs/__tests__/anchors.test.tsx similarity index 100% rename from src/components/__tests__/anchors.test.tsx rename to src/constructs/__tests__/anchors.test.tsx diff --git a/src/components/__tests__/capture.test.tsx b/src/constructs/__tests__/capture.test.tsx similarity index 100% rename from src/components/__tests__/capture.test.tsx rename to src/constructs/__tests__/capture.test.tsx diff --git a/src/components/__tests__/character-class.test.ts b/src/constructs/__tests__/character-class.test.ts similarity index 94% rename from src/components/__tests__/character-class.test.ts rename to src/constructs/__tests__/character-class.test.ts index 8673b3b..7a60c4d 100644 --- a/src/components/__tests__/character-class.test.ts +++ b/src/constructs/__tests__/character-class.test.ts @@ -9,7 +9,7 @@ import { whitespace, word, } from '../character-class'; -import { buildRegex } from '../../builders'; +import { buildRegExp } from '../../builders'; test('`any` character class', () => { expect(any).toHavePattern(/./); @@ -44,7 +44,7 @@ test('`charClass` base cases', () => { test('`charClass` throws on inverted arguments', () => { expect(() => charClass(inverted(whitespace))).toThrowErrorMatchingInlineSnapshot( - `"\`charClass\` should receive only non-inverted character classes"` + `"\`charClass\` should receive only non-inverted character classes"`, ); }); @@ -56,13 +56,13 @@ test('`charRange` base cases', () => { test('`charRange` throws on incorrect arguments', () => { expect(() => charRange('z', 'a')).toThrowErrorMatchingInlineSnapshot( - `"\`start\` should be before or equal to \`end\`"` + `"\`start\` should be before or equal to \`end\`"`, ); expect(() => charRange('aa', 'z')).toThrowErrorMatchingInlineSnapshot( - `"\`charRange\` should receive only single character \`start\` string"` + `"\`charRange\` should receive only single character \`start\` string"`, ); expect(() => charRange('a', 'zz')).toThrowErrorMatchingInlineSnapshot( - `"\`charRange\` should receive only single character \`end\` string"` + `"\`charRange\` should receive only single character \`end\` string"`, ); }); @@ -96,7 +96,7 @@ test('`anyOf` edge case caret and hyphen', () => { test('`anyOf` throws on empty text', () => { expect(() => anyOf('')).toThrowErrorMatchingInlineSnapshot( - `"\`anyOf\` should received at least one character"` + `"\`anyOf\` should received at least one character"`, ); }); @@ -117,16 +117,16 @@ test('`inverted` character class execution', () => { test('`encodeCharacterClass` throws on empty text', () => { expect(() => - buildRegex( + buildRegExp( // @ts-expect-error inverted({ type: 'characterClass', chars: [], ranges: [], isInverted: false, - }) - ) + }), + ), ).toThrowErrorMatchingInlineSnapshot( - `"Character class should contain at least one character or character range"` + `"Character class should contain at least one character or character range"`, ); }); diff --git a/src/components/__tests__/choice-of.test.ts b/src/constructs/__tests__/choice-of.test.ts similarity index 94% rename from src/components/__tests__/choice-of.test.ts rename to src/constructs/__tests__/choice-of.test.ts index 6533663..4d2d09c 100644 --- a/src/components/__tests__/choice-of.test.ts +++ b/src/constructs/__tests__/choice-of.test.ts @@ -30,12 +30,12 @@ test('`choiceOf` with sequence options', () => { test('`choiceOf` using nested regex', () => { expect(choiceOf(oneOrMore('a'), zeroOrMore('b'))).toHavePattern(/a+|b*/); expect(choiceOf(repeat('a', { min: 1, max: 3 }), repeat('bx', { count: 5 }))).toHavePattern( - /a{1,3}|(?:bx){5}/ + /a{1,3}|(?:bx){5}/, ); }); test('`choiceOf` throws on empty options', () => { expect(() => choiceOf()).toThrowErrorMatchingInlineSnapshot( - `"\`choiceOf\` should receive at least one alternative"` + `"\`choiceOf\` should receive at least one alternative"`, ); }); diff --git a/src/components/__tests__/quantifiers.test.tsx b/src/constructs/__tests__/quantifiers.test.tsx similarity index 100% rename from src/components/__tests__/quantifiers.test.tsx rename to src/constructs/__tests__/quantifiers.test.tsx diff --git a/src/components/__tests__/repeat.test.tsx b/src/constructs/__tests__/repeat.test.tsx similarity index 94% rename from src/components/__tests__/repeat.test.tsx rename to src/constructs/__tests__/repeat.test.tsx index 33d738a..f419647 100644 --- a/src/components/__tests__/repeat.test.tsx +++ b/src/constructs/__tests__/repeat.test.tsx @@ -19,6 +19,6 @@ test('`repeat` optimizes grouping for atoms', () => { test('`repeat` throws on no children', () => { expect(() => repeat([], { count: 1 })).toThrowErrorMatchingInlineSnapshot( - `"\`repeat\` should receive at least one element"` + `"\`repeat\` should receive at least one element"`, ); }); diff --git a/src/components/anchors.ts b/src/constructs/anchors.ts similarity index 60% rename from src/components/anchors.ts rename to src/constructs/anchors.ts index 46a9fd5..6b7d822 100644 --- a/src/components/anchors.ts +++ b/src/constructs/anchors.ts @@ -1,7 +1,7 @@ -import type { EncodeOutput } from '../encoder/types'; -import type { RegexEncodable } from '../types'; +import type { EncodeResult } from '../encoder/types'; +import type { RegexConstruct } from '../types'; -export interface Anchor extends RegexEncodable { +export interface Anchor extends RegexConstruct { type: 'anchor'; symbol: string; } @@ -18,7 +18,7 @@ export const endOfString: Anchor = { encode: encodeAnchor, }; -function encodeAnchor(this: Anchor): EncodeOutput { +function encodeAnchor(this: Anchor): EncodeResult { return { precedence: 'sequence', pattern: this.symbol, diff --git a/src/components/capture.ts b/src/constructs/capture.ts similarity index 52% rename from src/components/capture.ts rename to src/constructs/capture.ts index 33c995c..ca2ce40 100644 --- a/src/components/capture.ts +++ b/src/constructs/capture.ts @@ -1,9 +1,9 @@ import { encodeSequence } from '../encoder/encoder'; -import type { EncodeOutput } from '../encoder/types'; -import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexEncodable, RegexSequence } from '../types'; +import type { EncodeResult } from '../encoder/types'; +import { ensureArray } from '../utils/elements'; +import type { RegexConstruct, RegexElement, RegexSequence } from '../types'; -export interface Capture extends RegexEncodable { +export interface Capture extends RegexConstruct { type: 'capture'; children: RegexElement[]; } @@ -11,12 +11,12 @@ export interface Capture extends RegexEncodable { export function capture(sequence: RegexSequence): Capture { return { type: 'capture', - children: asNodeArray(sequence), + children: ensureArray(sequence), encode: encodeCapture, }; } -function encodeCapture(this: Capture): EncodeOutput { +function encodeCapture(this: Capture): EncodeResult { return { precedence: 'atom', pattern: `(${encodeSequence(this.children).pattern})`, diff --git a/src/components/character-class.ts b/src/constructs/character-class.ts similarity index 93% rename from src/components/character-class.ts rename to src/constructs/character-class.ts index 9d8af29..2e213e3 100644 --- a/src/components/character-class.ts +++ b/src/constructs/character-class.ts @@ -1,11 +1,12 @@ -import type { EncodeOutput } from '../encoder/types'; +import type { EncodeResult } from '../encoder/types'; +import type { RegexConstruct } from '../types'; -export interface CharacterClass { +export interface CharacterClass extends RegexConstruct { type: 'characterClass'; chars: string[]; ranges: CharacterRange[]; isInverted: boolean; - encode: () => EncodeOutput; + encode: () => EncodeResult; } /** @@ -112,7 +113,7 @@ export function inverted(element: CharacterClass): CharacterClass { }; } -function encodeCharacterClass(this: CharacterClass): EncodeOutput { +function encodeCharacterClass(this: CharacterClass): EncodeResult { if (this.chars.length === 0 && this.ranges.length === 0) { throw new Error('Character class should contain at least one character or character range'); } diff --git a/src/components/choice-of.ts b/src/constructs/choice-of.ts similarity index 63% rename from src/components/choice-of.ts rename to src/constructs/choice-of.ts index 0a2178a..d2bd3ac 100644 --- a/src/components/choice-of.ts +++ b/src/constructs/choice-of.ts @@ -1,9 +1,9 @@ import { encodeSequence } from '../encoder/encoder'; -import type { EncodeOutput } from '../encoder/types'; -import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexEncodable, RegexSequence } from '../types'; +import type { EncodeResult } from '../encoder/types'; +import { ensureArray } from '../utils/elements'; +import type { RegexConstruct, RegexElement, RegexSequence } from '../types'; -export interface ChoiceOf extends RegexEncodable { +export interface ChoiceOf extends RegexConstruct { type: 'choiceOf'; alternatives: RegexElement[][]; } @@ -15,19 +15,19 @@ export function choiceOf(...alternatives: RegexSequence[]): ChoiceOf { return { type: 'choiceOf', - alternatives: alternatives.map((c) => asNodeArray(c)), + alternatives: alternatives.map((c) => ensureArray(c)), encode: encodeChoiceOf, }; } -function encodeChoiceOf(this: ChoiceOf): EncodeOutput { +function encodeChoiceOf(this: ChoiceOf): EncodeResult { const encodedAlternatives = this.alternatives.map((c) => encodeSequence(c)); if (encodedAlternatives.length === 1) { return encodedAlternatives[0]!; } return { - precedence: 'alternation', + precedence: 'disjunction', pattern: encodedAlternatives.map((n) => n.pattern).join('|'), }; } diff --git a/src/components/quantifiers.ts b/src/constructs/quantifiers.ts similarity index 60% rename from src/components/quantifiers.ts rename to src/constructs/quantifiers.ts index 58560f3..c96a71d 100644 --- a/src/components/quantifiers.ts +++ b/src/constructs/quantifiers.ts @@ -1,19 +1,19 @@ import { encodeAtom } from '../encoder/encoder'; -import type { EncodeOutput } from '../encoder/types'; -import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexEncodable, RegexSequence } from '../types'; +import type { EncodeResult } from '../encoder/types'; +import { ensureArray } from '../utils/elements'; +import type { RegexConstruct, RegexElement, RegexSequence } from '../types'; -export interface OneOrMore extends RegexEncodable { +export interface OneOrMore extends RegexConstruct { type: 'oneOrMore'; children: RegexElement[]; } -export interface Optionally extends RegexEncodable { +export interface Optionally extends RegexConstruct { type: 'optionally'; children: RegexElement[]; } -export interface ZeroOrMore extends RegexEncodable { +export interface ZeroOrMore extends RegexConstruct { type: 'zeroOrMore'; children: RegexElement[]; } @@ -21,7 +21,7 @@ export interface ZeroOrMore extends RegexEncodable { export function oneOrMore(sequence: RegexSequence): OneOrMore { return { type: 'oneOrMore', - children: asNodeArray(sequence), + children: ensureArray(sequence), encode: encodeOneOrMore, }; } @@ -29,7 +29,7 @@ export function oneOrMore(sequence: RegexSequence): OneOrMore { export function optionally(sequence: RegexSequence): Optionally { return { type: 'optionally', - children: asNodeArray(sequence), + children: ensureArray(sequence), encode: encodeOptionally, }; } @@ -37,26 +37,26 @@ export function optionally(sequence: RegexSequence): Optionally { export function zeroOrMore(sequence: RegexSequence): ZeroOrMore { return { type: 'zeroOrMore', - children: asNodeArray(sequence), + children: ensureArray(sequence), encode: encodeZeroOrMore, }; } -function encodeOneOrMore(this: OneOrMore): EncodeOutput { +function encodeOneOrMore(this: OneOrMore): EncodeResult { return { precedence: 'sequence', pattern: `${encodeAtom(this.children).pattern}+`, }; } -function encodeOptionally(this: Optionally): EncodeOutput { +function encodeOptionally(this: Optionally): EncodeResult { return { precedence: 'sequence', pattern: `${encodeAtom(this.children).pattern}?`, }; } -function encodeZeroOrMore(this: ZeroOrMore): EncodeOutput { +function encodeZeroOrMore(this: ZeroOrMore): EncodeResult { return { precedence: 'sequence', pattern: `${encodeAtom(this.children).pattern}*`, diff --git a/src/components/repeat.ts b/src/constructs/repeat.ts similarity index 72% rename from src/components/repeat.ts rename to src/constructs/repeat.ts index 36dcd73..faf7156 100644 --- a/src/components/repeat.ts +++ b/src/constructs/repeat.ts @@ -1,9 +1,9 @@ import { encodeAtom } from '../encoder/encoder'; -import type { EncodeOutput } from '../encoder/types'; -import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexEncodable, RegexSequence } from '../types'; +import type { EncodeResult } from '../encoder/types'; +import { ensureArray } from '../utils/elements'; +import type { RegexConstruct, RegexElement, RegexSequence } from '../types'; -export interface Repeat extends RegexEncodable { +export interface Repeat extends RegexConstruct { type: 'repeat'; options: RepeatOptions; children: RegexElement[]; @@ -12,7 +12,7 @@ export interface Repeat extends RegexEncodable { export type RepeatOptions = { count: number } | { min: number; max?: number }; export function repeat(sequence: RegexSequence, options: RepeatOptions): Repeat { - const children = asNodeArray(sequence); + const children = ensureArray(sequence); if (children.length === 0) { throw new Error('`repeat` should receive at least one element'); @@ -26,7 +26,7 @@ export function repeat(sequence: RegexSequence, options: RepeatOptions): Repeat }; } -function encodeRepeat(this: Repeat): EncodeOutput { +function encodeRepeat(this: Repeat): EncodeResult { const atomicNodes = encodeAtom(this.children); if ('count' in this.options) { diff --git a/src/encoder/__tests__/encoder.test.tsx b/src/encoder/__tests__/encoder.test.tsx index cd730aa..d52dc3f 100644 --- a/src/encoder/__tests__/encoder.test.tsx +++ b/src/encoder/__tests__/encoder.test.tsx @@ -1,6 +1,6 @@ -import { buildPattern, buildRegex } from '../../builders'; -import { oneOrMore, optionally, zeroOrMore } from '../../components/quantifiers'; -import { repeat } from '../../components/repeat'; +import { buildPattern, buildRegExp } from '../../builders'; +import { oneOrMore, optionally, zeroOrMore } from '../../constructs/quantifiers'; +import { repeat } from '../../constructs/repeat'; test('basic quantifies', () => { expect('a').toHavePattern(/a/); @@ -43,15 +43,15 @@ test('`buildPattern` escapes special characters', () => { expect([oneOrMore('.*'), zeroOrMore('[]{}')]).toHavePattern(/(?:\.\*)+(?:\[\]\{\})*/); }); -test('`buildRegex` throws error on unknown element', () => { +test('`buildRegExp` throws error on unknown element', () => { expect(() => // @ts-expect-error intentionally passing incorrect object - buildRegex({ type: 'unknown' }) + buildRegExp({ type: 'unknown' }), ).toThrowErrorMatchingInlineSnapshot(`"\`encodeNode\`: unknown element type unknown"`); }); test('`buildPattern` throws on empty text', () => { expect(() => buildPattern('')).toThrowErrorMatchingInlineSnapshot( - `"\`encodeText\`: received text should not be empty"` + `"\`encodeText\`: received text should not be empty"`, ); }); diff --git a/src/encoder/encoder.ts b/src/encoder/encoder.ts index 68a6d66..2121ef6 100644 --- a/src/encoder/encoder.ts +++ b/src/encoder/encoder.ts @@ -1,29 +1,29 @@ import type { RegexElement } from '../types'; import { escapeText } from '../utils/text'; -import type { EncodeOutput } from './types'; +import type { EncodeResult } from './types'; -export function encodeSequence(nodes: RegexElement[]): EncodeOutput { - const encodedNodes = nodes.map((n) => encodeNode(n)); +export function encodeSequence(elements: RegexElement[]): EncodeResult { + const encodedNodes = elements.map((n) => encodeNode(n)); return concatSequence(encodedNodes); } -export function encodeAtom(nodes: RegexElement[]): EncodeOutput { - return asAtom(encodeSequence(nodes)); +export function encodeAtom(elements: RegexElement[]): EncodeResult { + return wrapAtom(encodeSequence(elements)); } -function encodeNode(node: RegexElement): EncodeOutput { - if (typeof node === 'string') { - return encodeText(node); +function encodeNode(element: RegexElement): EncodeResult { + if (typeof element === 'string') { + return encodeText(element); } - if (typeof node.encode !== 'function') { - throw new Error(`\`encodeNode\`: unknown element type ${node.type}`); + if (typeof element.encode !== 'function') { + throw new Error(`\`encodeNode\`: unknown element type ${element.type}`); } - return node.encode(); + return element.encode(); } -function encodeText(text: string): EncodeOutput { +function encodeText(text: string): EncodeResult { if (text.length === 0) { throw new Error('`encodeText`: received text should not be empty'); } @@ -42,18 +42,20 @@ function encodeText(text: string): EncodeOutput { }; } -function concatSequence(encoded: EncodeOutput[]): EncodeOutput { +function concatSequence(encoded: EncodeResult[]): EncodeResult { if (encoded.length === 1) { return encoded[0]!; } return { precedence: 'sequence', - pattern: encoded.map((n) => (n.precedence === 'alternation' ? asAtom(n) : n).pattern).join(''), + pattern: encoded + .map((n) => (n.precedence === 'disjunction' ? wrapAtom(n) : n).pattern) + .join(''), }; } -function asAtom(encoded: EncodeOutput): EncodeOutput { +function wrapAtom(encoded: EncodeResult): EncodeResult { if (encoded.precedence === 'atom') { return encoded; } diff --git a/src/encoder/types.ts b/src/encoder/types.ts index ad779c3..97a3807 100644 --- a/src/encoder/types.ts +++ b/src/encoder/types.ts @@ -1,9 +1,9 @@ /** * Encoded regex pattern with information about its type (atom, sequence) */ -export interface EncodeOutput { +export interface EncodeResult { precedence: EncodePrecedence; pattern: string; } -export type EncodePrecedence = 'atom' | 'sequence' | 'alternation'; +export type EncodePrecedence = 'atom' | 'sequence' | 'disjunction'; diff --git a/src/index.ts b/src/index.ts index ee5b82f..9bfb3d5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,9 +1,9 @@ export type * from './types'; -export { buildPattern, buildRegex } from './builders'; +export { buildPattern, buildRegExp } from './builders'; -export { startOfString, endOfString } from './components/anchors'; -export { capture } from './components/capture'; +export { startOfString, endOfString } from './constructs/anchors'; +export { capture } from './constructs/capture'; export { any, digit, @@ -13,7 +13,7 @@ export { charRange, charClass, inverted, -} from './components/character-class'; -export { choiceOf } from './components/choice-of'; -export { oneOrMore, optionally, zeroOrMore } from './components/quantifiers'; -export { repeat } from './components/repeat'; +} from './constructs/character-class'; +export { choiceOf } from './constructs/choice-of'; +export { oneOrMore, optionally, zeroOrMore } from './constructs/quantifiers'; +export { repeat } from './constructs/repeat'; diff --git a/src/types.ts b/src/types.ts index 8430c0f..6d57ad5 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,19 +1,40 @@ -import type { EncodeOutput } from './encoder/types'; +import type { EncodeResult } from './encoder/types'; + +export type ArrayOrSingle = T[] | T; /** - * Sequence of `RegexElements` that can be encoded into a regular expression. + * Sequence of regex elements forming a regular expression. + * + * For developer convenience it also accepts a single element instead of array. */ export type RegexSequence = RegexElement[] | RegexElement; /** - * Represents a result of calling a regex component (`RegexEncodable`) or a string to be matched literally. + * Fundamental building block of a regular expression, defined as either a regex construct or a string. */ -export type RegexElement = RegexEncodable | string; +export type RegexElement = RegexConstruct | string; /** - * Represents result of calling a regex componen. + * Common interface for all regex constructs like character classes, quantifiers, and anchors. */ -export interface RegexEncodable { +export interface RegexConstruct { type: string; - encode(): EncodeOutput; + encode(): EncodeResult; +} + +export interface RegexFlags { + /** Global search. */ + global?: boolean; + + /** Case-insensitive search. */ + ignoreCase?: boolean; + + /** Allows ^ and $ to match newline characters. */ + multiline?: boolean; + + /** Generate indices for substring matches. */ + hasIndices?: boolean; + + /** Perform a "sticky" search that matches starting at the current position in the target string. */ + sticky?: boolean; } diff --git a/src/utils/nodes.ts b/src/utils/elements.ts similarity index 66% rename from src/utils/nodes.ts rename to src/utils/elements.ts index 9576d9b..c9eb283 100644 --- a/src/utils/nodes.ts +++ b/src/utils/elements.ts @@ -1,5 +1,5 @@ import type { RegexElement, RegexSequence } from '../types'; -export function asNodeArray(sequence: RegexSequence): RegexElement[] { +export function ensureArray(sequence: RegexSequence): RegexElement[] { return Array.isArray(sequence) ? sequence : [sequence]; } diff --git a/test-utils/to-have-pattern.ts b/test-utils/to-have-pattern.ts index 9521848..ce27116 100644 --- a/test-utils/to-have-pattern.ts +++ b/test-utils/to-have-pattern.ts @@ -1,12 +1,12 @@ import type { RegexSequence } from '../src/types'; -import { asRegExp } from './utils'; +import { wrapRegExp } from './utils'; export function toHavePattern( this: jest.MatcherContext, received: RegExp | RegexSequence, - expected: RegExp + expected: RegExp, ) { - const receivedPattern = asRegExp(received).source; + const receivedPattern = wrapRegExp(received).source; const expectedPattern = expected.source; const options = { diff --git a/test-utils/to-match-groups.ts b/test-utils/to-match-groups.ts index 45494da..32b1612 100644 --- a/test-utils/to-match-groups.ts +++ b/test-utils/to-match-groups.ts @@ -1,13 +1,13 @@ import type { RegexSequence } from '../src/types'; -import { asRegExp } from './utils'; +import { wrapRegExp } from './utils'; export function toMatchGroups( this: jest.MatcherContext, received: RegExp | RegexSequence, expectedString: string, - expectedGroups: string[] + expectedGroups: string[], ) { - const receivedRegex = asRegExp(received); + const receivedRegex = wrapRegExp(received); const matchResult = expectedString.match(receivedRegex); const receivedGroups = matchResult ? [...matchResult] : null; const options = { diff --git a/test-utils/to-match-string.ts b/test-utils/to-match-string.ts index d0086df..018f5ea 100644 --- a/test-utils/to-match-string.ts +++ b/test-utils/to-match-string.ts @@ -1,12 +1,12 @@ import type { RegexSequence } from '../src/types'; -import { asRegExp } from './utils'; +import { wrapRegExp } from './utils'; export function toMatchString( this: jest.MatcherContext, received: RegExp | RegexSequence, - expected: string + expected: string, ) { - const receivedRegex = asRegExp(received); + const receivedRegex = wrapRegExp(received); const matchResult = expected.match(receivedRegex); const options = { isNot: this.isNot, diff --git a/test-utils/utils.ts b/test-utils/utils.ts index 2aaef01..323da9f 100644 --- a/test-utils/utils.ts +++ b/test-utils/utils.ts @@ -1,11 +1,11 @@ -import { buildRegex } from '../src/builders'; -import type { RegexElement, RegexEncodable } from '../src/types'; +import { buildRegExp } from '../src/builders'; +import type { RegexConstruct, RegexElement, RegexSequence } from '../src/types'; export function isRegexElement(node: unknown): node is RegexElement { - return typeof node === 'string' || isRegexEncodable(node); + return typeof node === 'string' || isRegexConstruct(node); } -export function isRegexEncodable(element: unknown): element is RegexEncodable { +export function isRegexConstruct(element: unknown): element is RegexConstruct { return ( typeof element === 'object' && element !== null && @@ -14,10 +14,10 @@ export function isRegexEncodable(element: unknown): element is RegexEncodable { ); } -export function asRegExp(regex: RegExp | RegexElement | RegexElement[]) { +export function wrapRegExp(regex: RegExp | RegexSequence) { if (regex instanceof RegExp) { return regex; } - return buildRegex(regex); + return buildRegExp(regex); }