callstack · mdjastrzebski · Feb 28, 2024 · Feb 15, 2024 · Feb 28, 2024 · Feb 28, 2024
diff --git a/README.md b/README.md
@@ -21,11 +21,7 @@ This library allows users to create regular expressions in a structured way, mak
 const hexColor = /^#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})$/;
 
 // TS Regex Builder DSL
-const hexDigit = charClass(
-  charRange('a', 'f'),
-  charRange('A', 'F'),
-  charRange('0', '9'),
-);
+const hexDigit = charClass(charRange('a', 'f'), charRange('A', 'F'), charRange('0', '9'));
 
 const hexColor = buildRegExp([
   startOfString,
@@ -66,13 +62,15 @@ const regex = buildRegExp(['Hello ', capture(oneOrMore(word))]);
 TS Regex Builder allows you to build complex regular expressions using domain-specific language.
 
 Terminology:
+
 - regex construct (`RegexConstruct`) - common name for all regex constructs like character classes, quantifiers, and anchors.
 - regex element (`RegexElement`) - a fundamental building block of a regular expression, defined as either a regex construct, a string, or `RegExp` literal (`/.../`).
 - regex sequence (`RegexSequence`) - a sequence of regex elements forming a regular expression. For developer convenience, it also accepts a single element instead of an array.
 
 Most of the regex constructs accept a regex sequence as their argument.
 
 Examples of sequences:
+
 - single element (construct): `capture('Hello')`
 - single element (string): `'Hello'`
 - single element (`RegExp` literal): `/Hello/`
@@ -152,6 +150,7 @@ See [Character Classes API doc](./docs/API.md##character-classes) for more info.
 | --------------- | ------------ | ------------------------------------------------------------------------ |
 | `startOfString` | `^`          | Match the start of the string (or the start of a line in multiline mode) |
 | `endOfString`   | `$`          | Match the end of the string (or the end of a line in multiline mode)     |
+| `wordBoundary`  | `\b`         | Match the start or end of a word without consuming characters            |
 
 See [Anchors API doc](./docs/API.md#anchors) for more info.
 
@@ -182,7 +181,6 @@ TS Regex Builder is inspired by [Swift Regex Builder API](https://developer.appl
 - [Swift Regex Builder API docs](https://developer.apple.com/documentation/regexbuilder)
 - [Swift Evolution 351: Regex Builder DSL](https://github.com/apple/swift-evolution/blob/main/proposals/0351-regex-builder.md)
 
-
 ---
 
 Made with [create-react-native-library](https://github.com/callstack/react-native-builder-bob)
diff --git a/docs/API.md b/docs/API.md
@@ -14,8 +14,7 @@ Fundamental building blocks of a regular expression, defined as either a regex c
 
 The common type for all regex constructs like character classes, quantifiers, and anchors. You should not need to use this type directly, it is returned by all regex construct functions.
 
-Note: the shape of the `RegexConstruct` is considered private and may change in a breaking way without a major release. We will focus on maintaining the compatibility of regexes built with 
-
+Note: the shape of the `RegexConstruct` is considered private and may change in a breaking way without a major release. We will focus on maintaining the compatibility of regexes built with
 
 ## Builder
 
@@ -133,14 +132,15 @@ Quantifiers in regex define the number of occurrences to match for a pattern.
 function zeroOrMore(
   sequence: RegexSequence,
   options?: {
-    greedy?: boolean, // default=true
-  }
-): ZeroOrMore
+    greedy?: boolean; // default=true
+  },
+): ZeroOrMore;
 ```
 
 Regex syntax:
-* `x*` for default greedy behavior (match as many characters as possible)
-* `x*?` for non-greedy behavior (match as few characters as possible)
+
+- `x*` for default greedy behavior (match as many characters as possible)
+- `x*?` for non-greedy behavior (match as few characters as possible)
 
 The `zeroOrMore` quantifier matches zero or more occurrences of a given pattern, allowing a flexible number of repetitions of that element.
 
@@ -150,14 +150,15 @@ The `zeroOrMore` quantifier matches zero or more occurrences of a given pattern,
 function oneOrMore(
   sequence: RegexSequence,
   options?: {
-    greedy?: boolean, // default=true
-  }
-): OneOrMore
+    greedy?: boolean; // default=true
+  },
+): OneOrMore;
 ```
 
 Regex syntax:
-* `x+` for default greedy behavior (match as many characters as possible)
-* `x+?` for non-greedy behavior (match as few characters as possible)
+
+- `x+` for default greedy behavior (match as many characters as possible)
+- `x+?` for non-greedy behavior (match as few characters as possible)
 
 The `oneOrMore` quantifier matches one or more occurrences of a given pattern, allowing a flexible number of repetitions of that element.
 
@@ -167,14 +168,15 @@ The `oneOrMore` quantifier matches one or more occurrences of a given pattern, a
 function optional(
   sequence: RegexSequence,
   options?: {
-    greedy?: boolean, // default=true
-  }
-): Optionally
+    greedy?: boolean; // default=true
+  },
+): Optionally;
 ```
 
 Regex syntax:
-* `x?` for default greedy behavior (match as many characters as possible)
-* `x??` for non-greedy behavior (match as few characters as possible)
+
+- `x?` for default greedy behavior (match as many characters as possible)
+- `x??` for non-greedy behavior (match as few characters as possible)
 
 The `optional` quantifier matches zero or one occurrence of a given pattern, making it optional.
 
@@ -183,17 +185,20 @@ The `optional` quantifier matches zero or one occurrence of a given pattern, mak
 ```ts
 function repeat(
   sequence: RegexSequence,
-  options: number | { 
-    min: number;
-    max?: number; 
-    greedy?: boolean;  // default=true
-  },
-): Repeat
+  options:
+    | number
+    | {
+        min: number;
+        max?: number;
+        greedy?: boolean; // default=true
+      },
+): Repeat;
 ```
 
 Regex syntax:
-* `x{n}`, `x{min,}`, `x{min, max}` for default greedy behavior (match as many characters as possible)
-* `x{min,}?`, `x{min, max}?` for non-greedy behavior (match as few characters as possible)
+
+- `x{n}`, `x{min,}`, `x{min, max}` for default greedy behavior (match as many characters as possible)
+- `x{min,}?`, `x{min, max}?` for non-greedy behavior (match as few characters as possible)
 
 The `repeat` quantifier in regex matches either exactly `count` times or between `min` and `max` times. If only `min` is provided, it matches at least `min` times.
 
@@ -301,3 +306,15 @@ const endOfString: Anchor;
 
 - `startOfString` anchor matches the start of a string (or line, if multiline mode is enabled). Regex syntax: `^`.
 - `endOfString` anchor matches the end of a string (or line, if multiline mode is enabled). Regex syntax: `$`.
+
+### Word boundary
+
+```ts
+const wordBoundary: Anchor;
+const notWordBoundary: Anchor;
+```
+
+- `wordBoundary` matches the positions where a word character is not followed or preceded by another word character, effectively indicating the start or end of a word. Regex syntax: `\b`.
+- `notWordBoundary` matches the positions where a word character is followed or preceded by another word character, indicating that it is not at the start or end of a word. Regex syntax: `\B`.
+
+Note: word characters are letters, digits, and underscore (`_`). Other special characters like `#`, `$`, etc are not considered word characters.
diff --git a/docs/Examples.md b/docs/Examples.md
@@ -40,7 +40,7 @@ const regex = buildRegExp(
   { ignoreCase: true },
 );
 
-const isValid = regex.test("#ffffff");
+const isValid = regex.test('#ffffff');
 ```
 
 Encoded regex: `/^#?(?:[a-f\d]{6}|[a-f\d]{3})$/i`.
@@ -70,7 +70,7 @@ const regex = buildRegExp([
   endOfString,
 ]);
 
-const isValid = regex.test("https://hello.github.com");
+const isValid = regex.test('https://hello.github.com');
 ```
 
 Encoded regex: `/^(?:(?:http|https):\/\/)?(?:(?:[a-z\d]|[a-z\d][a-z\d-]*[a-z\d])\.)+[a-z][a-z\d]+$/`.
@@ -100,7 +100,7 @@ const regex = buildRegExp(
   { ignoreCase: true },
 );
 
-const isValid = regex.test("user@example.com");
+const isValid = regex.test('user@example.com');
 ```
 
 Encoded regex: `/^[a-z\d._%+-]+@[a-z\d.-]+\.[a-z]{2,}$/i`.
@@ -126,7 +126,7 @@ const regex = buildRegExp([
   endOfString,
 ]);
 
-const isValid = regex.test("1.0e+27");
+const isValid = regex.test('1.0e+27');
 ```
 
 Encoded regex: `/^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$/`.
@@ -190,6 +190,7 @@ See tests: [example-regexp.ts](../src/__tests__/example-regexp.ts).
 ## Simple password validation
 
 This regex corresponds to following password policy:
+
 - at least one uppercase letter
 - at least one lowercase letter
 - at least one digit
@@ -205,16 +206,16 @@ const atLeastEightChars = /.{8,}/;
 
 // Match
 const validPassword = buildRegExp([
-    startOfString,
-    atLeastOneUppercase,
-    atLeastOneLowercase,
-    atLeastOneDigit,
-    atLeastOneSpecialChar,
-    atLeastEightChars,
-    endOfString
+  startOfString,
+  atLeastOneUppercase,
+  atLeastOneLowercase,
+  atLeastOneDigit,
+  atLeastOneSpecialChar,
+  atLeastEightChars,
+  endOfString,
 ]);
 
-const isValid = regex.test("Aa$123456");
+const isValid = regex.test('Aa$123456');
 ```
 
 Encoded regex: `/^(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[^A-Za-z0-9\s])(?:.{8,})$/`.
@@ -243,9 +244,57 @@ const currencyRegex = buildRegExp([
   endOfString,
 ]);
 
-const isValid = regex.test("£1,000");
+const isValid = regex.test('£1,000');
 ```
 
 Encoded regex: `/(?<=[$€£¥R₿])\s?\d{1,3}(?:,?\d{3})*(?:\.\d{2})?$/`.
 
 See tests: [example-currency.ts](../src/__tests__/example-currency.ts).
+
+## Finding specific whole words
+
+Ignoring cases where given word is part of a bigger word.
+
+```ts
+const wordsToFind = ['word', 'date'];
+
+const regex = buildRegExp([
+  wordBoundary, // match whole words only
+  choiceOf(...wordsToFind),
+  wordBoundary,
+]);
+
+expect(regex).toMatchString('word');
+expect(regex).toMatchString('date');
+
+expect(regex).not.toMatchString('sword');
+expect(regex).not.toMatchString('update');
+```
+
+Encoded regex: `/\b(?:word|date)\b/`.
+
+See tests: [example-find-words.ts](../src/__tests__/example-find-words.ts).
+
+## Finding specific suffixes
+
+Ignoring cases where given word is part of a bigger word.
+
+```ts
+const suffixesToFind = ['acy', 'ism'];
+
+const regex = buildRegExp([
+  notWordBoundary, // match suffixes only
+  choiceOf(...suffixesToFind),
+  wordBoundary,
+]);
+
+expect(regex).toMatchString('privacy ');
+expect(regex).toMatchString('democracy');
+
+expect(regex).not.toMatchString('acy');
+expect(regex).not.toMatchString('ism');
+```
+
+Encoded regex: `/\B(?:acy|ism)\b/`.
+
+See tests: [example-find-suffixes.ts](../src/__tests__/example-find-suffixes.ts).
diff --git a/src/__tests__/example-find-suffixes.ts b/src/__tests__/example-find-suffixes.ts
@@ -0,0 +1,24 @@
+import { buildRegExp, choiceOf, notWordBoundary, wordBoundary } from '..';
+
+test('example: find words with suffix', () => {
+  const suffixesToFind = ['acy', 'ism'];
+
+  const regex = buildRegExp([
+    notWordBoundary, // match suffixes only
+    choiceOf(...suffixesToFind),
+    wordBoundary,
+  ]);
+
+  expect(regex).toMatchString('democracy');
+  expect(regex).toMatchString('Bureaucracy');
+  expect(regex).toMatchString('abc privacy ');
+  expect(regex).toMatchString('abc dynamism');
+  expect(regex).toMatchString('realism abc');
+  expect(regex).toMatchString('abc modernism abc');
+
+  expect(regex).not.toMatchString('abc acy');
+  expect(regex).not.toMatchString('ism abc');
+  expect(regex).not.toMatchString('dynamisms');
+
+  expect(regex).toEqualRegex(/\B(?:acy|ism)\b/);
+});
diff --git a/src/__tests__/example-find-words.ts b/src/__tests__/example-find-words.ts
@@ -0,0 +1,23 @@
+import { buildRegExp, choiceOf, wordBoundary } from '..';
+
+test('example: find specific words', () => {
+  const wordsToFind = ['word', 'date'];
+
+  const regex = buildRegExp([
+    wordBoundary, // match whole words only
+    choiceOf(...wordsToFind),
+    wordBoundary,
+  ]);
+
+  expect(regex).toMatchString('word');
+  expect(regex).toMatchString('some date');
+  expect(regex).toMatchString('date and word');
+
+  expect(regex).not.toMatchString('sword');
+  expect(regex).not.toMatchString('keywords');
+  expect(regex).not.toMatchString('words');
+  expect(regex).not.toMatchString('update');
+  expect(regex).not.toMatchString('dates');
+
+  expect(regex).toEqualRegex(/\b(?:word|date)\b/);
+});