js-regex is a fluent regex builder for JavaScript. Its aim is to make the writing and maintenance of complicated regexes less taxing and error-prone.
js-regex has a mix of features that make it especially appealing, when compared to writing raw regexs or using other builder libraries, for building complicated regexes:
- Macros
- Macros are basically named sequences
- That can be registered for a particular builder instance, or across all js-regex objects
- That are added onto the current regex as a single term by using
.macro(registeredName)
- Named Capture Groups
- When using exec and similar functions, you don't get an array of the matches
- Instead, you get an object with the
match
property (representing the entire match of the regex) - Along with a property for each named property group you gave to
.capture(...)
- Minimal Generated Expressions
- Some regex builder libraries have a habit of wrapping almost everything you add in a non-capture group (
(?:<stuff here>)
) - The above works, and is easy to make correct
- But js-regex has the goal of not doing so whenever actually possible
- Some regex builder libraries have a habit of wrapping almost everything you add in a non-capture group (
- Named Backreferences
- Ignore the pumping lemma with your non-regular language expressions
- Backreferences, in brief, allow you to refer to a previous captured group, and say that that text has to repeat itself exactly
Let's suppose that you've been asked to figure out why the following regex isn't working:
(SH|RE|MF)-((?:197[1-9]|19[89]\d|[2-9]\d{3})-(?:0[1-9]|1[012])-(?:0[1-9]|[12]\d|3[01]))-((?!0{5})\d{5})
If you're experienced with regexes, it's certainly possible to gain an understanding of it, but it takes longer than it should.
This is one example regex that has been built with this library; see below to see this example translated into a js-regex equivalent, or simply read on to go through most of the API before jumping into the complex examples.
In addition to the usage documented below, with a matching test suite here, there's a fair number of other test cases here.
At the time of writing, js-regex has more test code than executable code, and this is likely to remain the case:
wc -l regex.js
851 regex.js
wc -l test/cases/*
160 test/cases/alt_syntax.js
30 test/cases/any.js
163 test/cases/capture.js
30 test/cases/flags.js
18 test/cases/literals.js
68 test/cases/macros.js
180 test/cases/or.js
427 test/cases/readme_cases.js
299 test/cases/repeat.js
46 test/cases/sequence.js
20 test/cases/states.js
9 test/cases/test.js
1450 total
regex()
.literals('abc')
.peek(); // Will return 'abc'
regex()
.literals('abc')
.call(function (curNode) {
console.log(this === curNode); // Will print true
console.log(curNode.peek()); // Will print 'abc'
})
.literals('def')
.call(function (curNode) {
console.log(curNode.peek()); // Will print 'abcdef'
});
regex()
.f.digit()
.f.whitespace()
.peek(); // Will return '\d\s'
regex()
.literals('aaa')
.capture()
.peek(); // Will return '(aaa)'
regex()
.literals('aaa')
.repeat()
.peek(); // Will return '(?:aaa)*'
regex()
.literals('aaa')
.call(function (curNode) {
console.log(curNode.peek()); // Will print 'aaa'
})
.repeat(1, 3)
.peek(); // Will return '(?:aaa){1,3}'
regex()
.sequence()
.literals('aaa')
.f.digit()
.literals('bbb')
.endSequence()
.repeat()
.peek(); // Will return '(?:aaa\dbbb)*'
regex().sequence('aaa', regex.flags.digit(), 'bbb')
.repeat()
.peek(); // Will return '(?:aaa\dbbb)*'
regex()
.any('abcdefg')
.peek(); // Will return '[abcdefg]'
regex()
.any()
.literals('abc')
.f.digit()
.endAny()
.peek(); // Will return '[abc\d]'
regex()
.none()
.literals('abc')
.f.whitespace()
.endNone()
.peek(); // Will return '[^abc\s]'
regex()
.either()
.literals('abc')
.literals('def')
.endEither()
.peek(); // Will return 'abc|def'
regex()
.either('abc', regex.any('def'))
.peek(); // Will return 'abc|[def]'
regex.create(); // Alternate form of regex()
regex
.addMacro('any-quote') // Adding a global macro for single or double quote
.any('\'"')
.endMacro()
.create()
.macro('any-quote')
.f.dot()
.repeat()
.macro('any-quote')
.peek(); // Will return '['"].*['"]'
regex
.addMacro('quote')
.any('\'"')
.endMacro()
.create()
.addMacro('quote') // Local macros override global ones
.literal('"') // Here, restricting to double quote only
.endMacro()
.macro('quote')
.f.dot()
.repeat()
.macro('quote')
.peek(); // Will return '".*"'
regex()
.literals('aaa')
.followedBy('bbb')
.peek(); // Will return 'aaa(?=bbb)'
regex()
.literals('ccc')
.notFollowedBy('ddd')
.peek(); // Will return 'ccc(?!ddd)
regex()
.flags.anything()
.repeat()
.capture('preamble')
.either('cool!', 'awesome!')
.capture('exclamation')
.call(function (rb) {
// Would print '(.*)(cool!|awesome!)'
console.log(rb.peek());
// Would print 'this is '
console.log(rb.exec('this is cool! isn\'t it?').preamble);
// Would print 'cool!'
console.log(rb.exec('this is cool! isn\'t it?').exclamation);
// Would print 'this is also '
console.log(rb.exec('this is also awesome!').preamble);
// Would print 'awesome!'
console.log(rb.exec('this is also awesome!').exclamation);
});
You know how JS regular expressions are more powerful than regular languages? You can reference previous capture terms. js-regex supports this:
regex()
.flags.anything()
.repeat(1)
.capture('anything')
.literal('-')
.reference('anything')
.call(function (rb) {
// Would print '(.+)-\1'
console.log(rb.peek());
// Would print 'whatever'
console.log(rb.exec('whatever-whatever').anything);
// Would print false
console.log(rb.test('whatever-whatev'));
});
How quickly can you figure out what this is supposed to represent?
regex()
.addMacro('0-255')
.either()
.sequence()
.literals('25')
.anyFrom('0', '5')
.endSequence()
.sequence()
.literal('2')
.anyFrom('0', '4')
.anyFrom('0', '9')
.endSequence()
.sequence()
.any('01').optional()
.anyFrom('0', '9')
.anyFrom('0', '9').optional()
.endSequence()
.endEither()
.endMacro()
.macro('0-255').capture()
.literal('.')
.macro('0-255').capture()
.literal('.')
.macro('0-255').capture()
.literal('.')
.macro('0-255').capture()
.peek();
(Hint: it's described here, in the fourth section on the page.)
(Also note: this example uses the 'verbose' usage form, always closing portions with endXXX(); the Readme tests cover the same using an alternate form)
So our 'business logic' regex looks like this:
(SH|RE|MF)-((?:197[1-9]|19[89]\d|[2-9]\d{3})-(?:0[1-9]|1[012])-(?:0[1-9]|[12]\d|3[01]))-((?!0{5})\d{5})
Written in human terms, that would be: one of three department codes, a dash, a YYYY-MM-DD date (after Jan 1, 1971), a dash, then a non 00000 5 digit number.
In converting this regex to use js-regex, we make use of macros to define the department code, the date, and the trailing number. Note that most of this example is spent setting up the date regex - if your situation called for many dates being used in the application, the cost of setting up this most complicated portion of the regex would only need to be done once, after which it would be usable in other circumstances with no code changes, and far greater readability.
Anyway, let's take a look:
regex
// Setting up our macros...
.addMacro('dept-prefix', regex.either('SH', 'RE', 'MF'))
.addMacro('date',
regex.either(
regex.sequence(
'197',
regex.anyFrom('1', '9')),
regex.sequence(
'19',
regex.any('89'),
regex.flags.digit()),
regex.sequence(
regex.anyFrom('2', '9'),
regex.flags.digit().repeat(3, 3))),
'-',
regex.either(
regex.sequence(
'0',
regex.anyFrom('1', '9')),
regex.sequence(
'1',
regex.any('012'))),
'-',
regex.either(
regex.sequence(
'0',
regex.anyFrom('1', '9')),
regex.sequence(
regex.any('12'),
regex.flags.digit()),
regex.sequence(
'3',
regex.any('01'))))
.addMacro('issuenum',
regex.notFollowedBy()
.literal('0')
.repeat(5, 5),
regex.flags.digit()
.repeat(5, 5))
// Macros are setup, let's create our actual regex now:
.create()
.macro('dept-prefix').capture()
.literal('-')
.macro('date').capture()
.literal('-')
.macro('issuenum').capture()
.peek(); // Returns the string shown above this code example
Perhaps this library piques your interest. If so, cool! Let me know! Just make sure that nothing on the issues page scares you before jumping in and actually using it.
test() is still kinda pointless.
regex()
.literal('a')
.test('a'); // Will return true
Needs more tests.
regex()
.literals('abc')
.replace('abc', function () {
return 'def';
}); // Will return 'def'