Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow unicode flag if all RegExps use it #123

Merged
merged 1 commit into from
Feb 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions moo.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
if (obj.global) throw new Error('RegExp /g flag is implied')
if (obj.sticky) throw new Error('RegExp /y flag is implied')
if (obj.multiline) throw new Error('RegExp /m flag is implied')
if (obj.unicode) throw new Error('RegExp /u flag is not allowed')
return obj.source

} else {
Expand Down Expand Up @@ -154,6 +153,7 @@
var errorRule = null
var fast = Object.create(null)
var fastAllowed = true
var unicodeFlag = null
var groups = []
var parts = []

Expand Down Expand Up @@ -210,6 +210,20 @@

groups.push(options)

// Check unicode flag is used everywhere or nowhere
for (var j = 0; j < match.length; j++) {
var obj = match[j]
if (!isRegExp(obj)) {
continue
}

if (unicodeFlag === null) {
unicodeFlag = obj.unicode
} else if (unicodeFlag !== obj.unicode) {
throw new Error("If one rule is /u then all must be")
}
}

// convert to RegExp
var pat = reUnion(match.map(regexpOrLiteral))

Expand Down Expand Up @@ -241,8 +255,9 @@
var fallbackRule = errorRule && errorRule.fallback
var flags = hasSticky && !fallbackRule ? 'ym' : 'gm'
var suffix = hasSticky || fallbackRule ? '' : '|'
var combined = new RegExp(reUnion(parts) + suffix, flags)

if (unicodeFlag === true) flags += "u"
var combined = new RegExp(reUnion(parts) + suffix, flags)
return {regexp: combined, groups: groups, fast: fast, error: errorRule || defaultErrorRule}
}

Expand Down
35 changes: 30 additions & 5 deletions test/test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops!

const fs = require('fs')
const vm = require('vm')

Expand Down Expand Up @@ -29,17 +28,14 @@ describe('compiler', () => {
expect(lex4.next()).toMatchObject({type: 'err', text: 'nope!'})
})

test("warns for /g, /y, /i, /m, /u", () => {
test("warns for /g, /y, /i, /m", () => {
expect(() => compile({ word: /foo/ })).not.toThrow()
expect(() => compile({ word: /foo/g })).toThrow('implied')
expect(() => compile({ word: /foo/i })).toThrow('not allowed')
expect(() => compile({ word: /foo/y })).toThrow('implied')
expect(() => compile({ word: /foo/m })).toThrow('implied')
expect(() => compile({ word: /foo/u })).toThrow('not allowed')
})

// TODO warns if no lineBreaks: true

test('warns about missing states', () => {
const rules = [
{match: '=', next: 'missing'},
Expand Down Expand Up @@ -1186,3 +1182,32 @@ describe('include', () => {
])
})
})


describe("unicode flag", () => {

test("allows all rules to be /u", () => {
expect(() => compile({ a: /foo/u, b: /bar/u, c: "quxx" })).not.toThrow()
expect(() => compile({ a: /foo/u, b: /bar/, c: "quxx" })).toThrow("If one rule is /u then all must be")
expect(() => compile({ a: /foo/, b: /bar/u, c: "quxx" })).toThrow("If one rule is /u then all must be")
})

test("supports unicode", () => {
const lexer = compile({
a: /[𝌆]/u,
})
lexer.reset("𝌆")
expect(lexer.next()).toMatchObject({value: "𝌆"})
lexer.reset("𝌆".charCodeAt(0))
expect(() => lexer.next()).toThrow()

const lexer2 = compile({
a: /\u{1D356}/u,
})
lexer2.reset("𝍖")
expect(lexer2.next()).toMatchObject({value: "𝍖"})
lexer2.reset("\\u{1D356}")
expect(() => lexer2.next()).toThrow()
})

})