From 6d11aa96e0dabb57218d4f0b3bdb3c83d749f85b Mon Sep 17 00:00:00 2001 From: JJ Kasper Date: Thu, 29 Apr 2021 19:48:39 -0500 Subject: [PATCH] Remove un-used lib files (#24625) --- .eslintignore | 3 +- packages/next/lib/regexr/expression-lexer.js | 951 ------------------ packages/next/lib/regexr/profile/core.js | 420 -------- packages/next/lib/regexr/profile/index.js | 15 - .../next/lib/regexr/profile/javascript.js | 160 --- 5 files changed, 1 insertion(+), 1548 deletions(-) delete mode 100644 packages/next/lib/regexr/expression-lexer.js delete mode 100644 packages/next/lib/regexr/profile/core.js delete mode 100644 packages/next/lib/regexr/profile/index.js delete mode 100644 packages/next/lib/regexr/profile/javascript.js diff --git a/.eslintignore b/.eslintignore index 48f68189799e1..37e0a229afb6d 100644 --- a/.eslintignore +++ b/.eslintignore @@ -16,5 +16,4 @@ packages/next-codemod/**/*.js packages/next-codemod/**/*.d.ts packages/next-env/**/*.d.ts test/integration/async-modules/** -test-timings.json -packages/next/lib/regexr/**/* \ No newline at end of file +test-timings.json \ No newline at end of file diff --git a/packages/next/lib/regexr/expression-lexer.js b/packages/next/lib/regexr/expression-lexer.js deleted file mode 100644 index 2beccd460473a..0000000000000 --- a/packages/next/lib/regexr/expression-lexer.js +++ /dev/null @@ -1,951 +0,0 @@ -/* -RegExr: Learn, Build, & Test RegEx -Copyright (C) 2017 gskinner.com, inc. -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -const profile = require('./profile') - -const Utils = { - copy: Object.assign, -} - -const SUPPORT_MAP_PROPS = { - // 1 = reverse, 0 - normal - flags: 1, - // escape is handled separately - // escCharCodes is handled separately - escCharTypes: 1, - charTypes: 1, - // unquantifiables not included - // unicodeScripts not included - // unicodeCategories not included - // posixCharClasses not included - // modes not included - tokens: 0, - substTokens: 0, - // config not included - // docs not included -} - -class ExpressionLexer { - constructor() { - this._profile = profile - this.string = this.token = this.errors = this.captureGroups = this.namedGroups = null - this._buildSupportMap(this._profile) - } - - parse(str) { - if (!this._profile) { - return null - } - if (str === this.string) { - return this.token - } - - this.token = null - this._modes = {} - this.string = str - this.errors = [] - let capgroups = (this.captureGroups = []) - let namedgroups = (this.namedGroups = {}) - let brgroups = (this.branchResetGroups = []) - let groups = [], - refs = [], - i = 0, - l = str.length - let o, - c, - token, - charset = null - // previous is the previous token, prv is the previous "active" token (!ignore) - let prev = null, - prv = null - let profile = this._profile, - unquantifiable = profile.unquantifiable - let charTypes = profile.charTypes - let closeIndex = str.lastIndexOf('/') - - for (let i = closeIndex + 1; i < l; i++) { - this._modes[str[i]] = true - } - - while (i < l) { - c = str[i] - - token = { i: i, l: 1, prev: prev, prv: prv, modes: this._modes } - if (prev) { - prev.next = token - } else { - this.token = token - } - - if (i === 0 || i >= closeIndex) { - this.parseFlag(str, token) - } else if (c === '(' && !charset) { - this.parseParen(str, token) - if (token.close === null) { - token.depth = groups.length - groups.push(token) - } - if (token.capture) { - this.addCaptureGroup(token, groups) - } - } else if (c === ')' && !charset) { - token.type = 'groupclose' - if (groups.length) { - o = token.open = groups.pop() - o.close = token - if (o.type === 'branchreset') { - brgroups.pop() - } - } else { - token.error = { id: 'groupclose' } - } - } else if (c === '[') { - charset = this.parseSquareBracket(str, token, charset) - } else if (c === ']' && charset) { - token.type = 'setclose' - token.open = charset - charset.close = token - charset = null - } else if ( - c === '+' && - prv && - prv.clss === 'quant' && - profile.tokens.possessive - ) { - token.type = 'possessive' - token.related = [prv] - } else if ((c === '+' || c === '*') && !charset) { - token.type = charTypes[c] - token.clss = 'quant' - token.min = c === '+' ? 1 : 0 - token.max = -1 - } else if ( - c === '{' && - !charset && - str.substr(i).search(/^{\d+,?\d*}/) !== -1 - ) { - this.parseQuant(str, token) - } else if (c === '\\') { - this.parseBackSlash(str, token, charset, closeIndex) - } else if (c === '?' && !charset) { - if (!prv || prv.clss !== 'quant') { - token.type = charTypes[c] - token.clss = 'quant' - token.min = 0 - token.max = 1 - } else { - token.type = 'lazy' - token.related = [prv] - } - } else if ( - c === '-' && - charset && - prv.code !== undefined && - prv.prv && - prv.prv.type !== 'range' - ) { - // this may be the start of a range, but we'll need to validate after the next token. - token.type = 'range' - } else { - this.parseChar(str, token, charset) - if (!charset && this._modes.x && /\s/.test(c)) { - token.ignore = true - token.type = 'ignorews' - } - } - - // post process token: - // quantifier: - if (token.clss === 'quant') { - if ( - !prv || - prv.close !== undefined || - unquantifiable[prv.type] || - (prv.open && unquantifiable[prv.open.type]) - ) { - token.error = { id: 'quanttarg' } - } else { - token.related = [prv.open || prv] - } - } - - // reference: - if (token.group === true) { - refs.push(token) - } - - // conditional: - let curGroup = groups.length ? groups[groups.length - 1] : null - if ( - curGroup && - (curGroup.type === 'conditional' || - curGroup.type === 'conditionalgroup') && - token.type === 'alt' - ) { - if (!curGroup.alt) { - curGroup.alt = token - } else { - token.error = { id: 'extraelse' } - } - token.related = [curGroup] - token.type = 'conditionalelse' - token.clss = 'special' - } else if (curGroup && curGroup.type === 'branchreset') { - // reset group - curGroup.curGroupNum = curGroup.inGroupNum - } - - // range: - if (prv && prv.type === 'range' && prv.l === 1) { - this.validateRange(str, token) - } - - // js warnings: - // TODO: this isn't ideal, but I'm hesitant to write a more robust solution for a couple of edge cases. - if (profile.id === 'js') { - this.addJSWarnings(token) - } - - // general: - if (token.open && !token.clss) { - token.clss = token.open.clss - } - if (token.error) { - this.addError(token) - } - i += token.l - prev = token - if (!token.ignore) { - prv = token - } - } - - // post processing: - while (groups.length) { - this.addError(groups.pop(), { id: 'groupopen' }) - } - this.matchRefs(refs, capgroups, namedgroups) - if (charset) { - this.addError(charset, { id: 'setopen' }) - } - - return this.token - } - - _buildSupportMap(profile) { - if (profile._supportMap) { - return - } - let map = (profile._supportMap = {}), - props = SUPPORT_MAP_PROPS, - n - for (n in props) { - this._addToSupportMap(map, profile[n], !!props[n]) - } - let o = profile.escCharCodes, - esc = profile.escChars - for (n in o) { - map['esc_' + o[n]] = true - } - for (n in esc) { - map['esc_' + esc[n]] = true - } - } - - _addToSupportMap(map, o, rev) { - if (rev) { - for (let n in o) { - map[o[n]] = true - } - } else { - for (let n in o) { - map[n] = o[n] - } - } - } - - addError(token, error = token.error) { - token.error = error - this.errors.push(token) - } - - addJSWarnings(token) { - if (token.error) { - return - } - if ( - token.type === 'neglookbehind' || - token.type === 'poslookbehind' || - token.type === 'sticky' || - token.type === 'unicode' || - token.type == 'dotall' || - token.type === 'unicodecat' || - token.type === 'unicodescript' || - token.type === 'namedgroup' - ) { - token.error = { id: 'jsfuture', warning: true } - } - } - - addCaptureGroup(token, groups) { - // it would be nice to make branch reset groups actually highlight all of the groups that share the same number - // that would require switching to arrays of groups for each group num - requires rearchitecture throughout the app. - let capgroups = this.captureGroups, - brgroups = this.branchResetGroups, - namedgroups = this.namedGroups - let curGroup = groups.length ? groups[groups.length - 1] : null - if (brgroups.length) { - let brgroup = brgroups[brgroups.length - 1] - token.num = ++brgroup.curGroupNum - } else { - token.num = capgroups.length + 1 - } - if (!capgroups[token.num - 1]) { - capgroups.push(token) - } - if (token.name && !token.error) { - if (/\d/.test(token.name[0])) { - token.error = { id: 'badname' } - } else if (namedgroups[token.name]) { - token.error = { id: 'dupname' } - token.related = [namedgroups[token.name]] - } else { - namedgroups[token.name] = token - } - } - } - - getRef(token, str) { - token.clss = 'ref' - token.group = true - token.relIndex = this.captureGroups.length - token.name = str - } - - matchRefs(refs, indexes, names) { - while (refs.length) { - let token = refs.pop(), - name = token.name, - group = names[name] - - if (!group && !isNaN(name)) { - let sign = name[0], - index = - parseInt(name) + (sign === '+' || sign === '-' ? token.relIndex : 0) - if (sign === '-') { - index++ - } - group = indexes[index - 1] - } - if (group) { - token.group = group - token.related = [group] - token.dir = - token.i < group.i - ? 1 - : !group.close || token.i < group.close.i - ? 0 - : -1 - } else { - delete token.group - delete token.relIndex - this.refToOctal(token) - if (token.error) { - this.errors.push(token.error) - } - } - } - } - - refToOctal(token) { - // PCRE: \# unmatched, \0 \00 \## = octal - // JS: \# \0 \00 \## = octal - // PCRE matches \8 \9 to "8" "9" - // JS: without the u flag \8 \9 match "8" "9" in IE, FF & Chrome, and "\8" "\9" in Safari. We support the former. - // JS: with the u flag, Chrome & FF throw an esc error, Safari does not. - - // TODO: handle \0 for PCRE? Would need more testing. - // TODO: this doesn't handle two digit refs with 8/9 in them. Ex. \18 - not even sure what this is interpreted as. - let name = token.name, - profile = this._profile - if (token.type !== 'numref') { - // not a simple \4 style reference, so can't decompose into an octal. - token.error = { id: 'unmatchedref' } - } else if ( - /^[0-7]{2}$/.test(name) || - (profile.config.reftooctalalways && /^[0-7]$/.test(name)) - ) { - // octal - let next = token.next, - char = String.fromCharCode(next.code) - if ( - next.type === 'char' && - char >= '0' && - char <= '7' && - parseInt(name + char, 8) <= 255 - ) { - name += char - this.mergeNext(token) - } - token.code = parseInt(name, 8) - token.clss = 'esc' - token.type = 'escoctal' - delete token.name - } else if (name === '8' || name === '9') { - this.parseEscChar(token, name) - delete token.name - } else { - token.error = { id: 'unmatchedref' } - } - } - - mergeNext(token) { - let next = token.next - token.next = next.next - token.next.prev = token - token.l++ - } - - parseFlag(str, token) { - // note that this doesn't deal with misformed patterns or incorrect flags. - let i = token.i, - c = str[i] - if (str[i] === '/') { - token.type = i === 0 ? 'open' : 'close' - if (i !== 0) { - token.related = [this.token] - this.token.related = [token] - } - } else { - token.type = this._profile.flags[c] - } - //token.clear = true; - } - - parseChar(str, token, charset) { - let c = str[token.i] - token.type = (!charset && this._profile.charTypes[c]) || 'char' - if (!charset && c === '/') { - token.error = { id: 'fwdslash' } - } - if (token.type === 'char') { - token.code = c.charCodeAt(0) - } else if (ExpressionLexer.ANCHOR_TYPES[token.type]) { - token.clss = 'anchor' - } else if (token.type === 'dot') { - token.clss = 'charclass' - } - return token - } - - parseSquareBracket(str, token, charset) { - let match - if ( - this._profile.tokens.posixcharclass && - (match = str.substr(token.i).match(/^\[(:|\.)([^\]]*?)\1]/)) - ) { - // posixcharclass: [:alpha:] - // posixcollseq: [.ch.] - // currently neither flavor supports posixcollseq, but PCRE does flag as an error: - // TODO: the expression above currently does not catch [.\].] - token.l = match[0].length - token.value = match[2] - token.clss = 'charclass' - if (match[1] === ':') { - token.type = 'posixcharclass' - if (!this._profile.posixCharClasses[match[2]]) { - token.error = { id: 'posixcharclassbad' } - } else if (!charset) { - token.error = { id: 'posixcharclassnoset' } - } - } else { - token.type = 'posixcollseq' - // TODO: can this be generalized? Right now, no, because we assign ids that aren't in the profile. - token.error = { id: 'notsupported' } - } - } else if (!charset) { - // set [a-z] [aeiou] - // setnot [^a-z] - token.type = token.clss = 'set' - if (str[token.i + 1] === '^') { - token.l++ - token.type += 'not' - } - charset = token - } else { - // [[] (square bracket inside a set) - this.parseChar(str, token, charset) - } - return charset - } - - parseParen(str, token) { - /* - core: - . group: - . lookahead: ?= ?! - . noncap: ?: - PCRE: - . lookbehind: ?<= ? ?'name' ? - . namedref: ?P=name Also: \g'name' \k'name' etc - . comment: ?# - . atomic: ?> - . recursion: ?0 ?R Also: \g<0> - . define: ?(DEFINE) - . subroutine: ?1 ?-1 ?&name ?P>name - conditionalgroup: ?(1)a|b ?(-1)a|b ?(name)a|b - conditional: ?(?=if)then|else - mode: ?c-i - branchreset: ?| - */ - - token.clss = token.type = 'group' - if (str[token.i + 1] !== '?') { - token.close = null // indicates that it needs a close token. - token.capture = true - return token - } - - let sub = str.substr(token.i + 2), - match, - s = sub[0] - - if (s === ':') { - // (?:foo) - token.type = 'noncapgroup' - token.close = null - token.l = 3 - } else if (s === '>') { - // (?>foo) - token.type = 'atomic' - token.close = null - token.l = 3 - } else if (s === '|') { - // (?|(a)|(b)) - token.type = 'branchreset' - token.close = null - token.l = 3 - token.inGroupNum = token.curGroupNum = this.captureGroups.length - this.branchResetGroups.push(token) - } else if (s === '#' && (match = sub.match(/[^)]*\)/))) { - // (?#foo) - token.clss = token.type = 'comment' - token.ignore = true - token.l = 2 + match[0].length - } else if (/^(R|0)\)/.test(sub)) { - // (?R) (?0) - token.clss = 'ref' - token.type = 'recursion' - token.l = 4 - } else if ((match = sub.match(/^P=(\w+)\)/i))) { - // (?P=name) - token.type = 'namedref' - this.getRef(token, match[1]) - token.l = match[0].length + 2 - } else if (/^\(DEFINE\)/.test(sub)) { - // (?(DEFINE)foo) - token.type = 'define' - token.close = null - token.l = 10 - } else if ((match = sub.match(/^/)) || - (this._profile.config.namedgroupalt && - ((match = sub.match(/^'(\w+)'/)) || (match = sub.match(/^P<(\w+)>/)))) - ) { - // (?foo) (?'name'foo) (?Pfoo) - token.type = 'namedgroup' - token.close = null - token.name = match[1] - token.capture = true - token.l = match[0].length + 2 - } else if ( - (match = sub.match(/^([-+]?\d\d?)\)/)) || - (match = sub.match(/^(?:&|P>)(\w+)\)/)) - ) { - // (?1) (?-1) (?&name) (?P>name) - token.type = (isNaN(match[1]) ? 'named' : 'num') + 'subroutine' - this.getRef(token, match[1]) - token.l = match[0].length + 2 - } else if ( - (match = sub.match(/^\(([-+]?\d\d?)\)/)) || - (match = sub.match(/^\((\w+)\)/)) - ) { - // (?(1)a|b) (?(-1)a|b) (?(name)a|b) - this.getRef(token, match[1]) - token.clss = 'special' - token.type = 'conditionalgroup' - token.close = null - token.l = match[0].length + 2 - } else if (/^\(\?255). In theory it should allow 4? - if (isNaN(val) || val > 255 || /[^\da-f]/i.test(match[1])) { - token.error = { id: 'esccharbad' } - } else { - token.code = val - } - } else if ((match = sub.match(/^x([\da-fA-F]{0,2})/))) { - // hex ascii: \xFF - token.type = 'eschexadecimal' - token.l += match[0].length - token.code = parseInt(match[1] || 0, 16) - } else if ((match = sub.match(/^c([a-zA-Z])?/))) { - // control char: \cA \cz - // also handles: \c - // not supported in JS strings - token.type = 'esccontrolchar' - if (match[1]) { - token.code = match[1].toUpperCase().charCodeAt(0) - 64 // A=65 - token.l += 2 - } else if (profile.config.ctrlcodeerr) { - token.l++ - token.error = { id: 'esccharbad' } - } else { - return this.parseChar(str, token, charset) // this builds the "/" token - } - } else if ((match = sub.match(/^[0-7]{1,3}/))) { - // octal ascii: \011 - token.type = 'escoctal' - sub = match[0] - if (parseInt(sub, 8) > 255) { - sub = sub.substr(0, 2) - } - token.l += sub.length - token.code = parseInt(sub, 8) - } else if (profile.tokens.escoctalo && (match = sub.match(/^o\{(.*?)}/i))) { - // \o{377} - token.type = 'escoctal' - token.l += match[0].length - val = parseInt(match[1], 8) - if (isNaN(val) || val > 255 || /[^0-7]/.test(match[1])) { - token.error = { id: 'esccharbad' } - } else { - token.code = val - } - } else { - // single char - if ((token.type = profile.escCharTypes[c])) { - token.l++ - token.clss = ExpressionLexer.ANCHOR_TYPES[token.type] - ? 'anchor' - : 'charclass' - return token - } - - token.code = profile.escCharCodes[c] - if (token.code === undefined || token.code === false) { - // unrecognized. - return this.parseEscChar(token, c) - } - - // update SubstLexer if this changes: - token.l++ - token.type = 'esc_' + token.code - } - token.clss = 'esc' - return token - } - - parseEscChar(token, c) { - // unrecognized escchar: \u \a \8, etc - // JS: allowed except if u flag set, Safari still allows \8 \9 - // PCRE: allows \8 \9 but not others // TODO: support? - let profile = this._profile - token.l = 2 - if ( - (!profile.badEscChars[c] && profile.tokens.escchar && !this._modes.u) || - profile.escChars[c] - ) { - token.type = 'escchar' - token.code = c.charCodeAt(0) - token.clss = 'esc' - } else { - token.error = { id: 'esccharbad' } - } - } - - parseRef(token, sub) { - // namedref: \k \k'name' \k{name} \g{name} - // namedsubroutine: \g \g'name' - // numref: \g1 \g+2 \g{2} - // numsubroutine: \g<-1> \g'1' - // recursion: \g<0> \g'0' - let c = sub[0], - s = '', - match - if ((match = sub.match(/^[gk](?:'\w*'|<\w*>|{\w*})/))) { - s = match[0].substr(2, match[0].length - 3) - if (c === 'k' && !isNaN(s)) { - s = '' - } // TODO: specific error for numeric \k? - } else if ( - (match = sub.match(/^g(?:({[-+]?\d+}|<[-+]?\d+>|'[-+]?\d+')|([-+]?\d+))/)) - ) { - s = - match[2] !== undefined - ? match[2] - : match[1].substr(1, match[1].length - 2) - } - let isRef = c === 'k' || !(sub[1] === "'" || sub[1] === '<') - if (!isRef && s == 0) { - token.type = 'recursion' - token.clss = 'ref' - } else { - // namedref, extnumref, namedsubroutine, numsubroutine - token.type = - (isNaN(s) ? 'named' : (isRef ? 'ext' : '') + 'num') + - (isRef ? 'ref' : 'subroutine') - this.getRef(token, s) - } - token.l += match ? match[0].length : 1 - } - - parseUnicode(token, sub) { - // unicodescript: \p{Cherokee} - // unicodecat: \p{Ll} \pL - // not: \P{Ll} \p{^Lu} - let match = sub.match(/p\{\^?([^}]*)}/i), - val = match && match[1], - not = sub[0] === 'P' - if (!match && (match = sub.match(/[pP]([LMZSNPC])/))) { - val = match[1] - } else { - not = not !== (sub[2] === '^') - } - token.l += match ? match[0].length : 1 - token.type = 'unicodecat' - if (this._profile.unicodeScripts[val]) { - token.type = 'unicodescript' - } else if (!this._profile.unicodeCategories[val]) { - val = null - } - if (not) { - token.type = 'not' + token.type - } - if ((!this._profile.config.unicodenegated && sub[2] === '^') || !val) { - token.error = { id: 'unicodebad' } - } - token.value = val - token.clss = 'charclass' - return token - } - - parseMode(token, sub) { - // (?i-x) - // supported modes in PCRE: i-caseinsens, x-freespacing, s-dotall, m-multiline, U-switchlazy, [J-samename] - let match = sub.match(/^[-a-z]+\)/i) - if (!match) { - return - } - let supModes = this._profile.modes - let modes = Utils.copy({}, this._modes), - bad = false, - not = false, - s = match[0], - c - token.on = token.off = '' - - for (let i = 0, l = s.length - 1; i < l; i++) { - c = s[i] - if (c === '-') { - not = true - continue - } - if (!supModes[c]) { - bad = true - break - } - modes[c] = !not - - token.on = token.on.replace(c, '') - if (not) { - token.off = token.off.replace(c, '') - token.off += c - } else { - token.on += c - } - } - - token.clss = 'special' - token.type = 'mode' - token.l = match[0].length + 2 - - if (bad) { - token.error = { id: 'modebad' } - token.errmode = c - } else { - this._modes = modes - } - return token - } - - parseQuant(str, token) { - // quantifier: {0,3} {3} {1,} - token.type = token.clss = 'quant' - let i = token.i - let end = str.indexOf('}', i + 1) - token.l += end - i - let arr = str.substring(i + 1, end).split(',') - token.min = parseInt(arr[0]) - token.max = - arr[1] === undefined ? token.min : arr[1] === '' ? -1 : parseInt(arr[1]) - if (token.max !== -1 && token.min > token.max) { - token.error = { id: 'quantrev' } - } - return token - } - - validateRange(str, end) { - // char range: [a-z] [\11-\n] - let next = end, - token = end.prv, - prv = token.prv - if (prv.code === undefined || next.code === undefined) { - // not a range, rewrite as a char: - this.parseChar(str, token) - } else { - token.clss = 'set' - if (prv.code > next.code) { - // this gets added here because parse has already moved to the next token: - this.errors.push((token.error = { id: 'rangerev' })) - } - // preserve as separate tokens, but treat as one in the UI: - next.proxy = prv.proxy = token - token.set = [prv, token, next] - } - } -} - -ExpressionLexer.ANCHOR_TYPES = { - bof: true, - eof: true, - bos: true, - eos: true, - abseos: true, - wordboundary: true, - notwordboundary: true, - prevmatchend: true, -} - -module.exports = ExpressionLexer diff --git a/packages/next/lib/regexr/profile/core.js b/packages/next/lib/regexr/profile/core.js deleted file mode 100644 index cd2ada92addff..0000000000000 --- a/packages/next/lib/regexr/profile/core.js +++ /dev/null @@ -1,420 +0,0 @@ -/* -RegExr: Learn, Build, & Test RegEx -Copyright (C) 2017 gskinner.com, inc. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -/* -The core profile essentially defines every feature we support, and is then pared down by other profiles. All values should be y (true). - -It also acts in part as pseudo documentation for all of the "type" values. - */ -let y = true, - n = false - -let core = { - id: 'core', - - flags: { - g: 'global', // note that this is not a real flag in some flavors, but a different method call - i: 'caseinsensitive', - m: 'multiline', - s: 'dotall', - u: 'unicode', - y: 'sticky', - x: 'extended', - U: 'ungreedy', - }, - - // reserved characters that need to be escaped: - escChars: '+*?^$\\.[]{}()|/'.split('').reduce((o, c) => { - o[c] = y - return o - }, {}), - - // escape chars that are specifically not supported by the flavor: - badEscChars: n, - - escCharCodes: { - '0': 0, // null - a: 7, // bell - t: 9, // tab - n: 10, // lf - v: 11, // vertical tab - f: 12, // form feed - r: 13, // cr - e: 27, // escape - }, - - escCharTypes: { - A: 'bos', - b: 'wordboundary', - B: 'notwordboundary', - d: 'digit', - D: 'notdigit', - G: 'prevmatchend', - h: 'hwhitespace', - H: 'nothwhitespace', - K: 'keepout', - N: 'notlinebreak', - R: 'linebreak', - s: 'whitespace', - S: 'notwhitespace', - v: 'vwhitespace', - V: 'notvwhitespace', - w: 'word', - W: 'notword', - X: 'unicodegrapheme', - Z: 'eos', - z: 'abseos', - }, - - charTypes: { - '.': 'dot', - '|': 'alt', - $: 'eof', - '^': 'bof', - '?': 'opt', // also: "lazy" - '+': 'plus', // also: "possessive" - '*': 'star', - }, - - unquantifiable: { - // all group/set open tokens are unquantifiable by default (ie. tokens with a .close value) - quant: y, - plus: y, - star: y, - opt: y, - lazy: y, - possessive: y, - eof: y, - bof: y, - eos: y, - abseos: y, - alt: y, - open: y, - mode: y, - comment: y, // TODO: this should actually be ignored by quantifiers. - condition: y, - }, - - unicodeScripts: { - // from: http://www.pcre.org/original/doc/html/pcrepattern.html - Arabic: y, - Armenian: y, - Avestan: y, - Balinese: y, - Bamum: y, - Bassa_Vah: y, - Batak: y, - Bengali: y, - Bopomofo: y, - Brahmi: y, - Braille: y, - Buginese: y, - Buhid: y, - Canadian_Aboriginal: y, - Carian: y, - Caucasian_Albanian: y, - Chakma: y, - Cham: y, - Cherokee: y, - Common: y, - Coptic: y, - Cuneiform: y, - Cypriot: y, - Cyrillic: y, - Deseret: y, - Devanagari: y, - Duployan: y, - Egyptian_Hieroglyphs: y, - Elbasan: y, - Ethiopic: y, - Georgian: y, - Glagolitic: y, - Gothic: y, - Grantha: y, - Greek: y, - Gujarati: y, - Gurmukhi: y, - Han: y, - Hangul: y, - Hanunoo: y, - Hebrew: y, - Hiragana: y, - Imperial_Aramaic: y, - Inherited: y, - Inscriptional_Pahlavi: y, - Inscriptional_Parthian: y, - Javanese: y, - Kaithi: y, - Kannada: y, - Katakana: y, - Kayah_Li: y, - Kharoshthi: y, - Khmer: y, - Khojki: y, - Khudawadi: y, - Lao: y, - Latin: y, - Lepcha: y, - Limbu: y, - Linear_A: y, - Linear_B: y, - Lisu: y, - Lycian: y, - Lydian: y, - Mahajani: y, - Malayalam: y, - Mandaic: y, - Manichaean: y, - Meetei_Mayek: y, - Mende_Kikakui: y, - Meroitic_Cursive: y, - Meroitic_Hieroglyphs: y, - Miao: y, - Modi: y, - Mongolian: y, - Mro: y, - Myanmar: y, - Nabataean: y, - New_Tai_Lue: y, - Nko: y, - Ogham: y, - Ol_Chiki: y, - Old_Italic: y, - Old_North_Arabian: y, - Old_Permic: y, - Old_Persian: y, - Old_South_Arabian: y, - Old_Turkic: y, - Oriya: y, - Osmanya: y, - Pahawh_Hmong: y, - Palmyrene: y, - Pau_Cin_Hau: y, - Phags_Pa: y, - Phoenician: y, - Psalter_Pahlavi: y, - Rejang: y, - Runic: y, - Samaritan: y, - Saurashtra: y, - Sharada: y, - Shavian: y, - Siddham: y, - Sinhala: y, - Sora_Sompeng: y, - Sundanese: y, - Syloti_Nagri: y, - Syriac: y, - Tagalog: y, - Tagbanwa: y, - Tai_Le: y, - Tai_Tham: y, - Tai_Viet: y, - Takri: y, - Tamil: y, - Telugu: y, - Thaana: y, - Thai: y, - Tibetan: y, - Tifinagh: y, - Tirhuta: y, - Ugaritic: y, - Vai: y, - Warang_Citi: y, - Yi: y, - }, - - unicodeCategories: { - // from: http://www.pcre.org/original/doc/html/pcrepattern.html - C: y, // Other - Cc: y, // Control - Cf: y, // Format - Cn: y, // Unassigned - Co: y, // Private use - Cs: y, // Surrogate - L: y, // Letter - 'L&': y, // Any letter - Ll: y, // Lower case letter - Lm: y, // Modifier letter - Lo: y, // Other letter - Lt: y, // Title case letter - Lu: y, // Upper case letter - M: y, // Mark - Mc: y, // Spacing mark - Me: y, // Enclosing mark - Mn: y, // Non-spacing mark - N: y, // Number - Nd: y, // Decimal number - Nl: y, // Letter number - No: y, // Other number - P: y, // Punctuation - Pc: y, // Connector punctuation - Pd: y, // Dash punctuation - Pe: y, // Close punctuation - Pf: y, // Final punctuation - Pi: y, // Initial punctuation - Po: y, // Other punctuation - Ps: y, // Open punctuation - S: y, // Symbol - Sc: y, // Currency symbol - Sk: y, // Modifier symbol - Sm: y, // Mathematical symbol - So: y, // Other symbol - Z: y, // Separator - Zl: y, // Line separator - Zp: y, // Paragraph separator - Zs: y, // Space separator - }, - - posixCharClasses: { - // from: http://www.pcre.org/original/doc/html/pcrepattern.html - alnum: y, // letters and digits - alpha: y, // letters - ascii: y, // character codes 0 - 127 - blank: y, // space or tab only - cntrl: y, // control characters - digit: y, // decimal digits (same as \d) - graph: y, // printing characters, excluding space - lower: y, // lower case letters - print: y, // printing characters, including space - punct: y, // printing characters, excluding letters and digits and space - space: y, // white space (the same as \s from PCRE 8.34) - upper: y, // upper case letters - word: y, // "word" characters (same as \w) - xdigit: y, // hexadecimal digits - }, - - modes: { - i: 'caseinsensitive', - s: 'dotall', - m: 'multiline', - x: 'freespacing', - J: 'samename', - U: 'switchlazy', - }, - - tokens: { - // note that not all of these are actively used in the lexer, but are included for completeness. - open: y, // opening / - close: y, // closing / - char: y, // abc - - // classes: - // also in escCharTypes and charTypes - set: y, // [a-z] - setnot: y, // [^a-z] - setclose: y, // ] - range: y, // [a-z] - unicodecat: y, // \p{Ll} \P{^Ll} \pL - notunicodecat: y, // \P{Ll} \p{^Ll} \PL - unicodescript: y, // \p{Cherokee} \P{^Cherokee} - notunicodescript: y, // \P{Cherokee} \p{^Cherokee} - posixcharclass: y, // [[:alpha:]] - // not in supported flavors: "posixcollseq": y, // [[.foo.]] // this is recognized by the lexer, currently returns "notsupported" error - // not in supported flavors: "unicodeblock": y, // \p{InThai} \p{IsThai} and NOT \P - // not in supported flavors: "subtract": y, // [base-[subtract]] - // not in supported flavors: "intersect": y, // [base&&[intersect]] - - // esc: - // also in escCharCodes and escCharTypes - escoctal: y, // \11 - escunicodeu: y, // \uFFFF - escunicodeub: y, // \u{00A9} - escunicodexb: y, // \x{00A9} - escsequence: y, // \Q...\E - eschexadecimal: y, // \xFF - esccontrolchar: y, // \cA - escoctalo: y, // \o{377} // resolved to escoctal in lexer, no docs required - escchar: y, // \m (unrecognized escapes) // no reference documentation required - - // group: - group: y, // (foo) - groupclose: y, // ) - noncapgroup: y, // (?:foo) - namedgroup: y, // (?Pfoo) (?foo) (?'name'foo) - atomic: y, // (?>foo|bar) - define: y, // (?(DEFINE)foo) - branchreset: y, // (?|(a)|(b)) - - // lookaround: - poslookbehind: y, // (?<=foo) - neglookbehind: y, // (? \k'name' \k{name} (?P=name) \g{name} - numref: y, // \1 - extnumref: y, // \g{-1} \g{+1} \g{1} \g1 \g-1 - recursion: y, // (?R) (?0) \g<0> \g'0' - numsubroutine: y, // \g<1> \g'-1' (?1) (?-1) - namedsubroutine: y, // \g \g'name' (?&name) (?P>name) - - // quantifiers: - // also in specialChars - quant: y, // {1,2} - possessive: y, // ++ - lazy: y, // ? - - // special: - conditional: y, // (?(?=if)then|else) - condition: y, // (?=if) any lookaround - conditionalelse: y, // | - conditionalgroup: y, // (?(1)a|b) (?(-1)a|b) (?(name)a|b) - mode: y, // (?i-x) see modes above - comment: y, // (?#comment) - - // meta: - matchanyset: y, // [\s\S] - }, - - substTokens: { - // named references aren't supported in JS or PCRE / PHP - subst_$esc: y, // $$ - 'subst_$&match': y, // $& - subst_$before: y, // $` - subst_$after: y, // $' - subst_$group: y, // $1 $99 // resolved to subst_group in lexer, no docs required - subst_$bgroup: y, // ${1} ${99} // resolved to subst_group in lexer, no docs required - subst_bsgroup: y, // \1 \99 // resolved to subst_group in lexer, no docs required - subst_group: y, // $1 \1 \{1} // combined in docs, not used by lexer - subst_0match: y, // $0 \0 \{0} - - // this isn't a feature of the engine, but of RegExr: - subst_esc: y, // \n \r \u1234 - }, - - config: { - forwardref: y, // \1(a) - nestedref: y, // (\1a|b)+ - ctrlcodeerr: y, // does \c error? (vs decompose) - reftooctalalways: y, // does a single digit reference \1 become an octal? (vs remain an unmatched ref) - substdecomposeref: y, // will a subst reference decompose? (ex. \3 becomes "\" & "3" if < 3 groups) - looseesc: y, // should unrecognized escape sequences match the character (ex. \u could match "u") // disabled when `u` flag is set - unicodenegated: y, // \p{^etc}" - namedgroupalt: y, // if false, only support (?foo) - }, - - docs: { - // for example: - //possessive: {desc: "+This will be appended to the existing entry." }, - //namedgroup: {tip: "This will overwrite the existing entry." } - }, -} - -module.exports = core diff --git a/packages/next/lib/regexr/profile/index.js b/packages/next/lib/regexr/profile/index.js deleted file mode 100644 index 70614719269bb..0000000000000 --- a/packages/next/lib/regexr/profile/index.js +++ /dev/null @@ -1,15 +0,0 @@ -function merge(p1, p2) { - // merges p1 into p2, essentially just a simple deep copy without array support. - for (let n in p1) { - if (p2[n] === false) { - continue - } else if (typeof p1[n] === 'object') { - p2[n] = merge(p1[n], p2[n] || {}) - } else if (p2[n] === undefined) { - p2[n] = p1[n] - } - } - return p2 -} - -module.exports = merge(require('./core'), require('./javascript')) diff --git a/packages/next/lib/regexr/profile/javascript.js b/packages/next/lib/regexr/profile/javascript.js deleted file mode 100644 index 89fd01490cd35..0000000000000 --- a/packages/next/lib/regexr/profile/javascript.js +++ /dev/null @@ -1,160 +0,0 @@ -/* -RegExr: Learn, Build, & Test RegEx -Copyright (C) 2017 gskinner.com, inc. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -/* -The javascript profile disables a large number of features. - -Note that JS warnings are currently added in addJSWarnings in the ExpresssionLexer. -*/ - -let y = true, - n = false -function test(expr, flag) { - try { - return new RegExp(expr, flag) && undefined - } catch (e) { - return n - } -} -function testFlag(flag) { - return test('.', flag) -} -let unicodeFlag = testFlag('u') -let stickyFlag = testFlag('y') -let dotallFlag = testFlag('s') -let lookbehind = test('(?<=A)') -let namedgroup = test('(?B)') -let unicodecat = test('\\p{Ll}', 'u') // disabled when `u` flag is not set - -let javascript = { - id: 'js', - label: 'JavaScript', - browser: true, - - flags: { - s: dotallFlag, // warning - x: n, - u: unicodeFlag, // warning - y: stickyFlag, // warning - U: n, - }, - - escCharCodes: { - a: n, // bell - e: n, // escape - }, - - escCharTypes: { - A: n, // bos - G: n, // prevmatchend - h: n, // hwhitespace - H: n, // nothwhitespace - K: n, // keepout - N: n, // notlinebreak - R: n, // newline - v: n, // vwhitespace - V: n, // notvwhitespace - X: n, // unicodegrapheme - Z: n, // eos - z: n, // abseos - }, - - unicodeScripts: unicodecat, - - unicodeCategories: unicodecat, - - posixCharClasses: n, - - modes: n, - - tokens: { - // classes: - // also in escCharSpecials and specialChars - unicodecat: unicodecat, // \p{Ll} \P{^Ll} \pL - notunicodecat: unicodecat, // \P{Ll} \p{^Ll} \PL - unicodescript: unicodecat, // \p{Cherokee} \P{^Cherokee} - notunicodescript: unicodecat, // \P{Cherokee} \p{^Cherokee} - posixcharclass: n, // [[:alpha:]] - - // esc: - // also in escCharCodes and escCharSpecials - escunicodeub: unicodeFlag, // \u{00A9} - escunicodexb: n, // \x{00A9} - escsequence: n, // \Q...\E - escoctalo: n, // \o{377} - - // group: - namedgroup: namedgroup, // (?Pfoo) (?foo) (?'name'foo) - atomic: n, // (?>foo|bar) - define: n, // (?(DEFINE)foo) - branchreset: n, // (?|(a)|(b)) - - // lookaround: - poslookbehind: lookbehind, // (?<=foo) // warning - neglookbehind: lookbehind, // (? \k'name' \k{name} (?P=name) \g{name} - extnumref: n, // \g{-1} \g{+1} \g{1} \g1 \g-1 - recursion: n, // (?R) (?0) \g<0> \g'0' - numsubroutine: n, // \g<1> \g'-1' (?1) (?-1) - namedsubroutine: n, // \g \g'name' (?&name) (?P>name) - - // quantifiers: - // also in specialChars - possessive: n, - - // special: - conditional: n, // (?(?=if)then|else) - conditionalif: n, // (?=if) any lookaround - conditionalelse: n, // | - conditionalgroup: n, // (?(1)a|b) (?(-1)a|b) (?(name)a|b) - mode: n, // (?i-x) see modes above - comment: n, // (?#comment) - }, - - config: { - forwardref: n, // \1(a) - nestedref: n, // (\1a|b)+ - ctrlcodeerr: n, // does \c error, or decompose? - unicodenegated: n, // \p{^etc} - namedgroupalt: n, // if false, only support (?foo) - }, - - substTokens: { - subst_0match: n, // $0 \0 \{0} - subst_$bgroup: n, // ${1} ${99} - subst_bsgroup: n, // \1 \99 - }, - - docs: { - subst_group: { ext: '' }, // remove other syntaxes. - namedgroup: { ext: '' }, // remove other syntaxes. - unicodecat: { - ext: - '

Requires the u flag.

' + - "

For a list of values, see this MDN page.

", - }, - // notunicodecat, unicodescript, notunicodescript are copied from unicodecat below. - }, -} - -javascript.docs.notunicodecat = javascript.docs.unicodescript = javascript.docs.notunicodescript = - javascript.docs.unicodecat - -module.exports = javascript