From 91d119f2d0931e45ac8d3307078ef5f599d82c31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 24 Oct 2024 20:59:00 -0400 Subject: [PATCH] =?UTF-8?q?fix:=20only=20=C2=B1lookbehind=20is=20quantifia?= =?UTF-8?q?ble=20in=20annexB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- parser.js | 80 ++++++++++++++++++++------------- test/test-data-unicode-set.json | 4 +- test/test-data-unicode.json | 28 +++++++++++- test/test-data.json | 24 ++++++++++ 4 files changed, 102 insertions(+), 34 deletions(-) diff --git a/parser.js b/parser.js index e5e51cc..be99844 100644 --- a/parser.js +++ b/parser.js @@ -567,51 +567,71 @@ function parseTerm() { // Term :: // Anchor - // Anchor Quantifier (see https://github.com/jviereck/regjsparser/issues/130) // Atom // Atom Quantifier + // Term (Annex B):: + // [~UnicodeMode] QuantifiableAssertion Quantifier (see https://github.com/jviereck/regjsparser/issues/130) + // [~UnicodeMode] ExtendedAtom Quantifier + + // QuantifiableAssertion:: + // (?= Disjunction[~UnicodeMode, ~UnicodeSetsMode, ?NamedCaptureGroups] ) + // (?! Disjunction[~UnicodeMode, ~UnicodeSetsMode, ?NamedCaptureGroups] ) + if (pos >= str.length || current('|') || current(')')) { return null; /* Means: The term is empty */ } - var anchorOrAtom = parseAnchor(); + var anchor = parseAnchor(); - // If there is no Anchor, try to parse an atom. - if (!anchorOrAtom) { - var atom = parseAtomAndExtendedAtom(); - var quantifier; - if (!atom) { - // Check if a quantifier is following. A quantifier without an atom - // is an error. - var pos_backup = pos - quantifier = parseQuantifier() || false; - if (quantifier) { - pos = pos_backup - bail('Expected atom'); + if (anchor) { + var pos_backup = pos; + quantifier = parseQuantifier() || false; + if (quantifier) { + // Annex B + if (!isUnicodeMode && anchor.type === "group") { + quantifier.body = flattenBody(anchor); + // The quantifier contains the anchor. Therefore, the beginning of the + // quantifier range is given by the beginning of the anchor. + updateRawStart(quantifier, anchor.range[0]); + return quantifier; } + pos = pos_backup; + bail("Expected atom"); + } + return anchor; + } - // If no unicode flag, then try to parse ExtendedAtom -> ExtendedPatternCharacter. - // ExtendedPatternCharacter - var res; - if (!isUnicodeMode && (res = matchReg(/^\{/))) { - atom = createCharacter(res); - } else { - bail('Expected atom'); - } + // If there is no Anchor, try to parse an atom. + var atom = parseAtomAndExtendedAtom(); + var quantifier; + if (!atom) { + // Check if a quantifier is following. A quantifier without an atom + // is an error. + pos_backup = pos; + quantifier = parseQuantifier() || false; + if (quantifier) { + pos = pos_backup; + bail("Expected atom"); + } + + // If no unicode flag, then try to parse ExtendedAtom -> ExtendedPatternCharacter. + // ExtendedPatternCharacter + var res; + if (!isUnicodeMode && (res = matchReg(/^\{/))) { + atom = createCharacter(res); + } else { + bail("Expected atom"); } - anchorOrAtom = atom; } quantifier = parseQuantifier() || false; if (quantifier) { - var type = anchorOrAtom.type, behavior = anchorOrAtom.behavior; + var type = atom.type, behavior = atom.behavior; if ( type === "group" && (behavior === "negativeLookbehind" || - behavior === "lookbehind" || - (isUnicodeMode && - (behavior === "negativeLookahead" || behavior === "lookahead"))) + behavior === "lookbehind") ) { bail( "Invalid quantifier", @@ -620,13 +640,13 @@ quantifier.range[1] ); } - quantifier.body = flattenBody(anchorOrAtom); + quantifier.body = flattenBody(atom); // The quantifier contains the atom. Therefore, the beginning of the // quantifier range is given by the beginning of the atom. - updateRawStart(quantifier, anchorOrAtom.range[0]); + updateRawStart(quantifier, atom.range[0]); return quantifier; } - return anchorOrAtom; + return atom; } function parseGroup(matchA, typeA, matchB, typeB) { diff --git a/test/test-data-unicode-set.json b/test/test-data-unicode-set.json index 3e8413d..dcf933f 100644 --- a/test/test-data-unicode-set.json +++ b/test/test-data-unicode-set.json @@ -1499,13 +1499,13 @@ ".(?=.){2,3}": { "type": "error", "name": "SyntaxError", - "message": "Invalid quantifier at position 6\n .(?=.){2,3}\n ^", + "message": "Expected atom at position 6\n .(?=.){2,3}\n ^", "input": ".(?=.){2,3}" }, ".(?!.){2,3}": { "type": "error", "name": "SyntaxError", - "message": "Invalid quantifier at position 6\n .(?!.){2,3}\n ^", + "message": "Expected atom at position 6\n .(?!.){2,3}\n ^", "input": ".(?!.){2,3}" }, "[\\__]": { diff --git a/test/test-data-unicode.json b/test/test-data-unicode.json index 09edf87..0eaac11 100644 --- a/test/test-data-unicode.json +++ b/test/test-data-unicode.json @@ -1232,13 +1232,13 @@ ".(?=.){2,3}": { "type": "error", "name": "SyntaxError", - "message": "Invalid quantifier at position 6\n .(?=.){2,3}\n ^", + "message": "Expected atom at position 6\n .(?=.){2,3}\n ^", "input": ".(?=.){2,3}" }, ".(?!.){2,3}": { "type": "error", "name": "SyntaxError", - "message": "Invalid quantifier at position 6\n .(?!.){2,3}\n ^", + "message": "Expected atom at position 6\n .(?!.){2,3}\n ^", "input": ".(?!.){2,3}" }, "[&&]": { @@ -1848,5 +1848,29 @@ 4 ], "raw": "[~~]" + }, + "^*": { + "type": "error", + "name": "SyntaxError", + "message": "Expected atom at position 1\n ^*\n ^", + "input": "^*" + }, + "$+": { + "type": "error", + "name": "SyntaxError", + "message": "Expected atom at position 1\n $+\n ^", + "input": "$+" + }, + "\\b?": { + "type": "error", + "name": "SyntaxError", + "message": "Expected atom at position 2\n \\b?\n ^", + "input": "\\b?" + }, + "\\B{1}": { + "type": "error", + "name": "SyntaxError", + "message": "Expected atom at position 2\n \\B{1}\n ^", + "input": "\\B{1}" } } diff --git a/test/test-data.json b/test/test-data.json index 41d951b..1151c5e 100644 --- a/test/test-data.json +++ b/test/test-data.json @@ -38626,5 +38626,29 @@ 11 ], "raw": ".(?!.){2,3}" + }, + "^*": { + "type": "error", + "name": "SyntaxError", + "message": "Expected atom at position 1\n ^*\n ^", + "input": "^*" + }, + "$+": { + "type": "error", + "name": "SyntaxError", + "message": "Expected atom at position 1\n $+\n ^", + "input": "$+" + }, + "\\b?": { + "type": "error", + "name": "SyntaxError", + "message": "Expected atom at position 2\n \\b?\n ^", + "input": "\\b?" + }, + "\\B{1}": { + "type": "error", + "name": "SyntaxError", + "message": "Expected atom at position 2\n \\B{1}\n ^", + "input": "\\B{1}" } }