Skip to content

Commit d216e60

Browse files
Tests: Improved dection of empty patterns (#3058)
1 parent a1b67ce commit d216e60

6 files changed

+49
-60
lines changed

components/prism-asciidoc.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
pattern: /^\|={3,}(?:(?:\r?\n|\r(?!\n)).*)*?(?:\r?\n|\r)\|={3,}$/m,
3636
inside: {
3737
'specifiers': {
38-
pattern: /(?!\|)(?:(?:(?:\d+(?:\.\d+)?|\.\d+)[+*])?(?:[<^>](?:\.[<^>])?|\.[<^>])?[a-z]*)(?=\|)/,
38+
pattern: /(?:(?:(?:\d+(?:\.\d+)?|\.\d+)[+*](?:[<^>](?:\.[<^>])?|\.[<^>])?|[<^>](?:\.[<^>])?|\.[<^>])[a-z]*|[a-z]+)(?=\|)/,
3939
alias: 'attr-value'
4040
},
4141
'punctuation': {

components/prism-asciidoc.min.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/prism-promql.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
lookbehind: true,
4848
inside: {
4949
'label-key': {
50-
pattern: /\b[^,]*\b/,
50+
pattern: /\b[^,]+\b/,
5151
alias: 'attr-name',
5252
},
5353
'punctuation': /[(),]/

components/prism-promql.min.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
"npm-run-all": "^4.1.5",
6060
"pump": "^3.0.0",
6161
"refa": "^0.9.1",
62+
"regexp-ast-analysis": "^0.2.4",
6263
"regexpp": "^3.2.0",
6364
"scslre": "^0.1.6",
6465
"simple-git": "^1.107.0",

tests/pattern-tests.js

+44-56
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ const { transform, combineTransformers, getIntersectionWordSets, JS, Words, NFA,
1212
const scslre = require('scslre');
1313
const path = require('path');
1414
const { argv } = require('yargs');
15+
const RAA = require('regexp-ast-analysis');
1516

1617
/**
1718
* A map from language id to a list of code snippets in that language.
@@ -130,6 +131,7 @@ function testPatterns(Prism, mainLanguage) {
130131
* @property {string} name
131132
* @property {any} parent
132133
* @property {boolean} lookbehind Whether the first capturing group of the pattern is a Prism lookbehind group.
134+
* @property {CapturingGroup | undefined} lookbehindGroup
133135
* @property {{ key: string, value: any }[]} path
134136
* @property {(message: string) => void} reportError
135137
*/
@@ -163,14 +165,17 @@ function testPatterns(Prism, mainLanguage) {
163165
}
164166

165167
const parent = path.length > 1 ? path[path.length - 2].value : undefined;
168+
const lookbehind = key === 'pattern' && parent && !!parent.lookbehind;
169+
const lookbehindGroup = lookbehind ? getFirstCapturingGroup(ast.pattern) : undefined;
166170
callback({
167171
pattern: value,
168172
ast,
169173
tokenPath,
170174
name: key,
171175
parent,
172176
path,
173-
lookbehind: key === 'pattern' && parent && !!parent.lookbehind,
177+
lookbehind,
178+
lookbehindGroup,
174179
reportError: message => errors.push(message)
175180
});
176181
} catch (error) {
@@ -231,9 +236,10 @@ function testPatterns(Prism, mainLanguage) {
231236

232237

233238
it('- should not match the empty string', function () {
234-
forEachPattern(({ pattern, tokenPath }) => {
239+
forEachPattern(({ ast, pattern, tokenPath }) => {
235240
// test for empty string
236-
assert.notMatch('', pattern, `${tokenPath}: ${pattern} should not match the empty string.\n\n`
241+
const empty = RAA.isPotentiallyZeroLength(ast.pattern.alternatives);
242+
assert.isFalse(empty, `${tokenPath}: ${pattern} should not match the empty string.\n\n`
237243
+ `Patterns that do match the empty string can potentially cause infinitely many empty tokens. `
238244
+ `Make sure that all patterns always consume at least one character.`);
239245
});
@@ -256,47 +262,37 @@ function testPatterns(Prism, mainLanguage) {
256262
});
257263

258264
it('- should not have lookbehind groups that can be preceded by other some characters', function () {
259-
forEachPattern(({ ast, tokenPath, lookbehind }) => {
260-
if (!lookbehind) {
261-
return;
265+
forEachPattern(({ tokenPath, lookbehindGroup }) => {
266+
if (lookbehindGroup && !isFirstMatch(lookbehindGroup)) {
267+
assert.fail(`${tokenPath}: The lookbehind group ${lookbehindGroup.raw} might be preceded by some characters.\n\n`
268+
+ `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
269+
+ `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
270+
+ `To fix this, either remove the preceding characters or include them in the lookbehind group.`);
262271
}
263-
forEachCapturingGroup(ast.pattern, ({ group, number }) => {
264-
if (number === 1 && !isFirstMatch(group)) {
265-
assert.fail(`${tokenPath}: The lookbehind group ${group.raw} might be preceded by some characters.\n\n`
266-
+ `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
267-
+ `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
268-
+ `To fix this, either remove the preceding characters or include them in the lookbehind group.`);
269-
}
270-
});
271272
});
272273
});
273274

274275
it('- should not have lookbehind groups that only have zero-width alternatives', function () {
275-
forEachPattern(({ ast, tokenPath, lookbehind, reportError }) => {
276-
if (!lookbehind) {
277-
return;
276+
forEachPattern(({ tokenPath, lookbehindGroup, reportError }) => {
277+
if (lookbehindGroup && RAA.isZeroLength(lookbehindGroup)) {
278+
const groupContent = lookbehindGroup.raw.substr(1, lookbehindGroup.raw.length - 2);
279+
const replacement = lookbehindGroup.alternatives.length === 1 ? groupContent : `(?:${groupContent})`;
280+
reportError(`${tokenPath}: The lookbehind group ${lookbehindGroup.raw} does not consume characters.\n\n`
281+
+ `Therefor it is not necessary to use a lookbehind group.\n`
282+
+ `To fix this, replace the lookbehind group with ${replacement} and remove the 'lookbehind' property.`);
278283
}
279-
forEachCapturingGroup(ast.pattern, ({ group, number }) => {
280-
if (number === 1 && isAlwaysZeroWidth(group)) {
281-
const groupContent = group.raw.substr(1, group.raw.length - 2);
282-
const replacement = group.alternatives.length === 1 ? groupContent : `(?:${groupContent})`;
283-
reportError(`${tokenPath}: The lookbehind group ${group.raw} does not consume characters.\n\n`
284-
+ `Therefor it is not necessary to use a lookbehind group.\n`
285-
+ `To fix this, replace the lookbehind group with ${replacement} and remove the 'lookbehind' property.`);
286-
}
287-
});
288284
});
289285
});
290286

291287
it('- should not have unused capturing groups', function () {
292-
forEachPattern(({ ast, tokenPath, lookbehind, reportError }) => {
288+
forEachPattern(({ ast, tokenPath, lookbehindGroup, reportError }) => {
293289
forEachCapturingGroup(ast.pattern, ({ group, number }) => {
294-
const isLookbehindGroup = lookbehind && number === 1;
290+
const isLookbehindGroup = group === lookbehindGroup;
295291
if (group.references.length === 0 && !isLookbehindGroup) {
296292
const fixes = [];
297293
fixes.push(`Make this group a non-capturing group ('(?:...)' instead of '(...)'). (It's usually this option.)`);
298294
fixes.push(`Reference this group with a backreference (use '\\${number}' for this).`);
299-
if (number === 1 && !lookbehind) {
295+
if (number === 1 && !lookbehindGroup) {
300296
if (isFirstMatch(group)) {
301297
fixes.push(`Add a 'lookbehind: true' declaration.`);
302298
} else {
@@ -392,28 +388,26 @@ function testPatterns(Prism, mainLanguage) {
392388

393389

394390
/**
395-
* Returns whether the given element will always have zero width meaning that it doesn't consume characters.
391+
* Returns the first capturing group in the given pattern.
396392
*
397-
* @param {Element} element
398-
* @returns {boolean}
393+
* @param {Pattern} pattern
394+
* @returns {CapturingGroup | undefined}
399395
*/
400-
function isAlwaysZeroWidth(element) {
401-
switch (element.type) {
402-
case 'Assertion':
403-
// assertions == ^, $, \b, lookarounds
404-
return true;
405-
case 'Quantifier':
406-
return element.max === 0 || isAlwaysZeroWidth(element.element);
407-
case 'CapturingGroup':
408-
case 'Group':
409-
// every element in every alternative has to be of zero length
410-
return element.alternatives.every(alt => alt.elements.every(isAlwaysZeroWidth));
411-
case 'Backreference':
412-
// on if the group referred to is of zero length
413-
return isAlwaysZeroWidth(element.resolved);
414-
default:
415-
return false; // what's left are characters
396+
function getFirstCapturingGroup(pattern) {
397+
let cap = undefined;
398+
399+
try {
400+
visitRegExpAST(pattern, {
401+
onCapturingGroupEnter(node) {
402+
cap = node;
403+
throw new Error('stop');
404+
}
405+
});
406+
} catch (error) {
407+
// ignore errors
416408
}
409+
410+
return cap;
417411
}
418412

419413
/**
@@ -427,7 +421,7 @@ function isFirstMatch(element) {
427421
switch (parent.type) {
428422
case 'Alternative': {
429423
// all elements before this element have to of zero length
430-
if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(isAlwaysZeroWidth)) {
424+
if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(RAA.isZeroLength)) {
431425
return false;
432426
}
433427
const grandParent = parent.parent;
@@ -457,13 +451,7 @@ function isFirstMatch(element) {
457451
* @returns {boolean}
458452
*/
459453
function underAStar(node) {
460-
if (node.type === 'Quantifier' && node.max > 10) {
461-
return true;
462-
} else if (node.parent) {
463-
return underAStar(node.parent);
464-
} else {
465-
return false;
466-
}
454+
return RAA.getEffectiveMaximumRepetition(node) > 10;
467455
}
468456

469457
/**

0 commit comments

Comments
 (0)