@@ -12,6 +12,7 @@ const { transform, combineTransformers, getIntersectionWordSets, JS, Words, NFA,
12
12
const scslre = require ( 'scslre' ) ;
13
13
const path = require ( 'path' ) ;
14
14
const { argv } = require ( 'yargs' ) ;
15
+ const RAA = require ( 'regexp-ast-analysis' ) ;
15
16
16
17
/**
17
18
* A map from language id to a list of code snippets in that language.
@@ -130,6 +131,7 @@ function testPatterns(Prism, mainLanguage) {
130
131
* @property {string } name
131
132
* @property {any } parent
132
133
* @property {boolean } lookbehind Whether the first capturing group of the pattern is a Prism lookbehind group.
134
+ * @property {CapturingGroup | undefined } lookbehindGroup
133
135
* @property {{ key: string, value: any }[] } path
134
136
* @property {(message: string) => void } reportError
135
137
*/
@@ -163,14 +165,17 @@ function testPatterns(Prism, mainLanguage) {
163
165
}
164
166
165
167
const parent = path . length > 1 ? path [ path . length - 2 ] . value : undefined ;
168
+ const lookbehind = key === 'pattern' && parent && ! ! parent . lookbehind ;
169
+ const lookbehindGroup = lookbehind ? getFirstCapturingGroup ( ast . pattern ) : undefined ;
166
170
callback ( {
167
171
pattern : value ,
168
172
ast,
169
173
tokenPath,
170
174
name : key ,
171
175
parent,
172
176
path,
173
- lookbehind : key === 'pattern' && parent && ! ! parent . lookbehind ,
177
+ lookbehind,
178
+ lookbehindGroup,
174
179
reportError : message => errors . push ( message )
175
180
} ) ;
176
181
} catch ( error ) {
@@ -231,9 +236,10 @@ function testPatterns(Prism, mainLanguage) {
231
236
232
237
233
238
it ( '- should not match the empty string' , function ( ) {
234
- forEachPattern ( ( { pattern, tokenPath } ) => {
239
+ forEachPattern ( ( { ast , pattern, tokenPath } ) => {
235
240
// test for empty string
236
- assert . notMatch ( '' , pattern , `${ tokenPath } : ${ pattern } should not match the empty string.\n\n`
241
+ const empty = RAA . isPotentiallyZeroLength ( ast . pattern . alternatives ) ;
242
+ assert . isFalse ( empty , `${ tokenPath } : ${ pattern } should not match the empty string.\n\n`
237
243
+ `Patterns that do match the empty string can potentially cause infinitely many empty tokens. `
238
244
+ `Make sure that all patterns always consume at least one character.` ) ;
239
245
} ) ;
@@ -256,47 +262,37 @@ function testPatterns(Prism, mainLanguage) {
256
262
} ) ;
257
263
258
264
it ( '- should not have lookbehind groups that can be preceded by other some characters' , function ( ) {
259
- forEachPattern ( ( { ast, tokenPath, lookbehind } ) => {
260
- if ( ! lookbehind ) {
261
- return ;
265
+ forEachPattern ( ( { tokenPath, lookbehindGroup } ) => {
266
+ if ( lookbehindGroup && ! isFirstMatch ( lookbehindGroup ) ) {
267
+ assert . fail ( `${ tokenPath } : The lookbehind group ${ lookbehindGroup . raw } might be preceded by some characters.\n\n`
268
+ + `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
269
+ + `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
270
+ + `To fix this, either remove the preceding characters or include them in the lookbehind group.` ) ;
262
271
}
263
- forEachCapturingGroup ( ast . pattern , ( { group, number } ) => {
264
- if ( number === 1 && ! isFirstMatch ( group ) ) {
265
- assert . fail ( `${ tokenPath } : The lookbehind group ${ group . raw } might be preceded by some characters.\n\n`
266
- + `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
267
- + `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
268
- + `To fix this, either remove the preceding characters or include them in the lookbehind group.` ) ;
269
- }
270
- } ) ;
271
272
} ) ;
272
273
} ) ;
273
274
274
275
it ( '- should not have lookbehind groups that only have zero-width alternatives' , function ( ) {
275
- forEachPattern ( ( { ast, tokenPath, lookbehind, reportError } ) => {
276
- if ( ! lookbehind ) {
277
- return ;
276
+ forEachPattern ( ( { tokenPath, lookbehindGroup, reportError } ) => {
277
+ if ( lookbehindGroup && RAA . isZeroLength ( lookbehindGroup ) ) {
278
+ const groupContent = lookbehindGroup . raw . substr ( 1 , lookbehindGroup . raw . length - 2 ) ;
279
+ const replacement = lookbehindGroup . alternatives . length === 1 ? groupContent : `(?:${ groupContent } )` ;
280
+ reportError ( `${ tokenPath } : The lookbehind group ${ lookbehindGroup . raw } does not consume characters.\n\n`
281
+ + `Therefor it is not necessary to use a lookbehind group.\n`
282
+ + `To fix this, replace the lookbehind group with ${ replacement } and remove the 'lookbehind' property.` ) ;
278
283
}
279
- forEachCapturingGroup ( ast . pattern , ( { group, number } ) => {
280
- if ( number === 1 && isAlwaysZeroWidth ( group ) ) {
281
- const groupContent = group . raw . substr ( 1 , group . raw . length - 2 ) ;
282
- const replacement = group . alternatives . length === 1 ? groupContent : `(?:${ groupContent } )` ;
283
- reportError ( `${ tokenPath } : The lookbehind group ${ group . raw } does not consume characters.\n\n`
284
- + `Therefor it is not necessary to use a lookbehind group.\n`
285
- + `To fix this, replace the lookbehind group with ${ replacement } and remove the 'lookbehind' property.` ) ;
286
- }
287
- } ) ;
288
284
} ) ;
289
285
} ) ;
290
286
291
287
it ( '- should not have unused capturing groups' , function ( ) {
292
- forEachPattern ( ( { ast, tokenPath, lookbehind , reportError } ) => {
288
+ forEachPattern ( ( { ast, tokenPath, lookbehindGroup , reportError } ) => {
293
289
forEachCapturingGroup ( ast . pattern , ( { group, number } ) => {
294
- const isLookbehindGroup = lookbehind && number === 1 ;
290
+ const isLookbehindGroup = group === lookbehindGroup ;
295
291
if ( group . references . length === 0 && ! isLookbehindGroup ) {
296
292
const fixes = [ ] ;
297
293
fixes . push ( `Make this group a non-capturing group ('(?:...)' instead of '(...)'). (It's usually this option.)` ) ;
298
294
fixes . push ( `Reference this group with a backreference (use '\\${ number } ' for this).` ) ;
299
- if ( number === 1 && ! lookbehind ) {
295
+ if ( number === 1 && ! lookbehindGroup ) {
300
296
if ( isFirstMatch ( group ) ) {
301
297
fixes . push ( `Add a 'lookbehind: true' declaration.` ) ;
302
298
} else {
@@ -392,28 +388,26 @@ function testPatterns(Prism, mainLanguage) {
392
388
393
389
394
390
/**
395
- * Returns whether the given element will always have zero width meaning that it doesn't consume characters .
391
+ * Returns the first capturing group in the given pattern .
396
392
*
397
- * @param {Element } element
398
- * @returns {boolean }
393
+ * @param {Pattern } pattern
394
+ * @returns {CapturingGroup | undefined }
399
395
*/
400
- function isAlwaysZeroWidth ( element ) {
401
- switch ( element . type ) {
402
- case 'Assertion' :
403
- // assertions == ^, $, \b, lookarounds
404
- return true ;
405
- case 'Quantifier' :
406
- return element . max === 0 || isAlwaysZeroWidth ( element . element ) ;
407
- case 'CapturingGroup' :
408
- case 'Group' :
409
- // every element in every alternative has to be of zero length
410
- return element . alternatives . every ( alt => alt . elements . every ( isAlwaysZeroWidth ) ) ;
411
- case 'Backreference' :
412
- // on if the group referred to is of zero length
413
- return isAlwaysZeroWidth ( element . resolved ) ;
414
- default :
415
- return false ; // what's left are characters
396
+ function getFirstCapturingGroup ( pattern ) {
397
+ let cap = undefined ;
398
+
399
+ try {
400
+ visitRegExpAST ( pattern , {
401
+ onCapturingGroupEnter ( node ) {
402
+ cap = node ;
403
+ throw new Error ( 'stop' ) ;
404
+ }
405
+ } ) ;
406
+ } catch ( error ) {
407
+ // ignore errors
416
408
}
409
+
410
+ return cap ;
417
411
}
418
412
419
413
/**
@@ -427,7 +421,7 @@ function isFirstMatch(element) {
427
421
switch ( parent . type ) {
428
422
case 'Alternative' : {
429
423
// all elements before this element have to of zero length
430
- if ( ! parent . elements . slice ( 0 , parent . elements . indexOf ( element ) ) . every ( isAlwaysZeroWidth ) ) {
424
+ if ( ! parent . elements . slice ( 0 , parent . elements . indexOf ( element ) ) . every ( RAA . isZeroLength ) ) {
431
425
return false ;
432
426
}
433
427
const grandParent = parent . parent ;
@@ -457,13 +451,7 @@ function isFirstMatch(element) {
457
451
* @returns {boolean }
458
452
*/
459
453
function underAStar ( node ) {
460
- if ( node . type === 'Quantifier' && node . max > 10 ) {
461
- return true ;
462
- } else if ( node . parent ) {
463
- return underAStar ( node . parent ) ;
464
- } else {
465
- return false ;
466
- }
454
+ return RAA . getEffectiveMaximumRepetition ( node ) > 10 ;
467
455
}
468
456
469
457
/**
0 commit comments