diff --git a/python/ql/lib/change-notes/2022-02-25-regex-group-characters.md b/python/ql/lib/change-notes/2022-02-25-regex-group-characters.md new file mode 100644 index 000000000000..615fe0023133 --- /dev/null +++ b/python/ql/lib/change-notes/2022-02-25-regex-group-characters.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`. diff --git a/python/ql/lib/semmle/python/RegexTreeView.qll b/python/ql/lib/semmle/python/RegexTreeView.qll index 808bb265b697..95d983f5e88f 100644 --- a/python/ql/lib/semmle/python/RegexTreeView.qll +++ b/python/ql/lib/semmle/python/RegexTreeView.qll @@ -39,7 +39,12 @@ newtype TRegExpParent = /** A special character */ TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or /** A normal character */ - TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or + TRegExpNormalChar(Regex re, int start, int end) { + re.normalCharacterSequence(start, end) + or + re.escapedCharacter(start, end) and + not re.specialCharacter(start, end, _) + } or /** A back reference */ TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) } diff --git a/python/ql/lib/semmle/python/regex.qll b/python/ql/lib/semmle/python/regex.qll index 001b3bdc635c..fb56fa0ab118 100644 --- a/python/ql/lib/semmle/python/regex.qll +++ b/python/ql/lib/semmle/python/regex.qll @@ -427,6 +427,7 @@ abstract class RegexString extends Expr { } predicate normalCharacter(int start, int end) { + end = start + 1 and this.character(start, end) and not this.specialCharacter(start, end, _) } @@ -446,6 +447,49 @@ abstract class RegexString extends Expr { ) } + /** + * Holds if the range [start:end) consists of only 'normal' characters. + */ + predicate normalCharacterSequence(int start, int end) { + // a normal character inside a character set is interpreted on its own + this.normalCharacter(start, end) and + this.inCharSet(start) + or + // a maximal run of normal characters is considered as one constant + exists(int s, int e | + e = max(int i | this.normalCharacterRun(s, i)) and + not this.inCharSet(s) + | + // 'abc' can be considered one constant, but + // 'abc+' has to be broken up into 'ab' and 'c+', + // as the qualifier only applies to 'c'. + if this.qualifier(e, _, _, _) + then + end = e and start = e - 1 + or + end = e - 1 and start = s and start < end + else ( + end = e and + start = s + ) + ) + } + + private predicate normalCharacterRun(int start, int end) { + ( + this.normalCharacterRun(start, end - 1) + or + start = end - 1 and not this.normalCharacter(start - 1, start) + ) and + this.normalCharacter(end - 1, end) + } + + private predicate characterItem(int start, int end) { + this.normalCharacterSequence(start, end) or + this.escapedCharacter(start, end) or + this.specialCharacter(start, end, _) + } + /** Whether the text in the range start,end is a group */ predicate group(int start, int end) { this.groupContents(start, end, _, _) @@ -717,7 +761,7 @@ abstract class RegexString extends Expr { string getBackrefName(int start, int end) { this.named_backreference(start, end, result) } private predicate baseItem(int start, int end) { - this.character(start, end) and + this.characterItem(start, end) and not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end) or this.group(start, end) @@ -837,14 +881,14 @@ abstract class RegexString extends Expr { } private predicate item_start(int start) { - this.character(start, _) or + this.characterItem(start, _) or this.isGroupStart(start) or this.charSet(start, _) or this.backreference(start, _) } private predicate item_end(int end) { - this.character(_, end) + this.characterItem(_, end) or exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1) or @@ -953,7 +997,7 @@ abstract class RegexString extends Expr { */ predicate firstItem(int start, int end) { ( - this.character(start, end) + this.characterItem(start, end) or this.qualifiedItem(start, end, _, _) or @@ -968,7 +1012,7 @@ abstract class RegexString extends Expr { */ predicate lastItem(int start, int end) { ( - this.character(start, end) + this.characterItem(start, end) or this.qualifiedItem(start, end, _, _) or diff --git a/python/ql/test/library-tests/regex/FirstLast.expected b/python/ql/test/library-tests/regex/FirstLast.expected index 5c393547a53c..e388e0d1fdf7 100644 --- a/python/ql/test/library-tests/regex/FirstLast.expected +++ b/python/ql/test/library-tests/regex/FirstLast.expected @@ -1,6 +1,6 @@ -| 012345678 | first | 0 | 1 | -| 012345678 | last | 8 | 9 | -| (?!not-this)^[A-Z_]+$ | first | 3 | 4 | +| 012345678 | first | 0 | 9 | +| 012345678 | last | 0 | 9 | +| (?!not-this)^[A-Z_]+$ | first | 3 | 11 | | (?!not-this)^[A-Z_]+$ | first | 12 | 13 | | (?!not-this)^[A-Z_]+$ | first | 13 | 19 | | (?!not-this)^[A-Z_]+$ | first | 13 | 20 | @@ -27,9 +27,9 @@ | (?m)^(?!$) | last | 4 | 5 | | (?m)^(?!$) | last | 8 | 9 | | (\\033\|~{) | first | 1 | 5 | -| (\\033\|~{) | first | 6 | 7 | +| (\\033\|~{) | first | 6 | 8 | | (\\033\|~{) | last | 1 | 5 | -| (\\033\|~{) | last | 7 | 8 | +| (\\033\|~{) | last | 6 | 8 | | [\ufffd-\ufffd] | first | 0 | 5 | | [\ufffd-\ufffd] | last | 0 | 5 | | [\ufffd-\ufffd][\ufffd-\ufffd] | first | 0 | 5 | @@ -52,8 +52,8 @@ | \\A[+-]?\\d+ | last | 7 | 9 | | \\A[+-]?\\d+ | last | 7 | 10 | | \\Afoo\\Z | first | 0 | 2 | -| \\Afoo\\Z | first | 2 | 3 | -| \\Afoo\\Z | last | 4 | 5 | +| \\Afoo\\Z | first | 2 | 5 | +| \\Afoo\\Z | last | 2 | 5 | | \\Afoo\\Z | last | 5 | 7 | | \\[(?P[^[]*)\\]\\((?P[^)]*) | first | 0 | 2 | | \\[(?P[^[]*)\\]\\((?P[^)]*) | last | 28 | 32 | @@ -86,24 +86,24 @@ | ^[A-Z_]+$(?= end) or this.group(start, end) @@ -746,7 +790,7 @@ class RegExp extends AST::RegExpLiteral { } private predicate itemStart(int start) { - this.character(start, _) or + this.characterItem(start, _) or this.isGroupStart(start) or this.charSet(start, _) or this.backreference(start, _) or @@ -754,7 +798,7 @@ class RegExp extends AST::RegExpLiteral { } private predicate itemEnd(int end) { - this.character(_, end) + this.characterItem(_, end) or exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1) or @@ -865,7 +909,7 @@ class RegExp extends AST::RegExpLiteral { */ predicate firstItem(int start, int end) { ( - this.character(start, end) + this.characterItem(start, end) or this.qualifiedItem(start, end, _, _) or @@ -880,7 +924,7 @@ class RegExp extends AST::RegExpLiteral { */ predicate lastItem(int start, int end) { ( - this.character(start, end) + this.characterItem(start, end) or this.qualifiedItem(start, end, _, _) or diff --git a/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll b/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll index 9c8e39e56cea..7c2df79abef4 100644 --- a/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll +++ b/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll @@ -228,7 +228,12 @@ newtype TRegExpParent = TRegExpCharacterRange(RegExp re, int start, int end) { re.charRange(_, start, _, _, end) } or TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or - TRegExpNormalChar(RegExp re, int start, int end) { re.normalCharacter(start, end) } or + TRegExpNormalChar(RegExp re, int start, int end) { + re.normalCharacterSequence(start, end) + or + re.escapedCharacter(start, end) and + not re.specialCharacter(start, end, _) + } or TRegExpBackRef(RegExp re, int start, int end) { re.backreference(start, end) } or TRegExpNamedCharacterProperty(RegExp re, int start, int end) { re.namedCharacterProperty(start, end, _) diff --git a/ruby/ql/test/library-tests/ast/Ast.expected b/ruby/ql/test/library-tests/ast/Ast.expected index a3e090324ba4..422af8141d5a 100644 --- a/ruby/ql/test/library-tests/ast/Ast.expected +++ b/ruby/ql/test/library-tests/ast/Ast.expected @@ -973,10 +973,8 @@ control/cases.rb: # 92| getParsed: [RegExpSequence] .*abc[0-9] # 92| 0: [RegExpStar] .* # 92| 0: [RegExpDot] . -# 92| 1: [RegExpConstant, RegExpNormalChar] a -# 92| 2: [RegExpConstant, RegExpNormalChar] b -# 92| 3: [RegExpConstant, RegExpNormalChar] c -# 92| 4: [RegExpCharacterClass] [0-9] +# 92| 1: [RegExpConstant, RegExpNormalChar] abc +# 92| 2: [RegExpCharacterClass] [0-9] # 92| 0: [RegExpCharacterRange] 0-9 # 92| 0: [RegExpConstant, RegExpNormalChar] 0 # 92| 1: [RegExpConstant, RegExpNormalChar] 9 @@ -1823,47 +1821,25 @@ literals/literals.rb: # 133| getAnOperand/getArgument/getRightOperand: [IntegerLiteral] 4 # 136| getStmt: [RegExpLiteral] // # 137| getStmt: [RegExpLiteral] /foo/ -# 137| getParsed: [RegExpSequence] foo -# 137| 0: [RegExpConstant, RegExpNormalChar] f -# 137| 1: [RegExpConstant, RegExpNormalChar] o -# 137| 2: [RegExpConstant, RegExpNormalChar] o +# 137| getParsed: [RegExpConstant, RegExpNormalChar] foo # 137| getComponent: [RegExpTextComponent] foo # 138| getStmt: [RegExpLiteral] /foo/ -# 138| getParsed: [RegExpSequence] foo -# 138| 0: [RegExpConstant, RegExpNormalChar] f -# 138| 1: [RegExpConstant, RegExpNormalChar] o -# 138| 2: [RegExpConstant, RegExpNormalChar] o +# 138| getParsed: [RegExpConstant, RegExpNormalChar] foo # 138| getComponent: [RegExpTextComponent] foo # 139| getStmt: [RegExpLiteral] /foo+\sbar\S/ # 139| getParsed: [RegExpSequence] foo+\sbar\S -# 139| 0: [RegExpConstant, RegExpNormalChar] f -# 139| 1: [RegExpConstant, RegExpNormalChar] o -# 139| 2: [RegExpPlus] o+ +# 139| 0: [RegExpConstant, RegExpNormalChar] fo +# 139| 1: [RegExpPlus] o+ # 139| 0: [RegExpConstant, RegExpNormalChar] o -# 139| 3: [RegExpCharacterClassEscape] \s -# 139| 4: [RegExpConstant, RegExpNormalChar] b -# 139| 5: [RegExpConstant, RegExpNormalChar] a -# 139| 6: [RegExpConstant, RegExpNormalChar] r -# 139| 7: [RegExpCharacterClassEscape] \S +# 139| 2: [RegExpCharacterClassEscape] \s +# 139| 3: [RegExpConstant, RegExpNormalChar] bar +# 139| 4: [RegExpCharacterClassEscape] \S # 139| getComponent: [RegExpTextComponent] foo+ # 139| getComponent: [RegExpEscapeSequenceComponent] \s # 139| getComponent: [RegExpTextComponent] bar # 139| getComponent: [RegExpEscapeSequenceComponent] \S # 140| getStmt: [RegExpLiteral] /foo#{...}bar#{...}#{...}/ -# 140| getParsed: [RegExpSequence] foo2barbarbar -# 140| 0: [RegExpConstant, RegExpNormalChar] f -# 140| 1: [RegExpConstant, RegExpNormalChar] o -# 140| 2: [RegExpConstant, RegExpNormalChar] o -# 140| 3: [RegExpConstant, RegExpNormalChar] 2 -# 140| 4: [RegExpConstant, RegExpNormalChar] b -# 140| 5: [RegExpConstant, RegExpNormalChar] a -# 140| 6: [RegExpConstant, RegExpNormalChar] r -# 140| 7: [RegExpConstant, RegExpNormalChar] b -# 140| 8: [RegExpConstant, RegExpNormalChar] a -# 140| 9: [RegExpConstant, RegExpNormalChar] r -# 140| 10: [RegExpConstant, RegExpNormalChar] b -# 140| 11: [RegExpConstant, RegExpNormalChar] a -# 140| 12: [RegExpConstant, RegExpNormalChar] r +# 140| getParsed: [RegExpConstant, RegExpNormalChar] foo2barbarbar # 140| getComponent: [RegExpTextComponent] foo # 140| getComponent: [RegExpInterpolationComponent] #{...} # 140| getStmt: [AddExpr] ... + ... @@ -1878,47 +1854,25 @@ literals/literals.rb: # 141| getComponent: [RegExpTextComponent] foo # 142| getStmt: [RegExpLiteral] // # 143| getStmt: [RegExpLiteral] /foo/ -# 143| getParsed: [RegExpSequence] foo -# 143| 0: [RegExpConstant, RegExpNormalChar] f -# 143| 1: [RegExpConstant, RegExpNormalChar] o -# 143| 2: [RegExpConstant, RegExpNormalChar] o +# 143| getParsed: [RegExpConstant, RegExpNormalChar] foo # 143| getComponent: [RegExpTextComponent] foo # 144| getStmt: [RegExpLiteral] /foo/ -# 144| getParsed: [RegExpSequence] foo -# 144| 0: [RegExpConstant, RegExpNormalChar] f -# 144| 1: [RegExpConstant, RegExpNormalChar] o -# 144| 2: [RegExpConstant, RegExpNormalChar] o +# 144| getParsed: [RegExpConstant, RegExpNormalChar] foo # 144| getComponent: [RegExpTextComponent] foo # 145| getStmt: [RegExpLiteral] /foo+\sbar\S/ # 145| getParsed: [RegExpSequence] foo+\sbar\S -# 145| 0: [RegExpConstant, RegExpNormalChar] f -# 145| 1: [RegExpConstant, RegExpNormalChar] o -# 145| 2: [RegExpPlus] o+ +# 145| 0: [RegExpConstant, RegExpNormalChar] fo +# 145| 1: [RegExpPlus] o+ # 145| 0: [RegExpConstant, RegExpNormalChar] o -# 145| 3: [RegExpCharacterClassEscape] \s -# 145| 4: [RegExpConstant, RegExpNormalChar] b -# 145| 5: [RegExpConstant, RegExpNormalChar] a -# 145| 6: [RegExpConstant, RegExpNormalChar] r -# 145| 7: [RegExpCharacterClassEscape] \S +# 145| 2: [RegExpCharacterClassEscape] \s +# 145| 3: [RegExpConstant, RegExpNormalChar] bar +# 145| 4: [RegExpCharacterClassEscape] \S # 145| getComponent: [RegExpTextComponent] foo+ # 145| getComponent: [RegExpEscapeSequenceComponent] \s # 145| getComponent: [RegExpTextComponent] bar # 145| getComponent: [RegExpEscapeSequenceComponent] \S # 146| getStmt: [RegExpLiteral] /foo#{...}bar#{...}#{...}/ -# 146| getParsed: [RegExpSequence] foo2barbarbar -# 146| 0: [RegExpConstant, RegExpNormalChar] f -# 146| 1: [RegExpConstant, RegExpNormalChar] o -# 146| 2: [RegExpConstant, RegExpNormalChar] o -# 146| 3: [RegExpConstant, RegExpNormalChar] 2 -# 146| 4: [RegExpConstant, RegExpNormalChar] b -# 146| 5: [RegExpConstant, RegExpNormalChar] a -# 146| 6: [RegExpConstant, RegExpNormalChar] r -# 146| 7: [RegExpConstant, RegExpNormalChar] b -# 146| 8: [RegExpConstant, RegExpNormalChar] a -# 146| 9: [RegExpConstant, RegExpNormalChar] r -# 146| 10: [RegExpConstant, RegExpNormalChar] b -# 146| 11: [RegExpConstant, RegExpNormalChar] a -# 146| 12: [RegExpConstant, RegExpNormalChar] r +# 146| getParsed: [RegExpConstant, RegExpNormalChar] foo2barbarbar # 146| getComponent: [RegExpTextComponent] foo # 146| getComponent: [RegExpInterpolationComponent] #{...} # 146| getStmt: [AddExpr] ... + ... @@ -2469,10 +2423,8 @@ operations/operations.rb: # 65| getAnOperand/getLeftOperand/getReceiver: [LocalVariableAccess] name # 65| getAnOperand/getArgument/getRightOperand: [RegExpLiteral] /foo.*/ # 65| getParsed: [RegExpSequence] foo.* -# 65| 0: [RegExpConstant, RegExpNormalChar] f -# 65| 1: [RegExpConstant, RegExpNormalChar] o -# 65| 2: [RegExpConstant, RegExpNormalChar] o -# 65| 3: [RegExpStar] .* +# 65| 0: [RegExpConstant, RegExpNormalChar] foo +# 65| 1: [RegExpStar] .* # 65| 0: [RegExpDot] . # 65| getComponent: [RegExpTextComponent] foo.* # 66| getStmt: [NoRegExpMatchExpr] ... !~ ... @@ -2481,9 +2433,7 @@ operations/operations.rb: # 66| getParsed: [RegExpSequence] .*bar # 66| 0: [RegExpStar] .* # 66| 0: [RegExpDot] . -# 66| 1: [RegExpConstant, RegExpNormalChar] b -# 66| 2: [RegExpConstant, RegExpNormalChar] a -# 66| 3: [RegExpConstant, RegExpNormalChar] r +# 66| 1: [RegExpConstant, RegExpNormalChar] bar # 66| getComponent: [RegExpTextComponent] .*bar # 69| getStmt: [AssignAddExpr] ... += ... # 69| getAnOperand/getLeftOperand: [LocalVariableAccess] x diff --git a/ruby/ql/test/library-tests/regexp/parse.expected b/ruby/ql/test/library-tests/regexp/parse.expected index c42b90d1ab81..3241ce25388f 100644 --- a/ruby/ql/test/library-tests/regexp/parse.expected +++ b/ruby/ql/test/library-tests/regexp/parse.expected @@ -1,14 +1,5 @@ regexp.rb: -# 5| [RegExpConstant, RegExpNormalChar] a - -# 5| [RegExpSequence] abc -#-----| 0 -> [RegExpConstant, RegExpNormalChar] a -#-----| 1 -> [RegExpConstant, RegExpNormalChar] b -#-----| 2 -> [RegExpConstant, RegExpNormalChar] c - -# 5| [RegExpConstant, RegExpNormalChar] b - -# 5| [RegExpConstant, RegExpNormalChar] c +# 5| [RegExpConstant, RegExpNormalChar] abc # 8| [RegExpConstant, RegExpNormalChar] a @@ -38,70 +29,36 @@ regexp.rb: # 9| [RegExpRange] a{4,8} #-----| 0 -> [RegExpConstant, RegExpNormalChar] a -# 9| [RegExpNormalChar] 4 - -# 9| [RegExpNormalChar] , - -# 9| [RegExpNormalChar] 8 - -# 9| [RegExpNormalChar] } +# 9| [RegExpNormalChar] 4,8} # 10| [RegExpConstant, RegExpNormalChar] a # 10| [RegExpRange] a{,8} #-----| 0 -> [RegExpConstant, RegExpNormalChar] a -# 10| [RegExpNormalChar] , - -# 10| [RegExpNormalChar] 8 - -# 10| [RegExpNormalChar] } +# 10| [RegExpNormalChar] ,8} # 11| [RegExpConstant, RegExpNormalChar] a # 11| [InfiniteRepetitionQuantifier, RegExpRange] a{3,} #-----| 0 -> [RegExpConstant, RegExpNormalChar] a -# 11| [RegExpNormalChar] 3 - -# 11| [RegExpNormalChar] , - -# 11| [RegExpNormalChar] } +# 11| [RegExpNormalChar] 3,} # 12| [RegExpConstant, RegExpNormalChar] a # 12| [RegExpRange] a{7} #-----| 0 -> [RegExpConstant, RegExpNormalChar] a -# 12| [RegExpNormalChar] 7 +# 12| [RegExpNormalChar] 7} -# 12| [RegExpNormalChar] } - -# 15| [RegExpConstant, RegExpNormalChar] f - -# 15| [RegExpSequence] foo -#-----| 0 -> [RegExpConstant, RegExpNormalChar] f -#-----| 1 -> [RegExpConstant, RegExpNormalChar] o -#-----| 2 -> [RegExpConstant, RegExpNormalChar] o +# 15| [RegExpConstant, RegExpNormalChar] foo # 15| [RegExpAlt] foo|bar -#-----| 0 -> [RegExpSequence] foo -#-----| 1 -> [RegExpSequence] bar - -# 15| [RegExpConstant, RegExpNormalChar] o +#-----| 0 -> [RegExpConstant, RegExpNormalChar] foo +#-----| 1 -> [RegExpConstant, RegExpNormalChar] bar -# 15| [RegExpConstant, RegExpNormalChar] o - -# 15| [RegExpConstant, RegExpNormalChar] b - -# 15| [RegExpSequence] bar -#-----| 0 -> [RegExpConstant, RegExpNormalChar] b -#-----| 1 -> [RegExpConstant, RegExpNormalChar] a -#-----| 2 -> [RegExpConstant, RegExpNormalChar] r - -# 15| [RegExpConstant, RegExpNormalChar] a - -# 15| [RegExpConstant, RegExpNormalChar] r +# 15| [RegExpConstant, RegExpNormalChar] bar # 18| [RegExpCharacterClass] [abc] #-----| 0 -> [RegExpConstant, RegExpNormalChar] a @@ -229,10 +186,7 @@ regexp.rb: # 29| [RegExpSequence] [[a-f]A-F] #-----| 0 -> [RegExpCharacterClass] [[a-f] -#-----| 1 -> [RegExpConstant, RegExpNormalChar] A -#-----| 2 -> [RegExpConstant, RegExpNormalChar] - -#-----| 3 -> [RegExpConstant, RegExpNormalChar] F -#-----| 4 -> [RegExpConstant, RegExpNormalChar] ] +#-----| 1 -> [RegExpConstant, RegExpNormalChar] A-F] # 29| [RegExpConstant, RegExpNormalChar] [ @@ -244,13 +198,7 @@ regexp.rb: # 29| [RegExpConstant, RegExpNormalChar] f -# 29| [RegExpConstant, RegExpNormalChar] A - -# 29| [RegExpConstant, RegExpNormalChar] - - -# 29| [RegExpConstant, RegExpNormalChar] F - -# 29| [RegExpConstant, RegExpNormalChar] ] +# 29| [RegExpConstant, RegExpNormalChar] A-F] # 32| [RegExpDot] . @@ -312,69 +260,41 @@ regexp.rb: # 41| [RegExpSequence] \Gabc #-----| 0 -> [RegExpSpecialChar] \G -#-----| 1 -> [RegExpConstant, RegExpNormalChar] a -#-----| 2 -> [RegExpConstant, RegExpNormalChar] b -#-----| 3 -> [RegExpConstant, RegExpNormalChar] c - -# 41| [RegExpConstant, RegExpNormalChar] a +#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc -# 41| [RegExpConstant, RegExpNormalChar] b - -# 41| [RegExpConstant, RegExpNormalChar] c +# 41| [RegExpConstant, RegExpNormalChar] abc # 42| [RegExpSpecialChar] \b # 42| [RegExpSequence] \b!a\B #-----| 0 -> [RegExpSpecialChar] \b -#-----| 1 -> [RegExpConstant, RegExpNormalChar] ! -#-----| 2 -> [RegExpConstant, RegExpNormalChar] a -#-----| 3 -> [RegExpSpecialChar] \B - -# 42| [RegExpConstant, RegExpNormalChar] ! +#-----| 1 -> [RegExpConstant, RegExpNormalChar] !a +#-----| 2 -> [RegExpSpecialChar] \B -# 42| [RegExpConstant, RegExpNormalChar] a +# 42| [RegExpConstant, RegExpNormalChar] !a # 42| [RegExpSpecialChar] \B # 45| [RegExpGroup] (foo) -#-----| 0 -> [RegExpSequence] foo +#-----| 0 -> [RegExpConstant, RegExpNormalChar] foo # 45| [RegExpStar] (foo)* #-----| 0 -> [RegExpGroup] (foo) # 45| [RegExpSequence] (foo)*bar #-----| 0 -> [RegExpStar] (foo)* -#-----| 1 -> [RegExpConstant, RegExpNormalChar] b -#-----| 2 -> [RegExpConstant, RegExpNormalChar] a -#-----| 3 -> [RegExpConstant, RegExpNormalChar] r - -# 45| [RegExpConstant, RegExpNormalChar] f - -# 45| [RegExpSequence] foo -#-----| 0 -> [RegExpConstant, RegExpNormalChar] f -#-----| 1 -> [RegExpConstant, RegExpNormalChar] o -#-----| 2 -> [RegExpConstant, RegExpNormalChar] o - -# 45| [RegExpConstant, RegExpNormalChar] o +#-----| 1 -> [RegExpConstant, RegExpNormalChar] bar -# 45| [RegExpConstant, RegExpNormalChar] o +# 45| [RegExpConstant, RegExpNormalChar] foo -# 45| [RegExpConstant, RegExpNormalChar] b +# 45| [RegExpConstant, RegExpNormalChar] bar -# 45| [RegExpConstant, RegExpNormalChar] a - -# 45| [RegExpConstant, RegExpNormalChar] r - -# 46| [RegExpConstant, RegExpNormalChar] f +# 46| [RegExpConstant, RegExpNormalChar] fo # 46| [RegExpSequence] fo(o|b)ar -#-----| 0 -> [RegExpConstant, RegExpNormalChar] f -#-----| 1 -> [RegExpConstant, RegExpNormalChar] o -#-----| 2 -> [RegExpGroup] (o|b) -#-----| 3 -> [RegExpConstant, RegExpNormalChar] a -#-----| 4 -> [RegExpConstant, RegExpNormalChar] r - -# 46| [RegExpConstant, RegExpNormalChar] o +#-----| 0 -> [RegExpConstant, RegExpNormalChar] fo +#-----| 1 -> [RegExpGroup] (o|b) +#-----| 2 -> [RegExpConstant, RegExpNormalChar] ar # 46| [RegExpGroup] (o|b) #-----| 0 -> [RegExpAlt] o|b @@ -387,9 +307,7 @@ regexp.rb: # 46| [RegExpConstant, RegExpNormalChar] b -# 46| [RegExpConstant, RegExpNormalChar] a - -# 46| [RegExpConstant, RegExpNormalChar] r +# 46| [RegExpConstant, RegExpNormalChar] ar # 47| [RegExpGroup] (a|b|cd) #-----| 0 -> [RegExpAlt] a|b|cd @@ -403,17 +321,11 @@ regexp.rb: # 47| [RegExpAlt] a|b|cd #-----| 0 -> [RegExpConstant, RegExpNormalChar] a #-----| 1 -> [RegExpConstant, RegExpNormalChar] b -#-----| 2 -> [RegExpSequence] cd +#-----| 2 -> [RegExpConstant, RegExpNormalChar] cd # 47| [RegExpConstant, RegExpNormalChar] b -# 47| [RegExpConstant, RegExpNormalChar] c - -# 47| [RegExpSequence] cd -#-----| 0 -> [RegExpConstant, RegExpNormalChar] c -#-----| 1 -> [RegExpConstant, RegExpNormalChar] d - -# 47| [RegExpConstant, RegExpNormalChar] d +# 47| [RegExpConstant, RegExpNormalChar] cd # 47| [RegExpConstant, RegExpNormalChar] e @@ -511,13 +423,7 @@ regexp.rb: # 61| [RegExpRange] \p{^Alnum}{2,3} #-----| 0 -> [RegExpNamedCharacterProperty] \p{^Alnum} -# 61| [RegExpNormalChar] 2 - -# 61| [RegExpNormalChar] , - -# 61| [RegExpNormalChar] 3 - -# 61| [RegExpNormalChar] } +# 61| [RegExpNormalChar] 2,3} # 62| [RegExpCharacterClass] [a-f\p{Digit}] #-----| 0 -> [RegExpCharacterRange] a-f @@ -583,13 +489,4 @@ regexp.rb: # 74| [RegExpNamedCharacterProperty] [:digit:] -# 78| [RegExpConstant, RegExpNormalChar] a - -# 78| [RegExpSequence] abc -#-----| 0 -> [RegExpConstant, RegExpNormalChar] a -#-----| 1 -> [RegExpConstant, RegExpNormalChar] b -#-----| 2 -> [RegExpConstant, RegExpNormalChar] c - -# 78| [RegExpConstant, RegExpNormalChar] b - -# 78| [RegExpConstant, RegExpNormalChar] c +# 78| [RegExpConstant, RegExpNormalChar] abc diff --git a/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected b/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected index 213f0e11189a..572ac08887d3 100644 --- a/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected +++ b/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected @@ -54,7 +54,7 @@ | tst.rb:218:11:218:15 | [^X]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'W'. | | tst.rb:221:16:221:16 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. | | tst.rb:227:16:227:16 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. | -| tst.rb:239:13:239:13 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ba'. | +| tst.rb:239:12:239:13 | ab | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ab'. | | tst.rb:245:11:245:17 | [\\n\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. | | tst.rb:254:11:254:13 | \\w* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. | | tst.rb:254:23:254:25 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |