github · erik-krogh · Mar 14, 2023
@@ -47,12 +47,7 @@ private newtype TRegExpParent =
   /** A special character */
   TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
   /** A normal character */
-  TRegExpNormalChar(Regex re, int start, int end) {
-    re.normalCharacterSequence(start, end)
-    or
-    re.escapedCharacter(start, end) and
-    not re.specialCharacter(start, end, _)
-  } or
+  TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
   /** A back reference */
   TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
 

@@ -467,7 +467,6 @@ abstract class RegexString extends Expr {
    * Holds if a normal character is found between `start` and `end`.
    */
   predicate normalCharacter(int start, int end) {
-    end = start + 1 and
     this.character(start, end) and
     not this.specialCharacter(start, end, _)
   }
@@ -490,49 +489,6 @@ abstract class RegexString extends Expr {
     )
   }
 
-  /**
-   * Holds if the range [start:end) consists of only 'normal' characters.
-   */
-  predicate normalCharacterSequence(int start, int end) {
-    // a normal character inside a character set is interpreted on its own
-    this.normalCharacter(start, end) and
-    this.inCharSet(start)
-    or
-    // a maximal run of normal characters is considered as one constant
-    exists(int s, int e |
-      e = max(int i | this.normalCharacterRun(s, i)) and
-      not this.inCharSet(s)
-    |
-      // 'abc' can be considered one constant, but
-      // 'abc+' has to be broken up into 'ab' and 'c+',
-      // as the qualifier only applies to 'c'.
-      if this.qualifier(e, _, _, _)
-      then
-        end = e and start = e - 1
-        or
-        end = e - 1 and start = s and start < end
-      else (
-        end = e and
-        start = s
-      )
-    )
-  }
-
-  private predicate normalCharacterRun(int start, int end) {
-    (
-      this.normalCharacterRun(start, end - 1)
-      or
-      start = end - 1 and not this.normalCharacter(start - 1, start)
-    ) and
-    this.normalCharacter(end - 1, end)
-  }
-
-  private predicate characterItem(int start, int end) {
-    this.normalCharacterSequence(start, end) or
-    this.escapedCharacter(start, end) or
-    this.specialCharacter(start, end, _)
-  }
-
   /** Whether the text in the range `start,end` is a group */
   predicate group(int start, int end) {
     this.groupContents(start, end, _, _)
@@ -819,7 +775,7 @@ abstract class RegexString extends Expr {
   string getBackrefName(int start, int end) { this.named_backreference(start, end, result) }
 
   private predicate baseItem(int start, int end) {
-    this.characterItem(start, end) and
+    this.character(start, end) and
     not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
     or
     this.group(start, end)
@@ -939,14 +895,14 @@ abstract class RegexString extends Expr {
   }
 
   private predicate item_start(int start) {
-    this.characterItem(start, _) or
+    this.character(start, _) or
     this.isGroupStart(start) or
     this.charSet(start, _) or
     this.backreference(start, _)
   }
 
   private predicate item_end(int end) {
-    this.characterItem(_, end)
+    this.character(_, end)
     or
     exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1)
     or
@@ -1053,7 +1009,7 @@ abstract class RegexString extends Expr {
    */
   predicate firstItem(int start, int end) {
     (
-      this.characterItem(start, end)
+      this.character(start, end)
       or
       this.qualifiedItem(start, end, _, _)
       or
@@ -1068,7 +1024,7 @@ abstract class RegexString extends Expr {
    */
   predicate lastItem(int start, int end) {
     (
-      this.characterItem(start, end)
+      this.character(start, end)
       or
       this.qualifiedItem(start, end, _, _)
       or

@@ -1,6 +1,10 @@
 | KnownCVEs.py:15:22:15:24 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
 | KnownCVEs.py:30:24:31:25 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
 | KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
+| KnownCVEs.py:88:97:91:106 | [^\\s()<>]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
+| KnownCVEs.py:88:111:91:120 | [^\\s()<>]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
+| KnownCVEs.py:88:150:91:159 | [^\\s()<>]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
+| myReTest.py:4:1385:87:1450 | (?:(?:xn--[-]{0,2})\|[a-z\\u00a1-\\uffff\\U00010000-\\U0010ffff0-9]-?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '00.'. |
 | redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
 | redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings starting with '*' and containing many repetitions of '**'. |
 | redos.py:21:34:21:53 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t"' and containing many repetitions of '\\\\\\\\'. |
@@ -58,7 +62,7 @@
 | redos.py:220:25:220:29 | [^X]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'W'. |
 | redos.py:223:30:223:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
 | redos.py:229:30:229:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
-| redos.py:241:26:241:27 | ab | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ab'. |
+| redos.py:241:27:241:27 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ba'. |
 | redos.py:247:25:247:31 | [\\n\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
 | redos.py:256:25:256:27 | \\w* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
 | redos.py:256:37:256:39 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |