Skip to content

PY: revert group sequences of chars #12514

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions python/ql/lib/semmle/python/RegexTreeView.qll
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,7 @@ private newtype TRegExpParent =
/** A special character */
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
/** A normal character */
TRegExpNormalChar(Regex re, int start, int end) {
re.normalCharacterSequence(start, end)
or
re.escapedCharacter(start, end) and
not re.specialCharacter(start, end, _)
} or
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
/** A back reference */
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }

Expand Down
54 changes: 5 additions & 49 deletions python/ql/lib/semmle/python/regex.qll
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,6 @@ abstract class RegexString extends Expr {
* Holds if a normal character is found between `start` and `end`.
*/
predicate normalCharacter(int start, int end) {
end = start + 1 and
this.character(start, end) and
not this.specialCharacter(start, end, _)
}
Expand All @@ -490,49 +489,6 @@ abstract class RegexString extends Expr {
)
}

/**
* Holds if the range [start:end) consists of only 'normal' characters.
*/
predicate normalCharacterSequence(int start, int end) {
// a normal character inside a character set is interpreted on its own
this.normalCharacter(start, end) and
this.inCharSet(start)
or
// a maximal run of normal characters is considered as one constant
exists(int s, int e |
e = max(int i | this.normalCharacterRun(s, i)) and
not this.inCharSet(s)
|
// 'abc' can be considered one constant, but
// 'abc+' has to be broken up into 'ab' and 'c+',
// as the qualifier only applies to 'c'.
if this.qualifier(e, _, _, _)
then
end = e and start = e - 1
or
end = e - 1 and start = s and start < end
else (
end = e and
start = s
)
)
}

private predicate normalCharacterRun(int start, int end) {
(
this.normalCharacterRun(start, end - 1)
or
start = end - 1 and not this.normalCharacter(start - 1, start)
) and
this.normalCharacter(end - 1, end)
}

private predicate characterItem(int start, int end) {
this.normalCharacterSequence(start, end) or
this.escapedCharacter(start, end) or
this.specialCharacter(start, end, _)
}

/** Whether the text in the range `start,end` is a group */
predicate group(int start, int end) {
this.groupContents(start, end, _, _)
Expand Down Expand Up @@ -819,7 +775,7 @@ abstract class RegexString extends Expr {
string getBackrefName(int start, int end) { this.named_backreference(start, end, result) }

private predicate baseItem(int start, int end) {
this.characterItem(start, end) and
this.character(start, end) and
not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
or
this.group(start, end)
Expand Down Expand Up @@ -939,14 +895,14 @@ abstract class RegexString extends Expr {
}

private predicate item_start(int start) {
this.characterItem(start, _) or
this.character(start, _) or
this.isGroupStart(start) or
this.charSet(start, _) or
this.backreference(start, _)
}

private predicate item_end(int end) {
this.characterItem(_, end)
this.character(_, end)
or
exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1)
or
Expand Down Expand Up @@ -1053,7 +1009,7 @@ abstract class RegexString extends Expr {
*/
predicate firstItem(int start, int end) {
(
this.characterItem(start, end)
this.character(start, end)
or
this.qualifiedItem(start, end, _, _)
or
Expand All @@ -1068,7 +1024,7 @@ abstract class RegexString extends Expr {
*/
predicate lastItem(int start, int end) {
(
this.characterItem(start, end)
this.character(start, end)
or
this.qualifiedItem(start, end, _, _)
or
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
| KnownCVEs.py:15:22:15:24 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| KnownCVEs.py:30:24:31:25 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
| KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
| KnownCVEs.py:88:97:91:106 | [^\\s()<>]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
| KnownCVEs.py:88:111:91:120 | [^\\s()<>]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
| KnownCVEs.py:88:150:91:159 | [^\\s()<>]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
| myReTest.py:4:1385:87:1450 | (?:(?:xn--[-]{0,2})\|[a-z\\u00a1-\\uffff\\U00010000-\\U0010ffff0-9]-?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '00.'. |
| redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings starting with '*' and containing many repetitions of '**'. |
| redos.py:21:34:21:53 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t"' and containing many repetitions of '\\\\\\\\'. |
Expand Down Expand Up @@ -58,7 +62,7 @@
| redos.py:220:25:220:29 | [^X]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'W'. |
| redos.py:223:30:223:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
| redos.py:229:30:229:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
| redos.py:241:26:241:27 | ab | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ab'. |
| redos.py:241:27:241:27 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ba'. |
| redos.py:247:25:247:31 | [\\n\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
| redos.py:256:25:256:27 | \\w* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
| redos.py:256:37:256:39 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
Expand Down