Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-40480 "fnmatch" exponential execution time #19908

Merged
merged 5 commits into from
May 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 53 additions & 7 deletions Lib/fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,19 @@ def translate(pat):
There is no way to quote meta-characters.
"""

STAR = object()
res = []
add = res.append
i, n = 0, len(pat)
res = ''
while i < n:
c = pat[i]
i = i+1
if c == '*':
res = res + '.*'
# compress consecutive `*` into one
if (not res) or res[-1] is not STAR:
add(STAR)
elif c == '?':
res = res + '.'
add('.')
elif c == '[':
j = i
if j < n and pat[j] == '!':
Expand All @@ -95,7 +99,7 @@ def translate(pat):
while j < n and pat[j] != ']':
j = j+1
if j >= n:
res = res + '\\['
add('\\[')
else:
stuff = pat[i:j]
if '--' not in stuff:
Expand All @@ -122,7 +126,49 @@ def translate(pat):
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
res = '%s[%s]' % (res, stuff)
add(f'[{stuff}]')
else:
res = res + re.escape(c)
return r'(?s:%s)\Z' % res
add(re.escape(c))
assert i == n

# Deal with STARs.
inp = res
res = []
add = res.append
i, n = 0, len(inp)
# Fixed pieces at the start?
while i < n and inp[i] is not STAR:
add(inp[i])
i += 1
# Now deal with STAR fixed STAR fixed ...
# For an interior `STAR fixed` pairing, we want to do a minimal
# .*? match followed by `fixed`, with no possibility of backtracking.
# We can't spell that directly, but can trick it into working by matching
# .*?fixed
# in a lookahead assertion, save the matched part in a group, then
# consume that group via a backreference. If the overall match fails,
# the lookahead assertion won't try alternatives. So the translation is:
# (?=(P<name>.*?fixed))(?P=name)
# Group names are created as needed: g1, g2, g3, ...
groupnum = 0
while i < n:
assert inp[i] is STAR
i += 1
if i == n:
add(".*")
break
assert inp[i] is not STAR
fixed = []
while i < n and inp[i] is not STAR:
fixed.append(inp[i])
i += 1
fixed = "".join(fixed)
if i == n:
add(".*")
add(fixed)
else:
groupnum += 1
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
assert i == n
res = "".join(res)
return fr'(?s:{res})\Z'
17 changes: 17 additions & 0 deletions Lib/test/test_fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ def test_fnmatch(self):
check('\nfoo', 'foo*', False)
check('\n', '*')

def test_slow_fnmatch(self):
check = self.check_match
check('a' * 50, '*a*a*a*a*a*a*a*a*a*a')
# The next "takes forever" if the regexp translation is
# straightforward. See bpo-40480.
check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False)

def test_mix_bytes_str(self):
self.assertRaises(TypeError, fnmatch, 'test', b'*')
self.assertRaises(TypeError, fnmatch, b'test', '*')
Expand Down Expand Up @@ -107,6 +114,16 @@ def test_translate(self):
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
# from the docs
self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z')
# squash consecutive stars
self.assertEqual(translate('*********'), r'(?s:.*)\Z')
self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
# fancy translation to prevent exponential-time match failure
self.assertEqual(translate('**a*a****a'),
r'(?s:(?=(?P<g1>.*?a))(?P=g1)(?=(?P<g2>.*?a))(?P=g2).*a)\Z')


class FilterTestCase(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
``fnmatch.fnmatch()`` could take exponential time in the presence of multiple ``*`` pattern characters. This was repaired by generating more elaborate regular expressions to avoid futile backtracking.