Skip to content

Commit ab4b759

Browse files
pythongh-89973: Fix re.error in the fnmatch module.
Character ranges with upper bound less that lower bound are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error.
1 parent e5d8dbd commit ab4b759

File tree

3 files changed

+91
-6
lines changed

3 files changed

+91
-6
lines changed

Lib/fnmatch.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def translate(pat):
102102
add('\\[')
103103
else:
104104
stuff = pat[i:j]
105-
if '--' not in stuff:
105+
if '-' not in stuff:
106106
stuff = stuff.replace('\\', r'\\')
107107
else:
108108
chunks = []
@@ -115,18 +115,29 @@ def translate(pat):
115115
i = k+1
116116
k = k+3
117117
chunks.append(pat[i:j])
118+
if not chunks[-1]:
119+
del chunks[-1]
120+
chunks[-1] += '-'
121+
for k in range(len(chunks)-1, 0, -1):
122+
if chunks[k-1][-1] > chunks[k][0]:
123+
chunks[k-1:k+1] = [chunks[k-1][:-1] + chunks[k][1:]]
118124
# Escape backslashes and hyphens for set difference (--).
119125
# Hyphens that create ranges shouldn't be escaped.
120126
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
121127
for s in chunks)
122128
# Escape set operations (&&, ~~ and ||).
123129
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
124130
i = j+1
125-
if stuff[0] == '!':
126-
stuff = '^' + stuff[1:]
127-
elif stuff[0] in ('^', '['):
128-
stuff = '\\' + stuff
129-
add(f'[{stuff}]')
131+
if not stuff:
132+
add(f'(?!)') # never match
133+
elif stuff == '!':
134+
add(f'.') # match any character
135+
else:
136+
if stuff[0] == '!':
137+
stuff = '^' + stuff[1:]
138+
elif stuff[0] in ('^', '['):
139+
stuff = '\\' + stuff
140+
add(f'[{stuff}]')
130141
else:
131142
add(re.escape(c))
132143
assert i == n

Lib/test/test_fnmatch.py

+71
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import unittest
44
import os
5+
import string
56
import warnings
67

78
from fnmatch import fnmatch, fnmatchcase, translate, filter
@@ -91,6 +92,76 @@ def test_sep(self):
9192
check('usr/bin', 'usr\\bin', normsep)
9293
check('usr\\bin', 'usr\\bin')
9394

95+
def test_char_set(self):
96+
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
97+
check = self.check_match
98+
tescases = string.ascii_lowercase + string.digits + string.punctuation
99+
for c in tescases:
100+
check(c, '[az]', c in 'az')
101+
check(c, '[!az]', c not in 'az')
102+
# Case insensitive.
103+
for c in tescases:
104+
check(c, '[AZ]', (c in 'az') and ignorecase)
105+
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
106+
for c in string.ascii_uppercase:
107+
check(c, '[az]', (c in 'AZ') and ignorecase)
108+
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
109+
# Repeated same character.
110+
for c in tescases:
111+
check(c, '[aa]', c == 'a')
112+
# Special cases.
113+
for c in tescases:
114+
check(c, '[^az]', c in '^az')
115+
check(c, '[[az]', c in '[az')
116+
check(c, r'[\]', c == '\\')
117+
check(c, r'[\az]', c in r'\az')
118+
check(c, r'[!]]', c != ']')
119+
check('[', '[')
120+
check('[]', '[]')
121+
check('[!', '[!')
122+
check('[!]', '[!]')
123+
124+
def test_range(self):
125+
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
126+
check = self.check_match
127+
tescases = string.ascii_lowercase + string.digits + string.punctuation
128+
for c in tescases:
129+
check(c, '[b-d]', c in 'bcd')
130+
check(c, '[!b-d]', c not in 'bcd')
131+
check(c, '[b-dx-z]', c in 'bcdxyz')
132+
check(c, '[!b-dx-z]', c not in 'bcdxyz')
133+
# Case insensitive.
134+
for c in tescases:
135+
check(c, '[B-D]', (c in 'bcd') and ignorecase)
136+
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
137+
for c in string.ascii_uppercase:
138+
check(c, '[b-d]', (c in 'BCD') and ignorecase)
139+
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
140+
# Upper bound == lower bound.
141+
for c in tescases:
142+
check(c, '[b-b]', c == 'b')
143+
# Special cases.
144+
for c in tescases:
145+
check(c, '[!-#]', c not in '-#')
146+
check(c, '[!--/]', c not in '-./')
147+
check(c, '[^-`]', c in '^_`')
148+
check(c, '[[-^]', c in r'[\]^')
149+
check(c, r'[\-^]', c in r'\]^')
150+
check(c, '[b-]', c in '-b')
151+
check(c, '[!b-]', c not in '-b')
152+
check(c, '[-b]', c in '-b')
153+
check(c, '[!-b]', c not in '-b')
154+
check(c, '[-]', c in '-')
155+
check(c, '[!-]', c not in '-')
156+
# Upper bound is less that lower bound: error in RE.
157+
for c in tescases:
158+
check(c, '[d-b]', False)
159+
check(c, '[!d-b]', True)
160+
check(c, '[d-bx-z]', c in 'xyz')
161+
check(c, '[!d-bx-z]', c not in 'xyz')
162+
check(c, '[d-b^-`]', c in '^_`')
163+
check(c, '[d-b[-^]', c in '[\\]^')
164+
94165
def test_warnings(self):
95166
with warnings.catch_warnings():
96167
warnings.simplefilter('error', Warning)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix :exc:`re.error` raised in :mod:`fnmatch` if the patterna contains
2+
character range with upeer bound lower than lower bound (e.g. ``[c-a]``).
3+
Now such ranges are interpreted as empty ranges.

0 commit comments

Comments
 (0)