Skip to content

Commit 0902c3d

Browse files
authoredJun 5, 2022
gh-89973: Fix re.error in the fnmatch module. (GH-93072)
Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error.
1 parent 6f8367d commit 0902c3d

File tree

3 files changed

+140
-7
lines changed

3 files changed

+140
-7
lines changed
 

‎Lib/fnmatch.py

+23-7
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def translate(pat):
102102
add('\\[')
103103
else:
104104
stuff = pat[i:j]
105-
if '--' not in stuff:
105+
if '-' not in stuff:
106106
stuff = stuff.replace('\\', r'\\')
107107
else:
108108
chunks = []
@@ -114,19 +114,35 @@ def translate(pat):
114114
chunks.append(pat[i:k])
115115
i = k+1
116116
k = k+3
117-
chunks.append(pat[i:j])
117+
chunk = pat[i:j]
118+
if chunk:
119+
chunks.append(chunk)
120+
else:
121+
chunks[-1] += '-'
122+
# Remove empty ranges -- invalid in RE.
123+
for k in range(len(chunks)-1, 0, -1):
124+
if chunks[k-1][-1] > chunks[k][0]:
125+
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
126+
del chunks[k]
118127
# Escape backslashes and hyphens for set difference (--).
119128
# Hyphens that create ranges shouldn't be escaped.
120129
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
121130
for s in chunks)
122131
# Escape set operations (&&, ~~ and ||).
123132
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
124133
i = j+1
125-
if stuff[0] == '!':
126-
stuff = '^' + stuff[1:]
127-
elif stuff[0] in ('^', '['):
128-
stuff = '\\' + stuff
129-
add(f'[{stuff}]')
134+
if not stuff:
135+
# Empty range: never match.
136+
add('(?!)')
137+
elif stuff == '!':
138+
# Negated empty range: match any character.
139+
add('.')
140+
else:
141+
if stuff[0] == '!':
142+
stuff = '^' + stuff[1:]
143+
elif stuff[0] in ('^', '['):
144+
stuff = '\\' + stuff
145+
add(f'[{stuff}]')
130146
else:
131147
add(re.escape(c))
132148
assert i == n

‎Lib/test/test_fnmatch.py

+114
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import unittest
44
import os
5+
import string
56
import warnings
67

78
from fnmatch import fnmatch, fnmatchcase, translate, filter
@@ -91,6 +92,119 @@ def test_sep(self):
9192
check('usr/bin', 'usr\\bin', normsep)
9293
check('usr\\bin', 'usr\\bin')
9394

95+
def test_char_set(self):
96+
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
97+
check = self.check_match
98+
tescases = string.ascii_lowercase + string.digits + string.punctuation
99+
for c in tescases:
100+
check(c, '[az]', c in 'az')
101+
check(c, '[!az]', c not in 'az')
102+
# Case insensitive.
103+
for c in tescases:
104+
check(c, '[AZ]', (c in 'az') and ignorecase)
105+
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
106+
for c in string.ascii_uppercase:
107+
check(c, '[az]', (c in 'AZ') and ignorecase)
108+
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
109+
# Repeated same character.
110+
for c in tescases:
111+
check(c, '[aa]', c == 'a')
112+
# Special cases.
113+
for c in tescases:
114+
check(c, '[^az]', c in '^az')
115+
check(c, '[[az]', c in '[az')
116+
check(c, r'[!]]', c != ']')
117+
check('[', '[')
118+
check('[]', '[]')
119+
check('[!', '[!')
120+
check('[!]', '[!]')
121+
122+
def test_range(self):
123+
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
124+
normsep = os.path.normcase('\\') == os.path.normcase('/')
125+
check = self.check_match
126+
tescases = string.ascii_lowercase + string.digits + string.punctuation
127+
for c in tescases:
128+
check(c, '[b-d]', c in 'bcd')
129+
check(c, '[!b-d]', c not in 'bcd')
130+
check(c, '[b-dx-z]', c in 'bcdxyz')
131+
check(c, '[!b-dx-z]', c not in 'bcdxyz')
132+
# Case insensitive.
133+
for c in tescases:
134+
check(c, '[B-D]', (c in 'bcd') and ignorecase)
135+
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
136+
for c in string.ascii_uppercase:
137+
check(c, '[b-d]', (c in 'BCD') and ignorecase)
138+
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
139+
# Upper bound == lower bound.
140+
for c in tescases:
141+
check(c, '[b-b]', c == 'b')
142+
# Special cases.
143+
for c in tescases:
144+
check(c, '[!-#]', c not in '-#')
145+
check(c, '[!--.]', c not in '-.')
146+
check(c, '[^-`]', c in '^_`')
147+
if not (normsep and c == '/'):
148+
check(c, '[[-^]', c in r'[\]^')
149+
check(c, r'[\-^]', c in r'\]^')
150+
check(c, '[b-]', c in '-b')
151+
check(c, '[!b-]', c not in '-b')
152+
check(c, '[-b]', c in '-b')
153+
check(c, '[!-b]', c not in '-b')
154+
check(c, '[-]', c in '-')
155+
check(c, '[!-]', c not in '-')
156+
# Upper bound is less that lower bound: error in RE.
157+
for c in tescases:
158+
check(c, '[d-b]', False)
159+
check(c, '[!d-b]', True)
160+
check(c, '[d-bx-z]', c in 'xyz')
161+
check(c, '[!d-bx-z]', c not in 'xyz')
162+
check(c, '[d-b^-`]', c in '^_`')
163+
if not (normsep and c == '/'):
164+
check(c, '[d-b[-^]', c in r'[\]^')
165+
166+
def test_sep_in_char_set(self):
167+
normsep = os.path.normcase('\\') == os.path.normcase('/')
168+
check = self.check_match
169+
check('/', r'[/]')
170+
check('\\', r'[\]')
171+
check('/', r'[\]', normsep)
172+
check('\\', r'[/]', normsep)
173+
check('[/]', r'[/]', False)
174+
check(r'[\\]', r'[/]', False)
175+
check('\\', r'[\t]')
176+
check('/', r'[\t]', normsep)
177+
check('t', r'[\t]')
178+
check('\t', r'[\t]', False)
179+
180+
def test_sep_in_range(self):
181+
normsep = os.path.normcase('\\') == os.path.normcase('/')
182+
check = self.check_match
183+
check('a/b', 'a[.-0]b', not normsep)
184+
check('a\\b', 'a[.-0]b', False)
185+
check('a\\b', 'a[Z-^]b', not normsep)
186+
check('a/b', 'a[Z-^]b', False)
187+
188+
check('a/b', 'a[/-0]b', not normsep)
189+
check(r'a\b', 'a[/-0]b', False)
190+
check('a[/-0]b', 'a[/-0]b', False)
191+
check(r'a[\-0]b', 'a[/-0]b', False)
192+
193+
check('a/b', 'a[.-/]b')
194+
check(r'a\b', 'a[.-/]b', normsep)
195+
check('a[.-/]b', 'a[.-/]b', False)
196+
check(r'a[.-\]b', 'a[.-/]b', False)
197+
198+
check(r'a\b', r'a[\-^]b')
199+
check('a/b', r'a[\-^]b', normsep)
200+
check(r'a[\-^]b', r'a[\-^]b', False)
201+
check('a[/-^]b', r'a[\-^]b', False)
202+
203+
check(r'a\b', r'a[Z-\]b', not normsep)
204+
check('a/b', r'a[Z-\]b', False)
205+
check(r'a[Z-\]b', r'a[Z-\]b', False)
206+
check('a[Z-/]b', r'a[Z-\]b', False)
207+
94208
def test_warnings(self):
95209
with warnings.catch_warnings():
96210
warnings.simplefilter('error', Warning)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix :exc:`re.error` raised in :mod:`fnmatch` if the pattern contains a
2+
character range with upper bound lower than lower bound (e.g. ``[c-a]``).
3+
Now such ranges are interpreted as empty ranges.

0 commit comments

Comments
 (0)
Please sign in to comment.