Skip to content

Commit b0d836e

Browse files
committedMar 15, 2023
pythonGH-81079: Add case_sensitive argument to pathlib.Path.glob()
This argument allows case-sensitive matching to be enabled on Windows, and case-insensitive matching to be enabled on Posix.
1 parent 61479d4 commit b0d836e

File tree

4 files changed

+76
-52
lines changed

4 files changed

+76
-52
lines changed
 

‎Doc/library/pathlib.rst

+18-2
Original file line numberDiff line numberDiff line change
@@ -852,7 +852,7 @@ call fails (for example because the path doesn't exist).
852852
.. versionadded:: 3.5
853853

854854

855-
.. method:: Path.glob(pattern)
855+
.. method:: Path.glob(pattern, *, case_sensitive=None)
856856

857857
Glob the given relative *pattern* in the directory represented by this path,
858858
yielding all matching files (of any kind)::
@@ -873,6 +873,11 @@ call fails (for example because the path doesn't exist).
873873
PosixPath('setup.py'),
874874
PosixPath('test_pathlib.py')]
875875

876+
By default, this method matches paths using platform-specific casing rules:
877+
case-sensitive on POSIX, and case-insensitive on Windows. The
878+
*case_sensitive* keyword-only argument can be set to true or false to
879+
override this behaviour.
880+
876881
.. note::
877882
Using the "``**``" pattern in large directory trees may consume
878883
an inordinate amount of time.
@@ -883,6 +888,9 @@ call fails (for example because the path doesn't exist).
883888
Return only directories if *pattern* ends with a pathname components
884889
separator (:data:`~os.sep` or :data:`~os.altsep`).
885890

891+
.. versionadded:: 3.12
892+
The *case_sensitive* argument.
893+
886894
.. method:: Path.group()
887895

888896
Return the name of the group owning the file. :exc:`KeyError` is raised
@@ -1268,7 +1276,7 @@ call fails (for example because the path doesn't exist).
12681276
.. versionadded:: 3.6
12691277
The *strict* argument (pre-3.6 behavior is strict).
12701278

1271-
.. method:: Path.rglob(pattern)
1279+
.. method:: Path.rglob(pattern, *, case_sensitive=None)
12721280

12731281
Glob the given relative *pattern* recursively. This is like calling
12741282
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where
@@ -1281,12 +1289,20 @@ call fails (for example because the path doesn't exist).
12811289
PosixPath('setup.py'),
12821290
PosixPath('test_pathlib.py')]
12831291

1292+
By default, this method matches paths using platform-specific casing rules:
1293+
case-sensitive on POSIX, and case-insensitive on Windows. The
1294+
*case_sensitive* keyword-only argument can be set to true or false to
1295+
override this behaviour.
1296+
12841297
.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob
12851298

12861299
.. versionchanged:: 3.11
12871300
Return only directories if *pattern* ends with a pathname components
12881301
separator (:data:`~os.sep` or :data:`~os.altsep`).
12891302

1303+
.. versionadded:: 3.12
1304+
The *case_sensitive* argument.
1305+
12901306
.. method:: Path.rmdir()
12911307

12921308
Remove this directory. The directory must be empty.

‎Lib/pathlib.py

+42-48
Original file line numberDiff line numberDiff line change
@@ -54,18 +54,16 @@ def _ignore_error(exception):
5454
return (getattr(exception, 'errno', None) in _IGNORED_ERRNOS or
5555
getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)
5656

57-
58-
def _is_wildcard_pattern(pat):
59-
# Whether this pattern needs actual matching using fnmatch, or can
60-
# be looked up directly as a file.
61-
return "*" in pat or "?" in pat or "[" in pat
62-
6357
#
6458
# Globbing helpers
6559
#
6660

61+
def _is_case_sensitive(flavour):
62+
return flavour.normcase('Aa') == 'Aa'
63+
64+
6765
@functools.lru_cache()
68-
def _make_selector(pattern_parts, flavour):
66+
def _make_selector(pattern_parts, case_sensitive):
6967
pat = pattern_parts[0]
7068
child_parts = pattern_parts[1:]
7169
if not pat:
@@ -74,21 +72,21 @@ def _make_selector(pattern_parts, flavour):
7472
cls = _RecursiveWildcardSelector
7573
elif '**' in pat:
7674
raise ValueError("Invalid pattern: '**' can only be an entire path component")
77-
elif _is_wildcard_pattern(pat):
78-
cls = _WildcardSelector
75+
elif pat == '..':
76+
cls = _ParentSelector
7977
else:
80-
cls = _PreciseSelector
81-
return cls(pat, child_parts, flavour)
78+
cls = _WildcardSelector
79+
return cls(pat, child_parts, case_sensitive)
8280

8381

8482
class _Selector:
8583
"""A selector matches a specific glob pattern part against the children
8684
of a given path."""
8785

88-
def __init__(self, child_parts, flavour):
86+
def __init__(self, child_parts, case_sensitive):
8987
self.child_parts = child_parts
9088
if child_parts:
91-
self.successor = _make_selector(child_parts, flavour)
89+
self.successor = _make_selector(child_parts, case_sensitive)
9290
self.dironly = True
9391
else:
9492
self.successor = _TerminatingSelector()
@@ -98,44 +96,36 @@ def select_from(self, parent_path):
9896
"""Iterate over all child paths of `parent_path` matched by this
9997
selector. This can contain parent_path itself."""
10098
path_cls = type(parent_path)
101-
is_dir = path_cls.is_dir
102-
exists = path_cls.exists
10399
scandir = path_cls._scandir
104-
normcase = path_cls._flavour.normcase
105-
if not is_dir(parent_path):
100+
if not parent_path.is_dir():
106101
return iter([])
107-
return self._select_from(parent_path, is_dir, exists, scandir, normcase)
102+
return self._select_from(parent_path, scandir)
108103

109104

110105
class _TerminatingSelector:
111106

112-
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
107+
def _select_from(self, parent_path, scandir):
113108
yield parent_path
114109

115110

116-
class _PreciseSelector(_Selector):
111+
class _ParentSelector(_Selector):
117112

118-
def __init__(self, name, child_parts, flavour):
119-
self.name = name
120-
_Selector.__init__(self, child_parts, flavour)
113+
def __init__(self, name, child_parts, case_sensitive):
114+
_Selector.__init__(self, child_parts, case_sensitive)
121115

122-
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
123-
try:
124-
path = parent_path._make_child_relpath(self.name)
125-
if (is_dir if self.dironly else exists)(path):
126-
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
127-
yield p
128-
except PermissionError:
129-
return
116+
def _select_from(self, parent_path, scandir):
117+
path = parent_path._make_child_relpath('..')
118+
return self.successor._select_from(path, scandir)
130119

131120

132121
class _WildcardSelector(_Selector):
133122

134-
def __init__(self, pat, child_parts, flavour):
135-
self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch
136-
_Selector.__init__(self, child_parts, flavour)
123+
def __init__(self, pat, child_parts, case_sensitive):
124+
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
125+
self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
126+
_Selector.__init__(self, child_parts, case_sensitive)
137127

138-
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
128+
def _select_from(self, parent_path, scandir):
139129
try:
140130
# We must close the scandir() object before proceeding to
141131
# avoid exhausting file descriptors when globbing deep trees.
@@ -154,20 +144,20 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
154144
raise
155145
continue
156146
name = entry.name
157-
if self.match(normcase(name)):
147+
if self.match(name):
158148
path = parent_path._make_child_relpath(name)
159-
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
149+
for p in self.successor._select_from(path, scandir):
160150
yield p
161151
except PermissionError:
162152
return
163153

164154

165155
class _RecursiveWildcardSelector(_Selector):
166156

167-
def __init__(self, pat, child_parts, flavour):
168-
_Selector.__init__(self, child_parts, flavour)
157+
def __init__(self, pat, child_parts, case_sensitive):
158+
_Selector.__init__(self, child_parts, case_sensitive)
169159

170-
def _iterate_directories(self, parent_path, is_dir, scandir):
160+
def _iterate_directories(self, parent_path, scandir):
171161
yield parent_path
172162
try:
173163
# We must close the scandir() object before proceeding to
@@ -183,18 +173,18 @@ def _iterate_directories(self, parent_path, is_dir, scandir):
183173
raise
184174
if entry_is_dir and not entry.is_symlink():
185175
path = parent_path._make_child_relpath(entry.name)
186-
for p in self._iterate_directories(path, is_dir, scandir):
176+
for p in self._iterate_directories(path, scandir):
187177
yield p
188178
except PermissionError:
189179
return
190180

191-
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
181+
def _select_from(self, parent_path, scandir):
192182
try:
193183
yielded = set()
194184
try:
195185
successor_select = self.successor._select_from
196-
for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
197-
for p in successor_select(starting_point, is_dir, exists, scandir, normcase):
186+
for starting_point in self._iterate_directories(parent_path, scandir):
187+
for p in successor_select(starting_point, scandir):
198188
if p not in yielded:
199189
yield p
200190
yielded.add(p)
@@ -763,7 +753,7 @@ def _scandir(self):
763753
# includes scandir(), which is used to implement glob().
764754
return os.scandir(self)
765755

766-
def glob(self, pattern):
756+
def glob(self, pattern, *, case_sensitive=None):
767757
"""Iterate over this subtree and yield all existing files (of any
768758
kind, including directories) matching the given relative pattern.
769759
"""
@@ -775,11 +765,13 @@ def glob(self, pattern):
775765
raise NotImplementedError("Non-relative patterns are unsupported")
776766
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
777767
pattern_parts.append('')
778-
selector = _make_selector(tuple(pattern_parts), self._flavour)
768+
if case_sensitive is None:
769+
case_sensitive = _is_case_sensitive(self._flavour)
770+
selector = _make_selector(tuple(pattern_parts), case_sensitive)
779771
for p in selector.select_from(self):
780772
yield p
781773

782-
def rglob(self, pattern):
774+
def rglob(self, pattern, *, case_sensitive=None):
783775
"""Recursively yield all existing files (of any kind, including
784776
directories) matching the given relative pattern, anywhere in
785777
this subtree.
@@ -790,7 +782,9 @@ def rglob(self, pattern):
790782
raise NotImplementedError("Non-relative patterns are unsupported")
791783
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
792784
pattern_parts.append('')
793-
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour)
785+
if case_sensitive is None:
786+
case_sensitive = _is_case_sensitive(self._flavour)
787+
selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive)
794788
for p in selector.select_from(self):
795789
yield p
796790

‎Lib/test/test_pathlib.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,18 @@ def _check(glob, expected):
17771777
else:
17781778
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"])
17791779

1780+
def test_glob_case_sensitive(self):
1781+
P = self.cls
1782+
def _check(path, pattern, case_sensitive, expected):
1783+
actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)}
1784+
expected = {str(P(BASE, q)) for q in expected}
1785+
self.assertEqual(actual, expected)
1786+
path = P(BASE)
1787+
_check(path, "DIRB/FILE*", True, [])
1788+
_check(path, "DIRB/FILE*", False, ["dirB/fileB"])
1789+
_check(path, "dirb/file*", True, [])
1790+
_check(path, "dirb/file*", False, ["dirB/fileB"])
1791+
17801792
def test_rglob_common(self):
17811793
def _check(glob, expected):
17821794
self.assertEqual(set(glob), { P(BASE, q) for q in expected })
@@ -3053,15 +3065,15 @@ def test_glob(self):
30533065
self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") })
30543066
self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") })
30553067
self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") })
3056-
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"})
3068+
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"})
30573069
self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"})
30583070

30593071
def test_rglob(self):
30603072
P = self.cls
30613073
p = P(BASE, "dirC")
30623074
self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") })
30633075
self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") })
3064-
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"})
3076+
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"})
30653077

30663078
def test_expanduser(self):
30673079
P = self.cls
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add *case_sensitive* keyword-only argument to :meth:`pathlib.Path.glob` and
2+
:meth:`~pathlib.Path.rglob`.

0 commit comments

Comments
 (0)
Please sign in to comment.