Skip to content

Commit

Permalink
pythonGH-81079: Add case_sensitive argument to pathlib.Path.glob()
Browse files Browse the repository at this point in the history
This argument allows case-sensitive matching to be enabled on Windows, and
case-insensitive matching to be enabled on Posix.
  • Loading branch information
barneygale committed Mar 15, 2023
1 parent 61479d4 commit b0d836e
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 52 deletions.
20 changes: 18 additions & 2 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5


.. method:: Path.glob(pattern)
.. method:: Path.glob(pattern, *, case_sensitive=None)

Glob the given relative *pattern* in the directory represented by this path,
yielding all matching files (of any kind)::
Expand All @@ -873,6 +873,11 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]

By default, this method matches paths using platform-specific casing rules:
case-sensitive on POSIX, and case-insensitive on Windows. The
*case_sensitive* keyword-only argument can be set to true or false to
override this behaviour.

.. note::
Using the "``**``" pattern in large directory trees may consume
an inordinate amount of time.
Expand All @@ -883,6 +888,9 @@ call fails (for example because the path doesn't exist).
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionadded:: 3.12
The *case_sensitive* argument.

.. method:: Path.group()

Return the name of the group owning the file. :exc:`KeyError` is raised
Expand Down Expand Up @@ -1268,7 +1276,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.6
The *strict* argument (pre-3.6 behavior is strict).

.. method:: Path.rglob(pattern)
.. method:: Path.rglob(pattern, *, case_sensitive=None)

Glob the given relative *pattern* recursively. This is like calling
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where
Expand All @@ -1281,12 +1289,20 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]

By default, this method matches paths using platform-specific casing rules:
case-sensitive on POSIX, and case-insensitive on Windows. The
*case_sensitive* keyword-only argument can be set to true or false to
override this behaviour.

.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob

.. versionchanged:: 3.11
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionadded:: 3.12
The *case_sensitive* argument.

.. method:: Path.rmdir()

Remove this directory. The directory must be empty.
Expand Down
90 changes: 42 additions & 48 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,16 @@ def _ignore_error(exception):
return (getattr(exception, 'errno', None) in _IGNORED_ERRNOS or
getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)


def _is_wildcard_pattern(pat):
# Whether this pattern needs actual matching using fnmatch, or can
# be looked up directly as a file.
return "*" in pat or "?" in pat or "[" in pat

#
# Globbing helpers
#

def _is_case_sensitive(flavour):
return flavour.normcase('Aa') == 'Aa'


@functools.lru_cache()
def _make_selector(pattern_parts, flavour):
def _make_selector(pattern_parts, case_sensitive):
pat = pattern_parts[0]
child_parts = pattern_parts[1:]
if not pat:
Expand All @@ -74,21 +72,21 @@ def _make_selector(pattern_parts, flavour):
cls = _RecursiveWildcardSelector
elif '**' in pat:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
elif _is_wildcard_pattern(pat):
cls = _WildcardSelector
elif pat == '..':
cls = _ParentSelector
else:
cls = _PreciseSelector
return cls(pat, child_parts, flavour)
cls = _WildcardSelector
return cls(pat, child_parts, case_sensitive)


class _Selector:
"""A selector matches a specific glob pattern part against the children
of a given path."""

def __init__(self, child_parts, flavour):
def __init__(self, child_parts, case_sensitive):
self.child_parts = child_parts
if child_parts:
self.successor = _make_selector(child_parts, flavour)
self.successor = _make_selector(child_parts, case_sensitive)
self.dironly = True
else:
self.successor = _TerminatingSelector()
Expand All @@ -98,44 +96,36 @@ def select_from(self, parent_path):
"""Iterate over all child paths of `parent_path` matched by this
selector. This can contain parent_path itself."""
path_cls = type(parent_path)
is_dir = path_cls.is_dir
exists = path_cls.exists
scandir = path_cls._scandir
normcase = path_cls._flavour.normcase
if not is_dir(parent_path):
if not parent_path.is_dir():
return iter([])
return self._select_from(parent_path, is_dir, exists, scandir, normcase)
return self._select_from(parent_path, scandir)


class _TerminatingSelector:

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, scandir):
yield parent_path


class _PreciseSelector(_Selector):
class _ParentSelector(_Selector):

def __init__(self, name, child_parts, flavour):
self.name = name
_Selector.__init__(self, child_parts, flavour)
def __init__(self, name, child_parts, case_sensitive):
_Selector.__init__(self, child_parts, case_sensitive)

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
try:
path = parent_path._make_child_relpath(self.name)
if (is_dir if self.dironly else exists)(path):
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
yield p
except PermissionError:
return
def _select_from(self, parent_path, scandir):
path = parent_path._make_child_relpath('..')
return self.successor._select_from(path, scandir)


class _WildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour):
self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch
_Selector.__init__(self, child_parts, flavour)
def __init__(self, pat, child_parts, case_sensitive):
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
_Selector.__init__(self, child_parts, case_sensitive)

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, scandir):
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
Expand All @@ -154,20 +144,20 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
raise
continue
name = entry.name
if self.match(normcase(name)):
if self.match(name):
path = parent_path._make_child_relpath(name)
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
for p in self.successor._select_from(path, scandir):
yield p
except PermissionError:
return


class _RecursiveWildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour):
_Selector.__init__(self, child_parts, flavour)
def __init__(self, pat, child_parts, case_sensitive):
_Selector.__init__(self, child_parts, case_sensitive)

def _iterate_directories(self, parent_path, is_dir, scandir):
def _iterate_directories(self, parent_path, scandir):
yield parent_path
try:
# We must close the scandir() object before proceeding to
Expand All @@ -183,18 +173,18 @@ def _iterate_directories(self, parent_path, is_dir, scandir):
raise
if entry_is_dir and not entry.is_symlink():
path = parent_path._make_child_relpath(entry.name)
for p in self._iterate_directories(path, is_dir, scandir):
for p in self._iterate_directories(path, scandir):
yield p
except PermissionError:
return

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, scandir):
try:
yielded = set()
try:
successor_select = self.successor._select_from
for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
for p in successor_select(starting_point, is_dir, exists, scandir, normcase):
for starting_point in self._iterate_directories(parent_path, scandir):
for p in successor_select(starting_point, scandir):
if p not in yielded:
yield p
yielded.add(p)
Expand Down Expand Up @@ -763,7 +753,7 @@ def _scandir(self):
# includes scandir(), which is used to implement glob().
return os.scandir(self)

def glob(self, pattern):
def glob(self, pattern, *, case_sensitive=None):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
Expand All @@ -775,11 +765,13 @@ def glob(self, pattern):
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(tuple(pattern_parts), self._flavour)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self._flavour)
selector = _make_selector(tuple(pattern_parts), case_sensitive)
for p in selector.select_from(self):
yield p

def rglob(self, pattern):
def rglob(self, pattern, *, case_sensitive=None):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
Expand All @@ -790,7 +782,9 @@ def rglob(self, pattern):
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self._flavour)
selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive)
for p in selector.select_from(self):
yield p

Expand Down
16 changes: 14 additions & 2 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,18 @@ def _check(glob, expected):
else:
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"])

def test_glob_case_sensitive(self):
P = self.cls
def _check(path, pattern, case_sensitive, expected):
actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)}
expected = {str(P(BASE, q)) for q in expected}
self.assertEqual(actual, expected)
path = P(BASE)
_check(path, "DIRB/FILE*", True, [])
_check(path, "DIRB/FILE*", False, ["dirB/fileB"])
_check(path, "dirb/file*", True, [])
_check(path, "dirb/file*", False, ["dirB/fileB"])

def test_rglob_common(self):
def _check(glob, expected):
self.assertEqual(set(glob), { P(BASE, q) for q in expected })
Expand Down Expand Up @@ -3053,15 +3065,15 @@ def test_glob(self):
self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") })
self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") })
self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") })
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"})
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"})
self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"})

def test_rglob(self):
P = self.cls
p = P(BASE, "dirC")
self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") })
self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") })
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"})
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"})

def test_expanduser(self):
P = self.cls
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add *case_sensitive* keyword-only argument to :meth:`pathlib.Path.glob` and
:meth:`~pathlib.Path.rglob`.

0 comments on commit b0d836e

Please sign in to comment.