From b0d836e7f40b983abf64b7dd3d50d5de1d27ab29 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 15 Mar 2023 00:39:14 +0000 Subject: [PATCH 1/8] GH-81079: Add case_sensitive argument to pathlib.Path.glob() This argument allows case-sensitive matching to be enabled on Windows, and case-insensitive matching to be enabled on Posix. --- Doc/library/pathlib.rst | 20 ++++- Lib/pathlib.py | 90 +++++++++---------- Lib/test/test_pathlib.py | 16 +++- ...3-03-15-00-37-43.gh-issue-81079.heTAod.rst | 2 + 4 files changed, 76 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 8e91936680fab8..ac793c416e4822 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -852,7 +852,7 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.5 -.. method:: Path.glob(pattern) +.. method:: Path.glob(pattern, *, case_sensitive=None) Glob the given relative *pattern* in the directory represented by this path, yielding all matching files (of any kind):: @@ -873,6 +873,11 @@ call fails (for example because the path doesn't exist). PosixPath('setup.py'), PosixPath('test_pathlib.py')] + By default, this method matches paths using platform-specific casing rules: + case-sensitive on POSIX, and case-insensitive on Windows. The + *case_sensitive* keyword-only argument can be set to true or false to + override this behaviour. + .. note:: Using the "``**``" pattern in large directory trees may consume an inordinate amount of time. @@ -883,6 +888,9 @@ call fails (for example because the path doesn't exist). Return only directories if *pattern* ends with a pathname components separator (:data:`~os.sep` or :data:`~os.altsep`). + .. versionadded:: 3.12 + The *case_sensitive* argument. + .. method:: Path.group() Return the name of the group owning the file. :exc:`KeyError` is raised @@ -1268,7 +1276,7 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.6 The *strict* argument (pre-3.6 behavior is strict). -.. method:: Path.rglob(pattern) +.. method:: Path.rglob(pattern, *, case_sensitive=None) Glob the given relative *pattern* recursively. This is like calling :func:`Path.glob` with "``**/``" added in front of the *pattern*, where @@ -1281,12 +1289,20 @@ call fails (for example because the path doesn't exist). PosixPath('setup.py'), PosixPath('test_pathlib.py')] + By default, this method matches paths using platform-specific casing rules: + case-sensitive on POSIX, and case-insensitive on Windows. The + *case_sensitive* keyword-only argument can be set to true or false to + override this behaviour. + .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob .. versionchanged:: 3.11 Return only directories if *pattern* ends with a pathname components separator (:data:`~os.sep` or :data:`~os.altsep`). + .. versionadded:: 3.12 + The *case_sensitive* argument. + .. method:: Path.rmdir() Remove this directory. The directory must be empty. diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 55c44f12e5a2fb..23f9290861ae57 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -54,18 +54,16 @@ def _ignore_error(exception): return (getattr(exception, 'errno', None) in _IGNORED_ERRNOS or getattr(exception, 'winerror', None) in _IGNORED_WINERRORS) - -def _is_wildcard_pattern(pat): - # Whether this pattern needs actual matching using fnmatch, or can - # be looked up directly as a file. - return "*" in pat or "?" in pat or "[" in pat - # # Globbing helpers # +def _is_case_sensitive(flavour): + return flavour.normcase('Aa') == 'Aa' + + @functools.lru_cache() -def _make_selector(pattern_parts, flavour): +def _make_selector(pattern_parts, case_sensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if not pat: @@ -74,21 +72,21 @@ def _make_selector(pattern_parts, flavour): cls = _RecursiveWildcardSelector elif '**' in pat: raise ValueError("Invalid pattern: '**' can only be an entire path component") - elif _is_wildcard_pattern(pat): - cls = _WildcardSelector + elif pat == '..': + cls = _ParentSelector else: - cls = _PreciseSelector - return cls(pat, child_parts, flavour) + cls = _WildcardSelector + return cls(pat, child_parts, case_sensitive) class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, flavour): + def __init__(self, child_parts, case_sensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, flavour) + self.successor = _make_selector(child_parts, case_sensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -98,44 +96,36 @@ def select_from(self, parent_path): """Iterate over all child paths of `parent_path` matched by this selector. This can contain parent_path itself.""" path_cls = type(parent_path) - is_dir = path_cls.is_dir - exists = path_cls.exists scandir = path_cls._scandir - normcase = path_cls._flavour.normcase - if not is_dir(parent_path): + if not parent_path.is_dir(): return iter([]) - return self._select_from(parent_path, is_dir, exists, scandir, normcase) + return self._select_from(parent_path, scandir) class _TerminatingSelector: - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + def _select_from(self, parent_path, scandir): yield parent_path -class _PreciseSelector(_Selector): +class _ParentSelector(_Selector): - def __init__(self, name, child_parts, flavour): - self.name = name - _Selector.__init__(self, child_parts, flavour) + def __init__(self, name, child_parts, case_sensitive): + _Selector.__init__(self, child_parts, case_sensitive) - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): - try: - path = parent_path._make_child_relpath(self.name) - if (is_dir if self.dironly else exists)(path): - for p in self.successor._select_from(path, is_dir, exists, scandir, normcase): - yield p - except PermissionError: - return + def _select_from(self, parent_path, scandir): + path = parent_path._make_child_relpath('..') + return self.successor._select_from(path, scandir) class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, case_sensitive): + flags = re.NOFLAG if case_sensitive else re.IGNORECASE + self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch + _Selector.__init__(self, child_parts, case_sensitive) - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + def _select_from(self, parent_path, scandir): try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -154,9 +144,9 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): raise continue name = entry.name - if self.match(normcase(name)): + if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, is_dir, exists, scandir, normcase): + for p in self.successor._select_from(path, scandir): yield p except PermissionError: return @@ -164,10 +154,10 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, case_sensitive): + _Selector.__init__(self, child_parts, case_sensitive) - def _iterate_directories(self, parent_path, is_dir, scandir): + def _iterate_directories(self, parent_path, scandir): yield parent_path try: # We must close the scandir() object before proceeding to @@ -183,18 +173,18 @@ def _iterate_directories(self, parent_path, is_dir, scandir): raise if entry_is_dir and not entry.is_symlink(): path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, is_dir, scandir): + for p in self._iterate_directories(path, scandir): yield p except PermissionError: return - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + def _select_from(self, parent_path, scandir): try: yielded = set() try: successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path, is_dir, scandir): - for p in successor_select(starting_point, is_dir, exists, scandir, normcase): + for starting_point in self._iterate_directories(parent_path, scandir): + for p in successor_select(starting_point, scandir): if p not in yielded: yield p yielded.add(p) @@ -763,7 +753,7 @@ def _scandir(self): # includes scandir(), which is used to implement glob(). return os.scandir(self) - def glob(self, pattern): + def glob(self, pattern, *, case_sensitive=None): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. """ @@ -775,11 +765,13 @@ def glob(self, pattern): raise NotImplementedError("Non-relative patterns are unsupported") if pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') - selector = _make_selector(tuple(pattern_parts), self._flavour) + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self._flavour) + selector = _make_selector(tuple(pattern_parts), case_sensitive) for p in selector.select_from(self): yield p - def rglob(self, pattern): + def rglob(self, pattern, *, case_sensitive=None): """Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. @@ -790,7 +782,9 @@ def rglob(self, pattern): raise NotImplementedError("Non-relative patterns are unsupported") if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') - selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour) + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self._flavour) + selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive) for p in selector.select_from(self): yield p diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index f05dead5886743..cd43d3854dad15 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1777,6 +1777,18 @@ def _check(glob, expected): else: _check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"]) + def test_glob_case_sensitive(self): + P = self.cls + def _check(path, pattern, case_sensitive, expected): + actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)} + expected = {str(P(BASE, q)) for q in expected} + self.assertEqual(actual, expected) + path = P(BASE) + _check(path, "DIRB/FILE*", True, []) + _check(path, "DIRB/FILE*", False, ["dirB/fileB"]) + _check(path, "dirb/file*", True, []) + _check(path, "dirb/file*", False, ["dirB/fileB"]) + def test_rglob_common(self): def _check(glob, expected): self.assertEqual(set(glob), { P(BASE, q) for q in expected }) @@ -3053,7 +3065,7 @@ def test_glob(self): self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") }) self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") }) self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") }) - self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"}) + self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) def test_rglob(self): @@ -3061,7 +3073,7 @@ def test_rglob(self): p = P(BASE, "dirC") self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") }) self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") }) - self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"}) + self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) def test_expanduser(self): P = self.cls diff --git a/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst b/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst new file mode 100644 index 00000000000000..ef5690533985d5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst @@ -0,0 +1,2 @@ +Add *case_sensitive* keyword-only argument to :meth:`pathlib.Path.glob` and +:meth:`~pathlib.Path.rglob`. From dc82494fb4b70551da06de47527cf17ccb495b1b Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 2 May 2023 20:07:19 +0100 Subject: [PATCH 2/8] Describe behaviour when case_sensitive is None. --- Doc/library/pathlib.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index ac793c416e4822..779e8741b16528 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -873,10 +873,10 @@ call fails (for example because the path doesn't exist). PosixPath('setup.py'), PosixPath('test_pathlib.py')] - By default, this method matches paths using platform-specific casing rules: - case-sensitive on POSIX, and case-insensitive on Windows. The - *case_sensitive* keyword-only argument can be set to true or false to - override this behaviour. + By default, or when the *case_sensitive* keyword-only argument is set to + ``None``, this method matches paths using platform-specific casing rules: + case-sensitive on POSIX, and case-insensitive on Windows. Set + *case_sensitive* to ``True`` or ``False`` to override this behaviour. .. note:: Using the "``**``" pattern in large directory trees may consume @@ -1289,10 +1289,10 @@ call fails (for example because the path doesn't exist). PosixPath('setup.py'), PosixPath('test_pathlib.py')] - By default, this method matches paths using platform-specific casing rules: - case-sensitive on POSIX, and case-insensitive on Windows. The - *case_sensitive* keyword-only argument can be set to true or false to - override this behaviour. + By default, or when the *case_sensitive* keyword-only argument is set to + ``None``, this method matches paths using platform-specific casing rules: + case-sensitive on POSIX, and case-insensitive on Windows. Set + *case_sensitive* to ``True`` or ``False`` to override this behaviour. .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob From d097f8c3b8e228ab75816a95b2d2daf395f09ce5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 3 May 2023 00:52:11 +0100 Subject: [PATCH 3/8] Restore `_PreciseSelector` --- Lib/pathlib.py | 70 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f999f8263a0b85..2c567a2587257b 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -67,7 +67,7 @@ def _is_case_sensitive(flavour): # @functools.lru_cache() -def _make_selector(pattern_parts, case_sensitive): +def _make_selector(pattern_parts, flavour, case_sensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if not pat: @@ -78,19 +78,21 @@ def _make_selector(pattern_parts, case_sensitive): cls = _ParentSelector elif '**' in pat: raise ValueError("Invalid pattern: '**' can only be an entire path component") - else: + elif _is_wildcard_pattern(pat) or case_sensitive != _is_case_sensitive(flavour): cls = _WildcardSelector - return cls(pat, child_parts, case_sensitive) + else: + cls = _PreciseSelector + return cls(pat, child_parts, flavour, case_sensitive) class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, case_sensitive): + def __init__(self, child_parts, flavour, case_sensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, case_sensitive) + self.successor = _make_selector(child_parts, flavour, case_sensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -100,37 +102,55 @@ def select_from(self, parent_path): """Iterate over all child paths of `parent_path` matched by this selector. This can contain parent_path itself.""" path_cls = type(parent_path) + is_dir = path_cls.is_dir + exists = path_cls.exists scandir = path_cls._scandir - if not parent_path.is_dir(): + if not is_dir(parent_path): return iter([]) - return self._select_from(parent_path, scandir) + return self._select_from(parent_path, is_dir, exists, scandir) class _TerminatingSelector: - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, is_dir, exists, scandir): yield parent_path class _ParentSelector(_Selector): - def __init__(self, name, child_parts, case_sensitive): - _Selector.__init__(self, child_parts, case_sensitive) + def __init__(self, name, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, is_dir, exists, scandir): path = parent_path._make_child_relpath('..') - for p in self.successor._select_from(path, scandir): + for p in self.successor._select_from(path, is_dir, exists, scandir): yield p +class _PreciseSelector(_Selector): + + def __init__(self, name, child_parts, flavour, case_sensitive): + self.name = name + _Selector.__init__(self, child_parts, flavour, case_sensitive) + + def _select_from(self, parent_path, is_dir, exists, scandir): + try: + path = parent_path._make_child_relpath(self.name) + if (is_dir if self.dironly else exists)(path): + for p in self.successor._select_from(path, is_dir, exists, scandir): + yield p + except PermissionError: + return + + class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, case_sensitive): + def __init__(self, pat, child_parts, flavour, case_sensitive): flags = re.NOFLAG if case_sensitive else re.IGNORECASE self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch - _Selector.__init__(self, child_parts, case_sensitive) + _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, is_dir, exists, scandir): try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -151,7 +171,7 @@ def _select_from(self, parent_path, scandir): name = entry.name if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, scandir): + for p in self.successor._select_from(path, is_dir, exists, scandir): yield p except PermissionError: return @@ -159,10 +179,10 @@ def _select_from(self, parent_path, scandir): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, case_sensitive): - _Selector.__init__(self, child_parts, case_sensitive) + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _iterate_directories(self, parent_path, scandir): + def _iterate_directories(self, parent_path, is_dir, scandir): yield parent_path try: # We must close the scandir() object before proceeding to @@ -178,18 +198,18 @@ def _iterate_directories(self, parent_path, scandir): raise if entry_is_dir and not entry.is_symlink(): path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, scandir): + for p in self._iterate_directories(path, is_dir, scandir): yield p except PermissionError: return - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, is_dir, exists, scandir): try: yielded = set() try: successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path, scandir): - for p in successor_select(starting_point, scandir): + for starting_point in self._iterate_directories(parent_path, is_dir, scandir): + for p in successor_select(starting_point, is_dir, exists, scandir): if p not in yielded: yield p yielded.add(p) @@ -839,7 +859,7 @@ def glob(self, pattern, *, case_sensitive=None): pattern_parts.append('') if case_sensitive is None: case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(tuple(pattern_parts), case_sensitive) + selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) for p in selector.select_from(self): yield p @@ -856,7 +876,7 @@ def rglob(self, pattern, *, case_sensitive=None): pattern_parts.append('') if case_sensitive is None: case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive) + selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) for p in selector.select_from(self): yield p From 33b435d56bfad4a649729b2aacdcf2c2f3710b31 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 3 May 2023 01:46:39 +0100 Subject: [PATCH 4/8] Revert "Restore `_PreciseSelector`" This reverts commit d097f8c3b8e228ab75816a95b2d2daf395f09ce5. --- Lib/pathlib.py | 70 ++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 2c567a2587257b..f999f8263a0b85 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -67,7 +67,7 @@ def _is_case_sensitive(flavour): # @functools.lru_cache() -def _make_selector(pattern_parts, flavour, case_sensitive): +def _make_selector(pattern_parts, case_sensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if not pat: @@ -78,21 +78,19 @@ def _make_selector(pattern_parts, flavour, case_sensitive): cls = _ParentSelector elif '**' in pat: raise ValueError("Invalid pattern: '**' can only be an entire path component") - elif _is_wildcard_pattern(pat) or case_sensitive != _is_case_sensitive(flavour): - cls = _WildcardSelector else: - cls = _PreciseSelector - return cls(pat, child_parts, flavour, case_sensitive) + cls = _WildcardSelector + return cls(pat, child_parts, case_sensitive) class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, flavour, case_sensitive): + def __init__(self, child_parts, case_sensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, flavour, case_sensitive) + self.successor = _make_selector(child_parts, case_sensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -102,55 +100,37 @@ def select_from(self, parent_path): """Iterate over all child paths of `parent_path` matched by this selector. This can contain parent_path itself.""" path_cls = type(parent_path) - is_dir = path_cls.is_dir - exists = path_cls.exists scandir = path_cls._scandir - if not is_dir(parent_path): + if not parent_path.is_dir(): return iter([]) - return self._select_from(parent_path, is_dir, exists, scandir) + return self._select_from(parent_path, scandir) class _TerminatingSelector: - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): yield parent_path class _ParentSelector(_Selector): - def __init__(self, name, child_parts, flavour, case_sensitive): - _Selector.__init__(self, child_parts, flavour, case_sensitive) + def __init__(self, name, child_parts, case_sensitive): + _Selector.__init__(self, child_parts, case_sensitive) - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): path = parent_path._make_child_relpath('..') - for p in self.successor._select_from(path, is_dir, exists, scandir): + for p in self.successor._select_from(path, scandir): yield p -class _PreciseSelector(_Selector): - - def __init__(self, name, child_parts, flavour, case_sensitive): - self.name = name - _Selector.__init__(self, child_parts, flavour, case_sensitive) - - def _select_from(self, parent_path, is_dir, exists, scandir): - try: - path = parent_path._make_child_relpath(self.name) - if (is_dir if self.dironly else exists)(path): - for p in self.successor._select_from(path, is_dir, exists, scandir): - yield p - except PermissionError: - return - - class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour, case_sensitive): + def __init__(self, pat, child_parts, case_sensitive): flags = re.NOFLAG if case_sensitive else re.IGNORECASE self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch - _Selector.__init__(self, child_parts, flavour, case_sensitive) + _Selector.__init__(self, child_parts, case_sensitive) - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -171,7 +151,7 @@ def _select_from(self, parent_path, is_dir, exists, scandir): name = entry.name if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, is_dir, exists, scandir): + for p in self.successor._select_from(path, scandir): yield p except PermissionError: return @@ -179,10 +159,10 @@ def _select_from(self, parent_path, is_dir, exists, scandir): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour, case_sensitive): - _Selector.__init__(self, child_parts, flavour, case_sensitive) + def __init__(self, pat, child_parts, case_sensitive): + _Selector.__init__(self, child_parts, case_sensitive) - def _iterate_directories(self, parent_path, is_dir, scandir): + def _iterate_directories(self, parent_path, scandir): yield parent_path try: # We must close the scandir() object before proceeding to @@ -198,18 +178,18 @@ def _iterate_directories(self, parent_path, is_dir, scandir): raise if entry_is_dir and not entry.is_symlink(): path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, is_dir, scandir): + for p in self._iterate_directories(path, scandir): yield p except PermissionError: return - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): try: yielded = set() try: successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path, is_dir, scandir): - for p in successor_select(starting_point, is_dir, exists, scandir): + for starting_point in self._iterate_directories(parent_path, scandir): + for p in successor_select(starting_point, scandir): if p not in yielded: yield p yielded.add(p) @@ -859,7 +839,7 @@ def glob(self, pattern, *, case_sensitive=None): pattern_parts.append('') if case_sensitive is None: case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) + selector = _make_selector(tuple(pattern_parts), case_sensitive) for p in selector.select_from(self): yield p @@ -876,7 +856,7 @@ def rglob(self, pattern, *, case_sensitive=None): pattern_parts.append('') if case_sensitive is None: case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) + selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive) for p in selector.select_from(self): yield p From 203e734027bef0cc60ab4569e7a61d2907878ce4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 3 May 2023 01:48:21 +0100 Subject: [PATCH 5/8] Remove unused `_is_wildcard_pattern()` function --- Lib/pathlib.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f999f8263a0b85..da732dfe7ae554 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -54,11 +54,6 @@ def _ignore_error(exception): getattr(exception, 'winerror', None) in _IGNORED_WINERRORS) -def _is_wildcard_pattern(pat): - # Whether this pattern needs actual matching using fnmatch, or can - # be looked up directly as a file. - return "*" in pat or "?" in pat or "[" in pat - def _is_case_sensitive(flavour): return flavour.normcase('Aa') == 'Aa' From 2146b2b80e374a1ce911388a71fad41b9dca2feb Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Wed, 3 May 2023 18:56:14 +0100 Subject: [PATCH 6/8] Update Doc/library/pathlib.rst Co-authored-by: Steve Dower --- Doc/library/pathlib.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 6d84ef214b685f..3ffe57b437c6c5 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -878,8 +878,8 @@ call fails (for example because the path doesn't exist). By default, or when the *case_sensitive* keyword-only argument is set to ``None``, this method matches paths using platform-specific casing rules: - case-sensitive on POSIX, and case-insensitive on Windows. Set - *case_sensitive* to ``True`` or ``False`` to override this behaviour. + typically, case-sensitive on POSIX, and case-insensitive on Windows. + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. .. note:: Using the "``**``" pattern in large directory trees may consume From b2c4b47b5e746c2254e28580ffdb5866dabee4d7 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 4 May 2023 10:22:10 +0100 Subject: [PATCH 7/8] Pass flavour to selector initialiser --- Lib/pathlib.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c990daef28c42f..f32e1e2d822834 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -62,7 +62,7 @@ def _is_case_sensitive(flavour): # @functools.lru_cache() -def _make_selector(pattern_parts, case_sensitive): +def _make_selector(pattern_parts, flavour, case_sensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if not pat: @@ -75,17 +75,17 @@ def _make_selector(pattern_parts, case_sensitive): raise ValueError("Invalid pattern: '**' can only be an entire path component") else: cls = _WildcardSelector - return cls(pat, child_parts, case_sensitive) + return cls(pat, child_parts, flavour, case_sensitive) class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, case_sensitive): + def __init__(self, child_parts, flavour, case_sensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, case_sensitive) + self.successor = _make_selector(child_parts, flavour, case_sensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -109,8 +109,8 @@ def _select_from(self, parent_path, scandir): class _ParentSelector(_Selector): - def __init__(self, name, child_parts, case_sensitive): - _Selector.__init__(self, child_parts, case_sensitive) + def __init__(self, name, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) def _select_from(self, parent_path, scandir): path = parent_path._make_child_relpath('..') @@ -120,10 +120,13 @@ def _select_from(self, parent_path, scandir): class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, case_sensitive): + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) + if case_sensitive is None: + # TODO: evaluate case-sensitivity of each directory in _select_from() + case_sensitive = _is_case_sensitive(flavour) flags = re.NOFLAG if case_sensitive else re.IGNORECASE self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch - _Selector.__init__(self, child_parts, case_sensitive) def _select_from(self, parent_path, scandir): try: @@ -154,8 +157,8 @@ def _select_from(self, parent_path, scandir): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, case_sensitive): - _Selector.__init__(self, child_parts, case_sensitive) + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) def _iterate_directories(self, parent_path, scandir): yield parent_path @@ -832,9 +835,7 @@ def glob(self, pattern, *, case_sensitive=None): raise NotImplementedError("Non-relative patterns are unsupported") if pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(tuple(pattern_parts), case_sensitive) + selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) for p in selector.select_from(self): yield p @@ -849,9 +850,7 @@ def rglob(self, pattern, *, case_sensitive=None): raise NotImplementedError("Non-relative patterns are unsupported") if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive) + selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) for p in selector.select_from(self): yield p From 0d697adf74980a14a3ffa2f0370b15e60e228e09 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 4 May 2023 17:00:47 +0100 Subject: [PATCH 8/8] Improve consistency between glob() and rglob() docs --- Doc/library/pathlib.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 3ffe57b437c6c5..14118127835bbe 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1294,8 +1294,8 @@ call fails (for example because the path doesn't exist). By default, or when the *case_sensitive* keyword-only argument is set to ``None``, this method matches paths using platform-specific casing rules: - case-sensitive on POSIX, and case-insensitive on Windows. Set - *case_sensitive* to ``True`` or ``False`` to override this behaviour. + typically, case-sensitive on POSIX, and case-insensitive on Windows. + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob