diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 627f2df9263dec..ee3330f44f47d0 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -885,7 +885,7 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.5 -.. method:: Path.glob(pattern, *, case_sensitive=None) +.. method:: Path.glob(pattern, *, case_sensitive=None, follow_symlinks=None) Glob the given relative *pattern* in the directory represented by this path, yielding all matching files (of any kind):: @@ -911,6 +911,11 @@ call fails (for example because the path doesn't exist). typically, case-sensitive on POSIX, and case-insensitive on Windows. Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. + By default, or when the *follow_symlinks* keyword-only argument is set to + ``None``, this method follows symlinks except when expanding "``**``" + wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or + ``False`` to treat all symlinks as files. + .. note:: Using the "``**``" pattern in large directory trees may consume an inordinate amount of time. @@ -924,6 +929,9 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.12 The *case_sensitive* argument. + .. versionadded:: 3.13 + The *follow_symlinks* argument. + .. method:: Path.group() Return the name of the group owning the file. :exc:`KeyError` is raised @@ -1309,7 +1317,7 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.6 The *strict* argument (pre-3.6 behavior is strict). -.. method:: Path.rglob(pattern, *, case_sensitive=None) +.. method:: Path.rglob(pattern, *, case_sensitive=None, follow_symlinks=None) Glob the given relative *pattern* recursively. This is like calling :func:`Path.glob` with "``**/``" added in front of the *pattern*, where @@ -1327,6 +1335,11 @@ call fails (for example because the path doesn't exist). typically, case-sensitive on POSIX, and case-insensitive on Windows. Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. + By default, or when the *follow_symlinks* keyword-only argument is set to + ``None``, this method follows symlinks except when expanding "``**``" + wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or + ``False`` to treat all symlinks as files. + .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob .. versionchanged:: 3.11 @@ -1336,6 +1349,9 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.12 The *case_sensitive* argument. + .. versionadded:: 3.13 + The *follow_symlinks* argument. + .. method:: Path.rmdir() Remove this directory. The directory must be empty. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index e0c3c2a3592ec7..a13dbf864a86e7 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -87,6 +87,12 @@ New Modules Improved Modules ================ +pathlib +------- + +* Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob` and + :meth:`~pathlib.Path.rglob`. + (Contributed by Barney Gale in :gh:`77609`.) Optimizations ============= diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 3d68c161603d08..2d60dc83aeed10 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -105,19 +105,19 @@ def __init__(self, child_parts, flavour, case_sensitive): self.successor = _TerminatingSelector() self.dironly = False - def select_from(self, parent_path): + def select_from(self, parent_path, follow_symlinks): """Iterate over all child paths of `parent_path` matched by this selector. This can contain parent_path itself.""" path_cls = type(parent_path) scandir = path_cls._scandir if not parent_path.is_dir(): return iter([]) - return self._select_from(parent_path, scandir) + return self._select_from(parent_path, scandir, follow_symlinks) class _TerminatingSelector: - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, scandir, follow_symlinks): yield parent_path @@ -126,9 +126,9 @@ class _ParentSelector(_Selector): def __init__(self, name, child_parts, flavour, case_sensitive): _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, scandir, follow_symlinks): path = parent_path._make_child_relpath('..') - for p in self.successor._select_from(path, scandir): + for p in self.successor._select_from(path, scandir, follow_symlinks): yield p @@ -141,7 +141,8 @@ def __init__(self, pat, child_parts, flavour, case_sensitive): case_sensitive = _is_case_sensitive(flavour) self.match = _compile_pattern(pat, case_sensitive) - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, scandir, follow_symlinks): + follow_dirlinks = True if follow_symlinks is None else follow_symlinks try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -153,14 +154,14 @@ def _select_from(self, parent_path, scandir): for entry in entries: if self.dironly: try: - if not entry.is_dir(): + if not entry.is_dir(follow_symlinks=follow_dirlinks): continue except OSError: continue name = entry.name if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, scandir): + for p in self.successor._select_from(path, scandir, follow_symlinks): yield p @@ -169,16 +170,17 @@ class _RecursiveWildcardSelector(_Selector): def __init__(self, pat, child_parts, flavour, case_sensitive): _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _iterate_directories(self, parent_path): + def _iterate_directories(self, parent_path, follow_symlinks): yield parent_path - for dirpath, dirnames, _ in parent_path.walk(): + for dirpath, dirnames, _ in parent_path.walk(follow_symlinks=follow_symlinks): for dirname in dirnames: yield dirpath._make_child_relpath(dirname) - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, scandir, follow_symlinks): + follow_dirlinks = False if follow_symlinks is None else follow_symlinks successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path): - for p in successor_select(starting_point, scandir): + for starting_point in self._iterate_directories(parent_path, follow_dirlinks): + for p in successor_select(starting_point, scandir, follow_symlinks): yield p @@ -189,10 +191,10 @@ class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector): multiple non-adjacent '**' segments. """ - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, scandir, follow_symlinks): yielded = set() try: - for p in super()._select_from(parent_path, scandir): + for p in super()._select_from(parent_path, scandir, follow_symlinks): if p not in yielded: yield p yielded.add(p) @@ -992,7 +994,7 @@ def _make_child_relpath(self, name): path._tail_cached = tail + [name] return path - def glob(self, pattern, *, case_sensitive=None): + def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. """ @@ -1005,10 +1007,10 @@ def glob(self, pattern, *, case_sensitive=None): if pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) - for p in selector.select_from(self): + for p in selector.select_from(self, follow_symlinks): yield p - def rglob(self, pattern, *, case_sensitive=None): + def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): """Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. @@ -1020,7 +1022,7 @@ def rglob(self, pattern, *, case_sensitive=None): if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) - for p in selector.select_from(self): + for p in selector.select_from(self, follow_symlinks): yield p def walk(self, top_down=True, on_error=None, follow_symlinks=False): diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index ab2c2b232a0411..e27d9dbb6e55dc 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1856,6 +1856,35 @@ def _check(path, pattern, case_sensitive, expected): _check(path, "dirb/file*", True, []) _check(path, "dirb/file*", False, ["dirB/fileB"]) + @os_helper.skip_unless_symlink + def test_glob_follow_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.glob(glob, follow_symlinks=True) + if "linkD" not in path.parent.parts} # exclude symlink loop. + self.assertEqual(actual, { P(BASE, q) for q in expected }) + P = self.cls + p = P(BASE) + _check(p, "fileB", []) + _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check(p, "*A", ["dirA", "fileA", "linkA"]) + _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) + _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) + _check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) + + @os_helper.skip_unless_symlink + def test_glob_no_follow_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.glob(glob, follow_symlinks=False)} + self.assertEqual(actual, { P(BASE, q) for q in expected }) + P = self.cls + p = P(BASE) + _check(p, "fileB", []) + _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check(p, "*A", ["dirA", "fileA", "linkA"]) + _check(p, "*B/*", ["dirB/fileB", "dirB/linkD"]) + _check(p, "*/fileB", ["dirB/fileB"]) + _check(p, "*/", ["dirA", "dirB", "dirC", "dirE"]) + def test_rglob_common(self): def _check(glob, expected): self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected)) @@ -1899,6 +1928,60 @@ def _check(glob, expected): _check(p.rglob("*.txt"), ["dirC/novel.txt"]) _check(p.rglob("*.*"), ["dirC/novel.txt"]) + @os_helper.skip_unless_symlink + def test_rglob_follow_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.rglob(glob, follow_symlinks=True) + if 'linkD' not in path.parent.parts} # exclude symlink loop. + self.assertEqual(actual, { P(BASE, q) for q in expected }) + P = self.cls + p = P(BASE) + _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"]) + _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", + "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) + _check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", + "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) + _check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", + "dirC", "dirE", "dirC/dirD", "linkB", "linkB/linkD"]) + + p = P(BASE, "dirC") + _check(p, "*", ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "*/*", ["dirC/dirD/fileD"]) + _check(p, "*/", ["dirC/dirD"]) + _check(p, "", ["dirC", "dirC/dirD"]) + # gh-91616, a re module regression + _check(p, "*.txt", ["dirC/novel.txt"]) + _check(p, "*.*", ["dirC/novel.txt"]) + + @os_helper.skip_unless_symlink + def test_rglob_no_follow_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.rglob(glob, follow_symlinks=False)} + self.assertEqual(actual, { P(BASE, q) for q in expected }) + P = self.cls + p = P(BASE) + _check(p, "fileB", ["dirB/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirB/fileB"]) + _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ]) + _check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) + _check(p, "", ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"]) + + p = P(BASE, "dirC") + _check(p, "*", ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "*/*", ["dirC/dirD/fileD"]) + _check(p, "*/", ["dirC/dirD"]) + _check(p, "", ["dirC", "dirC/dirD"]) + # gh-91616, a re module regression + _check(p, "*.txt", ["dirC/novel.txt"]) + _check(p, "*.*", ["dirC/novel.txt"]) + @os_helper.skip_unless_symlink def test_rglob_symlink_loop(self): # Don't get fooled by symlink loops (Issue #26012). diff --git a/Misc/NEWS.d/next/Library/2023-03-12-03-37-03.gh-issue-77609.aOQttm.rst b/Misc/NEWS.d/next/Library/2023-03-12-03-37-03.gh-issue-77609.aOQttm.rst new file mode 100644 index 00000000000000..35e61088de58a6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-12-03-37-03.gh-issue-77609.aOQttm.rst @@ -0,0 +1,2 @@ +Add *follow_symlinks* argument to :meth:`pathlib.Path.glob` and +:meth:`~pathlib.Path.rglob`, defaulting to false.