From ec181bb7b24c6601cf70466d54db7214faebcac5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 4 May 2023 15:10:32 +0100 Subject: [PATCH 1/2] GH-77609: Support following symlinks in `pathlib.Path.glob()` --- Doc/library/pathlib.rst | 11 ++++-- Lib/pathlib.py | 7 ++-- Lib/test/test_pathlib.py | 34 +++++++++++++++++++ ...3-05-04-15-09-46.gh-issue-77609.4wvomR.rst | 3 ++ 4 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-05-04-15-09-46.gh-issue-77609.4wvomR.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 4847ac24c77513..6faa522e9e3c3b 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -866,8 +866,9 @@ call fails (for example because the path doesn't exist). [PosixPath('docs/conf.py')] Patterns are the same as for :mod:`fnmatch`, with the addition of "``**``" - which means "this directory and all subdirectories, recursively". In other - words, it enables recursive globbing:: + which means "this directory and all subdirectories, recursively", and "``***``" + which additionally follows symlinks to directories. These wildcards enable + recursive globbing:: >>> sorted(Path('.').glob('**/*.py')) [PosixPath('build/lib/pathlib.py'), @@ -886,6 +887,9 @@ call fails (for example because the path doesn't exist). Return only directories if *pattern* ends with a pathname components separator (:data:`~os.sep` or :data:`~os.altsep`). + .. versionchanged:: 3.12 + Support for the "``***``" wildcard was added. + .. method:: Path.group() Return the name of the group owning the file. :exc:`KeyError` is raised @@ -1290,6 +1294,9 @@ call fails (for example because the path doesn't exist). Return only directories if *pattern* ends with a pathname components separator (:data:`~os.sep` or :data:`~os.altsep`). + .. versionchanged:: 3.12 + Support for the "``***``" wildcard was added. + .. method:: Path.rmdir() Remove this directory. The directory must be empty. diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8a1651c23d7f4d..00f2711e9d775c 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -67,7 +67,7 @@ def _make_selector(pattern_parts, flavour): child_parts = pattern_parts[1:] if not pat: return _TerminatingSelector() - if pat == '**': + if pat == '**' or pat == '***': cls = _RecursiveWildcardSelector elif pat == '..': cls = _ParentSelector @@ -154,6 +154,7 @@ def _select_from(self, parent_path, scandir): class _RecursiveWildcardSelector(_Selector): def __init__(self, pat, child_parts, flavour): + self.follow_symlinks = pat == '***' _Selector.__init__(self, child_parts, flavour) def _iterate_directories(self, parent_path, scandir): @@ -166,11 +167,11 @@ def _iterate_directories(self, parent_path, scandir): for entry in entries: entry_is_dir = False try: - entry_is_dir = entry.is_dir() + entry_is_dir = entry.is_dir(follow_symlinks=self.follow_symlinks) except OSError as e: if not _ignore_error(e): raise - if entry_is_dir and not entry.is_symlink(): + if entry_is_dir: path = parent_path._make_child_relpath(entry.name) for p in self._iterate_directories(path, scandir): yield p diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 424bb92a87d112..6affab2168373a 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1938,6 +1938,40 @@ def my_scandir(path): subdir.chmod(000) self.assertEqual(len(set(base.glob("*"))), 4) + def test_glob_recurse_symlinks(self): + def _check(glob, expected): + glob = {path for path in glob if "linkD" not in path.parts} + self.assertEqual(glob, { P(BASE, q) for q in expected }) + P = self.cls + + p = P(BASE) + if os_helper.can_symlink(): + _check(p.glob("***/fileB"), ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"]) + _check(p.glob("***/*/fileA"), []) + _check(p.glob("***/*/fileB"), ["dirB/fileB", "linkB/fileB", "dirA/linkC/fileB"]) + _check(p.glob("***/file*"), ["fileA", "dirA/linkC/fileB", "dirB/fileB", "dirC/fileC", + "dirC/dirD/fileD", "linkB/fileB"]) + _check(p.glob("***/*/"), ["dirA", "dirA/linkC", "dirB", "dirC", + "dirC/dirD", "dirE", "linkB",]) + _check(p.glob("***"), ["", "dirA", "dirA/linkC", "dirB", "dirC", "dirE", "dirC/dirD", + "linkB"]) + else: + _check(p.glob("***/fileB"), ["dirB/fileB"]) + _check(p.glob("***/*/fileA"), []) + _check(p.glob("***/*/fileB"), ["dirB/fileB"]) + _check(p.glob("***/file*"), ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD"]) + _check(p.glob("***/*/"), ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) + _check(p.glob("***"), ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"]) + + p = P(BASE, "dirC") + _check(p.glob("***/*"), ["dirC/fileC", "dirC/novel.txt", "dirC/dirD", "dirC/dirD/fileD"]) + _check(p.glob("***/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p.glob("***/*/*"), ["dirC/dirD/fileD"]) + _check(p.glob("***/*/"), ["dirC/dirD"]) + _check(p.glob("***"), ["dirC", "dirC/dirD"]) + _check(p.glob("***/*.txt"), ["dirC/novel.txt"]) + _check(p.glob("***/*.*"), ["dirC/novel.txt"]) + def _check_resolve(self, p, expected, strict=True): q = p.resolve(strict) self.assertEqual(q, expected) diff --git a/Misc/NEWS.d/next/Library/2023-05-04-15-09-46.gh-issue-77609.4wvomR.rst b/Misc/NEWS.d/next/Library/2023-05-04-15-09-46.gh-issue-77609.4wvomR.rst new file mode 100644 index 00000000000000..ae1442d5b65acf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-04-15-09-46.gh-issue-77609.4wvomR.rst @@ -0,0 +1,3 @@ +Add support for "``***``" wildcard in :meth:`pathlib.Path.glob` and +:meth:`~pathlib.Path.rglob`. This wildcard works like "``**``", except that +it also recurses into symlinks. From fa10d2889159b903a3ed4a467860699c491cd592 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 7 May 2023 22:26:54 +0100 Subject: [PATCH 2/2] Remove stray newline --- Lib/pathlib.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f04335ceff1d84..454ac98bbb2e43 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -61,7 +61,6 @@ def _is_case_sensitive(flavour): # Globbing helpers # - @functools.lru_cache() def _make_selector(pattern_parts, flavour, case_sensitive): pat = pattern_parts[0]