Skip to content

Commit ace676e

Browse files
authored
pythonGH-77609: Add follow_symlinks argument to pathlib.Path.glob() (pythonGH-102616)
Add a keyword-only *follow_symlinks* parameter to `pathlib.Path.glob()` and`rglob()`. When *follow_symlinks* is `None` (the default), these methods follow symlinks except when evaluating "`**`" wildcards. When set to true or false, symlinks are always or never followed, respectively.
1 parent 1668b41 commit ace676e

File tree

5 files changed

+130
-21
lines changed

5 files changed

+130
-21
lines changed

Doc/library/pathlib.rst

+18-2
Original file line numberDiff line numberDiff line change
@@ -885,7 +885,7 @@ call fails (for example because the path doesn't exist).
885885
.. versionadded:: 3.5
886886

887887

888-
.. method:: Path.glob(pattern, *, case_sensitive=None)
888+
.. method:: Path.glob(pattern, *, case_sensitive=None, follow_symlinks=None)
889889

890890
Glob the given relative *pattern* in the directory represented by this path,
891891
yielding all matching files (of any kind)::
@@ -911,6 +911,11 @@ call fails (for example because the path doesn't exist).
911911
typically, case-sensitive on POSIX, and case-insensitive on Windows.
912912
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
913913

914+
By default, or when the *follow_symlinks* keyword-only argument is set to
915+
``None``, this method follows symlinks except when expanding "``**``"
916+
wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
917+
``False`` to treat all symlinks as files.
918+
914919
.. note::
915920
Using the "``**``" pattern in large directory trees may consume
916921
an inordinate amount of time.
@@ -924,6 +929,9 @@ call fails (for example because the path doesn't exist).
924929
.. versionadded:: 3.12
925930
The *case_sensitive* argument.
926931

932+
.. versionadded:: 3.13
933+
The *follow_symlinks* argument.
934+
927935
.. method:: Path.group()
928936

929937
Return the name of the group owning the file. :exc:`KeyError` is raised
@@ -1309,7 +1317,7 @@ call fails (for example because the path doesn't exist).
13091317
.. versionadded:: 3.6
13101318
The *strict* argument (pre-3.6 behavior is strict).
13111319

1312-
.. method:: Path.rglob(pattern, *, case_sensitive=None)
1320+
.. method:: Path.rglob(pattern, *, case_sensitive=None, follow_symlinks=None)
13131321

13141322
Glob the given relative *pattern* recursively. This is like calling
13151323
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where
@@ -1327,6 +1335,11 @@ call fails (for example because the path doesn't exist).
13271335
typically, case-sensitive on POSIX, and case-insensitive on Windows.
13281336
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
13291337

1338+
By default, or when the *follow_symlinks* keyword-only argument is set to
1339+
``None``, this method follows symlinks except when expanding "``**``"
1340+
wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
1341+
``False`` to treat all symlinks as files.
1342+
13301343
.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob
13311344

13321345
.. versionchanged:: 3.11
@@ -1336,6 +1349,9 @@ call fails (for example because the path doesn't exist).
13361349
.. versionadded:: 3.12
13371350
The *case_sensitive* argument.
13381351

1352+
.. versionadded:: 3.13
1353+
The *follow_symlinks* argument.
1354+
13391355
.. method:: Path.rmdir()
13401356

13411357
Remove this directory. The directory must be empty.

Doc/whatsnew/3.13.rst

+6
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ New Modules
8787
Improved Modules
8888
================
8989

90+
pathlib
91+
-------
92+
93+
* Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob` and
94+
:meth:`~pathlib.Path.rglob`.
95+
(Contributed by Barney Gale in :gh:`77609`.)
9096

9197
Optimizations
9298
=============

Lib/pathlib.py

+21-19
Original file line numberDiff line numberDiff line change
@@ -105,19 +105,19 @@ def __init__(self, child_parts, flavour, case_sensitive):
105105
self.successor = _TerminatingSelector()
106106
self.dironly = False
107107

108-
def select_from(self, parent_path):
108+
def select_from(self, parent_path, follow_symlinks):
109109
"""Iterate over all child paths of `parent_path` matched by this
110110
selector. This can contain parent_path itself."""
111111
path_cls = type(parent_path)
112112
scandir = path_cls._scandir
113113
if not parent_path.is_dir():
114114
return iter([])
115-
return self._select_from(parent_path, scandir)
115+
return self._select_from(parent_path, scandir, follow_symlinks)
116116

117117

118118
class _TerminatingSelector:
119119

120-
def _select_from(self, parent_path, scandir):
120+
def _select_from(self, parent_path, scandir, follow_symlinks):
121121
yield parent_path
122122

123123

@@ -126,9 +126,9 @@ class _ParentSelector(_Selector):
126126
def __init__(self, name, child_parts, flavour, case_sensitive):
127127
_Selector.__init__(self, child_parts, flavour, case_sensitive)
128128

129-
def _select_from(self, parent_path, scandir):
129+
def _select_from(self, parent_path, scandir, follow_symlinks):
130130
path = parent_path._make_child_relpath('..')
131-
for p in self.successor._select_from(path, scandir):
131+
for p in self.successor._select_from(path, scandir, follow_symlinks):
132132
yield p
133133

134134

@@ -141,7 +141,8 @@ def __init__(self, pat, child_parts, flavour, case_sensitive):
141141
case_sensitive = _is_case_sensitive(flavour)
142142
self.match = _compile_pattern(pat, case_sensitive)
143143

144-
def _select_from(self, parent_path, scandir):
144+
def _select_from(self, parent_path, scandir, follow_symlinks):
145+
follow_dirlinks = True if follow_symlinks is None else follow_symlinks
145146
try:
146147
# We must close the scandir() object before proceeding to
147148
# avoid exhausting file descriptors when globbing deep trees.
@@ -153,14 +154,14 @@ def _select_from(self, parent_path, scandir):
153154
for entry in entries:
154155
if self.dironly:
155156
try:
156-
if not entry.is_dir():
157+
if not entry.is_dir(follow_symlinks=follow_dirlinks):
157158
continue
158159
except OSError:
159160
continue
160161
name = entry.name
161162
if self.match(name):
162163
path = parent_path._make_child_relpath(name)
163-
for p in self.successor._select_from(path, scandir):
164+
for p in self.successor._select_from(path, scandir, follow_symlinks):
164165
yield p
165166

166167

@@ -169,16 +170,17 @@ class _RecursiveWildcardSelector(_Selector):
169170
def __init__(self, pat, child_parts, flavour, case_sensitive):
170171
_Selector.__init__(self, child_parts, flavour, case_sensitive)
171172

172-
def _iterate_directories(self, parent_path):
173+
def _iterate_directories(self, parent_path, follow_symlinks):
173174
yield parent_path
174-
for dirpath, dirnames, _ in parent_path.walk():
175+
for dirpath, dirnames, _ in parent_path.walk(follow_symlinks=follow_symlinks):
175176
for dirname in dirnames:
176177
yield dirpath._make_child_relpath(dirname)
177178

178-
def _select_from(self, parent_path, scandir):
179+
def _select_from(self, parent_path, scandir, follow_symlinks):
180+
follow_dirlinks = False if follow_symlinks is None else follow_symlinks
179181
successor_select = self.successor._select_from
180-
for starting_point in self._iterate_directories(parent_path):
181-
for p in successor_select(starting_point, scandir):
182+
for starting_point in self._iterate_directories(parent_path, follow_dirlinks):
183+
for p in successor_select(starting_point, scandir, follow_symlinks):
182184
yield p
183185

184186

@@ -189,10 +191,10 @@ class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector):
189191
multiple non-adjacent '**' segments.
190192
"""
191193

192-
def _select_from(self, parent_path, scandir):
194+
def _select_from(self, parent_path, scandir, follow_symlinks):
193195
yielded = set()
194196
try:
195-
for p in super()._select_from(parent_path, scandir):
197+
for p in super()._select_from(parent_path, scandir, follow_symlinks):
196198
if p not in yielded:
197199
yield p
198200
yielded.add(p)
@@ -994,7 +996,7 @@ def _make_child_relpath(self, name):
994996
path._tail_cached = tail + [name]
995997
return path
996998

997-
def glob(self, pattern, *, case_sensitive=None):
999+
def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
9981000
"""Iterate over this subtree and yield all existing files (of any
9991001
kind, including directories) matching the given relative pattern.
10001002
"""
@@ -1007,10 +1009,10 @@ def glob(self, pattern, *, case_sensitive=None):
10071009
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
10081010
pattern_parts.append('')
10091011
selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive)
1010-
for p in selector.select_from(self):
1012+
for p in selector.select_from(self, follow_symlinks):
10111013
yield p
10121014

1013-
def rglob(self, pattern, *, case_sensitive=None):
1015+
def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
10141016
"""Recursively yield all existing files (of any kind, including
10151017
directories) matching the given relative pattern, anywhere in
10161018
this subtree.
@@ -1022,7 +1024,7 @@ def rglob(self, pattern, *, case_sensitive=None):
10221024
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
10231025
pattern_parts.append('')
10241026
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive)
1025-
for p in selector.select_from(self):
1027+
for p in selector.select_from(self, follow_symlinks):
10261028
yield p
10271029

10281030
def walk(self, top_down=True, on_error=None, follow_symlinks=False):

Lib/test/test_pathlib.py

+83
Original file line numberDiff line numberDiff line change
@@ -1863,6 +1863,35 @@ def _check(path, pattern, case_sensitive, expected):
18631863
_check(path, "dirb/file*", True, [])
18641864
_check(path, "dirb/file*", False, ["dirB/fileB"])
18651865

1866+
@os_helper.skip_unless_symlink
1867+
def test_glob_follow_symlinks_common(self):
1868+
def _check(path, glob, expected):
1869+
actual = {path for path in path.glob(glob, follow_symlinks=True)
1870+
if "linkD" not in path.parent.parts} # exclude symlink loop.
1871+
self.assertEqual(actual, { P(BASE, q) for q in expected })
1872+
P = self.cls
1873+
p = P(BASE)
1874+
_check(p, "fileB", [])
1875+
_check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
1876+
_check(p, "*A", ["dirA", "fileA", "linkA"])
1877+
_check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"])
1878+
_check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"])
1879+
_check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"])
1880+
1881+
@os_helper.skip_unless_symlink
1882+
def test_glob_no_follow_symlinks_common(self):
1883+
def _check(path, glob, expected):
1884+
actual = {path for path in path.glob(glob, follow_symlinks=False)}
1885+
self.assertEqual(actual, { P(BASE, q) for q in expected })
1886+
P = self.cls
1887+
p = P(BASE)
1888+
_check(p, "fileB", [])
1889+
_check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
1890+
_check(p, "*A", ["dirA", "fileA", "linkA"])
1891+
_check(p, "*B/*", ["dirB/fileB", "dirB/linkD"])
1892+
_check(p, "*/fileB", ["dirB/fileB"])
1893+
_check(p, "*/", ["dirA", "dirB", "dirC", "dirE"])
1894+
18661895
def test_rglob_common(self):
18671896
def _check(glob, expected):
18681897
self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected))
@@ -1906,6 +1935,60 @@ def _check(glob, expected):
19061935
_check(p.rglob("*.txt"), ["dirC/novel.txt"])
19071936
_check(p.rglob("*.*"), ["dirC/novel.txt"])
19081937

1938+
@os_helper.skip_unless_symlink
1939+
def test_rglob_follow_symlinks_common(self):
1940+
def _check(path, glob, expected):
1941+
actual = {path for path in path.rglob(glob, follow_symlinks=True)
1942+
if 'linkD' not in path.parent.parts} # exclude symlink loop.
1943+
self.assertEqual(actual, { P(BASE, q) for q in expected })
1944+
P = self.cls
1945+
p = P(BASE)
1946+
_check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
1947+
_check(p, "*/fileA", [])
1948+
_check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
1949+
_check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB",
1950+
"dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"])
1951+
_check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD",
1952+
"dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"])
1953+
_check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD",
1954+
"dirC", "dirE", "dirC/dirD", "linkB", "linkB/linkD"])
1955+
1956+
p = P(BASE, "dirC")
1957+
_check(p, "*", ["dirC/fileC", "dirC/novel.txt",
1958+
"dirC/dirD", "dirC/dirD/fileD"])
1959+
_check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
1960+
_check(p, "*/*", ["dirC/dirD/fileD"])
1961+
_check(p, "*/", ["dirC/dirD"])
1962+
_check(p, "", ["dirC", "dirC/dirD"])
1963+
# gh-91616, a re module regression
1964+
_check(p, "*.txt", ["dirC/novel.txt"])
1965+
_check(p, "*.*", ["dirC/novel.txt"])
1966+
1967+
@os_helper.skip_unless_symlink
1968+
def test_rglob_no_follow_symlinks_common(self):
1969+
def _check(path, glob, expected):
1970+
actual = {path for path in path.rglob(glob, follow_symlinks=False)}
1971+
self.assertEqual(actual, { P(BASE, q) for q in expected })
1972+
P = self.cls
1973+
p = P(BASE)
1974+
_check(p, "fileB", ["dirB/fileB"])
1975+
_check(p, "*/fileA", [])
1976+
_check(p, "*/fileB", ["dirB/fileB"])
1977+
_check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ])
1978+
_check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"])
1979+
_check(p, "", ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"])
1980+
1981+
p = P(BASE, "dirC")
1982+
_check(p, "*", ["dirC/fileC", "dirC/novel.txt",
1983+
"dirC/dirD", "dirC/dirD/fileD"])
1984+
_check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
1985+
_check(p, "*/*", ["dirC/dirD/fileD"])
1986+
_check(p, "*/", ["dirC/dirD"])
1987+
_check(p, "", ["dirC", "dirC/dirD"])
1988+
# gh-91616, a re module regression
1989+
_check(p, "*.txt", ["dirC/novel.txt"])
1990+
_check(p, "*.*", ["dirC/novel.txt"])
1991+
19091992
@os_helper.skip_unless_symlink
19101993
def test_rglob_symlink_loop(self):
19111994
# Don't get fooled by symlink loops (Issue #26012).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add *follow_symlinks* argument to :meth:`pathlib.Path.glob` and
2+
:meth:`~pathlib.Path.rglob`, defaulting to false.

0 commit comments

Comments
 (0)