Skip to content

Commit 797edb2

Browse files
authored
gh-89727: Fix os.walk RecursionError on deep trees (#99803)
Use a stack to implement os.walk iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees.
1 parent 702a5bc commit 797edb2

File tree

4 files changed

+139
-83
lines changed

4 files changed

+139
-83
lines changed

Lib/os.py

+83-77
Original file line numberDiff line numberDiff line change
@@ -340,89 +340,95 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
340340
341341
"""
342342
sys.audit("os.walk", top, topdown, onerror, followlinks)
343-
return _walk(fspath(top), topdown, onerror, followlinks)
344-
345-
def _walk(top, topdown, onerror, followlinks):
346-
dirs = []
347-
nondirs = []
348-
walk_dirs = []
349-
350-
# We may not have read permission for top, in which case we can't
351-
# get a list of the files the directory contains. os.walk
352-
# always suppressed the exception then, rather than blow up for a
353-
# minor reason when (say) a thousand readable directories are still
354-
# left to visit. That logic is copied here.
355-
try:
356-
# Note that scandir is global in this module due
357-
# to earlier import-*.
358-
scandir_it = scandir(top)
359-
except OSError as error:
360-
if onerror is not None:
361-
onerror(error)
362-
return
363343

364-
with scandir_it:
365-
while True:
366-
try:
344+
stack = [(False, fspath(top))]
345+
islink, join = path.islink, path.join
346+
while stack:
347+
must_yield, top = stack.pop()
348+
if must_yield:
349+
yield top
350+
continue
351+
352+
dirs = []
353+
nondirs = []
354+
walk_dirs = []
355+
356+
# We may not have read permission for top, in which case we can't
357+
# get a list of the files the directory contains.
358+
# We suppress the exception here, rather than blow up for a
359+
# minor reason when (say) a thousand readable directories are still
360+
# left to visit.
361+
try:
362+
scandir_it = scandir(top)
363+
except OSError as error:
364+
if onerror is not None:
365+
onerror(error)
366+
continue
367+
368+
cont = False
369+
with scandir_it:
370+
while True:
367371
try:
368-
entry = next(scandir_it)
369-
except StopIteration:
372+
try:
373+
entry = next(scandir_it)
374+
except StopIteration:
375+
break
376+
except OSError as error:
377+
if onerror is not None:
378+
onerror(error)
379+
cont = True
370380
break
371-
except OSError as error:
372-
if onerror is not None:
373-
onerror(error)
374-
return
375381

376-
try:
377-
is_dir = entry.is_dir()
378-
except OSError:
379-
# If is_dir() raises an OSError, consider that the entry is not
380-
# a directory, same behaviour than os.path.isdir().
381-
is_dir = False
382-
383-
if is_dir:
384-
dirs.append(entry.name)
385-
else:
386-
nondirs.append(entry.name)
382+
try:
383+
is_dir = entry.is_dir()
384+
except OSError:
385+
# If is_dir() raises an OSError, consider the entry not to
386+
# be a directory, same behaviour as os.path.isdir().
387+
is_dir = False
387388

388-
if not topdown and is_dir:
389-
# Bottom-up: recurse into sub-directory, but exclude symlinks to
390-
# directories if followlinks is False
391-
if followlinks:
392-
walk_into = True
389+
if is_dir:
390+
dirs.append(entry.name)
393391
else:
394-
try:
395-
is_symlink = entry.is_symlink()
396-
except OSError:
397-
# If is_symlink() raises an OSError, consider that the
398-
# entry is not a symbolic link, same behaviour than
399-
# os.path.islink().
400-
is_symlink = False
401-
walk_into = not is_symlink
402-
403-
if walk_into:
404-
walk_dirs.append(entry.path)
405-
406-
# Yield before recursion if going top down
407-
if topdown:
408-
yield top, dirs, nondirs
409-
410-
# Recurse into sub-directories
411-
islink, join = path.islink, path.join
412-
for dirname in dirs:
413-
new_path = join(top, dirname)
414-
# Issue #23605: os.path.islink() is used instead of caching
415-
# entry.is_symlink() result during the loop on os.scandir() because
416-
# the caller can replace the directory entry during the "yield"
417-
# above.
418-
if followlinks or not islink(new_path):
419-
yield from _walk(new_path, topdown, onerror, followlinks)
420-
else:
421-
# Recurse into sub-directories
422-
for new_path in walk_dirs:
423-
yield from _walk(new_path, topdown, onerror, followlinks)
424-
# Yield after recursion if going bottom up
425-
yield top, dirs, nondirs
392+
nondirs.append(entry.name)
393+
394+
if not topdown and is_dir:
395+
# Bottom-up: traverse into sub-directory, but exclude
396+
# symlinks to directories if followlinks is False
397+
if followlinks:
398+
walk_into = True
399+
else:
400+
try:
401+
is_symlink = entry.is_symlink()
402+
except OSError:
403+
# If is_symlink() raises an OSError, consider the
404+
# entry not to be a symbolic link, same behaviour
405+
# as os.path.islink().
406+
is_symlink = False
407+
walk_into = not is_symlink
408+
409+
if walk_into:
410+
walk_dirs.append(entry.path)
411+
if cont:
412+
continue
413+
414+
if topdown:
415+
# Yield before sub-directory traversal if going top down
416+
yield top, dirs, nondirs
417+
# Traverse into sub-directories
418+
for dirname in reversed(dirs):
419+
new_path = join(top, dirname)
420+
# bpo-23605: os.path.islink() is used instead of caching
421+
# entry.is_symlink() result during the loop on os.scandir() because
422+
# the caller can replace the directory entry during the "yield"
423+
# above.
424+
if followlinks or not islink(new_path):
425+
stack.append((False, new_path))
426+
else:
427+
# Yield after sub-directory traversal if going bottom up
428+
stack.append((True, (top, dirs, nondirs)))
429+
# Traverse into sub-directories
430+
for new_path in reversed(walk_dirs):
431+
stack.append((False, new_path))
426432

427433
__all__.append("walk")
428434

Lib/test/support/__init__.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -2178,19 +2178,23 @@ def check_disallow_instantiation(testcase, tp, *args, **kwds):
21782178
testcase.assertRaisesRegex(TypeError, msg, tp, *args, **kwds)
21792179

21802180
@contextlib.contextmanager
2181+
def set_recursion_limit(limit):
2182+
"""Temporarily change the recursion limit."""
2183+
original_limit = sys.getrecursionlimit()
2184+
try:
2185+
sys.setrecursionlimit(limit)
2186+
yield
2187+
finally:
2188+
sys.setrecursionlimit(original_limit)
2189+
21812190
def infinite_recursion(max_depth=75):
21822191
"""Set a lower limit for tests that interact with infinite recursions
21832192
(e.g test_ast.ASTHelpers_Test.test_recursion_direct) since on some
21842193
debug windows builds, due to not enough functions being inlined the
21852194
stack size might not handle the default recursion limit (1000). See
21862195
bpo-11105 for details."""
2196+
return set_recursion_limit(max_depth)
21872197

2188-
original_depth = sys.getrecursionlimit()
2189-
try:
2190-
sys.setrecursionlimit(max_depth)
2191-
yield
2192-
finally:
2193-
sys.setrecursionlimit(original_depth)
21942198

21952199
def ignore_deprecations_from(module: str, *, like: str) -> object:
21962200
token = object()

Lib/test/test_os.py

+43
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from test.support import import_helper
3434
from test.support import os_helper
3535
from test.support import socket_helper
36+
from test.support import set_recursion_limit
3637
from test.support import warnings_helper
3738
from platform import win32_is_iot
3839

@@ -1471,6 +1472,46 @@ def test_walk_many_open_files(self):
14711472
self.assertEqual(next(it), expected)
14721473
p = os.path.join(p, 'd')
14731474

1475+
def test_walk_above_recursion_limit(self):
1476+
depth = 50
1477+
os.makedirs(os.path.join(self.walk_path, *(['d'] * depth)))
1478+
with set_recursion_limit(depth - 5):
1479+
all = list(self.walk(self.walk_path))
1480+
1481+
sub2_path = self.sub2_tree[0]
1482+
for root, dirs, files in all:
1483+
if root == sub2_path:
1484+
dirs.sort()
1485+
files.sort()
1486+
1487+
d_entries = []
1488+
d_path = self.walk_path
1489+
for _ in range(depth):
1490+
d_path = os.path.join(d_path, "d")
1491+
d_entries.append((d_path, ["d"], []))
1492+
d_entries[-1][1].clear()
1493+
1494+
# Sub-sequences where the order is known
1495+
sections = {
1496+
"SUB1": [
1497+
(self.sub1_path, ["SUB11"], ["tmp2"]),
1498+
(self.sub11_path, [], []),
1499+
],
1500+
"SUB2": [self.sub2_tree],
1501+
"d": d_entries,
1502+
}
1503+
1504+
# The ordering of sub-dirs is arbitrary but determines the order in
1505+
# which sub-sequences appear
1506+
dirs = all[0][1]
1507+
expected = [(self.walk_path, dirs, ["tmp1"])]
1508+
for d in dirs:
1509+
expected.extend(sections[d])
1510+
1511+
self.assertEqual(len(all), depth + 4)
1512+
self.assertEqual(sorted(dirs), ["SUB1", "SUB2", "d"])
1513+
self.assertEqual(all, expected)
1514+
14741515

14751516
@unittest.skipUnless(hasattr(os, 'fwalk'), "Test needs os.fwalk()")
14761517
class FwalkTests(WalkTests):
@@ -1545,6 +1586,8 @@ def test_fd_leak(self):
15451586

15461587
# fwalk() keeps file descriptors open
15471588
test_walk_many_open_files = None
1589+
# fwalk() still uses recursion
1590+
test_walk_above_recursion_limit = None
15481591

15491592

15501593
class BytesWalkTests(WalkTests):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix issue with :func:`os.walk` where a :exc:`RecursionError` would occur on
2+
deep directory structures by adjusting the implementation of
3+
:func:`os.walk` to be iterative instead of recursive.

0 commit comments

Comments
 (0)