-
-
Notifications
You must be signed in to change notification settings - Fork 32.1k
gh-89727: Fix os.walk RecursionError on deep trees #99803
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
99f4691
3552d49
3078ea6
f37bbe8
f26a5b8
955d21b
8fbfb2e
ef46eda
e261b9f
5ec53f7
507b650
1c35610
73138a6
2814cc5
37f3cc7
d9c766c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
|
||
#' | ||
import abc | ||
from collections import deque | ||
import sys | ||
import stat as st | ||
|
||
|
@@ -340,89 +341,97 @@ def walk(top, topdown=True, onerror=None, followlinks=False): | |
|
||
""" | ||
sys.audit("os.walk", top, topdown, onerror, followlinks) | ||
return _walk(fspath(top), topdown, onerror, followlinks) | ||
|
||
def _walk(top, topdown, onerror, followlinks): | ||
dirs = [] | ||
nondirs = [] | ||
walk_dirs = [] | ||
|
||
# We may not have read permission for top, in which case we can't | ||
# get a list of the files the directory contains. os.walk | ||
# always suppressed the exception then, rather than blow up for a | ||
# minor reason when (say) a thousand readable directories are still | ||
# left to visit. That logic is copied here. | ||
try: | ||
# Note that scandir is global in this module due | ||
# to earlier import-*. | ||
scandir_it = scandir(top) | ||
except OSError as error: | ||
if onerror is not None: | ||
onerror(error) | ||
return | ||
|
||
with scandir_it: | ||
while True: | ||
try: | ||
stack = deque([(False, fspath(top))]) | ||
while stack: | ||
must_yield, top = stack.pop() | ||
if must_yield: | ||
yield top | ||
continue | ||
|
||
dirs = [] | ||
nondirs = [] | ||
walk_dirs = [] | ||
|
||
# We may not have read permission for top, in which case we can't | ||
# get a list of the files the directory contains. os.walk | ||
# always suppressed the exception then, rather than blow up for a | ||
# minor reason when (say) a thousand readable directories are still | ||
# left to visit. That logic is copied here. | ||
try: | ||
# Note that scandir is global in this module due | ||
# to earlier import-*. | ||
jonburdo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
scandir_it = scandir(top) | ||
except OSError as error: | ||
if onerror is not None: | ||
onerror(error) | ||
continue | ||
|
||
cont = False | ||
with scandir_it: | ||
while True: | ||
try: | ||
entry = next(scandir_it) | ||
except StopIteration: | ||
try: | ||
entry = next(scandir_it) | ||
except StopIteration: | ||
break | ||
except OSError as error: | ||
if onerror is not None: | ||
onerror(error) | ||
cont = True | ||
break | ||
except OSError as error: | ||
if onerror is not None: | ||
onerror(error) | ||
return | ||
|
||
try: | ||
is_dir = entry.is_dir() | ||
except OSError: | ||
# If is_dir() raises an OSError, consider that the entry is not | ||
# a directory, same behaviour than os.path.isdir(). | ||
is_dir = False | ||
|
||
if is_dir: | ||
dirs.append(entry.name) | ||
else: | ||
nondirs.append(entry.name) | ||
try: | ||
is_dir = entry.is_dir() | ||
except OSError: | ||
# If is_dir() raises an OSError, consider that the entry is not | ||
# a directory, same behaviour than os.path.isdir(). | ||
jonburdo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
is_dir = False | ||
|
||
if not topdown and is_dir: | ||
# Bottom-up: recurse into sub-directory, but exclude symlinks to | ||
# directories if followlinks is False | ||
if followlinks: | ||
walk_into = True | ||
if is_dir: | ||
dirs.append(entry.name) | ||
else: | ||
try: | ||
is_symlink = entry.is_symlink() | ||
except OSError: | ||
# If is_symlink() raises an OSError, consider that the | ||
# entry is not a symbolic link, same behaviour than | ||
# os.path.islink(). | ||
is_symlink = False | ||
walk_into = not is_symlink | ||
|
||
if walk_into: | ||
walk_dirs.append(entry.path) | ||
|
||
# Yield before recursion if going top down | ||
if topdown: | ||
yield top, dirs, nondirs | ||
|
||
# Recurse into sub-directories | ||
islink, join = path.islink, path.join | ||
for dirname in dirs: | ||
new_path = join(top, dirname) | ||
# Issue #23605: os.path.islink() is used instead of caching | ||
# entry.is_symlink() result during the loop on os.scandir() because | ||
# the caller can replace the directory entry during the "yield" | ||
# above. | ||
if followlinks or not islink(new_path): | ||
yield from _walk(new_path, topdown, onerror, followlinks) | ||
else: | ||
# Recurse into sub-directories | ||
for new_path in walk_dirs: | ||
yield from _walk(new_path, topdown, onerror, followlinks) | ||
# Yield after recursion if going bottom up | ||
yield top, dirs, nondirs | ||
nondirs.append(entry.name) | ||
|
||
if not topdown and is_dir: | ||
# Bottom-up: traverse into sub-directory, but exclude symlinks to | ||
# directories if followlinks is False | ||
if followlinks: | ||
walk_into = True | ||
else: | ||
try: | ||
is_symlink = entry.is_symlink() | ||
except OSError: | ||
# If is_symlink() raises an OSError, consider that the | ||
# entry is not a symbolic link, same behaviour than | ||
# os.path.islink(). | ||
is_symlink = False | ||
walk_into = not is_symlink | ||
|
||
if walk_into: | ||
walk_dirs.append(entry.path) | ||
if cont: | ||
continue | ||
|
||
# Yield before sub-directory traversal if going top down | ||
if topdown: | ||
yield top, dirs, nondirs | ||
# Traverse into sub-directories | ||
islink, join = path.islink, path.join | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we're caching these functions in locals, might as well do it outside the main while loop to micro-optimize a bit more. |
||
for dirname in reversed(dirs): | ||
new_path = join(top, dirname) | ||
# Issue #23605: os.path.islink() is used instead of caching | ||
jonburdo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# entry.is_symlink() result during the loop on os.scandir() because | ||
# the caller can replace the directory entry during the "yield" | ||
# above. | ||
if followlinks or not islink(new_path): | ||
stack.append((False, new_path)) | ||
else: | ||
# Yield after sub-directory traversal if going bottom up | ||
stack.append((True, (top, dirs, nondirs))) | ||
# Traverse into sub-directories | ||
for new_path in reversed(walk_dirs): | ||
stack.append((False, new_path)) | ||
JelleZijlstra marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
__all__.append("walk") | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2178,19 +2178,23 @@ def check_disallow_instantiation(testcase, tp, *args, **kwds): | |
testcase.assertRaisesRegex(TypeError, msg, tp, *args, **kwds) | ||
|
||
@contextlib.contextmanager | ||
def temp_recursion_limit(limit): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that it's a contextmanager, I believe the better name would be "set_recursion_limit" similar to "redirect_stdout" because the nature of a contextmanager implies that it's temporary. |
||
"""Temporarily change the recursion limit.""" | ||
original_limit = sys.getrecursionlimit() | ||
try: | ||
sys.setrecursionlimit(limit) | ||
yield | ||
finally: | ||
sys.setrecursionlimit(original_limit) | ||
|
||
def infinite_recursion(max_depth=75): | ||
"""Set a lower limit for tests that interact with infinite recursions | ||
(e.g test_ast.ASTHelpers_Test.test_recursion_direct) since on some | ||
debug windows builds, due to not enough functions being inlined the | ||
stack size might not handle the default recursion limit (1000). See | ||
bpo-11105 for details.""" | ||
return temp_recursion_limit(max_depth) | ||
|
||
original_depth = sys.getrecursionlimit() | ||
try: | ||
sys.setrecursionlimit(max_depth) | ||
yield | ||
finally: | ||
sys.setrecursionlimit(original_depth) | ||
|
||
def ignore_deprecations_from(module: str, *, like: str) -> object: | ||
token = object() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,7 @@ | |
from test.support import import_helper | ||
from test.support import os_helper | ||
from test.support import socket_helper | ||
from test.support import temp_recursion_limit | ||
from test.support import warnings_helper | ||
from platform import win32_is_iot | ||
|
||
|
@@ -1471,6 +1472,12 @@ def test_walk_many_open_files(self): | |
self.assertEqual(next(it), expected) | ||
p = os.path.join(p, 'd') | ||
|
||
def test_walk_above_recursion_limit(self): | ||
os.makedirs(os.path.join(self.walk_path, *(['d'] * 50))) | ||
with temp_recursion_limit(50): | ||
all = list(self.walk(self.walk_path)) | ||
self.assertEqual(len(all), 54) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe also test that the return value is correct? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, good idea |
||
|
||
|
||
@unittest.skipUnless(hasattr(os, 'fwalk'), "Test needs os.fwalk()") | ||
class FwalkTests(WalkTests): | ||
|
@@ -1545,6 +1552,8 @@ def test_fd_leak(self): | |
|
||
# fwalk() keeps file descriptors open | ||
test_walk_many_open_files = None | ||
# fwalk() still uses recursion | ||
test_walk_above_recursion_limit = None | ||
|
||
|
||
class BytesWalkTests(WalkTests): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Fix issue with :func:`os.walk` where a :exc:`RecursionError` would occur on | ||
deep directory structures by adjusting the implementation of | ||
:func:`os.walk` to be iterative instead of recursive. |
Uh oh!
There was an error while loading. Please reload this page.