Skip to content

Commit 694c71e

Browse files
committedNov 30, 2022
gh-89727: Fix os.walk RecursionError on deep trees
Use a stack to implement os.walk iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees.
1 parent 8bb2303 commit 694c71e

File tree

2 files changed

+87
-74
lines changed

2 files changed

+87
-74
lines changed
 

‎Lib/os.py

+84-74
Original file line numberDiff line numberDiff line change
@@ -343,86 +343,96 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
343343
return _walk(fspath(top), topdown, onerror, followlinks)
344344

345345
def _walk(top, topdown, onerror, followlinks):
346-
dirs = []
347-
nondirs = []
348-
walk_dirs = []
349-
350-
# We may not have read permission for top, in which case we can't
351-
# get a list of the files the directory contains. os.walk
352-
# always suppressed the exception then, rather than blow up for a
353-
# minor reason when (say) a thousand readable directories are still
354-
# left to visit. That logic is copied here.
355-
try:
356-
# Note that scandir is global in this module due
357-
# to earlier import-*.
358-
scandir_it = scandir(top)
359-
except OSError as error:
360-
if onerror is not None:
361-
onerror(error)
362-
return
346+
stack = [(False, top)]
347+
while stack:
348+
is_result, top = stack.pop()
349+
if is_result:
350+
yield top
351+
continue
363352

364-
with scandir_it:
365-
while True:
366-
try:
353+
dirs = []
354+
nondirs = []
355+
walk_dirs = []
356+
357+
# We may not have read permission for top, in which case we can't
358+
# get a list of the files the directory contains. os.walk
359+
# always suppressed the exception then, rather than blow up for a
360+
# minor reason when (say) a thousand readable directories are still
361+
# left to visit. That logic is copied here.
362+
try:
363+
# Note that scandir is global in this module due
364+
# to earlier import-*.
365+
scandir_it = scandir(top)
366+
except OSError as error:
367+
if onerror is not None:
368+
onerror(error)
369+
continue
370+
371+
cont = False
372+
with scandir_it:
373+
while True:
367374
try:
368-
entry = next(scandir_it)
369-
except StopIteration:
375+
try:
376+
entry = next(scandir_it)
377+
except StopIteration:
378+
break
379+
except OSError as error:
380+
if onerror is not None:
381+
onerror(error)
382+
cont = True
370383
break
371-
except OSError as error:
372-
if onerror is not None:
373-
onerror(error)
374-
return
375384

376-
try:
377-
is_dir = entry.is_dir()
378-
except OSError:
379-
# If is_dir() raises an OSError, consider that the entry is not
380-
# a directory, same behaviour than os.path.isdir().
381-
is_dir = False
382-
383-
if is_dir:
384-
dirs.append(entry.name)
385-
else:
386-
nondirs.append(entry.name)
385+
try:
386+
is_dir = entry.is_dir()
387+
except OSError:
388+
# If is_dir() raises an OSError, consider that the entry is not
389+
# a directory, same behaviour than os.path.isdir().
390+
is_dir = False
387391

388-
if not topdown and is_dir:
389-
# Bottom-up: recurse into sub-directory, but exclude symlinks to
390-
# directories if followlinks is False
391-
if followlinks:
392-
walk_into = True
392+
if is_dir:
393+
dirs.append(entry.name)
393394
else:
394-
try:
395-
is_symlink = entry.is_symlink()
396-
except OSError:
397-
# If is_symlink() raises an OSError, consider that the
398-
# entry is not a symbolic link, same behaviour than
399-
# os.path.islink().
400-
is_symlink = False
401-
walk_into = not is_symlink
402-
403-
if walk_into:
404-
walk_dirs.append(entry.path)
405-
406-
# Yield before recursion if going top down
407-
if topdown:
408-
yield top, dirs, nondirs
409-
410-
# Recurse into sub-directories
411-
islink, join = path.islink, path.join
412-
for dirname in dirs:
413-
new_path = join(top, dirname)
414-
# Issue #23605: os.path.islink() is used instead of caching
415-
# entry.is_symlink() result during the loop on os.scandir() because
416-
# the caller can replace the directory entry during the "yield"
417-
# above.
418-
if followlinks or not islink(new_path):
419-
yield from _walk(new_path, topdown, onerror, followlinks)
420-
else:
421-
# Recurse into sub-directories
422-
for new_path in walk_dirs:
423-
yield from _walk(new_path, topdown, onerror, followlinks)
424-
# Yield after recursion if going bottom up
425-
yield top, dirs, nondirs
395+
nondirs.append(entry.name)
396+
397+
if not topdown and is_dir:
398+
# Bottom-up: traverse into sub-directory, but exclude symlinks to
399+
# directories if followlinks is False
400+
if followlinks:
401+
walk_into = True
402+
else:
403+
try:
404+
is_symlink = entry.is_symlink()
405+
except OSError:
406+
# If is_symlink() raises an OSError, consider that the
407+
# entry is not a symbolic link, same behaviour than
408+
# os.path.islink().
409+
is_symlink = False
410+
walk_into = not is_symlink
411+
412+
if walk_into:
413+
walk_dirs.append(entry.path)
414+
if cont:
415+
continue
416+
417+
# Yield before sub-directory traversal if going top down
418+
if topdown:
419+
yield top, dirs, nondirs
420+
# Traverse into sub-directories
421+
islink, join = path.islink, path.join
422+
for dirname in reversed(dirs):
423+
new_path = join(top, dirname)
424+
# Issue #23605: os.path.islink() is used instead of caching
425+
# entry.is_symlink() result during the loop on os.scandir() because
426+
# the caller can replace the directory entry during the "yield"
427+
# above.
428+
if followlinks or not islink(new_path):
429+
stack.append((False, new_path))
430+
else:
431+
# Yield after sub-directory traversal if going bottom up
432+
stack.append((True, (top, dirs, nondirs)))
433+
# Traverse into sub-directories
434+
for new_path in reversed(walk_dirs):
435+
stack.append((False, new_path))
426436

427437
__all__.append("walk")
428438

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix issue with :func:`os.walk` where a :exc:`RecursionError` would occur on
2+
deep directory structures by adjusting the implementation of
3+
:func:`os._walk` to be iterative instead of recursive.

0 commit comments

Comments
 (0)