Skip to content

Commit 225c3cd

Browse files
[3.13] GH-89727: Fix shutil.rmtree() recursion error on deep trees (GH-119808) (#119918)
Implement `shutil._rmtree_safe_fd()` using a list as a stack to avoid emitting recursion errors on deeply nested trees. `shutil._rmtree_unsafe()` was fixed in a150679. (cherry picked from commit 53b1981) Co-authored-by: Barney Gale <barney.gale@gmail.com>
1 parent 99d0f51 commit 225c3cd

File tree

3 files changed

+68
-97
lines changed

3 files changed

+68
-97
lines changed

Lib/shutil.py

+66-96
Original file line numberDiff line numberDiff line change
@@ -635,81 +635,76 @@ def onerror(err):
635635
onexc(os.rmdir, path, err)
636636

637637
# Version using fd-based APIs to protect against races
638-
def _rmtree_safe_fd(topfd, path, onexc):
638+
def _rmtree_safe_fd(stack, onexc):
639+
# Each stack item has four elements:
640+
# * func: The first operation to perform: os.lstat, os.close or os.rmdir.
641+
# Walking a directory starts with an os.lstat() to detect symlinks; in
642+
# this case, func is updated before subsequent operations and passed to
643+
# onexc() if an error occurs.
644+
# * dirfd: Open file descriptor, or None if we're processing the top-level
645+
# directory given to rmtree() and the user didn't supply dir_fd.
646+
# * path: Path of file to operate upon. This is passed to onexc() if an
647+
# error occurs.
648+
# * orig_entry: os.DirEntry, or None if we're processing the top-level
649+
# directory given to rmtree(). We used the cached stat() of the entry to
650+
# save a call to os.lstat() when walking subdirectories.
651+
func, dirfd, path, orig_entry = stack.pop()
652+
name = path if orig_entry is None else orig_entry.name
639653
try:
654+
if func is os.close:
655+
os.close(dirfd)
656+
return
657+
if func is os.rmdir:
658+
os.rmdir(name, dir_fd=dirfd)
659+
return
660+
661+
# Note: To guard against symlink races, we use the standard
662+
# lstat()/open()/fstat() trick.
663+
assert func is os.lstat
664+
if orig_entry is None:
665+
orig_st = os.lstat(name, dir_fd=dirfd)
666+
else:
667+
orig_st = orig_entry.stat(follow_symlinks=False)
668+
669+
func = os.open # For error reporting.
670+
topfd = os.open(name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dirfd)
671+
672+
func = os.path.islink # For error reporting.
673+
try:
674+
if not os.path.samestat(orig_st, os.fstat(topfd)):
675+
# Symlinks to directories are forbidden, see GH-46010.
676+
raise OSError("Cannot call rmtree on a symbolic link")
677+
stack.append((os.rmdir, dirfd, path, orig_entry))
678+
finally:
679+
stack.append((os.close, topfd, path, orig_entry))
680+
681+
func = os.scandir # For error reporting.
640682
with os.scandir(topfd) as scandir_it:
641683
entries = list(scandir_it)
642-
except FileNotFoundError:
643-
return
644-
except OSError as err:
645-
err.filename = path
646-
onexc(os.scandir, path, err)
647-
return
648-
for entry in entries:
649-
fullname = os.path.join(path, entry.name)
650-
try:
651-
is_dir = entry.is_dir(follow_symlinks=False)
652-
except FileNotFoundError:
653-
continue
654-
except OSError:
655-
is_dir = False
656-
else:
657-
if is_dir:
658-
try:
659-
orig_st = entry.stat(follow_symlinks=False)
660-
is_dir = stat.S_ISDIR(orig_st.st_mode)
661-
except FileNotFoundError:
662-
continue
663-
except OSError as err:
664-
onexc(os.lstat, fullname, err)
665-
continue
666-
if is_dir:
684+
for entry in entries:
685+
fullname = os.path.join(path, entry.name)
667686
try:
668-
dirfd = os.open(entry.name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=topfd)
669-
dirfd_closed = False
687+
if entry.is_dir(follow_symlinks=False):
688+
# Traverse into sub-directory.
689+
stack.append((os.lstat, topfd, fullname, entry))
690+
continue
670691
except FileNotFoundError:
671692
continue
672-
except OSError as err:
673-
onexc(os.open, fullname, err)
674-
else:
675-
try:
676-
if os.path.samestat(orig_st, os.fstat(dirfd)):
677-
_rmtree_safe_fd(dirfd, fullname, onexc)
678-
try:
679-
os.close(dirfd)
680-
except OSError as err:
681-
# close() should not be retried after an error.
682-
dirfd_closed = True
683-
onexc(os.close, fullname, err)
684-
dirfd_closed = True
685-
try:
686-
os.rmdir(entry.name, dir_fd=topfd)
687-
except FileNotFoundError:
688-
continue
689-
except OSError as err:
690-
onexc(os.rmdir, fullname, err)
691-
else:
692-
try:
693-
# This can only happen if someone replaces
694-
# a directory with a symlink after the call to
695-
# os.scandir or stat.S_ISDIR above.
696-
raise OSError("Cannot call rmtree on a symbolic "
697-
"link")
698-
except OSError as err:
699-
onexc(os.path.islink, fullname, err)
700-
finally:
701-
if not dirfd_closed:
702-
try:
703-
os.close(dirfd)
704-
except OSError as err:
705-
onexc(os.close, fullname, err)
706-
else:
693+
except OSError:
694+
pass
707695
try:
708696
os.unlink(entry.name, dir_fd=topfd)
709697
except FileNotFoundError:
710698
continue
711699
except OSError as err:
712700
onexc(os.unlink, fullname, err)
701+
except FileNotFoundError as err:
702+
if orig_entry is None or func is os.close:
703+
err.filename = path
704+
onexc(func, path, err)
705+
except OSError as err:
706+
err.filename = path
707+
onexc(func, path, err)
713708

714709
_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
715710
os.supports_dir_fd and
@@ -762,41 +757,16 @@ def onexc(*args):
762757
# While the unsafe rmtree works fine on bytes, the fd based does not.
763758
if isinstance(path, bytes):
764759
path = os.fsdecode(path)
765-
# Note: To guard against symlink races, we use the standard
766-
# lstat()/open()/fstat() trick.
767-
try:
768-
orig_st = os.lstat(path, dir_fd=dir_fd)
769-
except OSError as err:
770-
onexc(os.lstat, path, err)
771-
return
760+
stack = [(os.lstat, dir_fd, path, None)]
772761
try:
773-
fd = os.open(path, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dir_fd)
774-
fd_closed = False
775-
except OSError as err:
776-
onexc(os.open, path, err)
777-
return
778-
try:
779-
if os.path.samestat(orig_st, os.fstat(fd)):
780-
_rmtree_safe_fd(fd, path, onexc)
781-
try:
782-
os.close(fd)
783-
except OSError as err:
784-
# close() should not be retried after an error.
785-
fd_closed = True
786-
onexc(os.close, path, err)
787-
fd_closed = True
788-
try:
789-
os.rmdir(path, dir_fd=dir_fd)
790-
except OSError as err:
791-
onexc(os.rmdir, path, err)
792-
else:
793-
try:
794-
# symlinks to directories are forbidden, see bug #1669
795-
raise OSError("Cannot call rmtree on a symbolic link")
796-
except OSError as err:
797-
onexc(os.path.islink, path, err)
762+
while stack:
763+
_rmtree_safe_fd(stack, onexc)
798764
finally:
799-
if not fd_closed:
765+
# Close any file descriptors still on the stack.
766+
while stack:
767+
func, fd, path, entry = stack.pop()
768+
if func is not os.close:
769+
continue
800770
try:
801771
os.close(fd)
802772
except OSError as err:

Lib/test/test_shutil.py

-1
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,6 @@ def _onexc(fn, path, exc):
741741
shutil.rmtree(TESTFN)
742742
raise
743743

744-
@unittest.skipIf(shutil._use_fd_functions, "fd-based functions remain unfixed (GH-89727)")
745744
def test_rmtree_above_recursion_limit(self):
746745
recursion_limit = 40
747746
# directory_depth > recursion_limit
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix issue with :func:`shutil.rmtree` where a :exc:`RecursionError` is raised
2+
on deep directory trees.

0 commit comments

Comments
 (0)