From 74eb17d84ef73d6c1349f6fed306cf94bbf04be9 Mon Sep 17 00:00:00 2001 From: Jon Burdo Date: Mon, 3 Apr 2023 20:50:39 -0400 Subject: [PATCH 1/4] add os.walkdir and os.fwalkdir --- Lib/os.py | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/Lib/os.py b/Lib/os.py index 598c9e502301f7..a11af6d5441f1c 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -432,6 +432,47 @@ def walk(top, topdown=True, onerror=None, followlinks=False): __all__.append("walk") +def walkdir(top, top_down=True, on_error=None, follow_symlinks=False): + sys.audit("os.walkdir", top, top_down, on_error, follow_symlinks) + paths = [top] + + while paths: + path = paths.pop() + if isinstance(path, tuple): + yield path + continue + + try: + scandir_it = scandir(path) + except OSError as error: + if on_error is not None: + on_error(error) + continue + + with scandir_it: + dirs = [] + nondirs = [] + for entry in scandir_it: + try: + is_dir = entry.is_dir(follow_symlinks=follow_symlinks) + except OSError: + is_dir = False + + if is_dir: + dirs.append(entry) + else: + nondirs.append(entry) + + if top_down: + yield path, dirs, nondirs + else: + paths.append((path, dirs, nondirs)) + + for new_path in reversed(dirs): + paths.append(new_path) + +__all__.append("walkdir") + if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd: def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None): @@ -540,6 +581,101 @@ def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks): __all__.append("fwalk") + class _WalkAction: + YIELD = object() + CLOSE = object() + WALK = object() + + def fwalkdir(top, top_down=True, on_error=None, *, follow_symlinks=False, dir_fd=None): + sys.audit("os.fwalkdir", top, top_down, on_error, follow_symlinks, dir_fd) + top = fspath(top) + isbytes = isinstance(top, bytes) + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + if not follow_symlinks: + orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd) + topfd = open(top, O_RDONLY, dir_fd=dir_fd) + stack = [(_WalkAction.CLOSE, topfd)] + try: + if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and + path.samestat(orig_st, stat(topfd)))): + stack.append((_WalkAction.WALK, (topfd, top))) + + while stack: + action, value = stack.pop() + if action is _WalkAction.YIELD: + yield value + continue + elif action is _WalkAction.CLOSE: + close(value) + continue + elif action is _WalkAction.WALK: + topfd, top = value + else: + raise AssertionError(f"invalid walk action: {action!r}") + + scandir_it = scandir(topfd) + dirs = [] + nondirs = [] + entries = None if top_down or follow_symlinks else [] + for entry in scandir_it: + try: + if entry.is_dir(follow_symlinks=follow_symlinks): + dirs.append(entry) + if entries is not None: + entries.append(entry) + else: + nondirs.append(entry) + except OSError: + try: + # Add dangling symlinks, ignore disappeared files + if entry.is_symlink(): + nondirs.append(entry) + except OSError: + pass + + if top_down: + # Yield top immediately, before walking subdirs + yield top, dirs, nondirs, topfd + else: + # Yield top after walking subdirs + stack.append( + (_WalkAction.YIELD, (top, dirs, nondirs, topfd))) + + for name in (reversed(dirs) if entries is None + else zip(reversed(dirs), reversed(entries))): + try: + if not follow_symlinks: + if top_down: + orig_st = stat(name, dir_fd=topfd, + follow_symlinks=False) + else: + assert entries is not None + name, entry = name + orig_st = entry.stat(follow_symlinks=False) + dirfd = open(name, O_RDONLY, dir_fd=topfd) + except OSError as err: + if on_error is not None: + on_error(err) + continue + # Close dirfd right after all subdirs have been traversed. + # Note that we use a stack, so actions appended first are + # executed last. + stack.append((_WalkAction.CLOSE, dirfd)) + # Walk all subdirs + if follow_symlinks or path.samestat(orig_st, stat(dirfd)): + dirpath = path.join(top, name) + stack.append((_WalkAction.WALK, (dirfd, dirpath))) + finally: + for action, value in reversed(stack): + if action is _WalkAction.CLOSE: + try: + close(value) + except OSError: + pass + + __all__.append("fwalkdir") + def execl(file, *args): """execl(file, *args) From 48b32fdb75aae3ee0c1c3a59179f20d63a534626 Mon Sep 17 00:00:00 2001 From: Jon Burdo Date: Sat, 15 Apr 2023 20:55:06 -0400 Subject: [PATCH 2/4] rename walkdir to scantree --- Lib/os.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/os.py b/Lib/os.py index a11af6d5441f1c..112314a0f736d6 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -432,8 +432,8 @@ def walk(top, topdown=True, onerror=None, followlinks=False): __all__.append("walk") -def walkdir(top, top_down=True, on_error=None, follow_symlinks=False): - sys.audit("os.walkdir", top, top_down, on_error, follow_symlinks) +def scantree(top, top_down=True, on_error=None, follow_symlinks=False): + sys.audit("os.scantree", top, top_down, on_error, follow_symlinks) paths = [top] while paths: @@ -471,7 +471,7 @@ def walkdir(top, top_down=True, on_error=None, follow_symlinks=False): for new_path in reversed(dirs): paths.append(new_path) -__all__.append("walkdir") +__all__.append("scantree") if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd: @@ -586,8 +586,8 @@ class _WalkAction: CLOSE = object() WALK = object() - def fwalkdir(top, top_down=True, on_error=None, *, follow_symlinks=False, dir_fd=None): - sys.audit("os.fwalkdir", top, top_down, on_error, follow_symlinks, dir_fd) + def fscantree(top, top_down=True, on_error=None, *, follow_symlinks=False, dir_fd=None): + sys.audit("os.fscantree", top, top_down, on_error, follow_symlinks, dir_fd) top = fspath(top) isbytes = isinstance(top, bytes) # Note: To guard against symlink races, we use the standard @@ -674,7 +674,7 @@ def fwalkdir(top, top_down=True, on_error=None, *, follow_symlinks=False, dir_fd except OSError: pass - __all__.append("fwalkdir") + __all__.append("fscantree") def execl(file, *args): """execl(file, *args) From 405f55d50fcab567db6e3c17a7cff92bab27d10e Mon Sep 17 00:00:00 2001 From: Jon Burdo Date: Sun, 16 Apr 2023 12:06:49 -0400 Subject: [PATCH 3/4] create os.DirEntry for top path argument in os.scantree --- Lib/os.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/Lib/os.py b/Lib/os.py index 112314a0f736d6..0a3225c0654ede 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -434,16 +434,33 @@ def walk(top, topdown=True, onerror=None, followlinks=False): def scantree(top, top_down=True, on_error=None, follow_symlinks=False): sys.audit("os.scantree", top, top_down, on_error, follow_symlinks) - paths = [top] + top = fspath(top) + # TODO: Create a single DirEntry properly + # This is an ugly hack and won't work for fs root dir + try: + scandir_it = scandir(path.dirname(top)) + with scandir_it: + for entry in scandir_it: + if entry.path == top: + top_entry = entry + break + else: + raise FileNotFoundError(f'Directory does not exist: {top}') + except OSError as error: + if on_error is not None: + on_error(error) + return + + paths = [top_entry] while paths: - path = paths.pop() - if isinstance(path, tuple): - yield path + top_entry = paths.pop() + if isinstance(top_entry, tuple): + yield top_entry continue try: - scandir_it = scandir(path) + scandir_it = scandir(top_entry) except OSError as error: if on_error is not None: on_error(error) @@ -464,9 +481,9 @@ def scantree(top, top_down=True, on_error=None, follow_symlinks=False): nondirs.append(entry) if top_down: - yield path, dirs, nondirs + yield top_entry, dirs, nondirs else: - paths.append((path, dirs, nondirs)) + paths.append((top_entry, dirs, nondirs)) for new_path in reversed(dirs): paths.append(new_path) From 3dcae98c37553e2c34d01e689a81fb7d329daa4d Mon Sep 17 00:00:00 2001 From: Jon Burdo Date: Sun, 16 Apr 2023 18:15:08 -0400 Subject: [PATCH 4/4] support os.scantree directory removal with generator.send --- Lib/os.py | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/Lib/os.py b/Lib/os.py index 0a3225c0654ede..0b6b8f38fc242b 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -432,21 +432,22 @@ def walk(top, topdown=True, onerror=None, followlinks=False): __all__.append("walk") +def _get_dir_entry(entry_path): + # TODO: Create a single DirEntry properly + # This is an ugly hack and won't work for fs root dir + scandir_it = scandir(path.dirname(entry_path)) + with scandir_it: + for entry in scandir_it: + if entry.path == entry_path: + return entry + else: + raise FileNotFoundError(f"No such file or directory: {entry_path}") + def scantree(top, top_down=True, on_error=None, follow_symlinks=False): sys.audit("os.scantree", top, top_down, on_error, follow_symlinks) - top = fspath(top) - # TODO: Create a single DirEntry properly - # This is an ugly hack and won't work for fs root dir try: - scandir_it = scandir(path.dirname(top)) - with scandir_it: - for entry in scandir_it: - if entry.path == top: - top_entry = entry - break - else: - raise FileNotFoundError(f'Directory does not exist: {top}') + top_entry = _get_dir_entry(top) except OSError as error: if on_error is not None: on_error(error) @@ -481,10 +482,24 @@ def scantree(top, top_down=True, on_error=None, follow_symlinks=False): nondirs.append(entry) if top_down: - yield top_entry, dirs, nondirs + dirnames = yield top_entry, dirs.copy(), nondirs else: paths.append((top_entry, dirs, nondirs)) + if dirnames is not None: + dir_map = {e.name: e for e in dirs} + dirs = [] + for dirname in dirnames: + try: + dirs.append(dir_map[dirname]) + except KeyError: + try: + dirs.append(_get_dir_entry(path.join(top_entry.path, dirname))) + except OSError as error: + if on_error is not None: + on_error(error) + yield None + for new_path in reversed(dirs): paths.append(new_path)