Skip to content

Commit 0cc71bd

Browse files
authored
GH-117586: Speed up pathlib.Path.walk() by working with strings (#117726)
Move `pathlib.Path.walk()` implementation into `glob._Globber`. The new `glob._Globber.walk()` classmethod works with strings internally, which is a little faster than generating `Path` objects and keeping them normalized. The `pathlib.Path.walk()` method converts the strings back to path objects. In the private pathlib ABCs, our existing subclass of `_Globber` ensures that `PathBase` instances are used throughout. Follow-up to #117589.
1 parent 6258844 commit 0cc71bd

File tree

4 files changed

+52
-71
lines changed

4 files changed

+52
-71
lines changed

Lib/glob.py

+37
Original file line numberDiff line numberDiff line change
@@ -498,3 +498,40 @@ def select_exists(self, path, exists=False):
498498
yield path
499499
except OSError:
500500
pass
501+
502+
@classmethod
503+
def walk(cls, root, top_down, on_error, follow_symlinks):
504+
"""Walk the directory tree from the given root, similar to os.walk().
505+
"""
506+
paths = [root]
507+
while paths:
508+
path = paths.pop()
509+
if isinstance(path, tuple):
510+
yield path
511+
continue
512+
try:
513+
with cls.scandir(path) as scandir_it:
514+
dirnames = []
515+
filenames = []
516+
if not top_down:
517+
paths.append((path, dirnames, filenames))
518+
for entry in scandir_it:
519+
name = entry.name
520+
try:
521+
if entry.is_dir(follow_symlinks=follow_symlinks):
522+
if not top_down:
523+
paths.append(cls.parse_entry(entry))
524+
dirnames.append(name)
525+
else:
526+
filenames.append(name)
527+
except OSError:
528+
filenames.append(name)
529+
except OSError as error:
530+
if on_error is not None:
531+
on_error(error)
532+
else:
533+
if top_down:
534+
yield path, dirnames, filenames
535+
if dirnames:
536+
prefix = cls.add_slash(path)
537+
paths += [cls.concat_path(prefix, d) for d in reversed(dirnames)]

Lib/pathlib/__init__.py

+6-14
Original file line numberDiff line numberDiff line change
@@ -586,18 +586,6 @@ def iterdir(self):
586586
"""
587587
return (self._make_child_relpath(name) for name in os.listdir(self))
588588

589-
def _scandir(self):
590-
return os.scandir(self)
591-
592-
def _make_child_direntry(self, entry):
593-
# Transform an entry yielded from _scandir() into a path object.
594-
path_str = entry.name if str(self) == '.' else entry.path
595-
path = self.with_segments(path_str)
596-
path._str = path_str
597-
path._drv = self.drive
598-
path._root = self.root
599-
path._tail_cached = self._tail + [entry.name]
600-
return path
601589

602590
def _make_child_relpath(self, name):
603591
if not name:
@@ -663,8 +651,12 @@ def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
663651
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
664652
"""Walk the directory tree from this directory, similar to os.walk()."""
665653
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
666-
return _abc.PathBase.walk(
667-
self, top_down=top_down, on_error=on_error, follow_symlinks=follow_symlinks)
654+
root_dir = str(self)
655+
results = self._globber.walk(root_dir, top_down, on_error, follow_symlinks)
656+
for path_str, dirnames, filenames in results:
657+
if root_dir == '.':
658+
path_str = path_str[2:]
659+
yield self._from_parsed_string(path_str), dirnames, filenames
668660

669661
def absolute(self):
670662
"""Return an absolute version of this path

Lib/pathlib/_abc.py

+8-57
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,15 @@ def _is_case_sensitive(parser):
4545

4646
class Globber(glob._Globber):
4747
lstat = operator.methodcaller('lstat')
48-
scandir = operator.methodcaller('_scandir')
4948
add_slash = operator.methodcaller('joinpath', '')
5049

50+
@staticmethod
51+
def scandir(path):
52+
# Emulate os.scandir(), which returns an object that can be used as a
53+
# context manager. This method is called by walk() and glob().
54+
from contextlib import nullcontext
55+
return nullcontext(path.iterdir())
56+
5157
@staticmethod
5258
def concat_path(path, text):
5359
"""Appends text to the given path.
@@ -677,20 +683,6 @@ def iterdir(self):
677683
"""
678684
raise UnsupportedOperation(self._unsupported_msg('iterdir()'))
679685

680-
def _scandir(self):
681-
# Emulate os.scandir(), which returns an object that can be used as a
682-
# context manager. This method is called by walk() and glob().
683-
from contextlib import nullcontext
684-
return nullcontext(self.iterdir())
685-
686-
def _make_child_direntry(self, entry):
687-
# Transform an entry yielded from _scandir() into a path object.
688-
# PathBase._scandir() yields PathBase objects, so this is a no-op.
689-
return entry
690-
691-
def _make_child_relpath(self, name):
692-
return self.joinpath(name)
693-
694686
def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
695687
if case_sensitive is None:
696688
case_sensitive = _is_case_sensitive(self.parser)
@@ -724,48 +716,7 @@ def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
724716

725717
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
726718
"""Walk the directory tree from this directory, similar to os.walk()."""
727-
paths = [self]
728-
729-
while paths:
730-
path = paths.pop()
731-
if isinstance(path, tuple):
732-
yield path
733-
continue
734-
735-
# We may not have read permission for self, in which case we can't
736-
# get a list of the files the directory contains. os.walk()
737-
# always suppressed the exception in that instance, rather than
738-
# blow up for a minor reason when (say) a thousand readable
739-
# directories are still left to visit. That logic is copied here.
740-
try:
741-
scandir_obj = path._scandir()
742-
except OSError as error:
743-
if on_error is not None:
744-
on_error(error)
745-
continue
746-
747-
with scandir_obj as scandir_it:
748-
dirnames = []
749-
filenames = []
750-
if not top_down:
751-
paths.append((path, dirnames, filenames))
752-
for entry in scandir_it:
753-
try:
754-
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
755-
except OSError:
756-
# Carried over from os.path.isdir().
757-
is_dir = False
758-
759-
if is_dir:
760-
if not top_down:
761-
paths.append(path._make_child_direntry(entry))
762-
dirnames.append(entry.name)
763-
else:
764-
filenames.append(entry.name)
765-
766-
if top_down:
767-
yield path, dirnames, filenames
768-
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
719+
return self._globber.walk(self, top_down, on_error, follow_symlinks)
769720

770721
def absolute(self):
771722
"""Return an absolute version of this path
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Speed up :meth:`pathlib.Path.walk` by working with strings internally.

0 commit comments

Comments
 (0)