@@ -86,19 +86,29 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
86
86
continue
87
87
except OSError :
88
88
continue
89
- if match (entry .name ):
90
- yield parent_path ._make_child_entry (entry )
89
+ # Avoid cost of making a path object for non-matching paths by
90
+ # matching against the os.DirEntry.name string.
91
+ if match is None or match (entry .name ):
92
+ yield parent_path ._make_child_direntry (entry )
91
93
92
94
93
- def _select_recursive (parent_paths , dir_only , follow_symlinks ):
94
- """Yield given paths and all their subdirectories, recursively."""
95
+ def _select_recursive (parent_paths , dir_only , follow_symlinks , match ):
96
+ """Yield given paths and all their children, recursively, filtering by
97
+ string and type.
98
+ """
95
99
if follow_symlinks is None :
96
100
follow_symlinks = False
97
101
for parent_path in parent_paths :
102
+ if match is not None :
103
+ # If we're filtering paths through a regex, record the length of
104
+ # the parent path. We'll pass it to match(path, pos=...) later.
105
+ parent_len = len (str (parent_path ._make_child_relpath ('_' ))) - 1
98
106
paths = [parent_path ._make_child_relpath ('' )]
99
107
while paths :
100
108
path = paths .pop ()
101
- yield path
109
+ if match is None or match (str (path ), parent_len ):
110
+ # Yield *directory* path that matches pattern (if any).
111
+ yield path
102
112
try :
103
113
# We must close the scandir() object before proceeding to
104
114
# avoid exhausting file descriptors when globbing deep trees.
@@ -108,14 +118,22 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks):
108
118
pass
109
119
else :
110
120
for entry in entries :
121
+ # Handle directory entry.
111
122
try :
112
123
if entry .is_dir (follow_symlinks = follow_symlinks ):
113
- paths .append (path ._make_child_entry (entry ))
124
+ # Recurse into this directory.
125
+ paths .append (path ._make_child_direntry (entry ))
114
126
continue
115
127
except OSError :
116
128
pass
129
+
130
+ # Handle file entry.
117
131
if not dir_only :
118
- yield path ._make_child_entry (entry )
132
+ # Avoid cost of making a path object for non-matching
133
+ # files by matching against the os.DirEntry object.
134
+ if match is None or match (path ._direntry_str (entry ), parent_len ):
135
+ # Yield *file* path that matches pattern (if any).
136
+ yield path ._make_child_direntry (entry )
119
137
120
138
121
139
def _select_unique (paths ):
@@ -750,8 +768,14 @@ def _scandir(self):
750
768
from contextlib import nullcontext
751
769
return nullcontext (self .iterdir ())
752
770
753
- def _make_child_entry (self , entry ):
771
+ def _direntry_str (self , entry ):
772
+ # Transform an entry yielded from _scandir() into a path string.
773
+ # PathBase._scandir() yields PathBase objects, so use str().
774
+ return str (entry )
775
+
776
+ def _make_child_direntry (self , entry ):
754
777
# Transform an entry yielded from _scandir() into a path object.
778
+ # PathBase._scandir() yields PathBase objects, so this is a no-op.
755
779
return entry
756
780
757
781
def _make_child_relpath (self , name ):
@@ -769,43 +793,49 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
769
793
770
794
stack = pattern ._pattern_stack
771
795
specials = ('' , '.' , '..' )
772
- filter_paths = False
773
796
deduplicate_paths = False
774
797
sep = self .pathmod .sep
775
798
paths = iter ([self ] if self .is_dir () else [])
776
799
while stack :
777
800
part = stack .pop ()
778
801
if part in specials :
802
+ # Join special component (e.g. '..') onto paths.
779
803
paths = _select_special (paths , part )
804
+
780
805
elif part == '**' :
781
- # Consume adjacent '**' components.
806
+ # Consume following '**' components, which have no effect .
782
807
while stack and stack [- 1 ] == '**' :
783
808
stack .pop ()
784
809
785
- # Consume adjacent non-special components and enable post-walk
786
- # regex filtering, provided we're treating symlinks consistently.
810
+ # Consume following non-special components, provided we're
811
+ # treating symlinks consistently. Each component is joined
812
+ # onto 'part', which is used to generate an re.Pattern object.
787
813
if follow_symlinks is not None :
788
814
while stack and stack [- 1 ] not in specials :
789
- filter_paths = True
790
- stack .pop ()
815
+ part += sep + stack .pop ()
791
816
792
- dir_only = bool (stack )
793
- paths = _select_recursive (paths , dir_only , follow_symlinks )
817
+ # If the previous loop consumed pattern components, compile an
818
+ # re.Pattern object based on those components.
819
+ match = _compile_pattern (part , sep , case_sensitive ) if part != '**' else None
820
+
821
+ # Recursively walk directories, filtering by type and regex.
822
+ paths = _select_recursive (paths , bool (stack ), follow_symlinks , match )
823
+
824
+ # De-duplicate if we've already seen a '**' component.
794
825
if deduplicate_paths :
795
- # De-duplicate if we've already seen a '**' component.
796
826
paths = _select_unique (paths )
797
827
deduplicate_paths = True
828
+
798
829
elif '**' in part :
799
830
raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
831
+
800
832
else :
801
- dir_only = bool (stack )
802
- match = _compile_pattern (part , sep , case_sensitive )
803
- paths = _select_children (paths , dir_only , follow_symlinks , match )
804
- if filter_paths :
805
- # Filter out paths that don't match pattern.
806
- prefix_len = len (str (self ._make_child_relpath ('_' ))) - 1
807
- match = _compile_pattern (pattern ._pattern_str , sep , case_sensitive )
808
- paths = (path for path in paths if match (path ._pattern_str , prefix_len ))
833
+ # If the pattern component isn't '*', compile an re.Pattern
834
+ # object based on the component.
835
+ match = _compile_pattern (part , sep , case_sensitive ) if part != '*' else None
836
+
837
+ # Iterate over directories' children filtering by type and regex.
838
+ paths = _select_children (paths , bool (stack ), follow_symlinks , match )
809
839
return paths
810
840
811
841
def rglob (self , pattern , * , case_sensitive = None , follow_symlinks = None ):
@@ -854,7 +884,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False):
854
884
855
885
if is_dir :
856
886
if not top_down :
857
- paths .append (path ._make_child_entry (entry ))
887
+ paths .append (path ._make_child_direntry (entry ))
858
888
dirnames .append (entry .name )
859
889
else :
860
890
filenames .append (entry .name )
0 commit comments