12
12
"""
13
13
14
14
import functools
15
+ import glob
16
+ import operator
15
17
from errno import ENOENT , ENOTDIR , EBADF , ELOOP , EINVAL
16
18
from stat import S_ISDIR , S_ISLNK , S_ISREG , S_ISSOCK , S_ISBLK , S_ISCHR , S_ISFIFO
17
19
@@ -40,109 +42,23 @@ def _ignore_error(exception):
40
42
def _is_case_sensitive (parser ):
41
43
return parser .normcase ('Aa' ) == 'Aa'
42
44
43
- #
44
- # Globbing helpers
45
- #
46
-
47
- re = glob = None
48
-
49
-
50
- @functools .lru_cache (maxsize = 512 )
51
- def _compile_pattern (pat , sep , case_sensitive , recursive = True ):
52
- """Compile given glob pattern to a re.Pattern object (observing case
53
- sensitivity)."""
54
- global re , glob
55
- if re is None :
56
- import re , glob
57
-
58
- flags = re .NOFLAG if case_sensitive else re .IGNORECASE
59
- regex = glob .translate (pat , recursive = recursive , include_hidden = True , seps = sep )
60
- return re .compile (regex , flags = flags ).match
61
45
46
+ class Globber (glob ._Globber ):
47
+ lstat = operator .methodcaller ('lstat' )
48
+ scandir = operator .methodcaller ('_scandir' )
49
+ add_slash = operator .methodcaller ('joinpath' , '' )
62
50
63
- def _select_special (paths , part ):
64
- """Yield special literal children of the given paths."""
65
- for path in paths :
66
- yield path ._make_child_relpath (part )
67
-
68
-
69
- def _select_children (parent_paths , dir_only , match ):
70
- """Yield direct children of given paths, filtering by name and type."""
71
- for parent_path in parent_paths :
72
- try :
73
- # We must close the scandir() object before proceeding to
74
- # avoid exhausting file descriptors when globbing deep trees.
75
- with parent_path ._scandir () as scandir_it :
76
- entries = list (scandir_it )
77
- except OSError :
78
- pass
79
- else :
80
- for entry in entries :
81
- if dir_only :
82
- try :
83
- if not entry .is_dir ():
84
- continue
85
- except OSError :
86
- continue
87
- # Avoid cost of making a path object for non-matching paths by
88
- # matching against the os.DirEntry.name string.
89
- if match is None or match (entry .name ):
90
- yield parent_path ._make_child_direntry (entry )
91
-
51
+ @staticmethod
52
+ def concat_path (path , text ):
53
+ """Appends text to the given path.
54
+ """
55
+ return path .with_segments (path ._raw_path + text )
92
56
93
- def _select_recursive (parent_paths , dir_only , follow_symlinks , match ):
94
- """Yield given paths and all their children, recursively, filtering by
95
- string and type.
96
- """
97
- for parent_path in parent_paths :
98
- if match is not None :
99
- # If we're filtering paths through a regex, record the length of
100
- # the parent path. We'll pass it to match(path, pos=...) later.
101
- parent_len = len (str (parent_path ._make_child_relpath ('_' ))) - 1
102
- paths = [parent_path ._make_child_relpath ('' )]
103
- while paths :
104
- path = paths .pop ()
105
- if match is None or match (str (path ), parent_len ):
106
- # Yield *directory* path that matches pattern (if any).
107
- yield path
108
- try :
109
- # We must close the scandir() object before proceeding to
110
- # avoid exhausting file descriptors when globbing deep trees.
111
- with path ._scandir () as scandir_it :
112
- entries = list (scandir_it )
113
- except OSError :
114
- pass
115
- else :
116
- for entry in entries :
117
- # Handle directory entry.
118
- try :
119
- if entry .is_dir (follow_symlinks = follow_symlinks ):
120
- # Recurse into this directory.
121
- paths .append (path ._make_child_direntry (entry ))
122
- continue
123
- except OSError :
124
- pass
125
-
126
- # Handle file entry.
127
- if not dir_only :
128
- # Avoid cost of making a path object for non-matching
129
- # files by matching against the os.DirEntry object.
130
- if match is None or match (path ._direntry_str (entry ), parent_len ):
131
- # Yield *file* path that matches pattern (if any).
132
- yield path ._make_child_direntry (entry )
133
-
134
-
135
- def _select_unique (paths ):
136
- """Yields the given paths, filtering out duplicates."""
137
- yielded = set ()
138
- try :
139
- for path in paths :
140
- path_str = str (path )
141
- if path_str not in yielded :
142
- yield path
143
- yielded .add (path_str )
144
- finally :
145
- yielded .clear ()
57
+ @staticmethod
58
+ def parse_entry (entry ):
59
+ """Returns the path of an entry yielded from scandir().
60
+ """
61
+ return entry
146
62
147
63
148
64
class UnsupportedOperation (NotImplementedError ):
@@ -218,6 +134,7 @@ class PurePathBase:
218
134
'_resolving' ,
219
135
)
220
136
parser = ParserBase ()
137
+ _globber = Globber
221
138
222
139
def __init__ (self , path , * paths ):
223
140
self ._raw_path = self .parser .join (path , * paths ) if paths else path
@@ -454,14 +371,6 @@ def is_absolute(self):
454
371
a drive)."""
455
372
return self .parser .isabs (self ._raw_path )
456
373
457
- @property
458
- def _pattern_stack (self ):
459
- """Stack of path components, to be used with patterns in glob()."""
460
- anchor , parts = self ._stack
461
- if anchor :
462
- raise NotImplementedError ("Non-relative patterns are unsupported" )
463
- return parts
464
-
465
374
@property
466
375
def _pattern_str (self ):
467
376
"""The path expressed as a string, for use in pattern-matching."""
@@ -487,8 +396,9 @@ def match(self, path_pattern, *, case_sensitive=None):
487
396
return False
488
397
if len (path_parts ) > len (pattern_parts ) and path_pattern .anchor :
489
398
return False
399
+ globber = self ._globber (sep , case_sensitive )
490
400
for path_part , pattern_part in zip (path_parts , pattern_parts ):
491
- match = _compile_pattern (pattern_part , sep , case_sensitive , recursive = False )
401
+ match = globber . compile (pattern_part )
492
402
if match (path_part ) is None :
493
403
return False
494
404
return True
@@ -502,7 +412,8 @@ def full_match(self, pattern, *, case_sensitive=None):
502
412
pattern = self .with_segments (pattern )
503
413
if case_sensitive is None :
504
414
case_sensitive = _is_case_sensitive (self .parser )
505
- match = _compile_pattern (pattern ._pattern_str , pattern .parser .sep , case_sensitive )
415
+ globber = self ._globber (pattern .parser .sep , case_sensitive , recursive = True )
416
+ match = globber .compile (pattern ._pattern_str )
506
417
return match (self ._pattern_str ) is not None
507
418
508
419
@@ -772,11 +683,6 @@ def _scandir(self):
772
683
from contextlib import nullcontext
773
684
return nullcontext (self .iterdir ())
774
685
775
- def _direntry_str (self , entry ):
776
- # Transform an entry yielded from _scandir() into a path string.
777
- # PathBase._scandir() yields PathBase objects, so use str().
778
- return str (entry )
779
-
780
686
def _make_child_direntry (self , entry ):
781
687
# Transform an entry yielded from _scandir() into a path object.
782
688
# PathBase._scandir() yields PathBase objects, so this is a no-op.
@@ -785,62 +691,26 @@ def _make_child_direntry(self, entry):
785
691
def _make_child_relpath (self , name ):
786
692
return self .joinpath (name )
787
693
694
+ def _glob_selector (self , parts , case_sensitive , recurse_symlinks ):
695
+ if case_sensitive is None :
696
+ case_sensitive = _is_case_sensitive (self .parser )
697
+ recursive = True if recurse_symlinks else glob ._no_recurse_symlinks
698
+ globber = self ._globber (self .parser .sep , case_sensitive , recursive )
699
+ return globber .selector (parts )
700
+
788
701
def glob (self , pattern , * , case_sensitive = None , recurse_symlinks = True ):
789
702
"""Iterate over this subtree and yield all existing files (of any
790
703
kind, including directories) matching the given relative pattern.
791
704
"""
792
705
if not isinstance (pattern , PurePathBase ):
793
706
pattern = self .with_segments (pattern )
794
- if case_sensitive is None :
795
- # TODO: evaluate case-sensitivity of each directory in _select_children().
796
- case_sensitive = _is_case_sensitive (self .parser )
797
-
798
- stack = pattern ._pattern_stack
799
- specials = ('' , '.' , '..' )
800
- deduplicate_paths = False
801
- sep = self .parser .sep
802
- paths = iter ([self ] if self .is_dir () else [])
803
- while stack :
804
- part = stack .pop ()
805
- if part in specials :
806
- # Join special component (e.g. '..') onto paths.
807
- paths = _select_special (paths , part )
808
-
809
- elif part == '**' :
810
- # Consume following '**' components, which have no effect.
811
- while stack and stack [- 1 ] == '**' :
812
- stack .pop ()
813
-
814
- # Consume following non-special components, provided we're
815
- # treating symlinks consistently. Each component is joined
816
- # onto 'part', which is used to generate an re.Pattern object.
817
- if recurse_symlinks :
818
- while stack and stack [- 1 ] not in specials :
819
- part += sep + stack .pop ()
820
-
821
- # If the previous loop consumed pattern components, compile an
822
- # re.Pattern object based on those components.
823
- match = _compile_pattern (part , sep , case_sensitive ) if part != '**' else None
824
-
825
- # Recursively walk directories, filtering by type and regex.
826
- paths = _select_recursive (paths , bool (stack ), recurse_symlinks , match )
827
-
828
- # De-duplicate if we've already seen a '**' component.
829
- if deduplicate_paths :
830
- paths = _select_unique (paths )
831
- deduplicate_paths = True
832
-
833
- elif '**' in part :
834
- raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
835
-
836
- else :
837
- # If the pattern component isn't '*', compile an re.Pattern
838
- # object based on the component.
839
- match = _compile_pattern (part , sep , case_sensitive ) if part != '*' else None
840
-
841
- # Iterate over directories' children filtering by type and regex.
842
- paths = _select_children (paths , bool (stack ), match )
843
- return paths
707
+ anchor , parts = pattern ._stack
708
+ if anchor :
709
+ raise NotImplementedError ("Non-relative patterns are unsupported" )
710
+ if not self .is_dir ():
711
+ return iter ([])
712
+ select = self ._glob_selector (parts , case_sensitive , recurse_symlinks )
713
+ return select (self , exists = True )
844
714
845
715
def rglob (self , pattern , * , case_sensitive = None , recurse_symlinks = True ):
846
716
"""Recursively yield all existing files (of any kind, including
0 commit comments