12
12
"""
13
13
14
14
import functools
15
+ import operator
15
16
from errno import ENOENT , ENOTDIR , EBADF , ELOOP , EINVAL
16
17
from stat import S_ISDIR , S_ISLNK , S_ISREG , S_ISSOCK , S_ISBLK , S_ISCHR , S_ISFIFO
17
18
19
+ from . import _glob
20
+
18
21
#
19
22
# Internals
20
23
#
@@ -44,105 +47,21 @@ def _is_case_sensitive(parser):
44
47
# Globbing helpers
45
48
#
46
49
47
- re = glob = None
48
-
49
-
50
- @functools .lru_cache (maxsize = 512 )
51
- def _compile_pattern (pat , sep , case_sensitive , recursive = True ):
52
- """Compile given glob pattern to a re.Pattern object (observing case
53
- sensitivity)."""
54
- global re , glob
55
- if re is None :
56
- import re , glob
57
-
58
- flags = re .NOFLAG if case_sensitive else re .IGNORECASE
59
- regex = glob .translate (pat , recursive = recursive , include_hidden = True , seps = sep )
60
- return re .compile (regex , flags = flags ).match
61
-
62
-
63
- def _select_special (paths , part ):
64
- """Yield special literal children of the given paths."""
65
- for path in paths :
66
- yield path ._make_child_relpath (part )
67
50
51
+ class Globber (_glob .Globber ):
52
+ lstat = operator .methodcaller ('lstat' )
53
+ scandir = operator .methodcaller ('_scandir' )
54
+ add_slash = operator .methodcaller ('joinpath' , '' )
68
55
69
- def _select_children (parent_paths , dir_only , match ):
70
- """Yield direct children of given paths, filtering by name and type."""
71
- for parent_path in parent_paths :
72
- try :
73
- # We must close the scandir() object before proceeding to
74
- # avoid exhausting file descriptors when globbing deep trees.
75
- with parent_path ._scandir () as scandir_it :
76
- entries = list (scandir_it )
77
- except OSError :
78
- pass
79
- else :
80
- for entry in entries :
81
- if dir_only :
82
- try :
83
- if not entry .is_dir ():
84
- continue
85
- except OSError :
86
- continue
87
- # Avoid cost of making a path object for non-matching paths by
88
- # matching against the os.DirEntry.name string.
89
- if match is None or match (entry .name ):
90
- yield parent_path ._make_child_direntry (entry )
91
-
56
+ def concat_path (self , path , text ):
57
+ """Appends text to the given path.
58
+ """
59
+ return path .with_segments (path ._raw_path + text )
92
60
93
- def _select_recursive (parent_paths , dir_only , follow_symlinks , match ):
94
- """Yield given paths and all their children, recursively, filtering by
95
- string and type.
96
- """
97
- for parent_path in parent_paths :
98
- if match is not None :
99
- # If we're filtering paths through a regex, record the length of
100
- # the parent path. We'll pass it to match(path, pos=...) later.
101
- parent_len = len (str (parent_path ._make_child_relpath ('_' ))) - 1
102
- paths = [parent_path ._make_child_relpath ('' )]
103
- while paths :
104
- path = paths .pop ()
105
- if match is None or match (str (path ), parent_len ):
106
- # Yield *directory* path that matches pattern (if any).
107
- yield path
108
- try :
109
- # We must close the scandir() object before proceeding to
110
- # avoid exhausting file descriptors when globbing deep trees.
111
- with path ._scandir () as scandir_it :
112
- entries = list (scandir_it )
113
- except OSError :
114
- pass
115
- else :
116
- for entry in entries :
117
- # Handle directory entry.
118
- try :
119
- if entry .is_dir (follow_symlinks = follow_symlinks ):
120
- # Recurse into this directory.
121
- paths .append (path ._make_child_direntry (entry ))
122
- continue
123
- except OSError :
124
- pass
125
-
126
- # Handle file entry.
127
- if not dir_only :
128
- # Avoid cost of making a path object for non-matching
129
- # files by matching against the os.DirEntry object.
130
- if match is None or match (path ._direntry_str (entry ), parent_len ):
131
- # Yield *file* path that matches pattern (if any).
132
- yield path ._make_child_direntry (entry )
133
-
134
-
135
- def _select_unique (paths ):
136
- """Yields the given paths, filtering out duplicates."""
137
- yielded = set ()
138
- try :
139
- for path in paths :
140
- path_str = str (path )
141
- if path_str not in yielded :
142
- yield path
143
- yielded .add (path_str )
144
- finally :
145
- yielded .clear ()
61
+ def parse_entry (self , entry ):
62
+ """Returns the path of an entry yielded from scandir().
63
+ """
64
+ return entry
146
65
147
66
148
67
class UnsupportedOperation (NotImplementedError ):
@@ -218,6 +137,7 @@ class PurePathBase:
218
137
'_resolving' ,
219
138
)
220
139
parser = ParserBase ()
140
+ _globber = Globber
221
141
222
142
def __init__ (self , path , * paths ):
223
143
self ._raw_path = self .parser .join (path , * paths ) if paths else path
@@ -454,14 +374,6 @@ def is_absolute(self):
454
374
a drive)."""
455
375
return self .parser .isabs (self ._raw_path )
456
376
457
- @property
458
- def _pattern_stack (self ):
459
- """Stack of path components, to be used with patterns in glob()."""
460
- anchor , parts = self ._stack
461
- if anchor :
462
- raise NotImplementedError ("Non-relative patterns are unsupported" )
463
- return parts
464
-
465
377
@property
466
378
def _pattern_str (self ):
467
379
"""The path expressed as a string, for use in pattern-matching."""
@@ -487,8 +399,9 @@ def match(self, path_pattern, *, case_sensitive=None):
487
399
return False
488
400
if len (path_parts ) > len (pattern_parts ) and path_pattern .anchor :
489
401
return False
402
+ globber = self ._globber (sep , case_sensitive )
490
403
for path_part , pattern_part in zip (path_parts , pattern_parts ):
491
- match = _compile_pattern (pattern_part , sep , case_sensitive , recursive = False )
404
+ match = globber . compile (pattern_part )
492
405
if match (path_part ) is None :
493
406
return False
494
407
return True
@@ -502,7 +415,8 @@ def full_match(self, pattern, *, case_sensitive=None):
502
415
pattern = self .with_segments (pattern )
503
416
if case_sensitive is None :
504
417
case_sensitive = _is_case_sensitive (self .parser )
505
- match = _compile_pattern (pattern ._pattern_str , pattern .parser .sep , case_sensitive )
418
+ globber = self ._globber (pattern .parser .sep , case_sensitive , recursive = True )
419
+ match = globber .compile (pattern ._pattern_str )
506
420
return match (self ._pattern_str ) is not None
507
421
508
422
@@ -772,11 +686,6 @@ def _scandir(self):
772
686
from contextlib import nullcontext
773
687
return nullcontext (self .iterdir ())
774
688
775
- def _direntry_str (self , entry ):
776
- # Transform an entry yielded from _scandir() into a path string.
777
- # PathBase._scandir() yields PathBase objects, so use str().
778
- return str (entry )
779
-
780
689
def _make_child_direntry (self , entry ):
781
690
# Transform an entry yielded from _scandir() into a path object.
782
691
# PathBase._scandir() yields PathBase objects, so this is a no-op.
@@ -785,62 +694,26 @@ def _make_child_direntry(self, entry):
785
694
def _make_child_relpath (self , name ):
786
695
return self .joinpath (name )
787
696
697
+ def _glob_selector (self , parts , case_sensitive , recurse_symlinks ):
698
+ if not self .is_dir ():
699
+ return iter ([])
700
+ if case_sensitive is None :
701
+ case_sensitive = _is_case_sensitive (self .parser )
702
+ recursive = True if recurse_symlinks else _glob .no_recurse_symlinks
703
+ globber = self ._globber (self .parser .sep , case_sensitive , recursive )
704
+ return globber .selector (parts )
705
+
788
706
def glob (self , pattern , * , case_sensitive = None , recurse_symlinks = True ):
789
707
"""Iterate over this subtree and yield all existing files (of any
790
708
kind, including directories) matching the given relative pattern.
791
709
"""
792
710
if not isinstance (pattern , PurePathBase ):
793
711
pattern = self .with_segments (pattern )
794
- if case_sensitive is None :
795
- # TODO: evaluate case-sensitivity of each directory in _select_children().
796
- case_sensitive = _is_case_sensitive (self .parser )
797
-
798
- stack = pattern ._pattern_stack
799
- specials = ('' , '.' , '..' )
800
- deduplicate_paths = False
801
- sep = self .parser .sep
802
- paths = iter ([self ] if self .is_dir () else [])
803
- while stack :
804
- part = stack .pop ()
805
- if part in specials :
806
- # Join special component (e.g. '..') onto paths.
807
- paths = _select_special (paths , part )
808
-
809
- elif part == '**' :
810
- # Consume following '**' components, which have no effect.
811
- while stack and stack [- 1 ] == '**' :
812
- stack .pop ()
813
-
814
- # Consume following non-special components, provided we're
815
- # treating symlinks consistently. Each component is joined
816
- # onto 'part', which is used to generate an re.Pattern object.
817
- if recurse_symlinks :
818
- while stack and stack [- 1 ] not in specials :
819
- part += sep + stack .pop ()
820
-
821
- # If the previous loop consumed pattern components, compile an
822
- # re.Pattern object based on those components.
823
- match = _compile_pattern (part , sep , case_sensitive ) if part != '**' else None
824
-
825
- # Recursively walk directories, filtering by type and regex.
826
- paths = _select_recursive (paths , bool (stack ), recurse_symlinks , match )
827
-
828
- # De-duplicate if we've already seen a '**' component.
829
- if deduplicate_paths :
830
- paths = _select_unique (paths )
831
- deduplicate_paths = True
832
-
833
- elif '**' in part :
834
- raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
835
-
836
- else :
837
- # If the pattern component isn't '*', compile an re.Pattern
838
- # object based on the component.
839
- match = _compile_pattern (part , sep , case_sensitive ) if part != '*' else None
840
-
841
- # Iterate over directories' children filtering by type and regex.
842
- paths = _select_children (paths , bool (stack ), match )
843
- return paths
712
+ anchor , parts = pattern ._stack
713
+ if anchor :
714
+ raise NotImplementedError ("Non-relative patterns are unsupported" )
715
+ select = self ._glob_selector (parts , case_sensitive , recurse_symlinks )
716
+ return select (self , exists = True )
844
717
845
718
def rglob (self , pattern , * , case_sensitive = None , recurse_symlinks = True ):
846
719
"""Recursively yield all existing files (of any kind, including
0 commit comments