@@ -54,13 +54,30 @@ def _ignore_error(exception):
54
54
getattr (exception , 'winerror' , None ) in _IGNORED_WINERRORS )
55
55
56
56
57
+ @functools .cache
57
58
def _is_case_sensitive (flavour ):
58
59
return flavour .normcase ('Aa' ) == 'Aa'
59
60
60
61
#
61
62
# Globbing helpers
62
63
#
63
64
65
+
66
+ # fnmatch.translate() returns a regular expression that includes a prefix and
67
+ # a suffix, which enable matching newlines and ensure the end of the string is
68
+ # matched, respectively. These features are undesirable for our implementation
69
+ # of PurePatch.match(), which represents path separators as newlines and joins
70
+ # pattern segments together. As a workaround, we define a slice object that
71
+ # can remove the prefix and suffix from any translate() result. See the
72
+ # _compile_pattern_lines() function for more details.
73
+ _FNMATCH_PREFIX , _FNMATCH_SUFFIX = fnmatch .translate ('_' ).split ('_' )
74
+ _FNMATCH_SLICE = slice (len (_FNMATCH_PREFIX ), - len (_FNMATCH_SUFFIX ))
75
+ _SWAP_SEP_AND_NEWLINE = {
76
+ '/' : str .maketrans ({'/' : '\n ' , '\n ' : '/' }),
77
+ '\\ ' : str .maketrans ({'\\ ' : '\n ' , '\n ' : '\\ ' }),
78
+ }
79
+
80
+
64
81
@functools .lru_cache ()
65
82
def _make_selector (pattern_parts , flavour , case_sensitive ):
66
83
pat = pattern_parts [0 ]
@@ -92,6 +109,51 @@ def _compile_pattern(pat, case_sensitive):
92
109
return re .compile (fnmatch .translate (pat ), flags ).match
93
110
94
111
112
+ @functools .lru_cache ()
113
+ def _compile_pattern_lines (pattern_lines , case_sensitive ):
114
+ """Compile the given pattern lines to an `re.Pattern` object.
115
+
116
+ The *pattern_lines* argument is a glob-style pattern (e.g. '**/*.py') with
117
+ its path separators and newlines swapped (e.g. '**\n *.py`). By using
118
+ newlines to separate path components, and not setting `re.DOTALL`, we
119
+ ensure that the `*` wildcard cannot match path separators.
120
+
121
+ The returned `re.Pattern` object may have its `match()` method called to
122
+ match a complete pattern, or `search()` to match from the right. The
123
+ argument supplied to these methods must also have its path separators and
124
+ newlines swapped.
125
+ """
126
+
127
+ # Match the start of the path, or just after a path separator
128
+ parts = ['^' ]
129
+ for part in pattern_lines .splitlines (keepends = True ):
130
+ if part == '**\n ' :
131
+ # '**/' component: we use '[\s\S]' rather than '.' so that path
132
+ # separators (i.e. newlines) are matched. The trailing '^' ensures
133
+ # we terminate after a path separator (i.e. on a new line).
134
+ part = r'[\s\S]*^'
135
+ elif part == '**' :
136
+ # '**' component.
137
+ part = r'[\s\S]*'
138
+ elif '**' in part :
139
+ raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
140
+ else :
141
+ # Any other component: pass to fnmatch.translate(). We slice off
142
+ # the common prefix and suffix added by translate() to ensure that
143
+ # re.DOTALL is not set, and the end of the string not matched,
144
+ # respectively. With DOTALL not set, '*' wildcards will not match
145
+ # path separators, because the '.' characters in the pattern will
146
+ # not match newlines.
147
+ part = fnmatch .translate (part )[_FNMATCH_SLICE ]
148
+ parts .append (part )
149
+ # Match the end of the path, always.
150
+ parts .append (r'\Z' )
151
+ flags = re .MULTILINE
152
+ if not case_sensitive :
153
+ flags |= re .IGNORECASE
154
+ return re .compile ('' .join (parts ), flags = flags )
155
+
156
+
95
157
class _Selector :
96
158
"""A selector matches a specific glob pattern part against the children
97
159
of a given path."""
@@ -276,6 +338,10 @@ class PurePath:
276
338
# to implement comparison methods like `__lt__()`.
277
339
'_parts_normcase_cached' ,
278
340
341
+ # The `_lines_cached` slot stores the string path with path separators
342
+ # and newlines swapped. This is used to implement `match()`.
343
+ '_lines_cached' ,
344
+
279
345
# The `_hash` slot stores the hash of the case-normalized string
280
346
# path. It's set when `__hash__()` is called for the first time.
281
347
'_hash' ,
@@ -441,6 +507,16 @@ def _parts_normcase(self):
441
507
self ._parts_normcase_cached = self ._str_normcase .split (self ._flavour .sep )
442
508
return self ._parts_normcase_cached
443
509
510
+ @property
511
+ def _lines (self ):
512
+ # Path with separators and newlines swapped, for pattern matching.
513
+ try :
514
+ return self ._lines_cached
515
+ except AttributeError :
516
+ trans = _SWAP_SEP_AND_NEWLINE [self ._flavour .sep ]
517
+ self ._lines_cached = str (self ).translate (trans )
518
+ return self ._lines_cached
519
+
444
520
def __eq__ (self , other ):
445
521
if not isinstance (other , PurePath ):
446
522
return NotImplemented
@@ -697,23 +773,18 @@ def match(self, path_pattern, *, case_sensitive=None):
697
773
"""
698
774
Return True if this path matches the given pattern.
699
775
"""
776
+ if not isinstance (path_pattern , PurePath ):
777
+ path_pattern = self .with_segments (path_pattern )
700
778
if case_sensitive is None :
701
779
case_sensitive = _is_case_sensitive (self ._flavour )
702
- pat = self .with_segments (path_pattern )
703
- if not pat .parts :
780
+ pattern = _compile_pattern_lines (path_pattern ._lines , case_sensitive )
781
+ if path_pattern .drive or path_pattern .root :
782
+ return pattern .match (self ._lines ) is not None
783
+ elif path_pattern ._tail :
784
+ return pattern .search (self ._lines ) is not None
785
+ else :
704
786
raise ValueError ("empty pattern" )
705
- pat_parts = pat .parts
706
- parts = self .parts
707
- if pat .drive or pat .root :
708
- if len (pat_parts ) != len (parts ):
709
- return False
710
- elif len (pat_parts ) > len (parts ):
711
- return False
712
- for part , pat in zip (reversed (parts ), reversed (pat_parts )):
713
- match = _compile_pattern (pat , case_sensitive )
714
- if not match (part ):
715
- return False
716
- return True
787
+
717
788
718
789
# Subclassing os.PathLike makes isinstance() checks slower,
719
790
# which in turn makes Path construction slower. Register instead!
0 commit comments