@@ -54,13 +54,30 @@ def _ignore_error(exception):
54
54
getattr (exception , 'winerror' , None ) in _IGNORED_WINERRORS )
55
55
56
56
57
+ @functools .cache
57
58
def _is_case_sensitive (flavour ):
58
59
return flavour .normcase ('Aa' ) == 'Aa'
59
60
60
61
#
61
62
# Globbing helpers
62
63
#
63
64
65
+
66
+ # fnmatch.translate() returns a regular expression that includes a prefix and
67
+ # a suffix, which enable matching newlines and ensure the end of the string is
68
+ # matched, respectively. These features are undesirable for our implementation
69
+ # of PurePatch.match(), which represents path separators as newlines and joins
70
+ # pattern segments together. As a workaround, we define a slice object that
71
+ # can remove the prefix and suffix from any translate() result. See the
72
+ # _compile_pattern_lines() function for more details.
73
+ _FNMATCH_PREFIX , _FNMATCH_SUFFIX = fnmatch .translate ('_' ).split ('_' )
74
+ _FNMATCH_SLICE = slice (len (_FNMATCH_PREFIX ), - len (_FNMATCH_SUFFIX ))
75
+ _SWAP_SEP_AND_NEWLINE = {
76
+ '/' : str .maketrans ({'/' : '\n ' , '\n ' : '/' }),
77
+ '\\ ' : str .maketrans ({'\\ ' : '\n ' , '\n ' : '\\ ' }),
78
+ }
79
+
80
+
64
81
@functools .lru_cache ()
65
82
def _make_selector (pattern_parts , flavour , case_sensitive ):
66
83
pat = pattern_parts [0 ]
@@ -92,6 +109,38 @@ def _compile_pattern(pat, case_sensitive):
92
109
return re .compile (fnmatch .translate (pat ), flags ).match
93
110
94
111
112
+ @functools .lru_cache ()
113
+ def _compile_pattern_lines (pattern_lines , case_sensitive ):
114
+ """Compile the given pattern lines to an `re.Pattern` object.
115
+
116
+ The *pattern_lines* argument is a glob-style pattern (e.g. '*/*.py') with
117
+ its path separators and newlines swapped (e.g. '*\n *.py`). By using
118
+ newlines to separate path components, and not setting `re.DOTALL`, we
119
+ ensure that the `*` wildcard cannot match path separators.
120
+
121
+ The returned `re.Pattern` object may have its `match()` method called to
122
+ match a complete pattern, or `search()` to match from the right. The
123
+ argument supplied to these methods must also have its path separators and
124
+ newlines swapped.
125
+ """
126
+
127
+ # Match the start of the path, or just after a path separator
128
+ parts = ['^' ]
129
+ for part in pattern_lines .splitlines (keepends = True ):
130
+ # We slice off the common prefix and suffix added by translate() to
131
+ # ensure that re.DOTALL is not set, and the end of the string not
132
+ # matched, respectively. With DOTALL not set, '*' wildcards will not
133
+ # match path separators, because the '.' characters in the pattern
134
+ # will not match newlines.
135
+ parts .append (fnmatch .translate (part )[_FNMATCH_SLICE ])
136
+ # Match the end of the path, always.
137
+ parts .append (r'\Z' )
138
+ flags = re .MULTILINE
139
+ if not case_sensitive :
140
+ flags |= re .IGNORECASE
141
+ return re .compile ('' .join (parts ), flags = flags )
142
+
143
+
95
144
class _Selector :
96
145
"""A selector matches a specific glob pattern part against the children
97
146
of a given path."""
@@ -274,6 +323,10 @@ class PurePath(object):
274
323
# to implement comparison methods like `__lt__()`.
275
324
'_parts_normcase_cached' ,
276
325
326
+ # The `_lines_cached` slot stores the string path with path separators
327
+ # and newlines swapped. This is used to implement `match()`.
328
+ '_lines_cached' ,
329
+
277
330
# The `_hash` slot stores the hash of the case-normalized string
278
331
# path. It's set when `__hash__()` is called for the first time.
279
332
'_hash' ,
@@ -439,6 +492,16 @@ def _parts_normcase(self):
439
492
self ._parts_normcase_cached = self ._str_normcase .split (self ._flavour .sep )
440
493
return self ._parts_normcase_cached
441
494
495
+ @property
496
+ def _lines (self ):
497
+ # Path with separators and newlines swapped, for pattern matching.
498
+ try :
499
+ return self ._lines_cached
500
+ except AttributeError :
501
+ trans = _SWAP_SEP_AND_NEWLINE [self ._flavour .sep ]
502
+ self ._lines_cached = str (self ).translate (trans )
503
+ return self ._lines_cached
504
+
442
505
def __eq__ (self , other ):
443
506
if not isinstance (other , PurePath ):
444
507
return NotImplemented
@@ -695,23 +758,18 @@ def match(self, path_pattern, *, case_sensitive=None):
695
758
"""
696
759
Return True if this path matches the given pattern.
697
760
"""
761
+ if not isinstance (path_pattern , PurePath ):
762
+ path_pattern = self .with_segments (path_pattern )
698
763
if case_sensitive is None :
699
764
case_sensitive = _is_case_sensitive (self ._flavour )
700
- pat = self .with_segments (path_pattern )
701
- if not pat .parts :
765
+ pattern = _compile_pattern_lines (path_pattern ._lines , case_sensitive )
766
+ if path_pattern .drive or path_pattern .root :
767
+ return pattern .match (self ._lines ) is not None
768
+ elif path_pattern ._tail :
769
+ return pattern .search (self ._lines ) is not None
770
+ else :
702
771
raise ValueError ("empty pattern" )
703
- pat_parts = pat .parts
704
- parts = self .parts
705
- if pat .drive or pat .root :
706
- if len (pat_parts ) != len (parts ):
707
- return False
708
- elif len (pat_parts ) > len (parts ):
709
- return False
710
- for part , pat in zip (reversed (parts ), reversed (pat_parts )):
711
- match = _compile_pattern (pat , case_sensitive )
712
- if not match (part ):
713
- return False
714
- return True
772
+
715
773
716
774
# Can't subclass os.PathLike from PurePath and keep the constructor
717
775
# optimizations in PurePath.__slots__.
0 commit comments