@@ -54,13 +54,30 @@ def _ignore_error(exception):
5454 getattr (exception , 'winerror' , None ) in _IGNORED_WINERRORS )
5555
5656
57+ @functools .cache
5758def _is_case_sensitive (flavour ):
5859 return flavour .normcase ('Aa' ) == 'Aa'
5960
6061#
6162# Globbing helpers
6263#
6364
65+
66+ # fnmatch.translate() returns a regular expression that includes a prefix and
67+ # a suffix, which enable matching newlines and ensure the end of the string is
68+ # matched, respectively. These features are undesirable for our implementation
69+ # of PurePatch.match(), which represents path separators as newlines and joins
70+ # pattern segments together. As a workaround, we define a slice object that
71+ # can remove the prefix and suffix from any translate() result. See the
72+ # _compile_pattern_lines() function for more details.
73+ _FNMATCH_PREFIX , _FNMATCH_SUFFIX = fnmatch .translate ('_' ).split ('_' )
74+ _FNMATCH_SLICE = slice (len (_FNMATCH_PREFIX ), - len (_FNMATCH_SUFFIX ))
75+ _SWAP_SEP_AND_NEWLINE = {
76+ '/' : str .maketrans ({'/' : '\n ' , '\n ' : '/' }),
77+ '\\ ' : str .maketrans ({'\\ ' : '\n ' , '\n ' : '\\ ' }),
78+ }
79+
80+
6481@functools .lru_cache ()
6582def _make_selector (pattern_parts , flavour , case_sensitive ):
6683 pat = pattern_parts [0 ]
@@ -92,6 +109,38 @@ def _compile_pattern(pat, case_sensitive):
92109 return re .compile (fnmatch .translate (pat ), flags ).match
93110
94111
112+ @functools .lru_cache ()
113+ def _compile_pattern_lines (pattern_lines , case_sensitive ):
114+ """Compile the given pattern lines to an `re.Pattern` object.
115+
116+ The *pattern_lines* argument is a glob-style pattern (e.g. '*/*.py') with
117+ its path separators and newlines swapped (e.g. '*\n *.py`). By using
118+ newlines to separate path components, and not setting `re.DOTALL`, we
119+ ensure that the `*` wildcard cannot match path separators.
120+
121+ The returned `re.Pattern` object may have its `match()` method called to
122+ match a complete pattern, or `search()` to match from the right. The
123+ argument supplied to these methods must also have its path separators and
124+ newlines swapped.
125+ """
126+
127+ # Match the start of the path, or just after a path separator
128+ parts = ['^' ]
129+ for part in pattern_lines .splitlines (keepends = True ):
130+ # We slice off the common prefix and suffix added by translate() to
131+ # ensure that re.DOTALL is not set, and the end of the string not
132+ # matched, respectively. With DOTALL not set, '*' wildcards will not
133+ # match path separators, because the '.' characters in the pattern
134+ # will not match newlines.
135+ parts .append (fnmatch .translate (part )[_FNMATCH_SLICE ])
136+ # Match the end of the path, always.
137+ parts .append (r'\Z' )
138+ flags = re .MULTILINE
139+ if not case_sensitive :
140+ flags |= re .IGNORECASE
141+ return re .compile ('' .join (parts ), flags = flags )
142+
143+
95144class _Selector :
96145 """A selector matches a specific glob pattern part against the children
97146 of a given path."""
@@ -274,6 +323,10 @@ class PurePath(object):
274323 # to implement comparison methods like `__lt__()`.
275324 '_parts_normcase_cached' ,
276325
326+ # The `_lines_cached` slot stores the string path with path separators
327+ # and newlines swapped. This is used to implement `match()`.
328+ '_lines_cached' ,
329+
277330 # The `_hash` slot stores the hash of the case-normalized string
278331 # path. It's set when `__hash__()` is called for the first time.
279332 '_hash' ,
@@ -439,6 +492,16 @@ def _parts_normcase(self):
439492 self ._parts_normcase_cached = self ._str_normcase .split (self ._flavour .sep )
440493 return self ._parts_normcase_cached
441494
495+ @property
496+ def _lines (self ):
497+ # Path with separators and newlines swapped, for pattern matching.
498+ try :
499+ return self ._lines_cached
500+ except AttributeError :
501+ trans = _SWAP_SEP_AND_NEWLINE [self ._flavour .sep ]
502+ self ._lines_cached = str (self ).translate (trans )
503+ return self ._lines_cached
504+
442505 def __eq__ (self , other ):
443506 if not isinstance (other , PurePath ):
444507 return NotImplemented
@@ -695,23 +758,18 @@ def match(self, path_pattern, *, case_sensitive=None):
695758 """
696759 Return True if this path matches the given pattern.
697760 """
761+ if not isinstance (path_pattern , PurePath ):
762+ path_pattern = self .with_segments (path_pattern )
698763 if case_sensitive is None :
699764 case_sensitive = _is_case_sensitive (self ._flavour )
700- pat = self .with_segments (path_pattern )
701- if not pat .parts :
765+ pattern = _compile_pattern_lines (path_pattern ._lines , case_sensitive )
766+ if path_pattern .drive or path_pattern .root :
767+ return pattern .match (self ._lines ) is not None
768+ elif path_pattern ._tail :
769+ return pattern .search (self ._lines ) is not None
770+ else :
702771 raise ValueError ("empty pattern" )
703- pat_parts = pat .parts
704- parts = self .parts
705- if pat .drive or pat .root :
706- if len (pat_parts ) != len (parts ):
707- return False
708- elif len (pat_parts ) > len (parts ):
709- return False
710- for part , pat in zip (reversed (parts ), reversed (pat_parts )):
711- match = _compile_pattern (pat , case_sensitive )
712- if not match (part ):
713- return False
714- return True
772+
715773
716774# Can't subclass os.PathLike from PurePath and keep the constructor
717775# optimizations in PurePath.__slots__.
0 commit comments