diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index 7e8abbf6ffe155..8c5dd18e938633 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -164,17 +164,17 @@ class RegexFlag: def match(pattern, string, flags=0): """Try to apply the pattern at the start of the string, returning a Match object, or None if no match was found.""" - return _compile(pattern, flags).match(string) + return compile(pattern, flags).match(string) def fullmatch(pattern, string, flags=0): """Try to apply the pattern to all of the string, returning a Match object, or None if no match was found.""" - return _compile(pattern, flags).fullmatch(string) + return compile(pattern, flags).fullmatch(string) def search(pattern, string, flags=0): """Scan through string looking for a match to the pattern, returning a Match object, or None if no match was found.""" - return _compile(pattern, flags).search(string) + return compile(pattern, flags).search(string) class _ZeroSentinel(int): pass @@ -205,7 +205,7 @@ def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel DeprecationWarning, stacklevel=2 ) - return _compile(pattern, flags).sub(repl, string, count) + return compile(pattern, flags).sub(repl, string, count) sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)' def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel): @@ -235,7 +235,7 @@ def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentine DeprecationWarning, stacklevel=2 ) - return _compile(pattern, flags).subn(repl, string, count) + return compile(pattern, flags).subn(repl, string, count) subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)' def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel): @@ -264,7 +264,7 @@ def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel) DeprecationWarning, stacklevel=2 ) - return _compile(pattern, flags).split(string, maxsplit) + return compile(pattern, flags).split(string, maxsplit) split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)' def findall(pattern, string, flags=0): @@ -275,60 +275,17 @@ def findall(pattern, string, flags=0): has more than one group. Empty matches are included in the result.""" - return _compile(pattern, flags).findall(string) + return compile(pattern, flags).findall(string) def finditer(pattern, string, flags=0): """Return an iterator over all non-overlapping matches in the string. For each match, the iterator returns a Match object. Empty matches are included in the result.""" - return _compile(pattern, flags).finditer(string) + return compile(pattern, flags).finditer(string) def compile(pattern, flags=0): - "Compile a regular expression pattern, returning a Pattern object." - return _compile(pattern, flags) - -def purge(): - "Clear the regular expression caches" - _cache.clear() - _cache2.clear() - _compile_template.cache_clear() - - -# SPECIAL_CHARS -# closing ')', '}' and ']' -# '-' (a range in character set) -# '&', '~', (extended character set operations) -# '#' (comment) and WHITESPACE (ignored) in verbose mode -_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f'} - -def escape(pattern): - """ - Escape special characters in a string. - """ - if isinstance(pattern, str): - return pattern.translate(_special_chars_map) - else: - pattern = str(pattern, 'latin1') - return pattern.translate(_special_chars_map).encode('latin1') - -Pattern = type(_compiler.compile('', 0)) -Match = type(_compiler.compile('', 0).match('')) - -# -------------------------------------------------------------------- -# internals - -# Use the fact that dict keeps the insertion order. -# _cache2 uses the simple FIFO policy which has better latency. -# _cache uses the LRU policy which has better hit rate. -_cache = {} # LRU -_cache2 = {} # FIFO -_MAXCACHE = 512 -_MAXCACHE2 = 256 -assert _MAXCACHE2 < _MAXCACHE - -def _compile(pattern, flags): - # internal: compile pattern + """Compile a regular expression pattern, returning a Pattern object.""" if isinstance(flags, RegexFlag): flags = flags.value try: @@ -371,6 +328,45 @@ def _compile(pattern, flags): _cache2[key] = p return p +def purge(): + "Clear the regular expression caches" + _cache.clear() + _cache2.clear() + _compile_template.cache_clear() + + +# SPECIAL_CHARS +# closing ')', '}' and ']' +# '-' (a range in character set) +# '&', '~', (extended character set operations) +# '#' (comment) and WHITESPACE (ignored) in verbose mode +_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f'} + +def escape(pattern): + """ + Escape special characters in a string. + """ + if isinstance(pattern, str): + return pattern.translate(_special_chars_map) + else: + pattern = str(pattern, 'latin1') + return pattern.translate(_special_chars_map).encode('latin1') + +Pattern = type(_compiler.compile('', 0)) +Match = type(_compiler.compile('', 0).match('')) + +# -------------------------------------------------------------------- +# internals + +# Use the fact that dict keeps the insertion order. +# _cache2 uses the simple FIFO policy which has better latency. +# _cache uses the LRU policy which has better hit rate. +_cache = {} # LRU +_cache2 = {} # FIFO +_MAXCACHE = 512 +_MAXCACHE2 = 256 +assert _MAXCACHE2 < _MAXCACHE + @functools.lru_cache(_MAXCACHE) def _compile_template(pattern, repl): # internal: compile replacement pattern @@ -381,9 +377,12 @@ def _compile_template(pattern, repl): import copyreg def _pickle(p): - return _compile, (p.pattern, p.flags) + return compile, (p.pattern, p.flags) + +# compatibility alias to deserialize old pickles +_compile = compile -copyreg.pickle(Pattern, _pickle, _compile) +copyreg.pickle(Pattern, _pickle, compile) # -------------------------------------------------------------------- # experimental stuff (see python-dev discussions for details) diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py index 0990255b22c219..0330805e63804d 100644 --- a/Lib/re/_parser.py +++ b/Lib/re/_parser.py @@ -12,6 +12,7 @@ # XXX: show string offset and offending character for all errors +import os from ._constants import * SPECIAL_CHARS = ".\\[{()*+?^$|" @@ -508,6 +509,8 @@ def _parse_sub(source, state, verbose, nested): subpattern.append((BRANCH, (None, items))) return subpattern +_warn_skips = (os.path.dirname(__file__),) + def _parse(source, state, verbose, nested, first=False): # parse a simple pattern subpattern = SubPattern(state) @@ -557,7 +560,7 @@ def _parse(source, state, verbose, nested, first=False): import warnings warnings.warn( 'Possible nested set at position %d' % source.tell(), - FutureWarning, stacklevel=nested + 6 + FutureWarning, skip_file_prefixes=_warn_skips ) negate = sourcematch("^") # check remaining characters @@ -580,7 +583,7 @@ def _parse(source, state, verbose, nested, first=False): 'symmetric difference' if this == '~' else 'union', source.tell() - 1), - FutureWarning, stacklevel=nested + 6 + FutureWarning, skip_file_prefixes=_warn_skips ) code1 = LITERAL, _ord(this) if sourcematch("-"): @@ -603,7 +606,7 @@ def _parse(source, state, verbose, nested, first=False): warnings.warn( 'Possible set difference at position %d' % ( source.tell() - 2), - FutureWarning, stacklevel=nested + 6 + FutureWarning, skip_file_prefixes=_warn_skips ) code2 = LITERAL, _ord(that) if code1[0] != LITERAL or code2[0] != LITERAL: diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index ff95f54026e172..53739ff6e28121 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1362,8 +1362,23 @@ def test_pickling(self): pickled = pickle.dumps(oldpat, proto) newpat = pickle.loads(pickled) self.assertEqual(newpat, oldpat) - # current pickle expects the _compile() reconstructor in re module + + def test_unpickling(self): + import pickle + pat = re.compile(".*") from re import _compile # noqa: F401 + # previous pickles may expect the _compile() reconstructor in re module. + # the four pickles below are examples of this at various protocol versions. + pickles = [ + b'cre\n_compile\np0\n(V.*\np1\nI32\ntp2\nRp3\n.', + b'cre\n_compile\nq\x00(X\x02\x00\x00\x00.*q\x01K tq\x02Rq\x03.', + b'\x80\x03cre\n_compile\nq\x00X\x02\x00\x00\x00.*q\x01K \x86q\x02Rq\x03.', + b'\x80\x04\x95\x1e\x00\x00\x00\x00\x00\x00\x00\x8c\x02re\x94\x8c\x08' + b'_compile\x94\x93\x94\x8c\x02.*\x94K \x86\x94R\x94.', + ] + for pickled in pickles: + unpickled = pickle.loads(pickled) + self.assertEqual(unpickled, pat) def test_copying(self): import copy diff --git a/Misc/NEWS.d/next/Library/2024-08-04-13-22-32.gh-issue-122358.wpnaq4.rst b/Misc/NEWS.d/next/Library/2024-08-04-13-22-32.gh-issue-122358.wpnaq4.rst new file mode 100644 index 00000000000000..2f66bd07c1a9e1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-04-13-22-32.gh-issue-122358.wpnaq4.rst @@ -0,0 +1 @@ +Remove :func:`!re._compile`, leaving a compatibility alias to :func:`re.compile`.