From 4fb942fcb6f54c503e103e7887d3cc8e19a57284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20K=C5=82oczko?= Date: Sun, 5 May 2024 07:10:04 +0000 Subject: [PATCH 1/2] really drop python<=3.7 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Filter all code over `pyupgrade --py38-plus`. Signed-off-by: Tomasz Kłoczko --- regex_3/_regex_core.py | 62 ++++++------- regex_3/regex.py | 6 +- regex_3/test_regex.py | 170 +++++++++++------------------------ tools/build_regex_unicode.py | 65 +++++++------- 4 files changed, 120 insertions(+), 183 deletions(-) diff --git a/regex_3/_regex_core.py b/regex_3/_regex_core.py index b2ffeae..8a0606b 100644 --- a/regex_3/_regex_core.py +++ b/regex_3/_regex_core.py @@ -48,7 +48,7 @@ def __init__(self, message, pattern=None, pos=None): self.lineno = pattern.count(newline, 0, pos) + 1 self.colno = pos - pattern.rfind(newline, 0, pos) - message = "{} at position {}".format(message, pos) + message = f"{message} at position {pos}" if newline in pattern: message += " (line {}, column {})".format(self.lineno, @@ -1318,7 +1318,7 @@ def parse_hex_escape(source, info, esc, expected_len, in_set, type): for i in range(expected_len): ch = source.get() if ch not in HEX_DIGITS: - raise error("incomplete escape \\%s%s" % (type, ''.join(digits)), + raise error("incomplete escape \\{}{}".format(type, ''.join(digits)), source.string, saved_pos) digits.append(ch) @@ -1331,7 +1331,7 @@ def parse_hex_escape(source, info, esc, expected_len, in_set, type): return make_character(info, value, in_set) # Bad hex escape. - raise error("bad hex escape \\%s%s" % (esc, ''.join(digits)), + raise error("bad hex escape \\{}{}".format(esc, ''.join(digits)), source.string, saved_pos) def parse_group_ref(source, info): @@ -1617,7 +1617,7 @@ def numeric_to_rational(numeric): else: raise ValueError() - result = "{}{}/{}".format(sign, num, den) + result = f"{sign}{num}/{den}" if result.endswith("/1"): return result[ : -2] @@ -1782,7 +1782,7 @@ def parse_repl_hex_escape(source, expected_len, type): for i in range(expected_len): ch = source.get() if ch not in HEX_DIGITS: - raise error("incomplete escape \\%s%s" % (type, ''.join(digits)), + raise error("incomplete escape \\{}{}".format(type, ''.join(digits)), source.string, source.pos) digits.append(ch) @@ -1921,7 +1921,7 @@ def __init__(self, positive=True): self._key = self.__class__, self.positive def get_firstset(self, reverse): - return set([None]) + return {None} def _compile(self, reverse, fuzzy): flags = 0 @@ -1954,7 +1954,7 @@ def _compile(self, reverse, fuzzy): return [(self._opcode[reverse], flags)] def dump(self, indent, reverse): - print("{}{}".format(INDENT * indent, self._op_name)) + print(f"{INDENT * indent}{self._op_name}") def max_width(self): return 1 @@ -2007,7 +2007,7 @@ def _compile(self, reverse, fuzzy): [(OP.END, )]) def dump(self, indent, reverse): - print("{}ATOMIC".format(INDENT * indent)) + print(f"{INDENT * indent}ATOMIC") self.subpattern.dump(indent + 1, reverse) def is_empty(self): @@ -2108,7 +2108,7 @@ def get_firstset(self, reverse): for b in self.branches: fs |= b.get_firstset(reverse) - return fs or set([None]) + return fs or {None} def _compile(self, reverse, fuzzy): if not self.branches: @@ -2124,10 +2124,10 @@ def _compile(self, reverse, fuzzy): return code def dump(self, indent, reverse): - print("{}BRANCH".format(INDENT * indent)) + print(f"{INDENT * indent}BRANCH") self.branches[0].dump(indent + 1, reverse) for b in self.branches[1 : ]: - print("{}OR".format(INDENT * indent)) + print(f"{INDENT * indent}OR") b.dump(indent + 1, reverse) @staticmethod @@ -2454,7 +2454,7 @@ def _compile(self, reverse, fuzzy): return [(OP.GROUP_CALL, self.call_ref)] def dump(self, indent, reverse): - print("{}GROUP_CALL {}".format(INDENT * indent, self.group)) + print(f"{INDENT * indent}GROUP_CALL {self.group}") def __eq__(self, other): return type(self) is type(other) and self.group == other.group @@ -2505,7 +2505,7 @@ def optimise(self, info, reverse, in_set=False): return self def get_firstset(self, reverse): - return set([self]) + return {self} def has_simple_start(self): return True @@ -2618,10 +2618,10 @@ def _compile(self, reverse, fuzzy): return code def dump(self, indent, reverse): - print("{}GROUP_EXISTS {}".format(INDENT * indent, self.group)) + print(f"{INDENT * indent}GROUP_EXISTS {self.group}") self.yes_item.dump(indent + 1, reverse) if not self.no_item.is_empty(): - print("{}OR".format(INDENT * indent)) + print(f"{INDENT * indent}OR") self.no_item.dump(indent + 1, reverse) def is_empty(self): @@ -2766,7 +2766,7 @@ def dump(self, indent, reverse): constraints = self._constraints_to_string() if constraints: constraints = " " + constraints - print("{}FUZZY{}".format(INDENT * indent, constraints)) + print(f"{INDENT * indent}FUZZY{constraints}") self.subpattern.dump(indent + 1, reverse) def is_empty(self): @@ -2790,12 +2790,12 @@ def _constraints_to_string(self): con = "" if min > 0: - con = "{}<=".format(min) + con = f"{min}<=" con += name if max is not None: - con += "<={}".format(max) + con += f"<={max}" constraints.append(con) @@ -2803,7 +2803,7 @@ def _constraints_to_string(self): for name in "ids": coeff = self.constraints["cost"][name] if coeff > 0: - cost.append("{}{}".format(coeff, name)) + cost.append(f"{coeff}{name}") limit = self.constraints["cost"]["max"] if limit is not None and limit > 0: @@ -2822,7 +2822,7 @@ def _compile(self, reverse, fuzzy): return grapheme_matcher.compile(reverse, fuzzy) def dump(self, indent, reverse): - print("{}GRAPHEME".format(INDENT * indent)) + print(f"{INDENT * indent}GRAPHEME") def max_width(self): return UNLIMITED @@ -2942,7 +2942,7 @@ def _compile(self, reverse, fuzzy): (OP.END, )]) def dump(self, indent, reverse): - print("{}ATOMIC".format(INDENT * indent)) + print(f"{INDENT * indent}ATOMIC") if self.max_count is None: limit = "INF" @@ -3019,7 +3019,7 @@ def dump(self, indent, reverse): group = self.group if group < 0: group = private_groups[group] - print("{}GROUP {}".format(INDENT * indent, group)) + print(f"{INDENT * indent}GROUP {group}") self.subpattern.dump(indent + 1, reverse) def __eq__(self, other): @@ -3082,7 +3082,7 @@ def get_firstset(self, reverse): if self.positive and self.behind == reverse: return self.subpattern.get_firstset(reverse) - return set([None]) + return {None} def _compile(self, reverse, fuzzy): flags = 0 @@ -3176,10 +3176,10 @@ def dump(self, indent, reverse): print("{}CONDITIONAL {} {}".format(INDENT * indent, self._dir_text[self.behind], POS_TEXT[self.positive])) self.subpattern.dump(indent + 1, self.behind) - print("{}EITHER".format(INDENT * indent)) + print(f"{INDENT * indent}EITHER") self.yes_item.dump(indent + 1, reverse) if not self.no_item.is_empty(): - print("{}OR".format(INDENT * indent)) + print(f"{INDENT * indent}OR") self.no_item.dump(indent + 1, reverse) def is_empty(self): @@ -3228,7 +3228,7 @@ def optimise(self, info, reverse, in_set=False): return self def get_firstset(self, reverse): - return set([self]) + return {self} def has_simple_start(self): return True @@ -3477,7 +3477,7 @@ def get_firstset(self, reverse): return fs fs.discard(None) - return fs | set([None]) + return fs | {None} def has_simple_start(self): return bool(self.items) and self.items[0].has_simple_start() @@ -3624,7 +3624,7 @@ def rebuild(self, positive, case_flags, zerowidth): zerowidth).optimise(self.info, False) def get_firstset(self, reverse): - return set([self]) + return {self} def has_simple_start(self): return True @@ -3913,8 +3913,8 @@ def get_firstset(self, reverse): pos = -1 else: pos = 0 - return set([Character(self.characters[pos], - case_flags=self.case_flags)]) + return {Character(self.characters[pos], + case_flags=self.case_flags)} def has_simple_start(self): return True @@ -4185,7 +4185,7 @@ def match(self, substring): def expect(self, substring): if not self.match(substring): - raise error("missing {}".format(substring), self.string, self.pos) + raise error(f"missing {substring}", self.string, self.pos) def at_end(self): string = self.string diff --git a/regex_3/regex.py b/regex_3/regex.py index 428a159..ca715be 100644 --- a/regex_3/regex.py +++ b/regex_3/regex.py @@ -476,7 +476,7 @@ def complain_unused_args(): unused_kwargs = set(kwargs) - {k for k, v in args_needed} if unused_kwargs: any_one = next(iter(unused_kwargs)) - raise ValueError('unused keyword argument {!a}'.format(any_one)) + raise ValueError(f'unused keyword argument {any_one!a}') if cache_it: try: @@ -491,7 +491,7 @@ def complain_unused_args(): try: args_supplied.add((k, frozenset(kwargs[k]))) except KeyError: - raise error("missing named list: {!r}".format(k)) + raise error(f"missing named list: {k!r}") complain_unused_args() @@ -639,7 +639,7 @@ def complain_unused_args(): pass # The named capture groups. - index_group = dict((v, n) for n, v in info.group_index.items()) + index_group = {v: n for n, v in info.group_index.items()} # Create the PatternObject. # diff --git a/regex_3/test_regex.py b/regex_3/test_regex.py index 8a02751..b473417 100644 --- a/regex_3/test_regex.py +++ b/regex_3/test_regex.py @@ -210,10 +210,7 @@ def test_bug_114660(self): def test_bug_462270(self): # Test for empty sub() behaviour, see SF bug #462270 - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b--d-') - else: - self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b-d-') + self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b--d-') self.assertEqual(regex.sub('(?V1)x*', '-', 'abxd'), '-a-b--d-') self.assertEqual(regex.sub('x+', '-', 'abxd'), 'ab-d') @@ -255,23 +252,14 @@ def test_re_subn(self): def test_re_split(self): self.assertEqual(regex.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.split(":*", ":a:b::c"), ['', '', 'a', '', - 'b', '', 'c', '']) - self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', '', '', - 'a', ':', '', '', 'b', '::', '', '', 'c', '', '']) - self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', '', 'a', - '', 'b', '', 'c', '']) - self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', '', - None, 'a', ':', '', None, 'b', ':', '', None, 'c', None, '']) - else: - self.assertEqual(regex.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) - self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', 'a', - ':', 'b', '::', 'c']) - self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', 'a', 'b', - 'c']) - self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', 'a', - ':', 'b', ':', 'c']) + self.assertEqual(regex.split(":*", ":a:b::c"), ['', '', 'a', '', + 'b', '', 'c', '']) + self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', '', '', + 'a', ':', '', '', 'b', '::', '', '', 'c', '', '']) + self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', '', 'a', + '', 'b', '', 'c', '']) + self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', '', + None, 'a', ':', '', None, 'b', ':', '', None, 'c', None, '']) self.assertEqual(regex.split("([b:]+)", ":a:b::c"), ['', ':', 'a', ':b::', 'c']) self.assertEqual(regex.split("(b)|(:+)", ":a:b::c"), ['', None, ':', @@ -310,12 +298,8 @@ def test_qualified_re_split(self): self.assertEqual(regex.split("(:)", ":a:b::c", 2), ['', ':', 'a', ':', 'b::c']) - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', '', - '', 'a:b::c']) - else: - self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', 'a', - ':', 'b::c']) + self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', '', + '', 'a:b::c']) def test_re_findall(self): self.assertEqual(regex.findall(":+", "abc"), []) @@ -997,7 +981,7 @@ def test_subscripting_match(self): if not m: self.fail("Failed: expected match but returned None") elif m[:] != ('x', 'x'): - self.fail("Failed: expected \"('x', 'x')\" but got {} instead".format(ascii(m[:]))) + self.fail(f"Failed: expected \"('x', 'x')\" but got {ascii(m[:])} instead") def test_new_named_groups(self): m0 = regex.match(r'(?P\w)', 'x') @@ -1178,9 +1162,9 @@ def test_properties(self): for pattern, chars, expected in tests: try: if chars[ : 0].join(regex.findall(pattern, chars)) != expected: - self.fail("Failed: {}".format(pattern)) + self.fail(f"Failed: {pattern}") except Exception as e: - self.fail("Failed: {} raised {}".format(pattern, ascii(e))) + self.fail(f"Failed: {pattern} raised {ascii(e)}") self.assertEqual(bool(regex.match(r"\p{NumericValue=0}", "0")), True) @@ -1335,11 +1319,8 @@ def test_possessive(self): def test_zerowidth(self): # Issue 3262. - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.split(r"\b", "a b"), ['', 'a', ' ', 'b', - '']) - else: - self.assertEqual(regex.split(r"\b", "a b"), ['a b']) + self.assertEqual(regex.split(r"\b", "a b"), ['', 'a', ' ', 'b', + '']) self.assertEqual(regex.split(r"(?V1)\b", "a b"), ['', 'a', ' ', 'b', '']) @@ -1361,25 +1342,15 @@ def test_zerowidth(self): self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+", "foo bar")], ['bar', 'foo', '']) - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.split("", "xaxbxc"), ['', 'x', 'a', 'x', - 'b', 'x', 'c', '']) - self.assertEqual([m for m in regex.splititer("", "xaxbxc")], ['', - 'x', 'a', 'x', 'b', 'x', 'c', '']) - else: - self.assertEqual(regex.split("", "xaxbxc"), ['xaxbxc']) - self.assertEqual([m for m in regex.splititer("", "xaxbxc")], - ['xaxbxc']) - - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.split("(?r)", "xaxbxc"), ['', 'c', 'x', 'b', - 'x', 'a', 'x', '']) - self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")], - ['', 'c', 'x', 'b', 'x', 'a', 'x', '']) - else: - self.assertEqual(regex.split("(?r)", "xaxbxc"), ['xaxbxc']) - self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")], - ['xaxbxc']) + self.assertEqual(regex.split("", "xaxbxc"), ['', 'x', 'a', 'x', + 'b', 'x', 'c', '']) + self.assertEqual([m for m in regex.splititer("", "xaxbxc")], ['', + 'x', 'a', 'x', 'b', 'x', 'c', '']) + + self.assertEqual(regex.split("(?r)", "xaxbxc"), ['', 'c', 'x', 'b', + 'x', 'a', 'x', '']) + self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")], + ['', 'c', 'x', 'b', 'x', 'a', 'x', '']) self.assertEqual(regex.split("(?V1)", "xaxbxc"), ['', 'x', 'a', 'x', 'b', 'x', 'c', '']) @@ -1465,33 +1436,19 @@ def test_lookbehind(self): def test_unmatched_in_sub(self): # Issue 1519638. - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "xy"), - 'y-x-') - else: - self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "xy"), - 'y-x') + self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "xy"), + 'y-x-') self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "xy"), 'y-x-') - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "x"), '-x-') - else: - self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "x"), '-x') + self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "x"), '-x-') self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "x"), '-x-') - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "y"), 'y--') - else: - self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "y"), 'y-') + self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "y"), 'y--') self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "y"), 'y--') def test_bug_10328 (self): # Issue 10328. pat = regex.compile(r'(?mV0)(?P[ \t]+\r*$)|(?P(?<=[^\n])\Z)') - if sys.version_info >= (3, 7, 0): - self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', - 'foobar '), ('foobar', 2)) - else: - self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', - 'foobar '), ('foobar', 1)) + self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', + 'foobar '), ('foobar', 2)) self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ', '']) pat = regex.compile(r'(?mV1)(?P[ \t]+\r*$)|(?P(?<=[^\n])\Z)') @@ -2447,7 +2404,7 @@ def test_various(self): pattern, string, groups, expected, excval = t except ValueError: fields = ", ".join([ascii(f) for f in t[ : 3]] + ["..."]) - self.fail("Incorrect number of test fields: ({})".format(fields)) + self.fail(f"Incorrect number of test fields: ({fields})") else: group_list = [] if groups: @@ -2576,9 +2533,9 @@ def test_named_lists(self): bar=["one", "two", "three"]))), self.PATTERN_CLASS) self.assertEqual(regex.findall(r"^\L", "solid QWERT", - options=set(['good', 'brilliant', '+s\\ol[i}d'])), []) + options={'good', 'brilliant', '+s\\ol[i}d'}), []) self.assertEqual(regex.findall(r"^\L", "+solid QWERT", - options=set(['good', 'brilliant', '+solid'])), ['+solid']) + options={'good', 'brilliant', '+solid'}), ['+solid']) options = ["STRASSE"] self.assertEqual(regex.match(r"(?fi)\L", @@ -2967,32 +2924,20 @@ def test_issue_18468(self): for string in ":a:b::c", StrSubclass(":a:b::c"): self.assertTypedEqual(regex.split(":", string), ['', 'a', 'b', '', 'c']) - if sys.version_info >= (3, 7, 0): - self.assertTypedEqual(regex.split(":*", string), ['', '', 'a', - '', 'b', '', 'c', '']) - self.assertTypedEqual(regex.split("(:*)", string), ['', ':', - '', '', 'a', ':', '', '', 'b', '::', '', '', 'c', '', '']) - else: - self.assertTypedEqual(regex.split(":*", string), ['', 'a', 'b', - 'c']) - self.assertTypedEqual(regex.split("(:*)", string), ['', ':', - 'a', ':', 'b', '::', 'c']) + self.assertTypedEqual(regex.split(":*", string), ['', '', 'a', + '', 'b', '', 'c', '']) + self.assertTypedEqual(regex.split("(:*)", string), ['', ':', + '', '', 'a', ':', '', '', 'b', '::', '', '', 'c', '', '']) for string in (b":a:b::c", BytesSubclass(b":a:b::c"), bytearray(b":a:b::c"), memoryview(b":a:b::c")): self.assertTypedEqual(regex.split(b":", string), [b'', b'a', b'b', b'', b'c']) - if sys.version_info >= (3, 7, 0): - self.assertTypedEqual(regex.split(b":*", string), [b'', b'', - b'a', b'', b'b', b'', b'c', b'']) - self.assertTypedEqual(regex.split(b"(:*)", string), [b'', b':', - b'', b'', b'a', b':', b'', b'', b'b', b'::', b'', b'', b'c', - b'', b'']) - else: - self.assertTypedEqual(regex.split(b":*", string), [b'', b'a', - b'b', b'c']) - self.assertTypedEqual(regex.split(b"(:*)", string), [b'', b':', - b'a', b':', b'b', b'::', b'c']) + self.assertTypedEqual(regex.split(b":*", string), [b'', b'', + b'a', b'', b'b', b'', b'c', b'']) + self.assertTypedEqual(regex.split(b"(:*)", string), [b'', b':', + b'', b'', b'a', b':', b'', b'', b'b', b'::', b'', b'', b'c', + b'', b'']) for string in "a:b::c:::d", StrSubclass("a:b::c:::d"): self.assertTypedEqual(regex.findall(":+", string), [":", "::", @@ -3360,16 +3305,10 @@ def test_hg_bugs(self): self.assertEqual(pat.findall(raw), ['xxx']) # Hg issue 106: * operator not working correctly with sub() - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.sub('(?V0).*', 'x', 'test'), 'xx') - else: - self.assertEqual(regex.sub('(?V0).*', 'x', 'test'), 'x') + self.assertEqual(regex.sub('(?V0).*', 'x', 'test'), 'xx') self.assertEqual(regex.sub('(?V1).*', 'x', 'test'), 'xx') - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.sub('(?V0).*?', '|', 'test'), '|||||||||') - else: - self.assertEqual(regex.sub('(?V0).*?', '|', 'test'), '|t|e|s|t|') + self.assertEqual(regex.sub('(?V0).*?', '|', 'test'), '|||||||||') self.assertEqual(regex.sub('(?V1).*?', '|', 'test'), '|||||||||') # Hg issue 112: re: OK, but regex: SystemError @@ -4441,16 +4380,15 @@ def test_subscripted_captures(self): 'c a b c c b a') def test_more_zerowidth(self): - if sys.version_info >= (3, 7, 0): - self.assertEqual(regex.split(r'\b|:+', 'a::bc'), ['', 'a', '', '', - 'bc', '']) - self.assertEqual(regex.sub(r'\b|:+', '-', 'a::bc'), '-a---bc-') - self.assertEqual(regex.findall(r'\b|:+', 'a::bc'), ['', '', '::', - '', '']) - self.assertEqual([m.span() for m in regex.finditer(r'\b|:+', - 'a::bc')], [(0, 0), (1, 1), (1, 3), (3, 3), (5, 5)]) - self.assertEqual([m.span() for m in regex.finditer(r'(?m)^\s*?$', - 'foo\n\n\nbar')], [(4, 4), (4, 5), (5, 5)]) + self.assertEqual(regex.split(r'\b|:+', 'a::bc'), ['', 'a', '', '', + 'bc', '']) + self.assertEqual(regex.sub(r'\b|:+', '-', 'a::bc'), '-a---bc-') + self.assertEqual(regex.findall(r'\b|:+', 'a::bc'), ['', '', '::', + '', '']) + self.assertEqual([m.span() for m in regex.finditer(r'\b|:+', + 'a::bc')], [(0, 0), (1, 1), (1, 3), (3, 3), (5, 5)]) + self.assertEqual([m.span() for m in regex.finditer(r'(?m)^\s*?$', + 'foo\n\n\nbar')], [(4, 4), (4, 5), (5, 5)]) def test_line_ending(self): self.assertEqual(regex.findall(r'\R', '\r\n\n\x0B\f\r\x85\u2028\u2029'), diff --git a/tools/build_regex_unicode.py b/tools/build_regex_unicode.py index 1507bb0..fb0d10a 100644 --- a/tools/build_regex_unicode.py +++ b/tools/build_regex_unicode.py @@ -1,5 +1,4 @@ #! python3.11 -# -*- coding: utf-8 -*- # # This Python script parses the Unicode data files in the UCD.zip file and # generates the C files for the regex module. @@ -147,7 +146,7 @@ def lowest(self): def __repr__(self): self._normalise() - return 'Ranges({!r})'.format(self._ranges) + return f'Ranges({self._ranges!r})' def _normalise(self): if self._is_normalised: @@ -190,7 +189,7 @@ def download_unicode_files(unicode_data_base, data_files, data_folder): if not exists(path): url = urljoin(unicode_data_base, rel_path) - print('Downloading {} from {}'.format(rel_path, url), + print(f'Downloading {rel_path} from {url}', flush=True) urlretrieve(url, path) @@ -687,7 +686,7 @@ def write_summary(unicode_data, unicode_version, tools_folder): path = join(tools_folder, 'Unicode %s.txt' % unicode_version) with open(path, 'w', encoding='ascii') as file: - file.write('Version {}\n'.format(unicode_version)) + file.write(f'Version {unicode_version}\n') for property in sorted(unique(properties.values(), key=id), key=preferred): @@ -706,9 +705,9 @@ def write_summary(unicode_data, unicode_version, tools_folder): for lower, upper in value.get('codepoints', []): if lower == upper: - file.write('{:04X}\n'.format(lower)) + file.write(f'{lower:04X}\n') else: - file.write('{:04X}..{:04X}\n'.format(lower, upper)) + file.write(f'{lower:04X}..{upper:04X}\n') else: if 'default' in property: default = values[property['default']] @@ -720,32 +719,32 @@ def write_summary(unicode_data, unicode_version, tools_folder): for lower, upper in value.get('codepoints', []): if lower == upper: - file.write('{:04X}\n'.format(lower)) + file.write(f'{lower:04X}\n') else: - file.write('{:04X}..{:04X}\n'.format(lower, upper)) + file.write(f'{lower:04X}..{upper:04X}\n') file.write('SimpleFolding\n') for delta, ranges in unicode_data['simple_folding'].items(): - file.write('Value {:04X}\n'.format(delta)) + file.write(f'Value {delta:04X}\n') for lower, upper in ranges: if lower == upper: - file.write('{:04X}\n'.format(lower)) + file.write(f'{lower:04X}\n') else: - file.write('{:04X}..{:04X}\n'.format(lower, upper)) + file.write(f'{lower:04X}..{upper:04X}\n') file.write('FullFolding\n') for key, ranges in unicode_data['full_folding'].items(): - file.write('Value {}\n'.format(' '.join('{:04X}'.format(value) for + file.write('Value {}\n'.format(' '.join(f'{value:04X}' for value in key))) for lower, upper in ranges: if lower == upper: - file.write('{:04X}\n'.format(lower)) + file.write(f'{lower:04X}\n') else: - file.write('{:04X}..{:04X}\n'.format(lower, upper)) + file.write(f'{lower:04X}..{upper:04X}\n') def make_binary_dict(): binary_dict = {} @@ -768,7 +767,7 @@ def collect_strings(properties): except KeyError: pass - return sorted(set(munge(string) for string in strings)) + return sorted({munge(string) for string in strings}) def chunked(iterable, chunk_size): sequence = iterable @@ -787,7 +786,7 @@ def determine_entry_type(iterable): if 0 <= lower <= upper <= 0xFFFF: return 'RE_UINT16' - raise ValueError('cannot determine C type for {}..{}'.format(lower, upper)) + raise ValueError(f'cannot determine C type for {lower}..{upper}') def is_binary(property): return sum(1 for val in val_list if val['id'] != 0) == 1 @@ -819,7 +818,7 @@ def generate_small_lookup(property, c_file): ranges.append((lower, upper, val_id)) if len(ranges) == 1 and ranges[0][ : 2] == (0, NUM_CODEPOINTS - 1): - c_file.write(' return {};\n}}\n'.format(ranges[0][2])) + c_file.write(f' return {ranges[0][2]};\n}}\n') else: for lower, upper, val_id in ranges: width = 2 if upper <= 0xFF else 4 if upper <= 0xFFFF else 6 @@ -835,15 +834,15 @@ def generate_small_lookup(property, c_file): return {}; '''.format(lower, upper, val_id, width=width)) - c_file.write('\n return {};\n}}\n'.format(default_id)) + c_file.write(f'\n return {default_id};\n}}\n') def generate_table(table_name, values, c_file, max_columns=16, public=False): entry_type = determine_entry_type(values) if public: - c_file.write('{} {}[] = {{\n'.format(entry_type, table_name)) + c_file.write(f'{entry_type} {table_name}[] = {{\n') else: - c_file.write('static {} {}[] = {{\n'.format(entry_type, table_name)) + c_file.write(f'static {entry_type} {table_name}[] = {{\n') entries = [str(value) for value in values] max_width = max(len(entry) for entry in entries) @@ -911,7 +910,7 @@ def generate_lookup(property, c_file): if i > 0: c_file.write('\n') - generate_table('re_{}_table_{}'.format(prop_name, 1 + i), table, + generate_table(f're_{prop_name}_table_{1 + i}', table, c_file) if binary: @@ -1005,7 +1004,7 @@ def generate_script_extensions_lookup(properties, property, c_file): prop_name = property['names'][0].lower() for i, table in enumerate([table_0, table_1, table_2]): - generate_table('{}_table_{}'.format(prop_name, 1 + i), table, c_file) + generate_table(f'{prop_name}_table_{1 + i}', table, c_file) script_values = properties[munge('Script')]['values'] ext_dict = {} @@ -1021,9 +1020,9 @@ def generate_script_extensions_lookup(properties, property, c_file): offsets.append(len(entries)) entries.extend(value + [0]) - generate_table('{}_table_4'.format(prop_name), offsets, c_file) + generate_table(f'{prop_name}_table_4', offsets, c_file) - generate_table('{}_table_5'.format(prop_name), entries, c_file) + generate_table(f'{prop_name}_table_5', entries, c_file) c_file.write(''' int re_get_{0}(RE_UINT32 codepoint, RE_UINT8* scripts) {{ @@ -1121,7 +1120,7 @@ def generate_all_cases(unicode_data, c_file): if i > 0: c_file.write('\n') - generate_table('re_all_cases_table_{}'.format(1 + i), table, c_file) + generate_table(f're_all_cases_table_{1 + i}', table, c_file) c_file.write('\nstatic RE_AllCases re_all_cases_table_4[] = {\n') @@ -1219,7 +1218,7 @@ def generate_simple_case_folding(unicode_data, c_file): if i > 0: c_file.write('\n') - generate_table('re_simple_folding_table_{}'.format(1 + i), table, c_file) + generate_table(f're_simple_folding_table_{1 + i}', table, c_file) c_file.write('\nstatic RE_UINT16 re_simple_folding_table_4[] = {\n') @@ -1292,7 +1291,7 @@ def generate_full_case_folding(unicode_data, c_file): if i > 0: c_file.write('\n') - generate_table('re_full_folding_table_{}'.format(1 + i), table, c_file) + generate_table(f're_full_folding_table_{1 + i}', table, c_file) c_file.write('\nstatic RE_FullCaseFolding re_full_folding_table_4[] = {\n') @@ -1493,7 +1492,7 @@ def make_key(value): lines = [] for string in strings: - lines.append(' "{}",\n'.format(string)) + lines.append(f' "{string}",\n') strings_dict = {string: i for i, string in enumerate(strings)} @@ -1580,7 +1579,7 @@ def make_key(names): if prop_name == 'script_extensions': lines.append(' 0,\n') else: - lines.append(' re_get_{},\n'.format(prop_name)) + lines.append(f' re_get_{prop_name},\n') lines[-1] = lines[-1].rstrip(',\n') + '\n' @@ -1646,7 +1645,7 @@ def make_key(names): gc_values = properties[munge('General_Category')]['values'] group_names = set('C L M N P S Z Assigned Cased_Letter'.split()) - names = set(gc_values) & set(munge(name) for name in group_names) + names = set(gc_values) & {munge(name) for name in group_names} for name in sorted(names, key=lambda name: gc_values[name]['id']): h_file.write('#define RE_PROP_{} {}\n'.format(name, @@ -1670,7 +1669,7 @@ def make_key(names): masks[name[0]] |= 1 << val_id for val_id, name in sorted(val_list): - h_file.write('#define RE_PROP_{} {}\n'.format(name, val_id)) + h_file.write(f'#define RE_PROP_{name} {val_id}\n') h_file.write('\n') @@ -1745,9 +1744,9 @@ def make_key(names): prop_name = property['names'][0] if prop_name == 'Script_Extensions': - h_file.write('int re_get_{}(RE_UINT32 codepoint, RE_UINT8* scripts);\n'.format(prop_name.lower())) + h_file.write(f'int re_get_{prop_name.lower()}(RE_UINT32 codepoint, RE_UINT8* scripts);\n') else: - h_file.write('RE_UINT32 re_get_{}(RE_UINT32 codepoint);\n'.format(prop_name.lower())) + h_file.write(f'RE_UINT32 re_get_{prop_name.lower()}(RE_UINT32 codepoint);\n') h_file.write('int re_get_all_cases(RE_UINT32 codepoint, RE_UINT32* cases);\n') h_file.write('RE_UINT32 re_get_simple_case_folding(RE_UINT32 codepoint);\n') From 9700616b156ed0e2a312dfb7c0a8c2721f504425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20K=C5=82oczko?= Date: Sun, 5 May 2024 07:11:45 +0000 Subject: [PATCH 2/2] drop unused imports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Filter code over ruff. Signed-off-by: Tomasz Kłoczko --- regex_3/test_regex.py | 1 - tools/build_regex_unicode.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/regex_3/test_regex.py b/regex_3/test_regex.py index b473417..2a16b65 100644 --- a/regex_3/test_regex.py +++ b/regex_3/test_regex.py @@ -3,7 +3,6 @@ import pickle import regex import string -import sys import unittest # String subclasses for issue 18468. diff --git a/tools/build_regex_unicode.py b/tools/build_regex_unicode.py index fb0d10a..407e44d 100644 --- a/tools/build_regex_unicode.py +++ b/tools/build_regex_unicode.py @@ -5,12 +5,10 @@ # # Written by MRAB. # -from contextlib import contextmanager, suppress +from contextlib import contextmanager from io import TextIOWrapper from itertools import chain -from os import listdir, mkdir from os.path import basename, dirname, exists, join, normpath -from time import time from urllib.parse import urljoin from urllib.request import urlretrieve from zipfile import ZipFile