Skip to content

Commit 029835d

Browse files
gh-91404: Revert "bpo-23689: re module, fix memory leak when a match is terminated by a signal or allocation failure (GH-32283) (GH-93882)
Revert "bpo-23689: re module, fix memory leak when a match is terminated by a signal or memory allocation failure (GH-32283)" This reverts commit 6e3eee5. Manual fixups to increase the MAGIC number and to handle conflicts with a couple of changes that landed after that. Thanks for reviews by Ma Lin and Serhiy Storchaka. (cherry picked from commit 4beee0c) Co-authored-by: Gregory P. Smith <greg@krypto.org>
1 parent 5ee86d4 commit 029835d

File tree

9 files changed

+74
-146
lines changed

9 files changed

+74
-146
lines changed

Lib/re/_compiler.py

+21-38
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,14 @@
2828
POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
2929
}
3030

31-
class _CompileData:
32-
__slots__ = ('code', 'repeat_count')
33-
def __init__(self):
34-
self.code = []
35-
self.repeat_count = 0
36-
3731
def _combine_flags(flags, add_flags, del_flags,
3832
TYPE_FLAGS=_parser.TYPE_FLAGS):
3933
if add_flags & TYPE_FLAGS:
4034
flags &= ~TYPE_FLAGS
4135
return (flags | add_flags) & ~del_flags
4236

43-
def _compile(data, pattern, flags):
37+
def _compile(code, pattern, flags):
4438
# internal: compile a (sub)pattern
45-
code = data.code
4639
emit = code.append
4740
_len = len
4841
LITERAL_CODES = _LITERAL_CODES
@@ -115,19 +108,15 @@ def _compile(data, pattern, flags):
115108
skip = _len(code); emit(0)
116109
emit(av[0])
117110
emit(av[1])
118-
_compile(data, av[2], flags)
111+
_compile(code, av[2], flags)
119112
emit(SUCCESS)
120113
code[skip] = _len(code) - skip
121114
else:
122115
emit(REPEATING_CODES[op][0])
123116
skip = _len(code); emit(0)
124117
emit(av[0])
125118
emit(av[1])
126-
# now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT)
127-
if op != POSSESSIVE_REPEAT:
128-
emit(data.repeat_count)
129-
data.repeat_count += 1
130-
_compile(data, av[2], flags)
119+
_compile(code, av[2], flags)
131120
code[skip] = _len(code) - skip
132121
emit(REPEATING_CODES[op][1])
133122
elif op is SUBPATTERN:
@@ -136,7 +125,7 @@ def _compile(data, pattern, flags):
136125
emit(MARK)
137126
emit((group-1)*2)
138127
# _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
139-
_compile(data, p, _combine_flags(flags, add_flags, del_flags))
128+
_compile(code, p, _combine_flags(flags, add_flags, del_flags))
140129
if group:
141130
emit(MARK)
142131
emit((group-1)*2+1)
@@ -148,7 +137,7 @@ def _compile(data, pattern, flags):
148137
# pop their stack if they reach it
149138
emit(ATOMIC_GROUP)
150139
skip = _len(code); emit(0)
151-
_compile(data, av, flags)
140+
_compile(code, av, flags)
152141
emit(SUCCESS)
153142
code[skip] = _len(code) - skip
154143
elif op in SUCCESS_CODES:
@@ -163,7 +152,7 @@ def _compile(data, pattern, flags):
163152
if lo != hi:
164153
raise error("look-behind requires fixed-width pattern")
165154
emit(lo) # look behind
166-
_compile(data, av[1], flags)
155+
_compile(code, av[1], flags)
167156
emit(SUCCESS)
168157
code[skip] = _len(code) - skip
169158
elif op is AT:
@@ -182,7 +171,7 @@ def _compile(data, pattern, flags):
182171
for av in av[1]:
183172
skip = _len(code); emit(0)
184173
# _compile_info(code, av, flags)
185-
_compile(data, av, flags)
174+
_compile(code, av, flags)
186175
emit(JUMP)
187176
tailappend(_len(code)); emit(0)
188177
code[skip] = _len(code) - skip
@@ -210,12 +199,12 @@ def _compile(data, pattern, flags):
210199
emit(op)
211200
emit(av[0]-1)
212201
skipyes = _len(code); emit(0)
213-
_compile(data, av[1], flags)
202+
_compile(code, av[1], flags)
214203
if av[2]:
215204
emit(JUMP)
216205
skipno = _len(code); emit(0)
217206
code[skipyes] = _len(code) - skipyes + 1
218-
_compile(data, av[2], flags)
207+
_compile(code, av[2], flags)
219208
code[skipno] = _len(code) - skipno
220209
else:
221210
code[skipyes] = _len(code) - skipyes + 1
@@ -582,17 +571,17 @@ def isstring(obj):
582571
def _code(p, flags):
583572

584573
flags = p.state.flags | flags
585-
data = _CompileData()
574+
code = []
586575

587576
# compile info block
588-
_compile_info(data.code, p, flags)
577+
_compile_info(code, p, flags)
589578

590579
# compile the pattern
591-
_compile(data, p.data, flags)
580+
_compile(code, p.data, flags)
592581

593-
data.code.append(SUCCESS)
582+
code.append(SUCCESS)
594583

595-
return data
584+
return code
596585

597586
def _hex_code(code):
598587
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
@@ -693,21 +682,14 @@ def print_2(*args):
693682
else:
694683
print_(FAILURE)
695684
i += 1
696-
elif op in (REPEAT_ONE, MIN_REPEAT_ONE,
685+
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
697686
POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
698687
skip, min, max = code[i: i+3]
699688
if max == MAXREPEAT:
700689
max = 'MAXREPEAT'
701690
print_(op, skip, min, max, to=i+skip)
702691
dis_(i+3, i+skip)
703692
i += skip
704-
elif op is REPEAT:
705-
skip, min, max, repeat_index = code[i: i+4]
706-
if max == MAXREPEAT:
707-
max = 'MAXREPEAT'
708-
print_(op, skip, min, max, repeat_index, to=i+skip)
709-
dis_(i+4, i+skip)
710-
i += skip
711693
elif op is GROUPREF_EXISTS:
712694
arg, skip = code[i: i+2]
713695
print_(op, arg, skip, to=i+skip)
@@ -762,11 +744,11 @@ def compile(p, flags=0):
762744
else:
763745
pattern = None
764746

765-
data = _code(p, flags)
747+
code = _code(p, flags)
766748

767749
if flags & SRE_FLAG_DEBUG:
768750
print()
769-
dis(data.code)
751+
dis(code)
770752

771753
# map in either direction
772754
groupindex = p.state.groupdict
@@ -775,6 +757,7 @@ def compile(p, flags=0):
775757
indexgroup[i] = k
776758

777759
return _sre.compile(
778-
pattern, flags | p.state.flags, data.code,
779-
p.state.groups-1, groupindex, tuple(indexgroup),
780-
data.repeat_count)
760+
pattern, flags | p.state.flags, code,
761+
p.state.groups-1,
762+
groupindex, tuple(indexgroup)
763+
)

Lib/re/_constants.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
# update when constants are added or removed
1515

16-
MAGIC = 20220423
16+
MAGIC = 20220615
1717

1818
from _sre import MAXREPEAT, MAXGROUPS
1919

Lib/test/test_re.py

+2-26
Original file line numberDiff line numberDiff line change
@@ -1796,12 +1796,9 @@ def test_dealloc(self):
17961796
long_overflow = 2**128
17971797
self.assertRaises(TypeError, re.finditer, "a", {})
17981798
with self.assertRaises(OverflowError):
1799-
_sre.compile("abc", 0, [long_overflow], 0, {}, (), 0)
1799+
_sre.compile("abc", 0, [long_overflow], 0, {}, ())
18001800
with self.assertRaises(TypeError):
1801-
_sre.compile({}, 0, [], 0, [], [], 0)
1802-
with self.assertRaises(RuntimeError):
1803-
# invalid repeat_count -1
1804-
_sre.compile("abc", 0, [1], 0, {}, (), -1)
1801+
_sre.compile({}, 0, [], 0, [], [])
18051802

18061803
def test_search_dot_unicode(self):
18071804
self.assertTrue(re.search("123.*-", '123abc-'))
@@ -2540,27 +2537,6 @@ def test_possesive_repeat(self):
25402537
14. SUCCESS
25412538
''')
25422539

2543-
def test_repeat_index(self):
2544-
self.assertEqual(get_debug_out(r'(?:ab)*?(?:cd)*'), '''\
2545-
MIN_REPEAT 0 MAXREPEAT
2546-
LITERAL 97
2547-
LITERAL 98
2548-
MAX_REPEAT 0 MAXREPEAT
2549-
LITERAL 99
2550-
LITERAL 100
2551-
2552-
0. INFO 4 0b0 0 MAXREPEAT (to 5)
2553-
5: REPEAT 8 0 MAXREPEAT 0 (to 14)
2554-
10. LITERAL 0x61 ('a')
2555-
12. LITERAL 0x62 ('b')
2556-
14: MIN_UNTIL
2557-
15. REPEAT 8 0 MAXREPEAT 1 (to 24)
2558-
20. LITERAL 0x63 ('c')
2559-
22. LITERAL 0x64 ('d')
2560-
24: MAX_UNTIL
2561-
25. SUCCESS
2562-
''')
2563-
25642540

25652541
class PatternReprTests(unittest.TestCase):
25662542
def check(self, pattern, expected):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Revert the :mod:`re` memory leak when a match is terminated by a signal or
2+
memory allocation failure as the implemented fix caused a major performance
3+
regression.

Modules/_sre/clinic/sre.c.h

+7-20
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)