6767_ignorecase_fixes = {i : tuple (j for j in t if i != j )
6868 for t in _equivalences for i in t }
6969
70+ class _CompileData :
71+ __slots__ = ('code' , 'repeat_count' )
72+ def __init__ (self ):
73+ self .code = []
74+ self .repeat_count = 0
75+
7076def _combine_flags (flags , add_flags , del_flags ,
7177 TYPE_FLAGS = _parser .TYPE_FLAGS ):
7278 if add_flags & TYPE_FLAGS :
7379 flags &= ~ TYPE_FLAGS
7480 return (flags | add_flags ) & ~ del_flags
7581
76- def _compile (code , pattern , flags ):
82+ def _compile (data , pattern , flags ):
7783 # internal: compile a (sub)pattern
84+ code = data .code
7885 emit = code .append
7986 _len = len
8087 LITERAL_CODES = _LITERAL_CODES
@@ -147,15 +154,19 @@ def _compile(code, pattern, flags):
147154 skip = _len (code ); emit (0 )
148155 emit (av [0 ])
149156 emit (av [1 ])
150- _compile (code , av [2 ], flags )
157+ _compile (data , av [2 ], flags )
151158 emit (SUCCESS )
152159 code [skip ] = _len (code ) - skip
153160 else :
154161 emit (REPEATING_CODES [op ][0 ])
155162 skip = _len (code ); emit (0 )
156163 emit (av [0 ])
157164 emit (av [1 ])
158- _compile (code , av [2 ], flags )
165+ # now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT)
166+ if op != POSSESSIVE_REPEAT :
167+ emit (data .repeat_count )
168+ data .repeat_count += 1
169+ _compile (data , av [2 ], flags )
159170 code [skip ] = _len (code ) - skip
160171 emit (REPEATING_CODES [op ][1 ])
161172 elif op is SUBPATTERN :
@@ -164,7 +175,7 @@ def _compile(code, pattern, flags):
164175 emit (MARK )
165176 emit ((group - 1 )* 2 )
166177 # _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
167- _compile (code , p , _combine_flags (flags , add_flags , del_flags ))
178+ _compile (data , p , _combine_flags (flags , add_flags , del_flags ))
168179 if group :
169180 emit (MARK )
170181 emit ((group - 1 )* 2 + 1 )
@@ -176,7 +187,7 @@ def _compile(code, pattern, flags):
176187 # pop their stack if they reach it
177188 emit (ATOMIC_GROUP )
178189 skip = _len (code ); emit (0 )
179- _compile (code , av , flags )
190+ _compile (data , av , flags )
180191 emit (SUCCESS )
181192 code [skip ] = _len (code ) - skip
182193 elif op in SUCCESS_CODES :
@@ -191,13 +202,13 @@ def _compile(code, pattern, flags):
191202 if lo != hi :
192203 raise error ("look-behind requires fixed-width pattern" )
193204 emit (lo ) # look behind
194- _compile (code , av [1 ], flags )
205+ _compile (data , av [1 ], flags )
195206 emit (SUCCESS )
196207 code [skip ] = _len (code ) - skip
197208 elif op is CALL :
198209 emit (op )
199210 skip = _len (code ); emit (0 )
200- _compile (code , av , flags )
211+ _compile (data , av , flags )
201212 emit (SUCCESS )
202213 code [skip ] = _len (code ) - skip
203214 elif op is AT :
@@ -216,7 +227,7 @@ def _compile(code, pattern, flags):
216227 for av in av [1 ]:
217228 skip = _len (code ); emit (0 )
218229 # _compile_info(code, av, flags)
219- _compile (code , av , flags )
230+ _compile (data , av , flags )
220231 emit (JUMP )
221232 tailappend (_len (code )); emit (0 )
222233 code [skip ] = _len (code ) - skip
@@ -244,12 +255,12 @@ def _compile(code, pattern, flags):
244255 emit (op )
245256 emit (av [0 ]- 1 )
246257 skipyes = _len (code ); emit (0 )
247- _compile (code , av [1 ], flags )
258+ _compile (data , av [1 ], flags )
248259 if av [2 ]:
249260 emit (JUMP )
250261 skipno = _len (code ); emit (0 )
251262 code [skipyes ] = _len (code ) - skipyes + 1
252- _compile (code , av [2 ], flags )
263+ _compile (data , av [2 ], flags )
253264 code [skipno ] = _len (code ) - skipno
254265 else :
255266 code [skipyes ] = _len (code ) - skipyes + 1
@@ -608,17 +619,17 @@ def isstring(obj):
608619def _code (p , flags ):
609620
610621 flags = p .state .flags | flags
611- code = []
622+ data = _CompileData ()
612623
613624 # compile info block
614- _compile_info (code , p , flags )
625+ _compile_info (data . code , p , flags )
615626
616627 # compile the pattern
617- _compile (code , p .data , flags )
628+ _compile (data , p .data , flags )
618629
619- code .append (SUCCESS )
630+ data . code .append (SUCCESS )
620631
621- return code
632+ return data
622633
623634def _hex_code (code ):
624635 return '[%s]' % ', ' .join ('%#0*x' % (_sre .CODESIZE * 2 + 2 , x ) for x in code )
@@ -719,14 +730,21 @@ def print_2(*args):
719730 else :
720731 print_ (FAILURE )
721732 i += 1
722- elif op in (REPEAT , REPEAT_ONE , MIN_REPEAT_ONE ,
733+ elif op in (REPEAT_ONE , MIN_REPEAT_ONE ,
723734 POSSESSIVE_REPEAT , POSSESSIVE_REPEAT_ONE ):
724735 skip , min , max = code [i : i + 3 ]
725736 if max == MAXREPEAT :
726737 max = 'MAXREPEAT'
727738 print_ (op , skip , min , max , to = i + skip )
728739 dis_ (i + 3 , i + skip )
729740 i += skip
741+ elif op is REPEAT :
742+ skip , min , max , repeat_index = code [i : i + 4 ]
743+ if max == MAXREPEAT :
744+ max = 'MAXREPEAT'
745+ print_ (op , skip , min , max , repeat_index , to = i + skip )
746+ dis_ (i + 4 , i + skip )
747+ i += skip
730748 elif op is GROUPREF_EXISTS :
731749 arg , skip = code [i : i + 2 ]
732750 print_ (op , arg , skip , to = i + skip )
@@ -781,11 +799,11 @@ def compile(p, flags=0):
781799 else :
782800 pattern = None
783801
784- code = _code (p , flags )
802+ data = _code (p , flags )
785803
786804 if flags & SRE_FLAG_DEBUG :
787805 print ()
788- dis (code )
806+ dis (data . code )
789807
790808 # map in either direction
791809 groupindex = p .state .groupdict
@@ -794,7 +812,6 @@ def compile(p, flags=0):
794812 indexgroup [i ] = k
795813
796814 return _sre .compile (
797- pattern , flags | p .state .flags , code ,
798- p .state .groups - 1 ,
799- groupindex , tuple (indexgroup )
800- )
815+ pattern , flags | p .state .flags , data .code ,
816+ p .state .groups - 1 , groupindex , tuple (indexgroup ),
817+ data .repeat_count )
0 commit comments