@@ -4102,16 +4102,12 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41024102 }
41034103 } else if (_Sav._Loop_idx < _Nr->_Min) { // at least one more rep to reach minimum
41044104 _Next = _Nr->_Next;
4105- // GH-5365: We have to reset the capture groups from the second iteration on.
4106- _Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
41074105 ++_Sav._Loop_idx;
41084106 } else if (_Greedy && !_Longest && _Sav._Loop_idx != _Nr->_Max) { // one more rep to try next
41094107 // set up stack unwinding for greedy matching
41104108 _Push_frame(_Rx_unwind_ops::_Loop_simple_greedy, _Nr);
41114109
41124110 _Next = _Nr->_Next;
4113- // GH-5365: We have to reset the capture groups from the second iteration on.
4114- _Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
41154111 if (_Sav._Loop_idx < INT_MAX) { // avoid overflowing _Loop_idx
41164112 ++_Sav._Loop_idx;
41174113 }
@@ -4294,12 +4290,11 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
42944290 // try tail if matching one more rep failed
42954291 if (_Failed) {
42964292 auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4297- auto& _Sav = _Loop_vals[_Node->_Loop_number];
42984293
42994294 _Increase_complexity_count();
43004295 _Nx = _Node->_End_rep->_Next;
43014296 _Tgt_state._Cur = _Frame._Match_state._Cur;
4302- _Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx] ._Match_state._Grp_valid;
4297+ _Tgt_state._Grp_valid = _Frame ._Match_state._Grp_valid;
43034298 _Failed = false;
43044299 }
43054300 break;
@@ -5356,14 +5351,21 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity(
53565351 for (_Node_if* _Branch = static_cast<_Node_if*>(_Nx)->_Child; _Branch; _Branch = _Branch->_Child) {
53575352 _Calculate_loop_simplicity(_Branch->_Next, _Branch->_Endif, _Outer_rep);
53585353 }
5359-
53605354 break;
5355+
53615356 case _N_assert:
5357+ // A positive lookahead assertion inside a _Node_rep makes the rep not simple
5358+ if (_Outer_rep) {
5359+ _Outer_rep->_Simple_loop = 0;
5360+ }
5361+ _FALLTHROUGH;
5362+
53625363 case _N_neg_assert:
53635364 // visit the assertion body
53645365 // note _Outer_rep being reset: the assertion regex is completely independent
53655366 _Calculate_loop_simplicity(static_cast<_Node_assert*>(_Nx)->_Child, nullptr, nullptr);
53665367 break;
5368+
53675369 case _N_rep:
53685370 // _Node_rep inside another _Node_rep makes both not simple if _Outer_rep can be repeated more than once
53695371 // because the matcher does not reset capture group boundaries when handling simple loops.
@@ -5381,6 +5383,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity(
53815383 _Outer_rep = static_cast<_Node_rep*>(_Nx);
53825384 }
53835385 break;
5386+
53845387 case _N_end_rep:
53855388 if (_Outer_rep == static_cast<_Node_end_rep*>(_Nx)->_Begin_rep) {
53865389 // if the _Node_rep is still undetermined when we reach its end, it is simple
@@ -5391,6 +5394,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity(
53915394 _Outer_rep = nullptr;
53925395 }
53935396 break;
5397+
53945398 case _N_class:
53955399 if (_Outer_rep) {
53965400 // _Node_rep is not simple if a class can match character sequences of different lengths
@@ -5407,14 +5411,6 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity(
54075411
54085412 case _N_group:
54095413 case _N_capture:
5410- // TRANSITION, requires more research to decide on the subset of loops that we can make simple:
5411- // - Simple mode can square the running time when matching a regex to an input string in the current matcher
5412- // - The optimal subset of simple loops for a non-recursive rewrite of the matcher aren't clear yet
5413- if (_Outer_rep) {
5414- _Outer_rep->_Simple_loop = 0;
5415- }
5416- break;
5417-
54185414 case _N_none:
54195415 case _N_nop:
54205416 case _N_bol:
0 commit comments