Skip to content

Commit cf4bfd3

Browse files
authored
<regex>: Mark more loops as simple (#5889)
1 parent a6b014c commit cf4bfd3

File tree

2 files changed

+12
-22
lines changed

2 files changed

+12
-22
lines changed

stl/inc/regex

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4102,16 +4102,12 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41024102
}
41034103
} else if (_Sav._Loop_idx < _Nr->_Min) { // at least one more rep to reach minimum
41044104
_Next = _Nr->_Next;
4105-
// GH-5365: We have to reset the capture groups from the second iteration on.
4106-
_Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
41074105
++_Sav._Loop_idx;
41084106
} else if (_Greedy && !_Longest && _Sav._Loop_idx != _Nr->_Max) { // one more rep to try next
41094107
// set up stack unwinding for greedy matching
41104108
_Push_frame(_Rx_unwind_ops::_Loop_simple_greedy, _Nr);
41114109

41124110
_Next = _Nr->_Next;
4113-
// GH-5365: We have to reset the capture groups from the second iteration on.
4114-
_Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
41154111
if (_Sav._Loop_idx < INT_MAX) { // avoid overflowing _Loop_idx
41164112
++_Sav._Loop_idx;
41174113
}
@@ -4294,12 +4290,11 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
42944290
// try tail if matching one more rep failed
42954291
if (_Failed) {
42964292
auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4297-
auto& _Sav = _Loop_vals[_Node->_Loop_number];
42984293

42994294
_Increase_complexity_count();
43004295
_Nx = _Node->_End_rep->_Next;
43014296
_Tgt_state._Cur = _Frame._Match_state._Cur;
4302-
_Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
4297+
_Tgt_state._Grp_valid = _Frame._Match_state._Grp_valid;
43034298
_Failed = false;
43044299
}
43054300
break;
@@ -5356,14 +5351,21 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity(
53565351
for (_Node_if* _Branch = static_cast<_Node_if*>(_Nx)->_Child; _Branch; _Branch = _Branch->_Child) {
53575352
_Calculate_loop_simplicity(_Branch->_Next, _Branch->_Endif, _Outer_rep);
53585353
}
5359-
53605354
break;
5355+
53615356
case _N_assert:
5357+
// A positive lookahead assertion inside a _Node_rep makes the rep not simple
5358+
if (_Outer_rep) {
5359+
_Outer_rep->_Simple_loop = 0;
5360+
}
5361+
_FALLTHROUGH;
5362+
53625363
case _N_neg_assert:
53635364
// visit the assertion body
53645365
// note _Outer_rep being reset: the assertion regex is completely independent
53655366
_Calculate_loop_simplicity(static_cast<_Node_assert*>(_Nx)->_Child, nullptr, nullptr);
53665367
break;
5368+
53675369
case _N_rep:
53685370
// _Node_rep inside another _Node_rep makes both not simple if _Outer_rep can be repeated more than once
53695371
// because the matcher does not reset capture group boundaries when handling simple loops.
@@ -5381,6 +5383,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity(
53815383
_Outer_rep = static_cast<_Node_rep*>(_Nx);
53825384
}
53835385
break;
5386+
53845387
case _N_end_rep:
53855388
if (_Outer_rep == static_cast<_Node_end_rep*>(_Nx)->_Begin_rep) {
53865389
// if the _Node_rep is still undetermined when we reach its end, it is simple
@@ -5391,6 +5394,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity(
53915394
_Outer_rep = nullptr;
53925395
}
53935396
break;
5397+
53945398
case _N_class:
53955399
if (_Outer_rep) {
53965400
// _Node_rep is not simple if a class can match character sequences of different lengths
@@ -5407,14 +5411,6 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity(
54075411

54085412
case _N_group:
54095413
case _N_capture:
5410-
// TRANSITION, requires more research to decide on the subset of loops that we can make simple:
5411-
// - Simple mode can square the running time when matching a regex to an input string in the current matcher
5412-
// - The optimal subset of simple loops for a non-recursive rewrite of the matcher aren't clear yet
5413-
if (_Outer_rep) {
5414-
_Outer_rep->_Simple_loop = 0;
5415-
}
5416-
break;
5417-
54185414
case _N_none:
54195415
case _N_nop:
54205416
case _N_bol:

tests/std/tests/VSO_0000000_regex_use/test.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -902,13 +902,7 @@ void test_gh_993() {
902902
void test_gh_997() {
903903
// GH-997: <regex>: Grouping within repetition causes regex stack error
904904
// GH-1528: <regex>: regex_match gets caught in recursive loop until stack overflow occurs
905-
906-
try {
907-
(void) regex_match(string(1025, 'a'), regex("(?:a)+"));
908-
assert(false); // adjust test when matching succeeds
909-
} catch (const regex_error& ex) {
910-
assert(ex.code() == error_stack);
911-
}
905+
g_regexTester.should_match(string(1025, 'a'), "(?:a)+");
912906

913907
{
914908
test_wregex rgx(&g_regexTester, LR"(^http[s]?://([^.]+\.)*example\.com/.*$)", icase);

0 commit comments

Comments
 (0)