@@ -1493,9 +1493,10 @@ public:
14931493 _Node_end_rep& operator=(const _Node_end_rep&) = delete;
14941494};
14951495
1496- struct _Loop_vals_t { // storage for loop administration
1497- int _Loop_idx;
1496+ struct _Loop_vals_v2_t { // storage for loop administration
14981497 void* _Loop_iter;
1498+ int _Loop_idx;
1499+ unsigned int _Group_first;
14991500};
15001501
15011502class _Node_rep : public _Node_base { // node that marks the beginning of a repetition
@@ -1681,13 +1682,15 @@ public:
16811682private:
16821683 _Tgt_state_t<_It> _Tgt_state;
16831684 _Tgt_state_t<_It> _Res;
1684- vector<_Loop_vals_t > _Loop_vals;
1685+ vector<_Loop_vals_v2_t > _Loop_vals;
16851686
16861687 bool _Do_assert(_Node_assert*);
16871688 bool _Do_neg_assert(_Node_assert*);
16881689 bool _Do_if(_Node_if*);
16891690 bool _Do_rep0(_Node_rep*, bool);
16901691 bool _Do_rep(_Node_rep*, bool, int);
1692+ bool _Do_rep_first(_Node_rep*);
1693+ bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
16911694 bool _Do_class(_Node_base*);
16921695 bool _Match_pat(_Node_base*);
16931696 bool _Better_match();
@@ -3235,6 +3238,13 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
32353238 _Tgt_state_t<_It> _St = _Tgt_state;
32363239
32373240 for (; _Ix < _Node->_Min; ++_Ix) { // do minimum number of reps
3241+ // GH-5365: We have to reset the capture groups from the second iteration on.
3242+ // We can avoid the reset for the first iteration
3243+ // because we know that a simple repetition was not encountered before.
3244+ if (_Ix > 0) {
3245+ _Tgt_state._Grp_valid = _St._Grp_valid;
3246+ }
3247+
32383248 _It _Cur = _Tgt_state._Cur;
32393249 if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail
32403250 _Tgt_state = _St;
@@ -3290,17 +3300,12 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
32903300template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
32913301bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node, bool _Greedy, int _Init_idx) {
32923302 // apply repetition
3293- if (_Node->_Simple_loop == 1) {
3294- return _Do_rep0(_Node, _Greedy);
3295- }
3296-
3297- bool _Matched0 = false;
3298- _Tgt_state_t<_It> _St = _Tgt_state;
3299- _Loop_vals_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3300- int _Loop_idx_sav = _Psav->_Loop_idx;
3301- _It* _Loop_iter_sav = static_cast<_It*>(_Psav->_Loop_iter);
3302-
3303- bool _Progress = _Init_idx == 0 || *_Loop_iter_sav != _St._Cur;
3303+ bool _Matched0 = false;
3304+ _Tgt_state_t<_It> _St = _Tgt_state;
3305+ _Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3306+ int _Loop_idx_sav = _Psav->_Loop_idx;
3307+ _It* _Loop_iter_sav = static_cast<_It*>(_Psav->_Loop_iter);
3308+ bool _Progress = _Init_idx == 0 || *_Loop_iter_sav != _St._Cur;
33043309
33053310 if (0 <= _Node->_Max && _Node->_Max <= _Init_idx) {
33063311 _Matched0 = _Match_pat(_Node->_End_rep->_Next); // reps done, try tail
@@ -3310,7 +3315,9 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
33103315 } else { // try another required match
33113316 _Psav->_Loop_idx = _Init_idx + 1;
33123317 _Psav->_Loop_iter = _STD addressof(_St._Cur);
3313- _Matched0 = _Match_pat(_Node->_Next);
3318+ _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3319+ _Tgt_state._Grp_valid.end(), false);
3320+ _Matched0 = _Match_pat(_Node->_Next);
33143321 }
33153322 } else if (_Longest) { // longest, try any number of repetitions
33163323
@@ -3332,13 +3339,17 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
33323339 _Tgt_state = _St;
33333340 _Psav->_Loop_idx = _Init_idx + 1;
33343341 _Psav->_Loop_iter = _STD addressof(_St._Cur);
3335- _Matched0 = _Match_pat(_Node->_Next);
3342+ _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3343+ _Tgt_state._Grp_valid.end(), false);
3344+ _Matched0 = _Match_pat(_Node->_Next);
33363345 }
33373346 } else { // greedy, favor maximum number of reps
33383347 if (_Progress) { // try another rep
33393348 _Psav->_Loop_idx = _Init_idx + 1;
33403349 _Psav->_Loop_iter = _STD addressof(_St._Cur);
3341- _Matched0 = _Match_pat(_Node->_Next);
3350+ _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3351+ _Tgt_state._Grp_valid.end(), false);
3352+ _Matched0 = _Match_pat(_Node->_Next);
33423353 }
33433354
33443355 if ((_Progress || 1 >= _Init_idx) && !_Matched0) { // rep failed, try tail
@@ -3358,6 +3369,127 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
33583369 return _Matched0;
33593370}
33603371
3372+ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3373+ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep_first(_Node_rep* _Node) {
3374+ bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;
3375+ // apply repetition
3376+ if (_Node->_Simple_loop == 1) {
3377+ return _Do_rep0(_Node, _Greedy);
3378+ }
3379+ _Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3380+
3381+ // Determine first capture group in repetition for later capture group reset, if not done so previously.
3382+ // No capture group reset is performed for POSIX regexes,
3383+ // so we prevent any reset by setting the first capture group to the number of capture groups _Ncap.
3384+ if (_Psav->_Group_first == 0) {
3385+ if ((_Sflags
3386+ & (regex_constants::basic | regex_constants::extended | regex_constants::grep | regex_constants::egrep
3387+ | regex_constants::awk))
3388+ || !_Find_first_inner_capture_group(_Node->_Next, _Psav)) {
3389+ _Psav->_Group_first = _Ncap;
3390+ }
3391+ }
3392+
3393+ return _Do_rep(_Node, _Greedy, 0);
3394+ }
3395+
3396+ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3397+ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Find_first_inner_capture_group(
3398+ _Node_base* _Nx, _Loop_vals_v2_t* _Loop_state) {
3399+ if (0 < _Max_stack_count && --_Max_stack_count <= 0) {
3400+ _Xregex_error(regex_constants::error_stack);
3401+ }
3402+
3403+ bool _Found_group = false;
3404+ while (_Nx) {
3405+ switch (_Nx->_Kind) {
3406+ case _N_nop:
3407+ case _N_bol:
3408+ case _N_eol:
3409+ case _N_wbound:
3410+ case _N_dot:
3411+ case _N_str:
3412+ case _N_class:
3413+ case _N_group:
3414+ case _N_end_group:
3415+ case _N_end_capture:
3416+ case _N_back:
3417+ case _N_begin:
3418+ break;
3419+
3420+ case _N_assert:
3421+ case _N_neg_assert:
3422+ {
3423+ if (_Find_first_inner_capture_group(static_cast<_Node_assert*>(_Nx), _Loop_state)) {
3424+ _Found_group = true;
3425+ _Nx = nullptr;
3426+ }
3427+ break;
3428+ }
3429+
3430+ case _N_capture:
3431+ {
3432+ _Node_capture* _Node = static_cast<_Node_capture*>(_Nx);
3433+ _Loop_state->_Group_first = _Node->_Idx;
3434+ _Found_group = true;
3435+ _Nx = nullptr;
3436+ break;
3437+ }
3438+
3439+ case _N_if:
3440+ {
3441+ _Node_if* _Node = static_cast<_Node_if*>(_Nx);
3442+ for (; _Node != nullptr; _Node = _Node->_Child) {
3443+ if (_Find_first_inner_capture_group(_Node->_Next, _Loop_state)) {
3444+ _Found_group = true;
3445+ _Nx = nullptr;
3446+ break;
3447+ }
3448+ }
3449+
3450+ if (_Nx != nullptr) { // continue search after the branches of the _N_if node
3451+ _Nx = static_cast<_Node_if*>(_Nx)->_Endif;
3452+ }
3453+ break;
3454+ }
3455+
3456+ case _N_rep:
3457+ {
3458+ _Node_rep* _Inner_rep = static_cast<_Node_rep*>(_Nx);
3459+ _Loop_vals_v2_t* _Inner_loop_state = &_Loop_vals[_Inner_rep->_Loop_number];
3460+ if (_Find_first_inner_capture_group(_Inner_rep->_Next, _Inner_loop_state)) {
3461+ _Loop_state->_Group_first = _Inner_loop_state->_Group_first;
3462+ _Found_group = true;
3463+ _Nx = nullptr;
3464+ } else {
3465+ _Inner_loop_state->_Group_first = _Ncap;
3466+ _Nx = _Inner_rep->_End_rep;
3467+ }
3468+ break;
3469+ }
3470+
3471+ case _N_end_assert:
3472+ case _N_endif:
3473+ case _N_end_rep:
3474+ case _N_end:
3475+ case _N_none:
3476+ default:
3477+ _Nx = nullptr;
3478+ break;
3479+ }
3480+
3481+ if (_Nx) {
3482+ _Nx = _Nx->_Next;
3483+ }
3484+ }
3485+
3486+ if (0 < _Max_stack_count) {
3487+ ++_Max_stack_count;
3488+ }
3489+
3490+ return _Found_group;
3491+ }
3492+
33613493template <class _BidIt1, class _BidIt2, class _Pr>
33623494_BidIt1 _Cmp_chrange(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2, _BidIt2 _End2, _Pr _Pred) {
33633495 // compare character ranges
@@ -3695,15 +3827,6 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
36953827 { // record current position
36963828 _Node_capture* _Node = static_cast<_Node_capture*>(_Nx);
36973829 _Tgt_state._Grps[_Node->_Idx]._Begin = _Tgt_state._Cur;
3698- if (!(_Sflags
3699- & (regex_constants::basic | regex_constants::extended | regex_constants::grep
3700- | regex_constants::egrep | regex_constants::awk))) {
3701- // CodeQL [SM02323] Comparing unchanging unsigned int _Node->_Idx to decreasing size_t _Idx is safe.
3702- for (size_t _Idx = _Tgt_state._Grp_valid.size(); _Node->_Idx < _Idx;) {
3703- _Tgt_state._Grp_valid[--_Idx] = false;
3704- }
3705- }
3706-
37073830 break;
37083831 }
37093832
@@ -3752,7 +3875,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
37523875 break;
37533876
37543877 case _N_rep:
3755- if (!_Do_rep (static_cast<_Node_rep*>(_Nx), (_Nx->_Flags & _Fl_greedy) != 0, 0 )) {
3878+ if (!_Do_rep_first (static_cast<_Node_rep*>(_Nx))) {
37563879 _Failed = true;
37573880 }
37583881
@@ -3761,10 +3884,9 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
37613884
37623885 case _N_end_rep:
37633886 {
3764- _Node_rep* _Nr = static_cast<_Node_end_rep*>(_Nx)->_Begin_rep;
3765- _Loop_vals_t* _Psav = &_Loop_vals[_Nr->_Loop_number];
3766-
3767- if (_Nr->_Simple_loop == 0 && !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Psav->_Loop_idx)) {
3887+ _Node_rep* _Nr = static_cast<_Node_end_rep*>(_Nx)->_Begin_rep;
3888+ if (_Nr->_Simple_loop == 0
3889+ && !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Loop_vals[_Nr->_Loop_number]._Loop_idx)) {
37683890 _Failed = true; // recurse only if loop contains if/do
37693891 }
37703892
0 commit comments