@@ -10994,7 +10994,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
1099410994STATIC regnode_offset
1099510995S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
1099610996 I32 *flagp,
10997- char * parse_start ,
10997+ char * backref_parse_start ,
1099810998 char ch
1099910999 )
1100011000{
@@ -11013,7 +11013,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
1101311013 }
1101411014 if (RExC_parse == name_start || *RExC_parse != ch) {
1101511015 /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
11016- vFAIL2("Sequence %.3s... not terminated", parse_start );
11016+ vFAIL2("Sequence %.3s... not terminated", backref_parse_start );
1101711017 }
1101811018
1101911019 if (sv_dat) {
@@ -11115,8 +11115,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1111511115 SV * max_open; /* Max number of unclosed parens */
1111611116 I32 was_in_lookaround = RExC_in_lookaround;
1111711117
11118- char * parse_start = RExC_parse; /* MJD */
11119- char * const oregcomp_parse = RExC_parse;
11118+ /* The difference between the following variables can be seen with *
11119+ * the broken pattern /(?:foo/ where segment_parse_start will point *
11120+ * at the 'f', and reg_parse_start will point at the '(' */
11121+
11122+ /* the following is used for unmatched '(' errors */
11123+ char * const reg_parse_start = RExC_parse;
11124+
11125+ /* the following is used to track where various segments of
11126+ * the pattern that we parse out started. */
11127+ char * segment_parse_start = RExC_parse;
1112011128
1112111129 DECLARE_AND_GET_RE_DEBUG_FLAGS;
1112211130
@@ -11501,7 +11509,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1150111509 else if (paren == '=') { /* (?P=...) named backref */
1150211510 RExC_parse++;
1150311511 return handle_named_backref(pRExC_state, flagp,
11504- parse_start , ')');
11512+ segment_parse_start , ')');
1150511513 }
1150611514 RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
1150711515 /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
@@ -11652,7 +11660,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1165211660 /*notreached*/
1165311661 /* named and numeric backreferences */
1165411662 case '&': /* (?&NAME) */
11655- parse_start = RExC_parse - 1;
11663+ segment_parse_start = RExC_parse - 1;
1165611664 named_recursion:
1165711665 {
1165811666 SV *sv_dat = reg_scan_name(pRExC_state,
@@ -11683,7 +11691,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1168311691 {
1168411692 bool is_neg = FALSE;
1168511693 UV unum;
11686- parse_start = RExC_parse - 1; /* MJD */
11694+ segment_parse_start = RExC_parse - 1;
1168711695 if (*RExC_parse == '-') {
1168811696 RExC_parse++;
1168911697 is_neg = TRUE;
@@ -12181,7 +12189,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1218112189
1218212190 parse_rest:
1218312191 /* Pick up the branches, linking them together. */
12184- parse_start = RExC_parse;
12192+ segment_parse_start = RExC_parse;
1218512193 br = regbranch(pRExC_state, &flags, 1, depth+1);
1218612194
1218712195 /* branch_len = (paren != 0); */
@@ -12406,7 +12414,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1240612414 set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
1240712415 }
1240812416 if (RExC_parse >= RExC_end || UCHARAT(RExC_parse) != ')') {
12409- RExC_parse = oregcomp_parse ;
12417+ RExC_parse = reg_parse_start ;
1241012418 vFAIL("Unmatched (");
1241112419 }
1241212420 nextchar(pRExC_state);
@@ -13509,7 +13517,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1350913517{
1351013518 regnode_offset ret = 0;
1351113519 I32 flags = 0;
13512- char *parse_start ;
13520+ char *atom_parse_start ;
1351313521 U8 op;
1351413522 int invert = 0;
1351513523
@@ -13522,7 +13530,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1352213530 PERL_ARGS_ASSERT_REGATOM;
1352313531
1352413532 tryagain:
13525- parse_start = RExC_parse;
13533+ atom_parse_start = RExC_parse;
1352613534 assert(RExC_parse < RExC_end);
1352713535 switch ((U8)*RExC_parse) {
1352813536 case '^':
@@ -13553,7 +13561,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1355313561 break;
1355413562 case '[':
1355513563 {
13556- char * const oregcomp_parse = ++RExC_parse;
13564+ char * const cc_parse_start = ++RExC_parse;
1355713565 ret = regclass(pRExC_state, flagp, depth+1,
1355813566 FALSE, /* means parse the whole char class */
1355913567 TRUE, /* allow multi-char folds */
@@ -13567,7 +13575,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1356713575 (UV) *flagp);
1356813576 }
1356913577 if (*RExC_parse != ']') {
13570- RExC_parse = oregcomp_parse ;
13578+ RExC_parse = cc_parse_start ;
1357113579 vFAIL("Unmatched [");
1357213580 }
1357313581 nextchar(pRExC_state);
@@ -13854,7 +13862,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1385413862 /* The escapes above that don't take a parameter can't be
1385513863 * followed by a '{'. But 'pX', 'p{foo}' and
1385613864 * correspondingly 'P' can be */
13857- if ( RExC_parse - parse_start == 1
13865+ if ( RExC_parse - atom_parse_start == 1
1385813866 && UCHARAT(RExC_parse + 1) == '{'
1385913867 && UNLIKELY(! regcurly(RExC_parse + 1, RExC_end, NULL)))
1386013868 {
@@ -13892,7 +13900,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1389213900 RETURN_FAIL_ON_RESTART_FLAGP(flagp);
1389313901
1389413902 /* Here, evaluates to a single code point. Go get that */
13895- RExC_parse = parse_start ;
13903+ RExC_parse = atom_parse_start ;
1389613904 goto defchar;
1389713905
1389813906 case 'k': /* Handle \k<NAME> and \k'NAME' and \k{NAME} */
@@ -13906,7 +13914,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1390613914 {
1390713915 RExC_parse++;
1390813916 /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
13909- vFAIL2("Sequence %.2s... not terminated", parse_start );
13917+ vFAIL2("Sequence %.2s... not terminated", atom_parse_start );
1391013918 } else {
1391113919 RExC_parse += 2;
1391213920 if (ch == '{') {
@@ -13916,7 +13924,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1391613924 }
1391713925 ret = handle_named_backref(pRExC_state,
1391813926 flagp,
13919- parse_start ,
13927+ atom_parse_start ,
1392013928 (ch == '<')
1392113929 ? '>'
1392213930 : (ch == '{')
@@ -14027,7 +14035,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1402714035 * to be an octal character escape, e.g. \35 or \777.
1402814036 * The above logic should make it obvious why using
1402914037 * octal escapes in patterns is problematic. - Yves */
14030- RExC_parse = parse_start ;
14038+ RExC_parse = atom_parse_start ;
1403114039 goto defchar;
1403214040 }
1403314041 }
@@ -14089,7 +14097,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1408914097 default:
1409014098 /* Do not generate "unrecognized" warnings here, we fall
1409114099 back into the quick-grab loop below */
14092- RExC_parse = parse_start ;
14100+ RExC_parse = atom_parse_start ;
1409314101 goto defchar;
1409414102 } /* end of switch on a \foo sequence */
1409514103 break;
@@ -14328,7 +14336,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1432814336 goto loopdone;
1432914337 }
1433014338 p = RExC_parse;
14331- RExC_parse = parse_start ;
14339+ RExC_parse = atom_parse_start ;
1433214340
1433314341 /* The \N{} means the pattern, if previously /d,
1433414342 * becomes /u. That means it can't be an EXACTF node,
@@ -14518,7 +14526,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1451814526 * string of characters instead of a meta construct */
1451914527 if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
1452014528 if ( RExC_strict
14521- || ( p > parse_start + 1
14529+ || ( p > atom_parse_start + 1
1452214530 && isALPHA_A(*(p - 1))
1452314531 && *(p - 2) == '\\'))
1452414532 {
0 commit comments