From 21722f87c0e26b99684d7f30dcd02823f3372ac0 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 23 Oct 2025 15:29:48 -0600 Subject: [PATCH 1/8] gv_magicalize: Length 0 input=>Immediately return false If you trace the execution of what happens to 0 length input, it relies on a NUL terminator in the string, and does nothing. Simply return false immediately instead. --- gv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gv.c b/gv.c index 9856e03a2b7a..4c6127b42444 100644 --- a/gv.c +++ b/gv.c @@ -2086,11 +2086,14 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, PERL_ARGS_ASSERT_GV_MAGICALIZE; + if (len == 0) { + return false; + } + if (stash != PL_defstash) { /* not the main stash */ /* We only have to check for a few names here: a, b, EXPORT, ISA and VERSION. All the others apply only to the main stash or to CORE (which is checked right after this). */ - if (len) { switch (*name) { case 'E': if ( @@ -2124,7 +2127,6 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, goto try_core; } goto ret; - } try_core: if (len > 1 /* shortest is uc */ && HvNAMELEN_get(stash) == 4) { /* Avoid null warning: */ From 3b2f9a1c944a1ca4cbe7b6d883c1c7c93d360554 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 23 Oct 2025 11:49:18 -0600 Subject: [PATCH 2/8] Add "magical" chars to l1_char_class_tab.h Some characters have special meaning to gv_magicalize(). This commit marks those in PL_charclass. This allows the next commit to more quickly than currently rule them out during processing. --- charclass_invlists.inc | 2 +- handy.h | 3 +- l1_char_class_tab.h | 366 ++++++++++++++++++------------------ lib/unicore/uni_keywords.pl | 2 +- regen/mk_PL_charclass.pl | 13 ++ regexp_constants.h | 2 +- uni_keywords.h | 2 +- 7 files changed, 202 insertions(+), 188 deletions(-) diff --git a/charclass_invlists.inc b/charclass_invlists.inc index 855ae3d196f9..cc5ebd75833e 100644 --- a/charclass_invlists.inc +++ b/charclass_invlists.inc @@ -492661,6 +492661,6 @@ static const U8 WB_dfa_table[] = { * 03640d8ad18fc65de766f2034a927f7442960e998d3243845ca9b9fe31bfe1ab lib/unicore/mktables * 8c30575264b2772c7a69c5bb6069a28f0e0a7a0df735871bde2d99ee674316ac lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl - * c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl + * 6c52efdee47313cfde75ff86376008ce53320ebc93176caab45c77ce086a256d regen/mk_PL_charclass.pl * 20a6e3d507a66f4594586485568134873485b08e23383f3dc4e6b3047569267b regen/mk_invlists.pl * ex: set ro ft=c: */ diff --git a/handy.h b/handy.h index d06292dd6ca9..0ada57f2d2e3 100644 --- a/handy.h +++ b/handy.h @@ -1542,8 +1542,9 @@ or casts # define CC_BINDIGIT_ 23 # define CC_OCTDIGIT_ 24 # define CC_MNEMONIC_CNTRL_ 25 +# define CC_MAGICAL_ 26 -/* Unused: 26-31 +/* Unused: 27-31 * If more bits are needed, one could add a second word for non-64bit * QUAD_IS_INT systems, using some #ifdefs to distinguish between having a 2nd * word or not. The IS_IN_SOME_FOLD bit is the most easily expendable, as it diff --git a/l1_char_class_tab.h b/l1_char_class_tab.h index 01d77639171a..dd6fc5296945 100644 --- a/l1_char_class_tab.h +++ b/l1_char_class_tab.h @@ -9,29 +9,29 @@ #if 'A' == 65 /* ASCII/Latin1 */ /* U+00 NUL */ (1U<' */ (1U<' */ (1U<' */ (1U<' */ (1U<' */ (1U<' */ (1U< [ ord "\a", ord "\b", ord "\e", ord "\f", ord "\n", ord "\r", ord "\t" ], + MAGICAL => [ 001, 003, 004, 005, 006, 007, 010, 011, 014, 015, + 016, 017, 020, 023, 024, 025, 026, 027, + ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), + ord('5'), ord('6'), ord('7'), ord('8'), ord('9'), + ord('a'), ord('b'), + ord('A'), ord('E'), ord('I'), ord('S'), ord('V'), + ord('!'), ord('#'), ord('%'), ord('&'), ord('('), + ord(')'), ord('*'), ord('+'), ord('-'), ord('.'), + ord('/'), ord(';'), ord(':'), ord('<'), ord('='), + ord('>'), ord('?'), ord('['), ord('\''), ord('\\'), + ord(']'), ord('^'), ord('`'), ord('|'), ord('~'), + ord('$'), + ], ); sub uniques { diff --git a/regexp_constants.h b/regexp_constants.h index 5df5462ef64b..f576861bfe2f 100644 --- a/regexp_constants.h +++ b/regexp_constants.h @@ -82,6 +82,6 @@ * 03640d8ad18fc65de766f2034a927f7442960e998d3243845ca9b9fe31bfe1ab lib/unicore/mktables * 8c30575264b2772c7a69c5bb6069a28f0e0a7a0df735871bde2d99ee674316ac lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl - * c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl + * 6c52efdee47313cfde75ff86376008ce53320ebc93176caab45c77ce086a256d regen/mk_PL_charclass.pl * 20a6e3d507a66f4594586485568134873485b08e23383f3dc4e6b3047569267b regen/mk_invlists.pl * ex: set ro ft=c: */ diff --git a/uni_keywords.h b/uni_keywords.h index 6e81a518e32e..4e553e138151 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -8174,7 +8174,7 @@ match_uniprop( const unsigned char * const key, const U16 key_len ) { * 03640d8ad18fc65de766f2034a927f7442960e998d3243845ca9b9fe31bfe1ab lib/unicore/mktables * 8c30575264b2772c7a69c5bb6069a28f0e0a7a0df735871bde2d99ee674316ac lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl - * c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl + * 6c52efdee47313cfde75ff86376008ce53320ebc93176caab45c77ce086a256d regen/mk_PL_charclass.pl * 20a6e3d507a66f4594586485568134873485b08e23383f3dc4e6b3047569267b regen/mk_invlists.pl * d6987e01ad538d1567394851cf199f99815f7701bebd6092be4bc7a6d8f147c6 regen/mph.pl * ex: set ro ft=c: */ From addc60f68962947368465c2725ecc7c02bde28df Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 23 Oct 2025 15:43:18 -0600 Subject: [PATCH 3/8] gv_magicalize: Quickly rule non-magical input out This uses the data structure introduced in the previous commit to quickly test the input first character. If it isn't a potential magical one, it could apply to CORE, so move the block that checks for that to here, eliminating a conditional. In either case, no need to look further. --- gv.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/gv.c b/gv.c index 4c6127b42444..632965fd0bc3 100644 --- a/gv.c +++ b/gv.c @@ -2090,6 +2090,20 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, return false; } + if (! generic_isCC_(*name, CC_MAGICAL_)) { + + /* If not a magical variable, it could be for CORE */ + try_core: + if (len > 1 /* shortest is uc */ && HvNAMELEN_get(stash) == 4) { + /* Avoid null warning: */ + const char * const stashname = HvNAME(stash); assert(stashname); + if (strBEGINs(stashname, "CORE")) + S_maybe_add_coresub(aTHX_ 0, gv, name, len); + } + + goto ret; + } + if (stash != PL_defstash) { /* not the main stash */ /* We only have to check for a few names here: a, b, EXPORT, ISA and VERSION. All the others apply only to the main stash or to @@ -2127,31 +2141,8 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, goto try_core; } goto ret; - try_core: - if (len > 1 /* shortest is uc */ && HvNAMELEN_get(stash) == 4) { - /* Avoid null warning: */ - const char * const stashname = HvNAME(stash); assert(stashname); - if (strBEGINs(stashname, "CORE")) - S_maybe_add_coresub(aTHX_ 0, gv, name, len); - } } else if (len > 1) { -#ifndef EBCDIC - if (*name > 'V' ) { - NOOP; - /* Nothing else to do. - The compiler will probably turn the switch statement into a - branch table. Make sure we avoid even that small overhead for - the common case of lower case variable names. (On EBCDIC - platforms, we can't just do: - if (NATIVE_TO_ASCII(*name) > NATIVE_TO_ASCII('V') ) { - because cases like '\027' in the switch statement below are - C1 (non-ASCII) controls on those platforms, so the remapping - would make them larger than 'V') - */ - } else -#endif - { switch (*name) { case 'A': if (memEQs(name, len, "ARGV")) { @@ -2313,7 +2304,6 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, goto storeparen; } } - } } else { /* Names of length 1. (Or 0. But name is NUL terminated, so that will be case '\0' in this switch statement (ie a default case) */ From cbce80871dca7ef1c126df9c25d3cb63bfba66a6 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 23 Oct 2025 16:00:12 -0600 Subject: [PATCH 4/8] gv_magicalize: remove three goto's, and dest label The previous commit moved a block, so two of these aren't necessary, and the final one can be removed by adding an else --- gv.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/gv.c b/gv.c index 632965fd0bc3..2625ef9dec44 100644 --- a/gv.c +++ b/gv.c @@ -2100,11 +2100,8 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, if (strBEGINs(stashname, "CORE")) S_maybe_add_coresub(aTHX_ 0, gv, name, len); } - - goto ret; } - - if (stash != PL_defstash) { /* not the main stash */ + else if (stash != PL_defstash) { /* not the main stash */ /* We only have to check for a few names here: a, b, EXPORT, ISA and VERSION. All the others apply only to the main stash or to CORE (which is checked right after this). */ @@ -2140,7 +2137,6 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, default: goto try_core; } - goto ret; } else if (len > 1) { switch (*name) { @@ -2298,7 +2294,7 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, this test */ UV uv; if (!grok_atoUV(name, &uv, NULL) || uv > I32_MAX) - goto ret; + break; /* XXX why are we using a SSize_t? */ paren = (SSize_t)(I32)uv; goto storeparen; @@ -2468,7 +2464,6 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, } } - ret: /* Return true if we actually did something. */ return GvAV(gv) || GvHV(gv) || GvIO(gv) || GvCV(gv) || ( GvSV(gv) && ( From 3ce2956b367381e0669b367150aa5fd88465b34b Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 24 Oct 2025 05:53:35 -0600 Subject: [PATCH 5/8] gv_magicalize: Replace FALLTHROUGH by what it does This is just to make the next commit differences a bit smaller --- gv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gv.c b/gv.c index 2625ef9dec44..a5208cd812e7 100644 --- a/gv.c +++ b/gv.c @@ -2133,7 +2133,8 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, case 'b': if (len == 1 && sv_type == SVt_PV) GvMULTI_on(gv); - /* FALLTHROUGH */ + goto try_core; + default: goto try_core; } From de90a05435e6c0b47f105c539aa06210a49e7fba Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 23 Oct 2025 16:05:06 -0600 Subject: [PATCH 6/8] gv_magicalize: Refactor This refactors to eliminate redundant code. Some things are magical only if we are using the main stash; others in any stash; one only in PL_debstash. Previously, the switches were structured thusly: 1) if we aren't using the main stash, handle things not requiring the main stash 2) if we are using the main stash, handle all len > 1 things that can be in the main stash. This duplicates much of item 1) 3) if we are using the main stash, handle all len == 1 things that can be in the main stash. This duplicates some of item 1) The new structure is if (len > 1) { 1) handle len > 1 things not requiring main stash, regardless of the stash we are in 2) handle len > 1 things requiring main stash } else { 3) handle len == 1 things, regardless of the stash we are in. } This removes the duplicated code. The case for 'a' and 'b' are special. When 'a' stands for "args" it is len > 1 and that is handled in 1). But 'a' can also mean a single character, as 'b' always does. These cases are handled in 3). These are the only two len == 1 characters that don't have to be in the main package, so there is an extra conditional clause to allow that. --- gv.c | 56 ++++++++++++++++++++++---------------------------------- 1 file changed, 22 insertions(+), 34 deletions(-) diff --git a/gv.c b/gv.c index a5208cd812e7..2cdc8bcddde8 100644 --- a/gv.c +++ b/gv.c @@ -2101,11 +2101,11 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, S_maybe_add_coresub(aTHX_ 0, gv, name, len); } } - else if (stash != PL_defstash) { /* not the main stash */ - /* We only have to check for a few names here: a, b, EXPORT, ISA - and VERSION. All the others apply only to the main stash or to - CORE (which is checked right after this). */ + else if (len > 1) { switch (*name) { + + /* Each in first set doesn't require this to be the main stash */ + case 'E': if ( len >= 6 && name[1] == 'X' && @@ -2129,17 +2129,15 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, GvMULTI_on(gv_AVadd(gv)); break; } - /* FALLTHROUGH */ - case 'b': - if (len == 1 && sv_type == SVt_PV) - GvMULTI_on(gv); goto try_core; default: + + /* The remainder apply only to the main stash */ + if (stash != PL_defstash) { goto try_core; } - } - else if (len > 1) { + switch (*name) { case 'A': if (memEQs(name, len, "ARGV")) { @@ -2149,21 +2147,6 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, GvMULTI_on(gv); } break; - case 'E': - if ( - len >= 6 && name[1] == 'X' && - (memEQs(name, len, "EXPORT") - ||memEQs(name, len, "EXPORT_OK") - ||memEQs(name, len, "EXPORT_FAIL") - ||memEQs(name, len, "EXPORT_TAGS")) - ) - GvMULTI_on(gv); - break; - case 'I': - if (memEQs(name, len, "ISA")) { - gv_magicalize_isa(gv); - } - break; case 'S': if (memEQs(name, len, "SIG")) { HV *hv; @@ -2195,10 +2178,6 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, } } break; - case 'V': - if (memEQs(name, len, "VERSION")) - GvMULTI_on(gv); - break; case '\003': /* $^CHILD_ERROR_NATIVE */ if (memEQs(name, len, "\003HILD_ERROR_NATIVE")) goto magicalize; @@ -2300,10 +2279,17 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, paren = (SSize_t)(I32)uv; goto storeparen; } - } - } else { - /* Names of length 1. (Or 0. But name is NUL terminated, so that will - be case '\0' in this switch statement (ie a default case) */ + } + } + } + else if ( stash == PL_defstash /* Names of length 1. */ + || *name == 'a' || *name == 'b') + { + /* All but the above two length 1 names have to be in the main stash. + * + * Note that nothing failing here can apply to CORE, because the + * minimum length (for things like 'uc') is 2. */ + switch (*name) { case '&': /* $& */ paren = RX_BUFF_IDX_FULLMATCH; @@ -2458,10 +2444,12 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, SvREFCNT_dec(sv); } break; - case 'a': + + case 'a': /* The len > 1 case was handled above */ case 'b': if (sv_type == SVt_PV) GvMULTI_on(gv); + break; } } From 0eb8960b0d590b0cfd8b7a6a0c86d2e2716c7595 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 23 Oct 2025 14:06:51 -0600 Subject: [PATCH 7/8] gv_magicalize: Essentially white space This indents things to nicely align vertically and moves some braces in the process --- gv.c | 267 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 132 insertions(+), 135 deletions(-) diff --git a/gv.c b/gv.c index 2cdc8bcddde8..fde82cd94f36 100644 --- a/gv.c +++ b/gv.c @@ -2102,36 +2102,36 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, } } else if (len > 1) { - switch (*name) { + switch (*name) { /* Each in first set doesn't require this to be the main stash */ - case 'E': - if ( - len >= 6 && name[1] == 'X' && - (memEQs(name, len, "EXPORT") + case 'E': + if ( (len >= 6 && name[1] == 'X') + && ( memEQs(name, len, "EXPORT") ||memEQs(name, len, "EXPORT_OK") ||memEQs(name, len, "EXPORT_FAIL") - ||memEQs(name, len, "EXPORT_TAGS")) - ) - GvMULTI_on(gv); - break; - case 'I': - if (memEQs(name, len, "ISA")) - gv_magicalize_isa(gv); - break; - case 'V': - if (memEQs(name, len, "VERSION")) - GvMULTI_on(gv); + ||memEQs(name, len, "EXPORT_TAGS"))) + { + GvMULTI_on(gv); + } + break; + case 'I': + if (memEQs(name, len, "ISA")) + gv_magicalize_isa(gv); + break; + case 'V': + if (memEQs(name, len, "VERSION")) + GvMULTI_on(gv); + break; + case 'a': + if (stash == PL_debstash && memEQs(name, len, "args")) { + GvMULTI_on(gv_AVadd(gv)); break; - case 'a': - if (stash == PL_debstash && memEQs(name, len, "args")) { - GvMULTI_on(gv_AVadd(gv)); - break; - } - goto try_core; + } + goto try_core; - default: + default: /* The remainder apply only to the main stash */ if (stash != PL_defstash) { @@ -2139,7 +2139,7 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, } switch (*name) { - case 'A': + case 'A': if (memEQs(name, len, "ARGV")) { IoFLAGS(GvIOn(gv)) |= IOf_ARGV|IOf_START; } @@ -2147,7 +2147,7 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, GvMULTI_on(gv); } break; - case 'S': + case 'S': if (memEQs(name, len, "SIG")) { HV *hv; I32 i; @@ -2178,7 +2178,7 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, } } break; - case '\003': /* $^CHILD_ERROR_NATIVE */ + case '\003': /* $^CHILD_ERROR_NATIVE */ if (memEQs(name, len, "\003HILD_ERROR_NATIVE")) goto magicalize; /* @{^CAPTURE} %{^CAPTURE} */ @@ -2192,41 +2192,42 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, require_tie_mod_s(gv, '+', "Tie::Hash::NamedCapture",0); } else /* %{^CAPTURE_ALL} */ - if (memEQs(name, len, "\003APTURE_ALL")) { + if (memEQs(name, len, "\003APTURE_ALL")) { require_tie_mod_s(gv, '-', "Tie::Hash::NamedCapture",0); } break; - case '\005': /* ${^ENCODING} */ + case '\005': /* ${^ENCODING} */ if (memEQs(name, len, "\005NCODING")) goto magicalize; break; - case '\007': /* ${^GLOBAL_PHASE} */ + case '\007': /* ${^GLOBAL_PHASE} */ if (memEQs(name, len, "\007LOBAL_PHASE")) goto ro_magicalize; break; - case '\010': /* %{^HOOK} */ + case '\010': /* %{^HOOK} */ if (memEQs(name, len, "\010OOK")) { GvMULTI_on(gv); HV *hv = GvHVn(gv); hv_magic(hv, NULL, PERL_MAGIC_hook); } break; - case '\014': - if ( memEQs(name, len, "\014AST_FH") || /* ${^LAST_FH} */ - memEQs(name, len, "\014AST_SUCCESSFUL_PATTERN")) /* ${^LAST_SUCCESSFUL_PATTERN} */ + case '\014': + if ( memEQs(name, len, "\014AST_FH") /* ${^LAST_FH} */ + || memEQs(name, len, "\014AST_SUCCESSFUL_PATTERN")) + /* ${^LAST_SUCCESSFUL_PATTERN} */ goto ro_magicalize; break; - case '\015': /* ${^MATCH} */ + case '\015': /* ${^MATCH} */ if (memEQs(name, len, "\015ATCH")) { paren = RX_BUFF_IDX_CARET_FULLMATCH; goto storeparen; } break; - case '\017': /* ${^OPEN} */ + case '\017': /* ${^OPEN} */ if (memEQs(name, len, "\017PEN")) goto magicalize; break; - case '\020': /* ${^PREMATCH} ${^POSTMATCH} */ + case '\020': /* ${^PREMATCH} ${^POSTMATCH} */ if (memEQs(name, len, "\020REMATCH")) { paren = RX_BUFF_IDX_CARET_PREMATCH; goto storeparen; @@ -2236,15 +2237,15 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, goto storeparen; } break; - case '\023': + case '\023': if (memEQs(name, len, "\023AFE_LOCALES")) goto ro_magicalize; break; - case '\024': /* ${^TAINT} */ + case '\024': /* ${^TAINT} */ if (memEQs(name, len, "\024AINT")) goto ro_magicalize; break; - case '\025': /* ${^UNICODE}, ${^UTF8LOCALE} */ + case '\025': /* ${^UNICODE}, ${^UTF8LOCALE} */ if (memEQs(name, len, "\025NICODE")) goto ro_magicalize; if (memEQs(name, len, "\025TF8LOCALE")) @@ -2252,7 +2253,7 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, if (memEQs(name, len, "\025TF8CACHE")) goto magicalize; break; - case '\027': /* $^WARNING_BITS */ + case '\027': /* $^WARNING_BITS */ if (memEQs(name, len, "\027ARNING_BITS")) goto magicalize; #ifdef WIN32 @@ -2260,16 +2261,16 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, goto magicalize; #endif break; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { /* Ensures that we have an all-digit variable, ${"1foo"} fails this test */ UV uv; @@ -2278,9 +2279,9 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, /* XXX why are we using a SSize_t? */ paren = (SSize_t)(I32)uv; goto storeparen; + } } } - } } else if ( stash == PL_defstash /* Names of length 1. */ || *name == 'a' || *name == 'b') @@ -2291,59 +2292,57 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, * minimum length (for things like 'uc') is 2. */ switch (*name) { - case '&': /* $& */ + case '&': /* $& */ paren = RX_BUFF_IDX_FULLMATCH; goto sawampersand; - case '`': /* $` */ + case '`': /* $` */ paren = RX_BUFF_IDX_PREMATCH; goto sawampersand; - case '\'': /* $' */ + case '\'': /* $' */ paren = RX_BUFF_IDX_POSTMATCH; - sawampersand: + sawampersand: #ifdef PERL_SAWAMPERSAND - if (!( - sv_type == SVt_PVAV || - sv_type == SVt_PVHV || - sv_type == SVt_PVCV || - sv_type == SVt_PVFM || - sv_type == SVt_PVIO - )) { PL_sawampersand |= - (*name == '`') - ? SAWAMPERSAND_LEFT - : (*name == '&') - ? SAWAMPERSAND_MIDDLE - : SAWAMPERSAND_RIGHT; - } + if (! ( sv_type == SVt_PVAV + || sv_type == SVt_PVHV + || sv_type == SVt_PVCV + || sv_type == SVt_PVFM + || sv_type == SVt_PVIO)) + { + PL_sawampersand |= (*name == '`') ? SAWAMPERSAND_LEFT + : (*name == '&') ? SAWAMPERSAND_MIDDLE + : SAWAMPERSAND_RIGHT; + } #endif goto storeparen; - case '1': /* $1 */ - case '2': /* $2 */ - case '3': /* $3 */ - case '4': /* $4 */ - case '5': /* $5 */ - case '6': /* $6 */ - case '7': /* $7 */ - case '8': /* $8 */ - case '9': /* $9 */ + + case '1': /* $1 */ + case '2': /* $2 */ + case '3': /* $3 */ + case '4': /* $4 */ + case '5': /* $5 */ + case '6': /* $6 */ + case '7': /* $7 */ + case '8': /* $8 */ + case '9': /* $9 */ paren = *name - '0'; - storeparen: + storeparen: /* Flag the capture variables with a NULL mg_ptr Use mg_len for the array index to lookup. */ sv_magic(GvSVn(gv), MUTABLE_SV(gv), PERL_MAGIC_sv, NULL, paren); break; - case ':': /* $: */ + case ':': /* $: */ sv_setpv(GvSVn(gv),PL_chopset); goto magicalize; - case '?': /* $? */ + case '?': /* $? */ #ifdef COMPLEX_STATUS SvUPGRADE(GvSVn(gv), SVt_PVLV); #endif goto magicalize; - case '!': /* $! */ + case '!': /* $! */ GvMULTI_on(gv); /* If %! has been used, automatically load Errno.pm. */ @@ -2354,8 +2353,8 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, require_tie_mod_s(gv, '!', "Errno", 1); break; - case '-': /* $-, %-, @- */ - case '+': /* $+, %+, @+ */ + case '-': /* $-, %-, @- */ + case '+': /* $+, %+, @+ */ GvMULTI_on(gv); /* no used once warnings here */ { /* $- $+ */ sv_magic(GvSVn(gv), MUTABLE_SV(gv), PERL_MAGIC_sv, name, len); @@ -2374,79 +2373,80 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, SvREADONLY_on(av); } break; - case '*': /* $* */ - case '#': /* $# */ - if (sv_type == SVt_PV) - /* diag_listed_as: $* is no longer supported as of Perl 5.30 */ - croak("$%c is no longer supported as of Perl 5.30", *name); - break; - case '\010': /* $^H */ + case '*': /* $* */ + case '#': /* $# */ + if (sv_type == SVt_PV) + /* diag_listed_as: $* is no longer supported as of Perl 5.30 */ + croak("$%c is no longer supported as of Perl 5.30", *name); + break; + case '\010': /* $^H */ { HV *const hv = GvHVn(gv); hv_magic(hv, NULL, PERL_MAGIC_hints); } goto magicalize; - case '\023': /* $^S */ - ro_magicalize: + case '\023': /* $^S */ + ro_magicalize: SvREADONLY_on(GvSVn(gv)); /* FALLTHROUGH */ - case '0': /* $0 */ - case '^': /* $^ */ - case '~': /* $~ */ - case '=': /* $= */ - case '%': /* $% */ - case '.': /* $. */ - case '(': /* $( */ - case ')': /* $) */ - case '<': /* $< */ - case '>': /* $> */ - case '\\': /* $\ */ - case '/': /* $/ */ - case '|': /* $| */ - case '$': /* $$ */ - case '[': /* $[ */ - case '\001': /* $^A */ - case '\003': /* $^C */ - case '\004': /* $^D */ - case '\005': /* $^E */ - case '\006': /* $^F */ - case '\011': /* $^I, NOT \t in EBCDIC */ - case '\016': /* $^N */ - case '\017': /* $^O */ - case '\020': /* $^P */ - case '\024': /* $^T */ - case '\027': /* $^W */ - magicalize: + case '0': /* $0 */ + case '^': /* $^ */ + case '~': /* $~ */ + case '=': /* $= */ + case '%': /* $% */ + case '.': /* $. */ + case '(': /* $( */ + case ')': /* $) */ + case '<': /* $< */ + case '>': /* $> */ + case '\\': /* $\ */ + case '/': /* $/ */ + case '|': /* $| */ + case '$': /* $$ */ + case '[': /* $[ */ + case '\001': /* $^A */ + case '\003': /* $^C */ + case '\004': /* $^D */ + case '\005': /* $^E */ + case '\006': /* $^F */ + case '\011': /* $^I, NOT \t in EBCDIC */ + case '\016': /* $^N */ + case '\017': /* $^O */ + case '\020': /* $^P */ + case '\024': /* $^T */ + case '\027': /* $^W */ + magicalize: sv_magic(GvSVn(gv), MUTABLE_SV(gv), PERL_MAGIC_sv, name, len); break; - case '\014': /* $^L */ + case '\014': /* $^L */ sv_setpvs(GvSVn(gv),"\f"); break; - case ';': /* $; */ + case ';': /* $; */ sv_setpvs(GvSVn(gv),"\034"); break; - case ']': /* $] */ - { + case ']': /* $] */ + { SV * const sv = GvSV(gv); if (!sv_derived_from(PL_patchlevel, "version")) upg_version(PL_patchlevel, TRUE); GvSV(gv) = vnumify(PL_patchlevel); SvREADONLY_on(GvSV(gv)); SvREFCNT_dec(sv); - } - break; - case '\026': /* $^V */ - { + break; + } + + case '\026': /* $^V */ + { SV * const sv = GvSV(gv); GvSV(gv) = new_version(PL_patchlevel); SvREADONLY_on(GvSV(gv)); SvREFCNT_dec(sv); - } - break; + break; + } - case 'a': /* The len > 1 case was handled above */ - case 'b': + case 'a': /* The len > 1 case was handled above */ + case 'b': if (sv_type == SVt_PV) GvMULTI_on(gv); break; @@ -2455,10 +2455,7 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, /* Return true if we actually did something. */ return GvAV(gv) || GvHV(gv) || GvIO(gv) || GvCV(gv) - || ( GvSV(gv) && ( - SvOK(GvSV(gv)) || SvMAGICAL(GvSV(gv)) - ) - ); + || (GvSV(gv) && (SvOK(GvSV(gv)) || SvMAGICAL(GvSV(gv)))); } /* If we do ever start using this later on in the file, we need to make From fee8fc37ed4702a9e1662263227509017dff5d20 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 24 Oct 2025 04:59:05 -0600 Subject: [PATCH 8/8] mg.c: Add asserts These two switch() statements handle magic names. We now have a quick way to determine if the first character of a name is magic. Assert that the cases of the switch match. This will tell us if something gets out-of-sync. --- mg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mg.c b/mg.c index 194f4bb217d1..4dad61f3e41c 100644 --- a/mg.c +++ b/mg.c @@ -906,6 +906,9 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) } nextchar = *remaining; + + assert(generic_isCC_(*mg->mg_ptr, CC_MAGICAL_)); + switch (*mg->mg_ptr) { case '\001': /* ^A */ if (SvOK(PL_bodytarget)) sv_copypv(sv, PL_bodytarget); @@ -3016,6 +3019,8 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) return 0; } + assert(generic_isCC_(*mg->mg_ptr, CC_MAGICAL_)); + switch (*mg->mg_ptr) { case '\001': /* ^A */ if (SvOK(sv)) sv_copypv(PL_bodytarget, sv);