From 985ed01a1088c955cdac07a439b0b84a679c2d9c Mon Sep 17 00:00:00 2001 From: bulk88 Date: Thu, 10 Oct 2024 20:45:21 -0400 Subject: [PATCH 1/6] sv.c: heavy streamline newSVuv() for speed, tweak newSViv() newSVnv() related to "SvUV() macro 100% of time calls Perl_sv_2uv_flags" https://github.com/Perl/perl5/issues/22653 Until #22653 is solved, clean up newSVuv() and remove branch to "newSViv()" that is unexplained by git blame. BUT, keep original intent and behaviour of "newSViv()" branch for now. Add asserts to guard against 0x8000,0000 == SVf_IVisUV changing. Value of SVf_IVisUV can change in the future, and there might be (I didn't git blame), logic that sign flag and SVf_IVisUV are equal. But these changes depend on SVf_IVisUV being 0x8000,0000 and must be updated if SVf_IVisUV changes. Change SvXXXV_set() to be an explicity bodyless SV head optimization. MSVC 2022 -O1 combined SET_SVANY_FOR_BODYLESS_IV() and SvIV_set(). But instead of hopes and prayers on "UB" or "ISB/IDB" of CCs that could change at random in any previous or future build number of a CC, do it explictly. Bodyless SV head API is defined by P5P, not CC vendors. 915544426781d184e3b057e63a20c089a32d3eba 3/20/2022 3:05:10 PM Perl_newSViv: simplify by using (inline) newSV_type Fix deoptimized Perl_newSViv(). In that commit it forgot about Perl_newSVuv(). Since newSV_type() is a inline fn, and "inline" is CC domain UB optimization. And newSV_type() is far more complex than CPP macro new_SV(), and newSV_type() depends on 100% perfection from CC's LTO engine and ".o" disk format, and possibly depends on the CC breaking ISO C spec with -O3 or -O4. Which turn on extreme SEGV inducing C variable aliasing rules that few C code bases tolerate. Quick examples, a reddit comment (not credible), claims "uint8_t *" and "char *" can not be casted since the CC or CPU has 9 bit bytes or a 9 wire data bus, and ECC parity wire is 9 of 9 for "uint8_t" and 8 of 9 for "signed char" and wire 9 for "char" is the ECC parity wire. The platform's libc's fwritef(), hides the secretly converts 9 bit bytes, to standard 8 bit bytes, making the CC "ISO C compliant". My more realistic scenario, inside newSV_type(). How can the CC know, what if Perl_more_sv() or Perl_more_bodies(), calls mprotect(), modifies "static const struct body_details bodies_by_type [];", calls mprotect() again, and returns execution to newSV_type()? Just switch to new_SV(). Its a CPP macro, not subject to CC UB inlining, and new_SV() only has 1 fn call and is super light weight. Old P5P commits/ML/CPAN dev talk about this area of code being crucial to (CPAN XS) deserializing perf in perl, so perf considerations, with proper asserts, has priority over readability. Links to old core commits in https://github.com/Perl/perl5/issues/22653 briefly discuss deserializing perf as rational, so this patch also follows that design idea. Perl_vnewSVpvf(), "malloc(1ch);" which in reality is "malloc(16ch)" makes no sense, since almost zero chance fmtstr+args+\0 <= 16, and perl malloc() round up, is semi-UB/a build flag default on anyways. Using guesstimate malloc(pat_len), increases chances far higher, that a realloc() inside sv_vcatpvfn_flags(), OS realloc(), will realloc() in place, not changing the ptr, esp assuming OS malloc() does bucket of power of 2 allocator algo. Assume, 40ch malloc() fmt string, bucket to 64ch by OS malloc(), throw in a %u 32b, that is max +10ch-2ch for "%u". So output is 48ch. realloc(48ch) is inplace, therefore it is a win. --- sv.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/sv.c b/sv.c index 2de62af3b819..fd0d7b2e9cfa 100644 --- a/sv.c +++ b/sv.c @@ -9994,12 +9994,14 @@ SV * Perl_vnewSVpvf(pTHX_ const char *const pat, va_list *const args) { SV *sv; + STRLEN pat_len = strlen(pat); PERL_ARGS_ASSERT_VNEWSVPVF; - sv = newSV(1); + /* Unlikely output len < input pat len. ("%c",'A')("%s","") is rare. */ + sv = newSV(pat_len+STRLENs("\0")); SvPVCLEAR_FRESH(sv); - sv_vcatpvfn_flags(sv, pat, strlen(pat), args, NULL, 0, NULL, 0); + sv_vcatpvfn_flags(sv, pat, pat_len, args, NULL, 0, NULL, 0); return sv; } @@ -10015,10 +10017,20 @@ The reference count for the SV is set to 1. SV * Perl_newSVnv(pTHX_ const NV n) { - SV *sv = newSV_type(SVt_NV); + SV *sv; +#if NVSIZE <= IVSIZE + /* This bodyless code has been agressively strip for speed. + Do not revise it unless you use disassembler and look at machine code.*/ + new_SV(sv); + SvFLAGS(sv) = SVt_NV | SVf_NOK | SVp_NOK; + SET_SVANY_FOR_BODYLESS_NV(sv); + sv->sv_u.svu_nv = n; +#else + sv = newSV_type(SVt_NV); (void)SvNOK_on(sv); - SvNV_set(sv, n); +#endif + SvTAINT(sv); return sv; @@ -10036,10 +10048,20 @@ SV is set to 1. SV * Perl_newSViv(pTHX_ const IV i) { - SV *sv = newSV_type(SVt_IV); - (void)SvIOK_on(sv); + SV *sv; + new_SV(sv); + + /* We're starting from SVt_FIRST, so provided that's + * actual 0, we don't have to unset any SV type flags + * to promote to SVt_IV. */ + STATIC_ASSERT_STMT(SVt_FIRST == 0); + + /* This bodyless code has been agressively striped for speed. + Do not revise it unless you use disassembler and look at machine code.*/ + SvFLAGS(sv) = SVt_IV | SVf_IOK | SVp_IOK; + SET_SVANY_FOR_BODYLESS_IV(sv); + sv->sv_u.svu_iv = i; - SvIV_set(sv, i); SvTAINT(sv); return sv; @@ -10058,15 +10080,6 @@ SV * Perl_newSVuv(pTHX_ const UV u) { SV *sv; - - /* Inlining ONLY the small relevant subset of sv_setuv here - * for performance. Makes a significant difference. */ - - /* Using ivs is more efficient than using uvs - see sv_setuv */ - if (u <= (UV)IV_MAX) { - return newSViv((IV)u); - } - new_SV(sv); /* We're starting from SVt_FIRST, so provided that's @@ -10074,12 +10087,44 @@ Perl_newSVuv(pTHX_ const UV u) * to promote to SVt_IV. */ STATIC_ASSERT_STMT(SVt_FIRST == 0); + /* This bodyless code has been agressively striped for speed. + Do not revise it unless you use disassembler and look at machine code.*/ + + /* Verify the &~ and |, or &~ >> | or >> |, trick works. Portability. */ + STATIC_ASSERT_STMT( + cBOOL(((UV)SVf_IVisUV) == (((UV)IV_MAX)+1)) + || cBOOL((((UV)SVf_IVisUV)<<32) == (((UV)IV_MAX)+1))); + STATIC_ASSERT_STMT( + STRUCT_OFFSET(SV, sv_u.svu_uv) == STRUCT_OFFSET(SV, sv_u.svu_iv) + && sizeof(sv->sv_u.svu_uv) == sizeof(sv->sv_u.svu_iv)); + + /* branchless SvIsUV_on() replaces former code with former comments: + + * Inlining ONLY the small relevant subset of sv_setuv here + * for performance. Makes a significant difference. + + * Using ivs is more efficient than using uvs - see sv_setuv + if (u <= (UV)IV_MAX) { + return newSViv((IV)u); + } + */ + /* Flags unrolled, since MSVC -O1 optimizer refused to combine + 3 SvFLAGS(s); statements if "SvFLAGS() |= dynamic_var;". + Unroll to guarentee any CC flags, any CCs, do exactly 1 write to + SvFLAGS(). */ + if(((UV)SVf_IVisUV) == (((UV)IV_MAX)+1)) /* UV is 32 */ + SvFLAGS(sv) = + ((U32)(u&(((UV)IV_MAX)+1))) + | (SVt_IV|SVf_IOK|SVp_IOK); + else /* UV is 64 */ + SvFLAGS(sv) = + ((U32)((UV)((u&(((UV)IV_MAX)+1))>>32))) + | (SVt_IV|SVf_IOK|SVp_IOK); + /* Explictly optimize out reading SvANY ptr. Some CCs might optimize + next 2 statements, MSVC did, but some may not. */ SET_SVANY_FOR_BODYLESS_IV(sv); - SvFLAGS(sv) |= SVt_IV; - (void)SvIOK_on(sv); - (void)SvIsUV_on(sv); + sv->sv_u.svu_uv = u; - SvUV_set(sv, u); SvTAINT(sv); return sv; From d6904cf4d6dce9a40fd3643e09ce90c1ab537949 Mon Sep 17 00:00:00 2001 From: Daniel Dragan Date: Sat, 12 Oct 2024 07:16:26 -0400 Subject: [PATCH 2/6] sv.c: heavy streamline newSVuv() newSViv() newSVnv() for speed part 2 -design and rational in src comments, this patch forces MSVC 2022 x64 to use 64b integer math/CPU ops (regs RAX/RDX/RCX), vs 2 sequences/pairs of EAX/EBX/ECX register ops removing a couple CPU instructions in filling out the SV HEAD. This optimization will translate to all OSes. It is broken out into a separate commit for git bisect reasons since it touches the alignment topic. As with part 1, some members of the community care about rapidly creating massive amounts of SVIVs/SVUVs/SVNVs in deserializing wire/protocol/disk formations, or big data sci num crunching. --- sv.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/sv.c b/sv.c index fd0d7b2e9cfa..86ad803a4ee0 100644 --- a/sv.c +++ b/sv.c @@ -10022,7 +10022,13 @@ Perl_newSVnv(pTHX_ const NV n) /* This bodyless code has been agressively strip for speed. Do not revise it unless you use disassembler and look at machine code.*/ new_SV(sv); +#if PTRSIZE == 8 && (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) + *(Size_t *)(&SvREFCNT(sv)) = + ((Size_t)1) + | ((Size_t)(((Size_t)(SVt_NV | SVf_NOK | SVp_NOK)) << 32)); +#else SvFLAGS(sv) = SVt_NV | SVf_NOK | SVp_NOK; +#endif SET_SVANY_FOR_BODYLESS_NV(sv); sv->sv_u.svu_nv = n; #else @@ -10058,7 +10064,12 @@ Perl_newSViv(pTHX_ const IV i) /* This bodyless code has been agressively striped for speed. Do not revise it unless you use disassembler and look at machine code.*/ +#if PTRSIZE == 8 && (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) + *(Size_t *)(&SvREFCNT(sv)) = ((Size_t)1) | + ((Size_t)((((Size_t)(SVt_IV|SVf_IOK|SVp_IOK)) << 32))); +#else SvFLAGS(sv) = SVt_IV | SVf_IOK | SVp_IOK; +#endif SET_SVANY_FOR_BODYLESS_IV(sv); sv->sv_u.svu_iv = i; @@ -10108,6 +10119,44 @@ Perl_newSVuv(pTHX_ const UV u) return newSViv((IV)u); } */ + + /* If 64b CPU, and little endian (x86, x64, modern ARM), + set sv->sv_refcnt and sv->sv_flags, with exactly 1 CPU op. + 'sv->sv_refcnt = 0;' assignment in new_SV() will optimize away. + Assert sv->sv_any is 64b, and sv->sv_refcnt is directly afterwards + in memory layout, and that sv->sv_refcnt and sv->sv_flags are adjacent, + therefore proving this U32* ptr, casted to U64*, is aligned. + Note majority of modern Perl users use LE CPUs, with hardware unaligned + support. But there is no Configure/perlapi macro defines currently, + that config YES/NO for hardware unaligned. Still, because the SV head + struct currently is aligned on all 64b builds, make sure SV head struct + stays aligned unless intentional future refactoring of SV head struct. + + This optimization can't be done on i386, since 64b ints are always + emulated with 32b CPU ops by all CCs AFAIK. And the X32 Linux OS/Kernel + has already been grandfathered. Other than X32 I can't think of any + OS or CPU with native 64b CPU ops, but 32b pointers. + + Doing this trick on 64b big endian OS, is possible, but a BE core dev + must port the code, fix all bit operators, and test it. */ + STATIC_ASSERT_STMT( + (STRUCT_OFFSET(SV,sv_refcnt) == sizeof(sv->sv_any)) + && STRUCT_OFFSET(SV,sv_flags) == STRUCT_OFFSET(SV,sv_refcnt)+U32SIZE); + +#if PTRSIZE == 8 && (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) + STATIC_ASSERT_STMT( + sizeof(sv->sv_refcnt) + sizeof(sv->sv_flags) == PTRSIZE + && STRUCT_OFFSET(SV,sv_refcnt) == PTRSIZE); + + *(Size_t *)(&SvREFCNT(sv)) = + ((Size_t)1) + | ((Size_t) ( + ((Size_t) ( + ((U32)((UV)((u&(((UV)IV_MAX)+1))>>32))) + | (SVt_IV|SVf_IOK|SVp_IOK) + )) << 32 + )); +#else /* Flags unrolled, since MSVC -O1 optimizer refused to combine 3 SvFLAGS(s); statements if "SvFLAGS() |= dynamic_var;". Unroll to guarentee any CC flags, any CCs, do exactly 1 write to @@ -10120,6 +10169,7 @@ Perl_newSVuv(pTHX_ const UV u) SvFLAGS(sv) = ((U32)((UV)((u&(((UV)IV_MAX)+1))>>32))) | (SVt_IV|SVf_IOK|SVp_IOK); +#endif /* Explictly optimize out reading SvANY ptr. Some CCs might optimize next 2 statements, MSVC did, but some may not. */ SET_SVANY_FOR_BODYLESS_IV(sv); From d57c1418ea68a26ec4494da06c282a5dcddd3c13 Mon Sep 17 00:00:00 2001 From: bulk88 Date: Sat, 12 Oct 2024 08:12:50 -0400 Subject: [PATCH 3/6] Revert "sv_taint() can easily be replaced by a macro." This reverts commit aae9cea0a2be05abb857e6b2b32773d3d5fae6d8. Author note, hand editing required to revert since commit was from 2005 and it is 2024. Part 1 of ? to optimize and reduce overhead of SvTAINT() macro inside all SV * allocator fncs. Using taint feature is rare, and "push(sv), push(my_perl), call()" is alot smaller machine code at the many call sites, than "push(0), push(0), push(116), push(0), push(sv), push(my_perl), call()" and using taint at runtime, means the user decided perf is irrelavent vs security. newSViv()/newSVuv()/newSVnv() are malloc()-free but Perl_sv_magicext() contains "sv_upgrade(SVt_PVMG); calloc(1,0x30);" and not for taint-feat, but also a 2nd "malloc(0x****)". Factor out all those sv_magic() calls into a wrapper for the unlikely branch. SvTAINT() has many call sites in hottest parts of perl. --- embed.fnc | 2 +- embed.h | 1 + mathoms.c | 17 ----------------- proto.h | 10 +++++----- sv.c | 17 +++++++++++++++++ 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/embed.fnc b/embed.fnc index f0d1dedb1485..2a76a7c035e2 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3433,7 +3433,7 @@ Adp |SV * |sv_strftime_tm |NN SV *fmt \ Adp |SV * |sv_string_from_errnum \ |int errnum \ |NULLOK SV *tgtsv -CMbdp |void |sv_taint |NN SV *sv +ACdp |void |sv_taint |NN SV *sv CRdp |bool |sv_tainted |NN SV * const sv Adip |bool |SvTRUE |NULLOK SV *sv Cdp |I32 |sv_true |NULLOK SV * const sv diff --git a/embed.h b/embed.h index 8f890fba4df4..82e7327ea4fa 100644 --- a/embed.h +++ b/embed.h @@ -752,6 +752,7 @@ # define sv_strftime_ints(a,b,c,d,e,f,g,h) Perl_sv_strftime_ints(aTHX_ a,b,c,d,e,f,g,h) # define sv_strftime_tm(a,b) Perl_sv_strftime_tm(aTHX_ a,b) # define sv_string_from_errnum(a,b) Perl_sv_string_from_errnum(aTHX_ a,b) +# define sv_taint(a) Perl_sv_taint(aTHX_ a) # define sv_tainted(a) Perl_sv_tainted(aTHX_ a) # define sv_true(a) Perl_sv_true(aTHX_ a) # define sv_uni_display(a,b,c,d) Perl_sv_uni_display(aTHX_ a,b,c,d) diff --git a/mathoms.c b/mathoms.c index 27fa2969d1b1..6057dacaa494 100644 --- a/mathoms.c +++ b/mathoms.c @@ -90,23 +90,6 @@ Perl_sv_unref(pTHX_ SV *sv) sv_unref_flags(sv, 0); } -/* -=for apidoc_section $tainting -=for apidoc sv_taint - -Taint an SV. Use C instead. - -=cut -*/ - -void -Perl_sv_taint(pTHX_ SV *sv) -{ - PERL_ARGS_ASSERT_SV_TAINT; - - sv_magic((sv), NULL, PERL_MAGIC_taint, NULL, 0); -} - /* sv_2iv() is now a macro using Perl_sv_2iv_flags(); * this function provided for binary compatibility only */ diff --git a/proto.h b/proto.h index 65fe5c5bd68c..e07319463604 100644 --- a/proto.h +++ b/proto.h @@ -4916,6 +4916,11 @@ PERL_CALLCONV SV * Perl_sv_string_from_errnum(pTHX_ int errnum, SV *tgtsv); #define PERL_ARGS_ASSERT_SV_STRING_FROM_ERRNUM +PERL_CALLCONV void +Perl_sv_taint(pTHX_ SV *sv); +#define PERL_ARGS_ASSERT_SV_TAINT \ + assert(sv) + PERL_CALLCONV bool Perl_sv_tainted(pTHX_ SV * const sv) __attribute__warn_unused_result__; @@ -5986,11 +5991,6 @@ Perl_sv_setsv(pTHX_ SV *dsv, SV *ssv); # define PERL_ARGS_ASSERT_SV_SETSV \ assert(dsv) -PERL_CALLCONV void -Perl_sv_taint(pTHX_ SV *sv); -# define PERL_ARGS_ASSERT_SV_TAINT \ - assert(sv) - PERL_CALLCONV void Perl_sv_unref(pTHX_ SV *sv); # define PERL_ARGS_ASSERT_SV_UNREF \ diff --git a/sv.c b/sv.c index 86ad803a4ee0..f267ce48a32f 100644 --- a/sv.c +++ b/sv.c @@ -11147,6 +11147,23 @@ Perl_sv_unref_flags(pTHX_ SV *const ref, const U32 flags) sv_2mortal(target); /* Schedule for freeing later */ } +/* +=for apidoc_section $tainting +=for apidoc sv_taint + +Taint an SV. Use C instead. + +=cut +*/ + +void +Perl_sv_taint(pTHX_ SV *sv) +{ + PERL_ARGS_ASSERT_SV_TAINT; + + sv_magic((sv), NULL, PERL_MAGIC_taint, NULL, 0); +} + /* =for apidoc sv_untaint From 7dd92ee8c530c5ade261505797fa7cf4e798399a Mon Sep 17 00:00:00 2001 From: bulk88 Date: Sat, 12 Oct 2024 09:03:17 -0400 Subject: [PATCH 4/6] de-mathom Perl_sv_taint() part 2 -make Perl_sv_taint() return the SV *, useful for a future optimization previous it was void This part 2, along with part 1. Shows improvement. Delta, after 1 & 2. previous miniperl.exe Win64 .text section, VC 2022 -O1 0x12440C bytes long after 0x1240AC bytes long 864 bytes of machine code were removed. A bin analysis tool shows has Perl_sv_taint() 62 callers in miniperl.exe --- embed.fnc | 2 +- proto.h | 2 +- sv.c | 8 +++++--- sv.h | 2 -- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/embed.fnc b/embed.fnc index 2a76a7c035e2..e64a78efd9c2 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3433,7 +3433,7 @@ Adp |SV * |sv_strftime_tm |NN SV *fmt \ Adp |SV * |sv_string_from_errnum \ |int errnum \ |NULLOK SV *tgtsv -ACdp |void |sv_taint |NN SV *sv +ACdp |SV * |sv_taint |NN SV *sv CRdp |bool |sv_tainted |NN SV * const sv Adip |bool |SvTRUE |NULLOK SV *sv Cdp |I32 |sv_true |NULLOK SV * const sv diff --git a/proto.h b/proto.h index e07319463604..e2f595e1e64c 100644 --- a/proto.h +++ b/proto.h @@ -4916,7 +4916,7 @@ PERL_CALLCONV SV * Perl_sv_string_from_errnum(pTHX_ int errnum, SV *tgtsv); #define PERL_ARGS_ASSERT_SV_STRING_FROM_ERRNUM -PERL_CALLCONV void +PERL_CALLCONV SV * Perl_sv_taint(pTHX_ SV *sv); #define PERL_ARGS_ASSERT_SV_TAINT \ assert(sv) diff --git a/sv.c b/sv.c index f267ce48a32f..f4ddf9b8888c 100644 --- a/sv.c +++ b/sv.c @@ -11151,17 +11151,19 @@ Perl_sv_unref_flags(pTHX_ SV *const ref, const U32 flags) =for apidoc_section $tainting =for apidoc sv_taint -Taint an SV. Use C instead. +Taint an SV. Use C instead. Return value is input SV *. +Useful for chaining calls and more efficient C code (tail calling). =cut */ -void +SV * Perl_sv_taint(pTHX_ SV *sv) { PERL_ARGS_ASSERT_SV_TAINT; - sv_magic((sv), NULL, PERL_MAGIC_taint, NULL, 0); + sv_magic(sv, NULL, PERL_MAGIC_taint, NULL, 0); + return sv; } /* diff --git a/sv.h b/sv.h index 47434ad6c0fd..4f8c4a227517 100644 --- a/sv.h +++ b/sv.h @@ -1724,8 +1724,6 @@ attention to precisely which outputs are influenced by which inputs. =cut */ -#define sv_taint(sv) sv_magic((sv), NULL, PERL_MAGIC_taint, NULL, 0) - #ifdef NO_TAINT_SUPPORT # define SvTAINTED(sv) 0 #else From 7f2987841765c01e16b909093e052d3c439046f9 Mon Sep 17 00:00:00 2001 From: Daniel Dragan Date: Sat, 12 Oct 2024 13:12:34 -0400 Subject: [PATCH 5/6] de-mathom Perl_sv_taint() part 3 (SvTAINT() branch remove) "SvTAINT();" contains "if(PL_tainting && PL_tainted) sv_taint(sv);" that is 2 One Byte reads and 2 branches. Collapse the 2 bool chars, to a U16, so it is exactly 1 read, and 1 branch. Strips complexity from the very bottom of the very hot newSVuv/newSViv/newSVuv, and other callers. sv_taint(sv) has 62 callers, not sure how many do the 2 reads, 2 branches SvTAINT(sv);, but the change decreased the size of miniperl.exe and therefore perl541.dll, and branches were removed from the newSVuv/newSViv/newSVuv trio. Delta machine code bytes, between part 2 & 3 (this commit). previous miniperl.exe Win64 .text section, VC 2022 -O1 0x1240AC bytes long after 0x12408C bytes long --- embedvar.h | 3 +-- intrpvar.h | 5 +++-- perl.c | 11 +++++++++++ perl.h | 17 +++++++++++++++++ sv.c | 4 ++-- sv.h | 4 ++-- 6 files changed, 36 insertions(+), 8 deletions(-) diff --git a/embedvar.h b/embedvar.h index 05f9ed6ef11d..e2e9c4681aa4 100644 --- a/embedvar.h +++ b/embedvar.h @@ -319,9 +319,8 @@ # define PL_sv_yes (vTHX->Isv_yes) # define PL_sv_zero (vTHX->Isv_zero) # define PL_sys_intern (vTHX->Isys_intern) +# define PL_taint (vTHX->Itaint) # define PL_taint_warn (vTHX->Itaint_warn) -# define PL_tainted (vTHX->Itainted) -# define PL_tainting (vTHX->Itainting) # define PL_threadhook (vTHX->Ithreadhook) # define PL_tmps_floor (vTHX->Itmps_floor) # define PL_tmps_ix (vTHX->Itmps_ix) diff --git a/intrpvar.h b/intrpvar.h index 482f46d71885..d6a14d477f7c 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -74,8 +74,9 @@ PERLVAR(I, multideref_pc, UNOP_AUX_item *) PERLVAR(I, curpm, PMOP *) /* what to do \ interps in REs from */ PERLVAR(I, curpm_under, PMOP *) /* what to do \ interps in REs from */ -PERLVAR(I, tainting, bool) /* ? doing taint checks */ -PERLVARI(I, tainted, bool, FALSE) /* using variables controlled by $< */ +/* bool PL_tainting --- ? doing taint checks */ +/* bool PL_tainted --- using variables controlled by $< */ +PERLVAR(I, taint, TAINT_U) /* PL_delaymagic is currently used for two purposes: to assure simultaneous * updates in ($<,$>) = ..., and to assure atomic update in push/unshift diff --git a/perl.c b/perl.c index 15127adc50fe..cdb23079b740 100644 --- a/perl.c +++ b/perl.c @@ -249,6 +249,17 @@ perl_construct(pTHXx) SvREADONLY_on(&PL_sv_placeholder); SvREFCNT(&PL_sv_placeholder) = SvREFCNT_IMMORTAL; + STATIC_ASSERT_STMT( + sizeof(((TAINT_U *)0)->both) + == (sizeof(((TAINT_U *)0)->u.tainting) + sizeof(((TAINT_U *)0)->u.tainted)) + ); + STATIC_ASSERT_STMT( + sizeof(((TAINT_U *)0)->both) + == (STRUCT_OFFSET(TAINT_U, u.tainted) + sizeof(((TAINT_U *)0)->u.tainted)) + ); + STATIC_ASSERT_STMT(STRUCT_OFFSET(TAINT_U, both) == STRUCT_OFFSET(TAINT_U, u.tainting)); + /* PL_taint.u.both = 0; */ + PL_sighandlerp = Perl_sighandler; PL_sighandler1p = Perl_sighandler1; PL_sighandler3p = Perl_sighandler3; diff --git a/perl.h b/perl.h index 7641425fcc12..a60a6c0e4895 100644 --- a/perl.h +++ b/perl.h @@ -940,6 +940,8 @@ symbol would not be defined on C> platforms. * know what you're doing: tests and CPAN modules' tests are bound to fail. */ #ifdef NO_TAINT_SUPPORT +# define PL_tainting PL_taint.u.tainting +# define PL_tainted PL_taint.u.tainted # define TAINT NOOP # define TAINT_NOT NOOP # define TAINT_IF(c) NOOP @@ -948,6 +950,7 @@ symbol would not be defined on C> platforms. # define TAINT_set(s) NOOP # define TAINT_get 0 # define TAINTING_get 0 +# define TAINT_AND_TAINTING_get 0 # define TAINTING_set(s) NOOP # define TAINT_WARN_get 0 # define TAINT_WARN_set(s) NOOP @@ -1014,6 +1017,10 @@ violations are fatal. =cut */ + +#define PL_tainting PL_taint.u.tainting +#define PL_tainted PL_taint.u.tainted + /* Set to tainted if we are running under tainting mode */ # define TAINT (PL_tainted = PL_tainting) @@ -1027,6 +1034,8 @@ violations are fatal. # define TAINT_set(s) (PL_tainted = cBOOL(s)) # define TAINT_get (cBOOL(UNLIKELY(PL_tainted))) /* Is something tainted? */ # define TAINTING_get (cBOOL(UNLIKELY(PL_tainting))) +/* Efficient version of (PL_tainted && PL_tainting) */ +# define TAINT_AND_TAINTING_get (UNLIKELY(PL_taint.both == (TRUE | (TRUE << 8)))) # define TAINTING_set(s) (PL_tainting = cBOOL(s)) # define TAINT_WARN_get (PL_taint_warn) # define TAINT_WARN_set(s) (PL_taint_warn = cBOOL(s)) @@ -3309,6 +3318,14 @@ typedef struct padname PADNAME; #include "handy.h" #include "charclass_invlists.h" +typedef union { + U16 both; + struct { + bool tainting; + bool tainted; + } u; +} TAINT_U; + #if defined(USE_LARGE_FILES) && !defined(NO_64_BIT_RAWIO) # if LSEEKSIZE == 8 && !defined(USE_64_BIT_RAWIO) # define USE_64_BIT_RAWIO /* implicit */ diff --git a/sv.c b/sv.c index f4ddf9b8888c..f8e2d42c2da0 100644 --- a/sv.c +++ b/sv.c @@ -15912,7 +15912,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, #ifndef NO_TAINT_SUPPORT /* Set tainting stuff before PerlIO_debug can possibly get called */ - PL_tainting = proto_perl->Itainting; + PL_tainting = proto_perl->Itaint.u.tainting; PL_taint_warn = proto_perl->Itaint_warn; #else PL_tainting = FALSE; @@ -16057,7 +16057,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_statcache = proto_perl->Istatcache; #ifndef NO_TAINT_SUPPORT - PL_tainted = proto_perl->Itainted; + PL_tainted = proto_perl->Itaint.u.tainted; #else PL_tainted = FALSE; #endif diff --git a/sv.h b/sv.h index 4f8c4a227517..4264c21a3470 100644 --- a/sv.h +++ b/sv.h @@ -1735,8 +1735,8 @@ attention to precisely which outputs are influenced by which inputs. #define SvTAINT(sv) \ STMT_START { \ assert(TAINTING_get || !TAINT_get); \ - if (UNLIKELY(TAINT_get)) \ - SvTAINTED_on(sv); \ + if (TAINT_AND_TAINTING_get) \ + sv_taint(sv); \ } STMT_END /* From c77e852a6f837836478e0f3b87e7a04e6076d3d8 Mon Sep 17 00:00:00 2001 From: bulk88 Date: Sun, 13 Oct 2024 09:06:36 -0400 Subject: [PATCH 6/6] de-mathom Perl_sv_taint() part 4 add SvTAINTTC() tailcall API Perl_newSVnv/Perl_newSViv/Perl_newSVuv, currently have to save the fresh SV *, either on C stack, or in non volatile registers, around the possible Perl_sv_taint() fn call inside SvTAINT(). If Perl_sv_taint() returns its SV * argument, and assigns it back to the same C var, now these 3 performance critical SV allocator functions, after plucking the SV head from the arena, these 3 function never ever have to store the fresh SV * back to C stack for any reason during their execution. This optimization removes pop/push pairs of the C compiler saving non-volatile registers and restoring them at function entry and exit since after SvTAINTTC() change, NO variables AT ALL, have to be saved around any function calls in Perl_newSVnv/Perl_newSViv/Perl_newSVuv. Also the SV head *, after being delinked/removed from an areana, can now be stored through the whole function, in the x86 EAX/x64 RAX register, and pass through to the caller, without a final (non vol) reg to (vol retval reg) mov/copy cpu op. Remember eax/rax/retval registers, are always wiped after each fn call, but the refactoring of SvTAINTTC() conviently returns the SV * back to us, in the ABI return register, and we let the fresh SV * glide through on the "heavy" Perl_sv_taint() branch, from Perl_sv_taint() to Perl_newSViv()'s caller, without touching it, 0 machine code ops. Few code sites were changed from SvTAINT() to SvTAINTTC(), to keep this patch smaller, and the Perl_sv_set*vXXX() category of functions, all have void return types and can't be chained. Also the Perl_sv_taint() branch can be tail called or converted to a JMP insted of CALL, if the CC/OS/ABI wants to now. This is the final part of speeding up Perl_newSVnv/Perl_newSViv/Perl_newSVuv there is nothing else to remove or optimze. --- hv.c | 4 ++-- sv.c | 6 +++--- sv.h | 18 ++++++++++++++++++ sv_inline.h | 2 +- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/hv.c b/hv.c index 7eaf6fc97063..63e17c0a0e38 100644 --- a/hv.c +++ b/hv.c @@ -1130,12 +1130,12 @@ Perl_hv_scalar(pTHX_ HV *hv) if (u <= (UV)IV_MAX) { SvIV_set(sv, (IV)u); (void)SvIOK_only(sv); - SvTAINT(sv); + sv = SvTAINTTC(sv); } else { SvIV_set(sv, 0); SvUV_set(sv, u); (void)SvIOK_only_UV(sv); - SvTAINT(sv); + sv = SvTAINTTC(sv); } return sv; diff --git a/sv.c b/sv.c index f8e2d42c2da0..e19a4e7f7d1f 100644 --- a/sv.c +++ b/sv.c @@ -10037,7 +10037,7 @@ Perl_newSVnv(pTHX_ const NV n) SvNV_set(sv, n); #endif - SvTAINT(sv); + sv = SvTAINTTC(sv); return sv; } @@ -10073,7 +10073,7 @@ Perl_newSViv(pTHX_ const IV i) SET_SVANY_FOR_BODYLESS_IV(sv); sv->sv_u.svu_iv = i; - SvTAINT(sv); + sv = SvTAINTTC(sv); return sv; } @@ -10175,7 +10175,7 @@ Perl_newSVuv(pTHX_ const UV u) SET_SVANY_FOR_BODYLESS_IV(sv); sv->sv_u.svu_uv = u; - SvTAINT(sv); + sv = SvTAINTTC(sv); return sv; } diff --git a/sv.h b/sv.h index 4264c21a3470..7297632ac72c 100644 --- a/sv.h +++ b/sv.h @@ -1721,6 +1721,21 @@ inputs such as locale settings. C propagates that taintedness to the outputs of an expression in a pessimistic fashion; i.e., without paying attention to precisely which outputs are influenced by which inputs. +=cut + +=for apidoc Cm|SV* sv|SvTAINTTC|SV* sv +Identical to C, except optimized for for C compilers to do tail calls. +Incoming arg I will be returned as the retval of I. +The return value I pointer will be identical to the incoming +argument I pointer. Ex. I). This way if I is on, and +the slow path branch executes, which has an internal helper function, that +helper function returns the argument passed in, and C compilers can optimize +the slowpath branch to a tail call, or use less registers. This macro is mostly +intended to be used if C is the last or almost last statement +in the caller function, and the caller has a I return type, and will +return C's arg I, to its caller as a return value. Similar idea +to C. + =cut */ @@ -1739,6 +1754,9 @@ attention to precisely which outputs are influenced by which inputs. sv_taint(sv); \ } STMT_END +#define SvTAINTTC(sv) ((assert(TAINTING_get || !TAINT_get), \ + TAINT_AND_TAINTING_get) ? sv_taint((sv)) : (sv)) + /* =for apidoc_section $SV =for apidoc Am|char*|SvPV_force |SV* sv|STRLEN len diff --git a/sv_inline.h b/sv_inline.h index a0fe8ec870c2..d65c784ca677 100644 --- a/sv_inline.h +++ b/sv_inline.h @@ -985,7 +985,7 @@ Perl_sv_setpv_freshbuf(pTHX_ SV *const sv) (void)SvPOK_only_UTF8(sv); /* UTF-8 flag will be 0; This is used instead of 'SvPOK_only' because the other sv_setpv functions use it */ - SvTAINT(sv); + SvTAINTTC(sv); return SvPVX(sv); }