From bc1794cf0df1926d12bf20ddacd1b0700ce879bb Mon Sep 17 00:00:00 2001 From: bulk88 Date: Tue, 15 Oct 2024 03:03:47 -0400 Subject: [PATCH] implement C_BP macro for throwing a C debugger breakpoint WIP -short macro name, less to type -cross platform -makes it easier to work on Perl core or XS CPAN -emits debug info to console for CI/smoke/unattended machine -writes to STDOUT and STDERR, incase one of the 2 FDs are redirected to a disk file or piped to another process, or that disk file is temp flagged, and OS instantly deletes it -breaking TAP testing is good -C_BP; is less to type vs DebugBreak(); or __debugbreak(); on Win32 --- embed.fnc | 1 + embed.h | 1 + ext/XS-APItest/APItest.xs | 14 ++++ proto.h | 5 ++ t/uni/caller.t | 30 +++++++- util.c | 142 ++++++++++++++++++++++++++++++++++++++ util.h | 94 +++++++++++++++++++++++++ 7 files changed, 286 insertions(+), 1 deletion(-) diff --git a/embed.fnc b/embed.fnc index f0d1dedb1485f..733a0922559cb 100644 --- a/embed.fnc +++ b/embed.fnc @@ -802,6 +802,7 @@ CRTp |I32 |cast_i32 |NV f CRTp |IV |cast_iv |NV f CRTp |U32 |cast_ulong |NV f CRTp |UV |cast_uv |NV f +TXdp |void |c_bp |NN const char *file_metadata p |bool |check_utf8_print \ |NN const U8 *s \ |const STRLEN len diff --git a/embed.h b/embed.h index 8f890fba4df45..8ff58d6bfd8f7 100644 --- a/embed.h +++ b/embed.h @@ -917,6 +917,7 @@ # define boot_core_builtin() Perl_boot_core_builtin(aTHX) # define boot_core_mro() Perl_boot_core_mro(aTHX) # define build_infix_plugin(a,b,c) Perl_build_infix_plugin(aTHX_ a,b,c) +# define c_bp Perl_c_bp # define cando(a,b,c) Perl_cando(aTHX_ a,b,c) # define check_utf8_print(a,b) Perl_check_utf8_print(aTHX_ a,b) # define closest_cop(a,b,c,d) Perl_closest_cop(aTHX_ a,b,c,d) diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index 1676ded76c8dd..8485cb8229943 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -3152,6 +3152,20 @@ my_cxt_setsv(sv) my_cxt_setsv_p(sv _aMY_CXT); SvREFCNT_inc(sv); +void +test_C_BP_breakpoint() + CODE: + { +#ifdef WIN32 + UINT em = GetErrorMode(); + SetErrorMode( SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX ); +#endif + C_BP; +#ifdef WIN32 + SetErrorMode(em); +#endif + } + bool sv_setsv_cow_hashkey_core() diff --git a/proto.h b/proto.h index 65fe5c5bd68ce..9a7d1448467df 100644 --- a/proto.h +++ b/proto.h @@ -418,6 +418,11 @@ Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp); #define PERL_ARGS_ASSERT_BYTES_TO_UTF8 \ assert(s); assert(lenp) +PERL_CALLCONV void +Perl_c_bp(const char *file_metadata); +#define PERL_ARGS_ASSERT_C_BP \ + assert(file_metadata) + PERL_CALLCONV SSize_t Perl_call_argv(pTHX_ const char *sub_name, I32 flags, char **argv); #define PERL_ARGS_ASSERT_CALL_ARGV \ diff --git a/t/uni/caller.t b/t/uni/caller.t index e05f73735d3ad..cc5f50bef1687 100644 --- a/t/uni/caller.t +++ b/t/uni/caller.t @@ -7,10 +7,11 @@ BEGIN { set_up_inc('../lib'); } +use Config; use utf8; use open qw( :utf8 :std ); -plan( tests => 18 ); +plan( tests => 19 ); package main; @@ -74,3 +75,30 @@ $^P = 16; $^P = $saved_perldb; ::is( eval 'pb()', 'main::pb', 'actually return the right function name even if $^P had been on at some point' ); + +# Skip the OS signal/exception from this faux-SEGV +# code is from cpan/Test-Harness/t/harness.t +SKIP: { + ::skip "No SIGSEGV on $^O", 1 + if $^O ne 'MSWin32' && $Config::Config{'sig_name'} !~ m/SEGV/; + #line below not in cpan/Test-Harness/t/harness.t + ::skip "No SIGTRAP on $^O", 1 + if $^O ne 'MSWin32' && $Config::Config{'sig_name'} !~ m/TRAP/; + + # some people -Dcc="somecc -fsanitize=..." or -Doptimize="-fsanitize=..." + ::skip "ASAN doesn't passthrough SEGV", 1 + if "$Config{cc} $Config{ccflags} $Config{optimize}" =~ /-fsanitize\b/; + + my $out_str = ::fresh_perl('use XS::APItest; XS::APItest::test_C_BP_breakpoint();'); + + # On machines where 'ulimit -c' does not return '0', a perl.core + # file is created here. We don't need to examine it, and it's + # annoying to have it subsequently show up as an untracked file in + # `git status`, so simply get rid of it per suggestion by Karen + # Etheridge. + END { unlink 'perl.core' } + + + ::like($out_str, qr/panic: C breakpoint hit file/, + 'C_BP macro and C breakpoint works'); +} diff --git a/util.c b/util.c index fa946b4153c2d..690fc0183b931 100644 --- a/util.c +++ b/util.c @@ -2003,6 +2003,148 @@ Perl_croak_popstack(void) my_exit(1); } +/* +=for apidoc c_bp + +Internal helper for C. Not to be called directly. + +Prints file name, C function name, line number, and CPU the instruction +pointer. Instruction pointer intended to be copied to a C debugger tool or +disassembler or used with core dumps. It is a faux-function pointer to +somewhere in the middle of the caller's C function, this address can never +be casted from I to a function pointer, then called, a SEGV will +occur. + +=cut +*/ + +void +Perl_c_bp(const char * file_metadata) +{ + /* file_metadata is a string in the format of "XS_my_func*XSModule.c*6789" + The 3 arguments are catted together by CPP, so in the caller, + when using a C debugger, you press "Step One" key 2 times less, when + using step by disassembly view. C_BP macro should never appear in + public Stable/Gold releases of Perl core or any CPAN module. Using + C_BP even in a alpha release, is questionable. Smokers/CI greatly + dislike SEGVs which someone require human intervention to unfreeze + the console or unattended CI tool. + */ + + /* XXX improvements, identify which .so/.dll on disk this address is from. + Ajust value to a 0-indexed value to remove ASLR randomizing between + process runs. Better integration with USE_C_BACKTRACE if + USE_C_BACKTRACE enabled on a particular platform. */ +#if defined(__has_builtin) && __has_builtin(__builtin_return_address) + void * ip = __builtin_return_address(0); /* GCC family */ +#elif _MSC_VER + void * ip = _ReturnAddress(); +#else + /* last resort, seems to work on all CPU archs, guaranteed to work + on all x86/x64 OSes, all CCs, exceptions to last resort, rumor says + Solaris SPARC, call/ret instructions pop and push function pointers + to an array of function pointers, far far away from the C stack as + a security measure so on SPARC this would be the contents of a random + C auto var in the caller. + + IA64, with hardware assistence by the IA64, supposedly appropriate + portions of the C stack are automatically shifted into kernel space on + each function call so no callee can read or write any C auto var in its + caller. Only exception is "other_func(&some_var_this_func);" The shift + factor now excludes some_var_this_func. So the line below would SEGV. + + If any bug reports come in from these old CPUs, implement the correct + platform specific way to get debugging info, or uncomment the fallback */ + void * ip = *(((void **)&file_metadata)-1); + /* fallback +# if PTRSIZE == 4 + void * ip = (void *)0x12345678; +# else + void * ip = (void *)0x123456789ABCDEF0; +# endif + */ +#endif + char buf [sizeof("panic: C breakpoint hit file \"%.*s\", function \"%.*s\" line %.*s CPU IP 0x%p\n") + + (U8_MAX*3) + (PTRSIZE*2) + 1]; + int out_len; + U32 f_len; + const char * file_metadata_end; + const char * p; + char * pbuf; + char * pbuf2; + U8 l; + + const char * fnc_st; + const char * fnc_end; + U8 fnc_len; + + const char * fn_st; + const char * fn_end; + U8 fn_len; + + const char * ln_st; + const char * ln_end; + U8 ln_len; + + PERL_ARGS_ASSERT_C_BP; + + + f_len = (U32)strlen(file_metadata); + file_metadata_end = file_metadata + f_len; + p = file_metadata; + + fnc_st = p; + fnc_end = memchr(fnc_st, '*', fnc_st-file_metadata_end); + if(!fnc_end) { + fnc_st = "unknown"; + fnc_end = fnc_st + STRLENs("unknown"); + p = file_metadata_end; + } + else { + p = fnc_end + 1; + } + fnc_len = (U8)(fnc_end - fnc_st); + + fn_st = p; + fn_end = memchr(fn_st, '*', file_metadata_end - fn_st); + if(!fn_end) { + fn_st = "unknown"; + fn_end = fn_st + STRLENs("unknown"); + p = file_metadata_end; + } + else { + p = fn_end + 1; + } + fn_len = (U8)(fn_end-fn_st); + + ln_st = p; + ln_end = file_metadata_end; + ln_len = (U8)(ln_end - p); + if(!ln_len) { + ln_st = "unknown"; + ln_len = STRLENs("unknown"); + } + out_len = my_snprintf((char*)buf, sizeof(buf)-2, + "panic: C breakpoint hit file \"%.*s\", " + "function \"%.*s\" line %.*s CPU IP 0x%p", + (Size_t)fn_len, fn_st, (Size_t)fnc_len, fnc_st, + (Size_t)ln_len, ln_st, ip); + buf[out_len] = '\0'; /* MSVCRT bug don't ask, paranoia */ + + STMT_START { + dTHX; /* stderr+stdout, force user to see it */ + Perl_warn(aTHX_ "%s", (char *)buf); /* no "\n" for max diag info */ + PerlIO_flush(PerlIO_stderr()); + PerlIO * out = PerlIO_stdout(); + buf[out_len] = '\n'; /* force shell/terminal to print it, paranoia */ + out_len++; + buf[out_len] = '\0'; + PerlIO_write(out, (char *)buf, out_len); + PerlIO_flush(out); /* force shell/terminal to print it */ + } STMT_END; + return; +} + /* =for apidoc warn_sv diff --git a/util.h b/util.h index 84d0e9f1c8d53..58f1815c6bc3b 100644 --- a/util.h +++ b/util.h @@ -165,6 +165,100 @@ typedef struct { #endif /* USE_C_BACKTRACE */ +/* + +=for apidoc_section $debugging +=for apidoc Amn;||C_BP + +Prints file, C function name, and line, to I and I. +Then triggers a breakpoint in the OS specific C debugger. If the OS specific +C debugger is not running, not configured correctly, not installed, or Perl +is running on an unattended machine, C is immediatly fatal to the +Perl process. "immediatly fatal" is similar to a SEGV happening. +If you use a C debugger, and I executes, using your C debugger, you can +resume execution of the Perl process, with no side effects, as if nothing +happened. + +C breakpoints implementations are very OS specific, but on most OSes, this +is I or CPU I. + +C_BP macro should never appear in public Stable/Gold releases of Perl core +or any CPAN module. Using C_BP even in a alpha release, is questionable. +Smokers/CI greatly dislike SEGVs and SEGV-like abnormal process terminations +which sometimes require human intervention to unfreeze the console or +unattended CI tool of that unattended system. C_BP is intended for personal +hacking and development, or 1 off patches sent by a lead dev to a user +for troubleshooting or bug fixing some very specific problem. + +C has no arguments, no return value. Use it as I. + +=cut + +*/ + +/* __builtin_debugtrap() and __builtin_trap() for GCC and Clang have + bike shedding drama, supposedly GCC and gdb made an executive decision + that a software (compiled C) triggered C breakpoint, is a NO_RETURN + optimized, unrecoverable hard error, and they will never impliment + a software triggered breakpoint that can resume execution. + AFAIK internally GCC impliments __builtin_debugtrap() and __builtin_trap() + as an illegal CPU opcode, followed by NO_RETURN optimization. + Supposedly GDB itself, when you set a BP in GDB, GDB will scribble + "illegal opcode" in the process memory space and save whatever prior CPU + op was there before. Then once the OS kernel delivers SIGILL, gdb looks at + its table of breakpoints, scribbles the old good opcode over "illegal op" + and resumes execution or pauses and shows you the frozen process. + + gdb will not and has no way, to repair a foreign "illegal opcode" "problem" + that came with the binary from the disk copy of the binary. + The foreign "illegal opcode" was inserted at compile time by GCC. + + Therefore, __builtin_debugtrap() and __builtin_trap() are not being used here + since they don't allow resuming execution after the breakpoint/pause + in the C debugger. + + x86/x64 interrupt 3 allows resuming execution, since while GCC/gdb disagree + with the decisions of Linux Kernel devs, they are reluctantly forced to + to allow resuming execution for ABI/API compatiblity with the Linux Kernel. + + For ARM, the assembly opcode for a software breakpoint seems to change with + each new iPhone release. If you have a working perl on ARM development + enviroment, you are welcome to add ARM specific code with correct #ifdefs. + + If anyone wants to test __builtin_debugtrap() and __builtin_trap(), and + see if execution can be resumed in a C debugger on + Clang/Apple/Android/latest GCC/forks of GCC, your welcome to set #ifdefs + and use __builtin_debugtrap()/__builtin_trap(). +*/ + +#ifdef _MSC_VER +# define C_BP (void)(IsDebuggerPresent() ? __debugbreak() \ + : (Perl_c_bp(FUNCTION__ "*"__FILE__ "*" STRINGIFY(__LINE__)) \ + ,__debugbreak())) +#elif defined(__has_builtin) && __has_builtin(__debugbreak) +# ifdef WIN32 +# define C_BP (void)(IsDebuggerPresent() ? __debugbreak() \ + : (Perl_c_bp(FUNCTION__ "*"__FILE__ "*" STRINGIFY(__LINE__)) \ + ,__debugbreak())) +# else +# define C_BP (void)(Perl_c_bp(FUNCTION__ "*"__FILE__ "*" STRINGIFY(__LINE__)) \ + ,__debugbreak()) +# endif +#elif defined(__i386__) || defined(__x86_64__) +# define C_BP (void)(Perl_c_bp(FUNCTION__ "*"__FILE__ "*" STRINGIFY(__LINE__)) \ + ,__debugbreak_int3()) + +PERL_STATIC_INLINE void __debugbreak_int3(void) { + __asm__ __volatile__("int {$}3":); +} + +#else +/* last resort, has to do something useful on all platforms, and SIGTRAP + in a post mortem log file is very distinct from SIGSEGV */ +# define C_BP (void)(Perl_c_bp(FUNCTION__ "*"__FILE__ "*" STRINGIFY(__LINE__)) \ + ,raise(SIGTRAP)) +#endif + /* Use a packed 32 bit constant "key" to start the handshake. The key defines ABI compatibility, and how to process the vararg list.