From ad607a225b0691f17bbac406fc59d426dabaabe0 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Sun, 31 Oct 2021 23:05:31 -0400 Subject: [PATCH] handle some cases of null Task in segv handling (#42836) Also ensure we do not use round-robin sampling when printing a critical error, only for profiling. --- src/gf.c | 2 +- src/julia_internal.h | 2 +- src/signal-handling.c | 19 +++---- src/signals-mach.c | 2 +- src/signals-unix.c | 123 +++++++++++++++++++++--------------------- src/signals-win.c | 7 +-- 6 files changed, 79 insertions(+), 76 deletions(-) diff --git a/src/gf.c b/src/gf.c index 07594a217364f..1615e13fcc2e6 100644 --- a/src/gf.c +++ b/src/gf.c @@ -1829,7 +1829,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args, jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n"); jl_ptls_t ptls = jl_current_task->ptls; ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0); - jl_critical_error(0, NULL); + jl_critical_error(0, NULL, jl_current_task); abort(); } // not reached diff --git a/src/julia_internal.h b/src/julia_internal.h index 70efd91d468fc..798a67246be89 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1013,7 +1013,7 @@ size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT; #endif JL_DLLEXPORT jl_value_t *jl_get_backtrace(void); -void jl_critical_error(int sig, bt_context_t *context); +void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct); JL_DLLEXPORT void jl_raise_debugger(void); int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gdblookup(void* ip) JL_NOTSAFEPOINT; diff --git a/src/signal-handling.c b/src/signal-handling.c index f61588ded0489..aef682e29382e 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -241,19 +241,20 @@ void jl_show_sigill(void *_ctx) } // what to do on a critical error on a thread -void jl_critical_error(int sig, bt_context_t *context) +void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct) { - jl_task_t *ct = jl_current_task; - jl_bt_element_t *bt_data = ct->ptls->bt_data; - size_t *bt_size = &ct->ptls->bt_size; - size_t i, n = *bt_size; + jl_bt_element_t *bt_data = ct ? ct->ptls->bt_data : NULL; + size_t *bt_size = ct ? &ct->ptls->bt_size : NULL; + size_t i, n = ct ? *bt_size : 0; if (sig) { // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit) jl_set_safe_restore(NULL); - ct->gcstack = NULL; - ct->eh = NULL; - ct->excstack = NULL; + if (ct) { + ct->gcstack = NULL; + ct->eh = NULL; + ct->excstack = NULL; + } #ifndef _OS_WINDOWS_ sigset_t sset; sigemptyset(&sset); @@ -277,7 +278,7 @@ void jl_critical_error(int sig, bt_context_t *context) jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig)); } jl_safe_printf("in expression starting at %s:%d\n", jl_filename, jl_lineno); - if (context) { + if (context && ct) { // Must avoid extended backtrace frames here unless we're sure bt_data // is properly rooted. *bt_size = n = rec_backtrace_ctx(bt_data, JL_MAX_BT_SIZE, context, NULL); diff --git a/src/signals-mach.c b/src/signals-mach.c index 179f45f187e26..57dcc969068e8 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -402,7 +402,7 @@ static void jl_try_deliver_sigint(void) static void JL_NORETURN jl_exit_thread0_cb(int exitstate) { CFI_NORETURN - jl_critical_error(exitstate - 128, NULL); + jl_critical_error(exitstate - 128, NULL, jl_current_task); jl_exit(exitstate); } diff --git a/src/signals-unix.c b/src/signals-unix.c index b56eeb19a3fec..9893bf940227f 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -231,7 +231,7 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context) uv_tty_reset_mode(); if (sig == SIGILL) jl_show_sigill(context); - jl_critical_error(sig, jl_to_bt_context(context)); + jl_critical_error(sig, jl_to_bt_context(context), jl_get_current_task()); if (sig != SIGSEGV && sig != SIGBUS && sig != SIGILL) { @@ -410,7 +410,7 @@ CFI_NORETURN // (unavoidable due to its async nature). // Try harder to exit each time if we get multiple exit requests. if (thread0_exit_count <= 1) { - jl_critical_error(thread0_exit_state - 128, NULL); + jl_critical_error(thread0_exit_state - 128, NULL, jl_current_task); jl_exit(thread0_exit_state); } else if (thread0_exit_count == 2) { @@ -747,71 +747,72 @@ static void *signal_listener(void *arg) unw_context_t *signal_context; // sample each thread, round-robin style in reverse order // (so that thread zero gets notified last) - if (critical || profile) + if (critical || profile) { jl_lock_profile(); - jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed); - for (int idx = jl_n_threads; idx-- > 0; ) { - // Stop the threads in the random round-robin order. - int i = profile_round_robin_thread_order[idx]; - // notify thread to stop - jl_thread_suspend_and_get_state(i, &signal_context); - - // do backtrace on thread contexts for critical signals - // this part must be signal-handler safe - if (critical) { - bt_size += rec_backtrace_ctx(bt_data + bt_size, - JL_MAX_BT_SIZE / jl_n_threads - 1, - signal_context, NULL); - bt_data[bt_size++].uintptr = 0; - } - - // do backtrace for profiler - if (profile && running) { - if (jl_profile_is_buffer_full()) { - // Buffer full: Delete the timer - jl_profile_stop_timer(); + if (!critical) + jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed); + for (int idx = jl_n_threads; idx-- > 0; ) { + // Stop the threads in the random round-robin order. + int i = critical ? idx : profile_round_robin_thread_order[idx]; + // notify thread to stop + jl_thread_suspend_and_get_state(i, &signal_context); + + // do backtrace on thread contexts for critical signals + // this part must be signal-handler safe + if (critical) { + bt_size += rec_backtrace_ctx(bt_data + bt_size, + JL_MAX_BT_SIZE / jl_n_threads - 1, + signal_context, NULL); + bt_data[bt_size++].uintptr = 0; } - else { - // unwinding can fail, so keep track of the current state - // and restore from the SEGV handler if anything happens. - jl_jmp_buf *old_buf = jl_get_safe_restore(); - jl_jmp_buf buf; - - jl_set_safe_restore(&buf); - if (jl_setjmp(buf, 0)) { - jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n"); - } else { - // Get backtrace data - bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, - bt_size_max - bt_size_cur - 1, signal_context, NULL); - } - jl_set_safe_restore(old_buf); - - jl_ptls_t ptls = jl_all_tls_states[i]; - - // store threadid but add 1 as 0 is preserved to indicate end of block - bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1; - - // store task id - bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task); - - // store cpu cycle clock - bt_data_prof[bt_size_cur++].uintptr = cycleclock(); - // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block - bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1; - - // Mark the end of this block with two 0's - bt_data_prof[bt_size_cur++].uintptr = 0; - bt_data_prof[bt_size_cur++].uintptr = 0; + // do backtrace for profiler + if (profile && running) { + if (jl_profile_is_buffer_full()) { + // Buffer full: Delete the timer + jl_profile_stop_timer(); + } + else { + // unwinding can fail, so keep track of the current state + // and restore from the SEGV handler if anything happens. + jl_jmp_buf *old_buf = jl_get_safe_restore(); + jl_jmp_buf buf; + + jl_set_safe_restore(&buf); + if (jl_setjmp(buf, 0)) { + jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n"); + } else { + // Get backtrace data + bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, + bt_size_max - bt_size_cur - 1, signal_context, NULL); + } + jl_set_safe_restore(old_buf); + + jl_ptls_t ptls = jl_all_tls_states[i]; + + // store threadid but add 1 as 0 is preserved to indicate end of block + bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1; + + // store task id + bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task); + + // store cpu cycle clock + bt_data_prof[bt_size_cur++].uintptr = cycleclock(); + + // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block + bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1; + + // Mark the end of this block with two 0's + bt_data_prof[bt_size_cur++].uintptr = 0; + bt_data_prof[bt_size_cur++].uintptr = 0; + } } - } - // notify thread to resume - jl_thread_resume(i, sig); - } - if (critical || profile) + // notify thread to resume + jl_thread_resume(i, sig); + } jl_unlock_profile(); + } #ifndef HAVE_MACH if (profile && running) { #if defined(HAVE_TIMER) diff --git a/src/signals-win.c b/src/signals-win.c index 159460cf64082..f4b58570a9c79 100644 --- a/src/signals-win.c +++ b/src/signals-win.c @@ -91,7 +91,7 @@ void __cdecl crt_sig_handler(int sig, int num) RtlCaptureContext(&Context); if (sig == SIGILL) jl_show_sigill(&Context); - jl_critical_error(sig, &Context); + jl_critical_error(sig, &Context, jl_get_current_task()); raise(sig); } } @@ -225,7 +225,8 @@ static BOOL WINAPI sigint_handler(DWORD wsig) //This needs winapi types to guara LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo) { - jl_ptls_t ptls = jl_current_task->ptls; + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; if (ExceptionInfo->ExceptionRecord->ExceptionFlags == 0) { switch (ExceptionInfo->ExceptionRecord->ExceptionCode) { case EXCEPTION_INT_DIVIDE_BY_ZERO: @@ -312,7 +313,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo) jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress); jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress); - jl_critical_error(0, ExceptionInfo->ContextRecord); + jl_critical_error(0, ExceptionInfo->ContextRecord, ct); static int recursion = 0; if (recursion++) exit(1);