diff --git a/src/gf.c b/src/gf.c index 33ec6003c95f9..43d4cf5112b95 100644 --- a/src/gf.c +++ b/src/gf.c @@ -1817,7 +1817,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args, jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n"); jl_ptls_t ptls = jl_get_ptls_states(); ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0); - jl_critical_error(0, NULL, ptls->bt_data, &ptls->bt_size); + jl_critical_error(0, NULL); abort(); } // not reached diff --git a/src/julia_internal.h b/src/julia_internal.h index d1c040d8ee71a..0b766967918e2 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -69,6 +69,36 @@ void __tsan_switch_to_fiber(void *fiber, unsigned flags); # define JL_USE_IFUNC 0 #endif +// If we've smashed the stack, (and not just normal NORETURN) +// this will smash stack-unwind too +#ifdef _OS_WINDOWS_ +#if defined(_CPU_X86_64_) + // install the unhandled exception handler at the top of our stack + // to call directly into our personality handler +#define CFI_NORETURN \ + asm volatile ("\t.seh_handler __julia_personality, @except\n\t.text"); +#else +#define CFI_NORETURN +#endif +#else +// wipe out the call-stack unwind capability beyond this function +// (we are noreturn, so it is not a total lie) +#if defined(_CPU_X86_64_) +// per nongnu libunwind: "x86_64 ABI specifies that end of call-chain is marked with a NULL RBP or undefined return address" +// so we do all 3, to be extra certain of it +#define CFI_NORETURN \ + asm volatile ("\t.cfi_undefined rip"); \ + asm volatile ("\t.cfi_undefined rbp"); \ + asm volatile ("\t.cfi_return_column rbp"); +#else + // per nongnu libunwind: "DWARF spec says undefined return address location means end of stack" + // we use whatever happens to be register 1 on this platform for this +#define CFI_NORETURN \ + asm volatile ("\t.cfi_undefined 1"); \ + asm volatile ("\t.cfi_return_column 1"); +#endif +#endif + // If this is detected in a backtrace of segfault, it means the functions // that use this value must be reworked into their async form with cb arg // provided and with JL_UV_LOCK used around the calls @@ -904,7 +934,7 @@ size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT; #endif JL_DLLEXPORT jl_value_t *jl_get_backtrace(void); -void jl_critical_error(int sig, bt_context_t *context, jl_bt_element_t *bt_data, size_t *bt_size); +void jl_critical_error(int sig, bt_context_t *context); JL_DLLEXPORT void jl_raise_debugger(void); int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gdblookup(void* ip) JL_NOTSAFEPOINT; diff --git a/src/signal-handling.c b/src/signal-handling.c index 80dfdb3b2fc21..aa642eeedf2a2 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -231,15 +231,44 @@ void jl_show_sigill(void *_ctx) #endif } -// what to do on a critical error -void jl_critical_error(int sig, bt_context_t *context, jl_bt_element_t *bt_data, size_t *bt_size) +// what to do on a critical error on a thread +void jl_critical_error(int sig, bt_context_t *context) { - // This function is not allowed to reference any TLS variables. - // We need to explicitly pass in the TLS buffer pointer when - // we make `jl_filename` and `jl_lineno` thread local. + + jl_ptls_t ptls = jl_get_ptls_states(); + jl_bt_element_t *bt_data = ptls->bt_data; + size_t *bt_size = &ptls->bt_size; size_t i, n = *bt_size; - if (sig) + if (sig) { + // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit) + ptls->pgcstack = NULL; + ptls->safe_restore = NULL; + if (ptls->current_task) { + ptls->current_task->eh = NULL; + ptls->current_task->excstack = NULL; + } +#ifndef _OS_WINDOWS_ + sigset_t sset; + sigemptyset(&sset); + // n.b. In `abort()`, Apple's libSystem "helpfully" blocks all signals + // on all threads but SIGABRT. But we also don't know what the thread + // was doing, so unblock all critical signals so that they will crash + // hard, and not just get stuck. + sigaddset(&sset, SIGSEGV); + sigaddset(&sset, SIGBUS); + sigaddset(&sset, SIGILL); + // also unblock fatal signals now, so we won't get back here twice + sigaddset(&sset, SIGTERM); + sigaddset(&sset, SIGABRT); + sigaddset(&sset, SIGQUIT); + // and the original signal is now fatal too, in case it wasn't + // something already listed (?) + if (sig != SIGINT) + sigaddset(&sset, sig); + pthread_sigmask(SIG_UNBLOCK, &sset, NULL); +#endif jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig)); + } jl_safe_printf("in expression starting at %s:%d\n", jl_filename, jl_lineno); if (context) { // Must avoid extended backtrace frames here unless we're sure bt_data diff --git a/src/signals-mach.c b/src/signals-mach.c index 3737bab1002cd..0d97d3b0dce56 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -84,6 +84,7 @@ extern boolean_t exc_server(mach_msg_header_t *, mach_msg_header_t *); void *mach_segv_listener(void *arg) { (void)arg; + (void)jl_get_ptls_states(); while (1) { int ret = mach_msg_server(exc_server, 2048, segv_port, MACH_MSG_TIMEOUT_NONE); jl_safe_printf("mach_msg_server: %s\n", mach_error_string(ret)); @@ -91,7 +92,8 @@ void *mach_segv_listener(void *arg) } } -static void allocate_segv_handler() + +static void allocate_mach_handler() { // ensure KEYMGR_GCC3_DW2_OBJ_LIST is initialized, as this requires malloc // and thus can deadlock when used without first initializing it. @@ -122,7 +124,7 @@ static void allocate_segv_handler() jl_error("pthread_create failed"); } pthread_attr_destroy(&attr); - for (int16_t tid = 0;tid < jl_n_threads;tid++) { + for (int16_t tid = 0; tid < jl_n_threads; tid++) { attach_exception_port(pthread_mach_thread_np(jl_all_tls_states[tid]->system_id), 0); } } @@ -164,19 +166,31 @@ typedef arm_exception_state64_t host_exception_state_t; static void jl_call_in_state(jl_ptls_t ptls2, host_thread_state_t *state, void (*fptr)(void)) { - uint64_t rsp = (uint64_t)ptls2->signal_stack + sig_stack_size; +#ifdef _CPU_X86_64_ + uintptr_t rsp = state->__rsp; +#elif defined(_CPU_AARCH64_) + uintptr_t rsp = state->__sp; +#else +#error "julia: throw-in-context not supported on this platform" +#endif + if (ptls2->signal_stack == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) { + rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment + } + else { + rsp = (uintptr_t)ptls2->signal_stack + sig_stack_size; + } assert(rsp % 16 == 0); - // push (null) $RIP onto the stack - rsp -= sizeof(void*); - *(void**)rsp = NULL; - #ifdef _CPU_X86_64_ + rsp -= sizeof(void*); state->__rsp = rsp; // set stack pointer state->__rip = (uint64_t)fptr; // "call" the function -#else +#elif defined(_CPU_AARCH64_) state->__sp = rsp; state->__pc = (uint64_t)fptr; + state->__lr = 0; +#else +#error "julia: throw-in-context not supported on this platform" #endif } @@ -194,11 +208,22 @@ static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exceptio ptls2->sig_exception = exception; } jl_call_in_state(ptls2, &state, &jl_sig_throw); - ret = thread_set_state(thread, THREAD_STATE, - (thread_state_t)&state, count); + ret = thread_set_state(thread, THREAD_STATE, (thread_state_t)&state, count); HANDLE_MACH_ERROR("thread_set_state", ret); } +static void segv_handler(int sig, siginfo_t *info, void *context) +{ + jl_ptls_t ptls = jl_get_ptls_states(); + assert(sig == SIGSEGV || sig == SIGBUS); + if (ptls->safe_restore) { // restarting jl_ or jl_unwind_stepn + jl_call_in_state(ptls, (host_thread_state_t*)jl_to_bt_context(context), &jl_sig_throw); + } + else { + sigdie_handler(sig, info, context); + } +} + //exc_server uses dlsym to find symbol JL_DLLEXPORT kern_return_t catch_exception_raise(mach_port_t exception_port, @@ -208,18 +233,16 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, exception_data_t code, mach_msg_type_number_t code_count) { - unsigned int count = THREAD_STATE_COUNT; unsigned int exc_count = HOST_EXCEPTION_STATE_COUNT; host_exception_state_t exc_state; - host_thread_state_t state; -#ifdef LIBOSXUNWIND +#ifdef LLVMLIBUNWIND if (thread == mach_profiler_thread) { return profiler_segv_handler(exception_port, thread, task, exception, code, code_count); } #endif int16_t tid; jl_ptls_t ptls2 = NULL; - for (tid = 0;tid < jl_n_threads;tid++) { + for (tid = 0; tid < jl_n_threads; tid++) { jl_ptls_t _ptls2 = jl_all_tls_states[tid]; if (pthread_mach_thread_np(_ptls2->system_id) == thread) { ptls2 = _ptls2; @@ -288,11 +311,8 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, return KERN_SUCCESS; } else { - kern_return_t ret = thread_get_state(thread, THREAD_STATE, (thread_state_t)&state, &count); - HANDLE_MACH_ERROR("thread_get_state", ret); - jl_critical_error(SIGSEGV, (unw_context_t*)&state, - ptls2->bt_data, &ptls2->bt_size); - return KERN_INVALID_ARGUMENT; + jl_exit_thread0(128 + SIGSEGV, NULL, 0); + return KERN_SUCCESS; } } @@ -307,24 +327,27 @@ static void attach_exception_port(thread_port_t thread, int segv_only) HANDLE_MACH_ERROR("thread_set_exception_ports", ret); } -static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx) +static void jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx) { jl_ptls_t ptls2 = jl_all_tls_states[tid]; - mach_port_t tid_port = pthread_mach_thread_np(ptls2->system_id); + mach_port_t thread = pthread_mach_thread_np(ptls2->system_id); - kern_return_t ret = thread_suspend(tid_port); + kern_return_t ret = thread_suspend(thread); HANDLE_MACH_ERROR("thread_suspend", ret); // Do the actual sampling unsigned int count = THREAD_STATE_COUNT; - static unw_context_t state; - memset(&state, 0, sizeof(unw_context_t)); + memset(ctx, 0, sizeof(*ctx)); // Get the state of the suspended thread - ret = thread_get_state(tid_port, THREAD_STATE, (thread_state_t)&state, &count); + ret = thread_get_state(thread, THREAD_STATE, (thread_state_t)ctx, &count); +} - // Initialize the unwind context with the suspend thread's state - *ctx = &state; +static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx) +{ + static host_thread_state_t state; + jl_thread_suspend_and_get_state2(tid, &state); + *ctx = (unw_context_t*)&state; } static void jl_thread_resume(int tid, int sig) @@ -366,29 +389,46 @@ static void jl_try_deliver_sigint(void) HANDLE_MACH_ERROR("thread_resume", ret); } -static void jl_exit_thread0(int exitstate) +static void JL_NORETURN jl_exit_thread0_cb(int exitstate) +{ +CFI_NORETURN + jl_critical_error(exitstate - 128, NULL); + jl_exit(exitstate); +} + +static void jl_exit_thread0(int exitstate, jl_bt_element_t *bt_data, size_t bt_size) { jl_ptls_t ptls2 = jl_all_tls_states[0]; mach_port_t thread = pthread_mach_thread_np(ptls2->system_id); - kern_return_t ret = thread_suspend(thread); - HANDLE_MACH_ERROR("thread_suspend", ret); + + host_thread_state_t state; + jl_thread_suspend_and_get_state2(0, &state); + unw_context_t *uc = (unw_context_t*)&state; // This aborts `sleep` and other syscalls. - ret = thread_abort(thread); + kern_return_t ret = thread_abort(thread); HANDLE_MACH_ERROR("thread_abort", ret); - unsigned int count = THREAD_STATE_COUNT; - host_thread_state_t state; - ret = thread_get_state(thread, THREAD_STATE, - (thread_state_t)&state, &count); + if (bt_data == NULL) { + // Must avoid extended backtrace frames here unless we're sure bt_data + // is properly rooted. + ptls2->bt_size = rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, uc, NULL); + } + else { + ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE + memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0])); + } void (*exit_func)(int) = &_exit; if (thread0_exit_count <= 1) { - exit_func = &jl_exit; + exit_func = &jl_exit_thread0_cb; } else if (thread0_exit_count == 2) { exit_func = &exit; } + else { + exit_func = &_exit; + } #ifdef _CPU_X86_64_ // First integer argument. Not portable but good enough =) @@ -399,8 +439,8 @@ static void jl_exit_thread0(int exitstate) #error Fill in first integer argument here #endif jl_call_in_state(ptls2, &state, (void (*)(void))exit_func); - ret = thread_set_state(thread, THREAD_STATE, - (thread_state_t)&state, count); + unsigned int count = THREAD_STATE_COUNT; + ret = thread_set_state(thread, THREAD_STATE, (thread_state_t)&state, count); HANDLE_MACH_ERROR("thread_set_state", ret); ret = thread_resume(thread); @@ -498,8 +538,10 @@ void *mach_profile_listener(void *arg) break; } - unw_context_t *uc; - jl_thread_suspend_and_get_state(i, &uc); + host_thread_state_t state; + jl_thread_suspend_and_get_state2(i, &state); + unw_context_t *uc = (unw_context_t*)&state; + if (running) { #ifdef LIBOSXUNWIND /* diff --git a/src/signals-unix.c b/src/signals-unix.c index 57ce2439fcb90..de3b5e13c98df 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -58,7 +58,9 @@ static bt_context_t *jl_to_bt_context(void *sigctx) #endif } + static int thread0_exit_count = 0; +static void jl_exit_thread0(int exitstate, jl_bt_element_t *bt_data, size_t bt_size); static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *_ctx) { @@ -86,8 +88,17 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void * #endif } +static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr) +{ + // One guard page for signal_stack. + return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size || + (char*)ptr > (char*)ptls->signal_stack + sig_stack_size); +} + // Modify signal context `_ctx` so that `fptr` will execute when the signal // returns. `fptr` will execute on the signal stack, and must not return. +// jl_call_in_ctx is also currently executing on that signal stack, +// so be careful not to smash it static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_ctx) { // Modifying the ucontext should work but there is concern that @@ -105,30 +116,32 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c fptr(); return; } - uintptr_t rsp = (uintptr_t)ptls->signal_stack + sig_stack_size; + uintptr_t rsp = jl_get_rsp_from_ctx(_ctx); + if (is_addr_on_sigstack(ptls, (void*)rsp)) { + rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment + } + else { + rsp = (uintptr_t)ptls->signal_stack + sig_stack_size; + } assert(rsp % 16 == 0); #if defined(_OS_LINUX_) && defined(_CPU_X86_64_) ucontext_t *ctx = (ucontext_t*)_ctx; rsp -= sizeof(void*); - *(void**)rsp = NULL; ctx->uc_mcontext.gregs[REG_RSP] = rsp; ctx->uc_mcontext.gregs[REG_RIP] = (uintptr_t)fptr; #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_) ucontext_t *ctx = (ucontext_t*)_ctx; rsp -= sizeof(void*); - *(void**)rsp = NULL; ctx->uc_mcontext.mc_rsp = rsp; ctx->uc_mcontext.mc_rip = (uintptr_t)fptr; #elif defined(_OS_LINUX_) && defined(_CPU_X86_) ucontext_t *ctx = (ucontext_t*)_ctx; rsp -= sizeof(void*); - *(void**)rsp = NULL; ctx->uc_mcontext.gregs[REG_ESP] = rsp; ctx->uc_mcontext.gregs[REG_EIP] = (uintptr_t)fptr; #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_) ucontext_t *ctx = (ucontext_t*)_ctx; rsp -= sizeof(void*); - *(void**)rsp = NULL; ctx->uc_mcontext.mc_esp = rsp; ctx->uc_mcontext.mc_eip = (uintptr_t)fptr; #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_) @@ -162,14 +175,14 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c // `catch_exception_raise`. It works fine when a signal is received // due to `kill`/`raise` though. ucontext64_t *ctx = (ucontext64_t*)_ctx; - rsp -= sizeof(void*); - *(void**)rsp = NULL; #if defined(_CPU_X86_64_) + rsp -= sizeof(void*); ctx->uc_mcontext64->__ss.__rsp = rsp; ctx->uc_mcontext64->__ss.__rip = (uintptr_t)fptr; #else ctx->uc_mcontext64->__ss.__sp = rsp; ctx->uc_mcontext64->__ss.__pc = (uintptr_t)fptr; + ctx->uc_mcontext64->__ss.__lr = 0; #endif #else #warning "julia: throw-in-context not supported on this platform" @@ -206,16 +219,11 @@ static int is_addr_on_stack(jl_ptls_t ptls, void *addr) static void sigdie_handler(int sig, siginfo_t *info, void *context) { - jl_ptls_t ptls = jl_get_ptls_states(); - sigset_t sset; + signal(sig, SIG_DFL); uv_tty_reset_mode(); if (sig == SIGILL) jl_show_sigill(context); - jl_critical_error(sig, jl_to_bt_context(context), - ptls->bt_data, &ptls->bt_size); - sigfillset(&sset); - sigprocmask(SIG_UNBLOCK, &sset, NULL); - signal(sig, SIG_DFL); + jl_critical_error(sig, jl_to_bt_context(context)); if (sig != SIGSEGV && sig != SIGBUS && sig != SIGILL) { @@ -228,12 +236,6 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context) #include "signals-mach.c" #else -static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr) -{ - // One guard page for signal_stack. - return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size || - (char*)ptr > (char*)ptls->signal_stack + sig_stack_size); -} static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context) { @@ -245,7 +247,6 @@ static void segv_handler(int sig, siginfo_t *info, void *context) { jl_ptls_t ptls = jl_get_ptls_states(); assert(sig == SIGSEGV || sig == SIGBUS); - if (jl_addr_is_safepoint((uintptr_t)info->si_addr)) { jl_set_gc_and_wait(); // Do not raise sigint on worker thread @@ -284,22 +285,6 @@ static void segv_handler(int sig, siginfo_t *info, void *context) } } -static void allocate_segv_handler(void) -{ - struct sigaction act; - memset(&act, 0, sizeof(struct sigaction)); - sigemptyset(&act.sa_mask); - act.sa_sigaction = segv_handler; - act.sa_flags = SA_ONSTACK | SA_SIGINFO; - if (sigaction(SIGSEGV, &act, NULL) < 0) { - jl_errorf("fatal error: sigaction: %s", strerror(errno)); - } - // On AArch64, stack overflow triggers a SIGBUS - if (sigaction(SIGBUS, &act, NULL) < 0) { - jl_errorf("fatal error: sigaction: %s", strerror(errno)); - } -} - #if !defined(JL_DISABLE_LIBUNWIND) static unw_context_t *volatile signal_context; static pthread_mutex_t in_signal_lock; @@ -319,9 +304,8 @@ static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx) static void jl_thread_resume(int tid, int sig) { - (void)sig; jl_ptls_t ptls2 = jl_all_tls_states[tid]; - jl_atomic_store_release(&ptls2->signal_request, 1); + jl_atomic_store_release(&ptls2->signal_request, sig == -1 ? 3 : 1); pthread_cond_broadcast(&exit_signal_cond); pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge assert(jl_atomic_load_acquire(&ptls2->signal_request) == 0); @@ -344,12 +328,14 @@ static void jl_try_deliver_sigint(void) // Write only by signal handling thread, read only by main thread // no sync necessary. static int thread0_exit_state = 0; -static void jl_exit_thread0_cb(void) +static void JL_NORETURN jl_exit_thread0_cb(void) { +CFI_NORETURN // This can get stuck if it happens at an unfortunate spot // (unavoidable due to its async nature). // Try harder to exit each time if we get multiple exit requests. if (thread0_exit_count <= 1) { + jl_critical_error(thread0_exit_state - 128, NULL); jl_exit(thread0_exit_state); } else if (thread0_exit_count == 2) { @@ -360,12 +346,23 @@ static void jl_exit_thread0_cb(void) } } -static void jl_exit_thread0(int state) +static void jl_exit_thread0(int state, jl_bt_element_t *bt_data, size_t bt_size) { jl_ptls_t ptls2 = jl_all_tls_states[0]; - thread0_exit_state = state; - jl_atomic_store_release(&ptls2->signal_request, 3); - pthread_kill(ptls2->system_id, SIGUSR2); + if (thread0_exit_count <= 1) { + unw_context_t *signal_context; + jl_thread_suspend_and_get_state(0, &signal_context); + thread0_exit_state = state; + ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE + memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0])); + jl_thread_resume(0, -1); + } + else { + thread0_exit_state = state; + jl_atomic_store_release(&ptls2->signal_request, 3); + // This also makes sure `sleep` is aborted. + pthread_kill(ptls2->system_id, SIGUSR2); + } } // request: @@ -387,12 +384,10 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx) pthread_cond_broadcast(&signal_caught_cond); pthread_cond_wait(&exit_signal_cond, &in_signal_lock); request = jl_atomic_exchange(&ptls->signal_request, 0); - assert(request == 1); - (void)request; + assert(request == 1 || request == 3); pthread_cond_broadcast(&signal_caught_cond); pthread_mutex_unlock(&in_signal_lock); } - else #endif if (request == 2) { int force = jl_check_force_sigint(); @@ -483,43 +478,42 @@ JL_DLLEXPORT void jl_profile_stop_timer(void) #endif #endif // HAVE_MACH -static void *alloc_sigstack(size_t size) +static void allocate_segv_handler(void) { - size_t pagesz = jl_getpagesize(); - // Add one guard page to catch stack overflow in the signal handler - size = LLT_ALIGN(size, pagesz) + pagesz; - void *stackbuff = mmap(0, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (stackbuff == MAP_FAILED) - jl_errorf("fatal error allocating signal stack: mmap: %s", - strerror(errno)); - mprotect(stackbuff, pagesz, PROT_NONE); - return (void*)((char*)stackbuff + pagesz); + struct sigaction act; + memset(&act, 0, sizeof(struct sigaction)); + sigemptyset(&act.sa_mask); + act.sa_sigaction = segv_handler; + act.sa_flags = SA_ONSTACK | SA_SIGINFO; + if (sigaction(SIGSEGV, &act, NULL) < 0) { + jl_errorf("fatal error: sigaction: %s", strerror(errno)); + } + // On AArch64, stack overflow triggers a SIGBUS + if (sigaction(SIGBUS, &act, NULL) < 0) { + jl_errorf("fatal error: sigaction: %s", strerror(errno)); + } +} + +static void *alloc_sigstack(size_t *ssize) +{ + void *stk = jl_malloc_stack(ssize, NULL); + if (stk == MAP_FAILED) + jl_errorf("fatal error allocating signal stack: mmap: %s", strerror(errno)); + return stk; } void jl_install_thread_signal_handler(jl_ptls_t ptls) { - void *signal_stack = alloc_sigstack(sig_stack_size); + size_t ssize = sig_stack_size; + void *signal_stack = alloc_sigstack(&ssize); + ptls->signal_stack = signal_stack; stack_t ss; ss.ss_flags = 0; - ss.ss_size = sig_stack_size - 16; + ss.ss_size = ssize - 16; ss.ss_sp = signal_stack; if (sigaltstack(&ss, NULL) < 0) { jl_errorf("fatal error: sigaltstack: %s", strerror(errno)); } - -#if !defined(HAVE_MACH) - struct sigaction act; - memset(&act, 0, sizeof(struct sigaction)); - sigemptyset(&act.sa_mask); - act.sa_sigaction = usr2_handler; - act.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTART; - if (sigaction(SIGUSR2, &act, NULL) < 0) { - jl_errorf("fatal error: sigaction: %s", strerror(errno)); - } -#endif - - ptls->signal_stack = signal_stack; } static void jl_sigsetset(sigset_t *sset) @@ -737,10 +731,16 @@ static void *signal_listener(void *arg) // this part is async with the running of the rest of the program // and must be thread-safe, but not necessarily signal-handler safe if (critical) { - jl_critical_error(sig, NULL, bt_data, &bt_size); if (doexit) { thread0_exit_count++; - jl_exit_thread0(128 + sig); + jl_exit_thread0(128 + sig, bt_data, bt_size); + } + else { + jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig)); + size_t i; + for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) { + jl_print_bt_entry_codeloc(bt_data + i); + } } } } @@ -787,7 +787,7 @@ void jl_install_default_signal_handlers(void) memset(&actf, 0, sizeof(struct sigaction)); sigemptyset(&actf.sa_mask); actf.sa_sigaction = fpe_handler; - actf.sa_flags = SA_SIGINFO; + actf.sa_flags = SA_ONSTACK | SA_SIGINFO; if (sigaction(SIGFPE, &actf, NULL) < 0) { jl_errorf("fatal error: sigaction: %s", strerror(errno)); } @@ -806,13 +806,26 @@ void jl_install_default_signal_handlers(void) jl_error("fatal error: Couldn't set SIGTRAP"); } +#if defined(HAVE_MACH) + allocate_mach_handler(); +#else + struct sigaction act; + memset(&act, 0, sizeof(struct sigaction)); + sigemptyset(&act.sa_mask); + act.sa_sigaction = usr2_handler; + act.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTART; + if (sigaction(SIGUSR2, &act, NULL) < 0) { + jl_errorf("fatal error: sigaction: %s", strerror(errno)); + } +#endif + allocate_segv_handler(); struct sigaction act_die; memset(&act_die, 0, sizeof(struct sigaction)); sigemptyset(&act_die.sa_mask); act_die.sa_sigaction = sigdie_handler; - act_die.sa_flags = SA_SIGINFO; + act_die.sa_flags = SA_SIGINFO | SA_RESETHAND; if (sigaction(SIGILL, &act_die, NULL) < 0) { jl_errorf("fatal error: sigaction: %s", strerror(errno)); } @@ -823,7 +836,7 @@ void jl_install_default_signal_handlers(void) jl_errorf("fatal error: sigaction: %s", strerror(errno)); } // need to ensure the following signals are not SIG_IGN, even though they will be blocked - act_die.sa_flags = SA_SIGINFO | SA_RESTART; + act_die.sa_flags = SA_SIGINFO | SA_RESTART | SA_RESETHAND; #if defined(HAVE_ITIMER) if (sigaction(SIGPROF, &act_die, NULL) < 0) { jl_errorf("fatal error: sigaction: %s", strerror(errno)); diff --git a/src/signals-win.c b/src/signals-win.c index c871c59aa1316..ace5a178d483a 100644 --- a/src/signals-win.c +++ b/src/signals-win.c @@ -92,7 +92,7 @@ void __cdecl crt_sig_handler(int sig, int num) RtlCaptureContext(&Context); if (sig == SIGILL) jl_show_sigill(&Context); - jl_critical_error(sig, &Context, ptls->bt_data, &ptls->bt_size); + jl_critical_error(sig, &Context); raise(sig); } } @@ -309,8 +309,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo) jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress); jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress); - jl_critical_error(0, ExceptionInfo->ContextRecord, - ptls->bt_data, &ptls->bt_size); + jl_critical_error(0, ExceptionInfo->ContextRecord); static int recursion = 0; if (recursion++) exit(1); @@ -384,10 +383,12 @@ JL_DLLEXPORT int jl_profile_start_timer(void) { if (hBtThread == NULL) { - if (MMSYSERR_NOERROR != timeGetDevCaps(&timecaps, sizeof(timecaps))) { + TIMECAPS _timecaps; + if (MMSYSERR_NOERROR != timeGetDevCaps(&_timecaps, sizeof(_timecaps))) { fputs("failed to get timer resolution", stderr); return -2; } + timecaps = _timecaps; hBtThread = CreateThread( NULL, // default security attributes diff --git a/src/stackwalk.c b/src/stackwalk.c index 9150d48b29765..52d95e8fe8de7 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -698,7 +698,10 @@ JL_DLLEXPORT void jl_gdblookup(void* ip) // Print backtrace for current exception in catch block JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT { - jl_excstack_t *s = jl_get_ptls_states()->current_task->excstack; + jl_ptls_t ptls = jl_get_ptls_states(); + if (ptls->current_task == NULL) + return; + jl_excstack_t *s = ptls->current_task->excstack; if (!s) return; size_t bt_size = jl_excstack_bt_size(s, s->top); diff --git a/src/task.c b/src/task.c index 4d94d90fc62b3..d65e7412bb7ab 100644 --- a/src/task.c +++ b/src/task.c @@ -647,8 +647,9 @@ JL_DLLEXPORT void jl_rethrow(void) // Special case throw for errors detected inside signal handlers. This is not // (cannot be) called directly in the signal handler itself, but is returned to // after the signal handler exits. -JL_DLLEXPORT void jl_sig_throw(void) +JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void) { +CFI_NORETURN jl_ptls_t ptls = jl_get_ptls_states(); jl_value_t *e = ptls->sig_exception; ptls->sig_exception = NULL; @@ -800,14 +801,7 @@ void jl_init_tasks(void) JL_GC_DISABLED STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void) { -#ifdef _OS_WINDOWS_ -#if defined(_CPU_X86_64_) - // install the unhandled exception hanlder at the top of our stack - // to call directly into our personality handler - asm volatile ("\t.seh_handler __julia_personality, @except\n\t.text"); -#endif -#endif - +CFI_NORETURN // this runs the first time we switch to a task sanitizer_finish_switch_fiber(); jl_ptls_t ptls = jl_get_ptls_states();