Skip to content

Commit

Permalink
handle some cases of null Task in segv handling (#42836)
Browse files Browse the repository at this point in the history
Also ensure we do not use round-robin sampling when printing a critical
error, only for profiling.
  • Loading branch information
vtjnash authored Nov 1, 2021
1 parent b55fb5f commit ad607a2
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 76 deletions.
2 changes: 1 addition & 1 deletion src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1829,7 +1829,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
jl_ptls_t ptls = jl_current_task->ptls;
ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
jl_critical_error(0, NULL);
jl_critical_error(0, NULL, jl_current_task);
abort();
}
// not reached
Expand Down
2 changes: 1 addition & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1013,7 +1013,7 @@ size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t
size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT;
#endif
JL_DLLEXPORT jl_value_t *jl_get_backtrace(void);
void jl_critical_error(int sig, bt_context_t *context);
void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct);
JL_DLLEXPORT void jl_raise_debugger(void);
int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
JL_DLLEXPORT void jl_gdblookup(void* ip) JL_NOTSAFEPOINT;
Expand Down
19 changes: 10 additions & 9 deletions src/signal-handling.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,19 +241,20 @@ void jl_show_sigill(void *_ctx)
}

// what to do on a critical error on a thread
void jl_critical_error(int sig, bt_context_t *context)
void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct)
{

jl_task_t *ct = jl_current_task;
jl_bt_element_t *bt_data = ct->ptls->bt_data;
size_t *bt_size = &ct->ptls->bt_size;
size_t i, n = *bt_size;
jl_bt_element_t *bt_data = ct ? ct->ptls->bt_data : NULL;
size_t *bt_size = ct ? &ct->ptls->bt_size : NULL;
size_t i, n = ct ? *bt_size : 0;
if (sig) {
// kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
jl_set_safe_restore(NULL);
ct->gcstack = NULL;
ct->eh = NULL;
ct->excstack = NULL;
if (ct) {
ct->gcstack = NULL;
ct->eh = NULL;
ct->excstack = NULL;
}
#ifndef _OS_WINDOWS_
sigset_t sset;
sigemptyset(&sset);
Expand All @@ -277,7 +278,7 @@ void jl_critical_error(int sig, bt_context_t *context)
jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
}
jl_safe_printf("in expression starting at %s:%d\n", jl_filename, jl_lineno);
if (context) {
if (context && ct) {
// Must avoid extended backtrace frames here unless we're sure bt_data
// is properly rooted.
*bt_size = n = rec_backtrace_ctx(bt_data, JL_MAX_BT_SIZE, context, NULL);
Expand Down
2 changes: 1 addition & 1 deletion src/signals-mach.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ static void jl_try_deliver_sigint(void)
static void JL_NORETURN jl_exit_thread0_cb(int exitstate)
{
CFI_NORETURN
jl_critical_error(exitstate - 128, NULL);
jl_critical_error(exitstate - 128, NULL, jl_current_task);
jl_exit(exitstate);
}

Expand Down
123 changes: 62 additions & 61 deletions src/signals-unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
uv_tty_reset_mode();
if (sig == SIGILL)
jl_show_sigill(context);
jl_critical_error(sig, jl_to_bt_context(context));
jl_critical_error(sig, jl_to_bt_context(context), jl_get_current_task());
if (sig != SIGSEGV &&
sig != SIGBUS &&
sig != SIGILL) {
Expand Down Expand Up @@ -410,7 +410,7 @@ CFI_NORETURN
// (unavoidable due to its async nature).
// Try harder to exit each time if we get multiple exit requests.
if (thread0_exit_count <= 1) {
jl_critical_error(thread0_exit_state - 128, NULL);
jl_critical_error(thread0_exit_state - 128, NULL, jl_current_task);
jl_exit(thread0_exit_state);
}
else if (thread0_exit_count == 2) {
Expand Down Expand Up @@ -747,71 +747,72 @@ static void *signal_listener(void *arg)
unw_context_t *signal_context;
// sample each thread, round-robin style in reverse order
// (so that thread zero gets notified last)
if (critical || profile)
if (critical || profile) {
jl_lock_profile();
jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
for (int idx = jl_n_threads; idx-- > 0; ) {
// Stop the threads in the random round-robin order.
int i = profile_round_robin_thread_order[idx];
// notify thread to stop
jl_thread_suspend_and_get_state(i, &signal_context);

// do backtrace on thread contexts for critical signals
// this part must be signal-handler safe
if (critical) {
bt_size += rec_backtrace_ctx(bt_data + bt_size,
JL_MAX_BT_SIZE / jl_n_threads - 1,
signal_context, NULL);
bt_data[bt_size++].uintptr = 0;
}

// do backtrace for profiler
if (profile && running) {
if (jl_profile_is_buffer_full()) {
// Buffer full: Delete the timer
jl_profile_stop_timer();
if (!critical)
jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
for (int idx = jl_n_threads; idx-- > 0; ) {
// Stop the threads in the random round-robin order.
int i = critical ? idx : profile_round_robin_thread_order[idx];
// notify thread to stop
jl_thread_suspend_and_get_state(i, &signal_context);

// do backtrace on thread contexts for critical signals
// this part must be signal-handler safe
if (critical) {
bt_size += rec_backtrace_ctx(bt_data + bt_size,
JL_MAX_BT_SIZE / jl_n_threads - 1,
signal_context, NULL);
bt_data[bt_size++].uintptr = 0;
}
else {
// unwinding can fail, so keep track of the current state
// and restore from the SEGV handler if anything happens.
jl_jmp_buf *old_buf = jl_get_safe_restore();
jl_jmp_buf buf;

jl_set_safe_restore(&buf);
if (jl_setjmp(buf, 0)) {
jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
} else {
// Get backtrace data
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
bt_size_max - bt_size_cur - 1, signal_context, NULL);
}
jl_set_safe_restore(old_buf);

jl_ptls_t ptls = jl_all_tls_states[i];

// store threadid but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;

// store task id
bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);

// store cpu cycle clock
bt_data_prof[bt_size_cur++].uintptr = cycleclock();

// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;

// Mark the end of this block with two 0's
bt_data_prof[bt_size_cur++].uintptr = 0;
bt_data_prof[bt_size_cur++].uintptr = 0;
// do backtrace for profiler
if (profile && running) {
if (jl_profile_is_buffer_full()) {
// Buffer full: Delete the timer
jl_profile_stop_timer();
}
else {
// unwinding can fail, so keep track of the current state
// and restore from the SEGV handler if anything happens.
jl_jmp_buf *old_buf = jl_get_safe_restore();
jl_jmp_buf buf;

jl_set_safe_restore(&buf);
if (jl_setjmp(buf, 0)) {
jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
} else {
// Get backtrace data
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
bt_size_max - bt_size_cur - 1, signal_context, NULL);
}
jl_set_safe_restore(old_buf);

jl_ptls_t ptls = jl_all_tls_states[i];

// store threadid but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;

// store task id
bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);

// store cpu cycle clock
bt_data_prof[bt_size_cur++].uintptr = cycleclock();

// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;

// Mark the end of this block with two 0's
bt_data_prof[bt_size_cur++].uintptr = 0;
bt_data_prof[bt_size_cur++].uintptr = 0;
}
}
}

// notify thread to resume
jl_thread_resume(i, sig);
}
if (critical || profile)
// notify thread to resume
jl_thread_resume(i, sig);
}
jl_unlock_profile();
}
#ifndef HAVE_MACH
if (profile && running) {
#if defined(HAVE_TIMER)
Expand Down
7 changes: 4 additions & 3 deletions src/signals-win.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ void __cdecl crt_sig_handler(int sig, int num)
RtlCaptureContext(&Context);
if (sig == SIGILL)
jl_show_sigill(&Context);
jl_critical_error(sig, &Context);
jl_critical_error(sig, &Context, jl_get_current_task());
raise(sig);
}
}
Expand Down Expand Up @@ -225,7 +225,8 @@ static BOOL WINAPI sigint_handler(DWORD wsig) //This needs winapi types to guara

LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
{
jl_ptls_t ptls = jl_current_task->ptls;
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
if (ExceptionInfo->ExceptionRecord->ExceptionFlags == 0) {
switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
case EXCEPTION_INT_DIVIDE_BY_ZERO:
Expand Down Expand Up @@ -312,7 +313,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);

jl_critical_error(0, ExceptionInfo->ContextRecord);
jl_critical_error(0, ExceptionInfo->ContextRecord, ct);
static int recursion = 0;
if (recursion++)
exit(1);
Expand Down

0 comments on commit ad607a2

Please sign in to comment.