Skip to content

Commit

Permalink
handle some cases of null Task in segv handling
Browse files Browse the repository at this point in the history
Also ensure we do not use round-robin sampling when printing a critical
error, only for profiling.
  • Loading branch information
vtjnash committed Oct 29, 2021
1 parent c054dbc commit 36399c7
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 76 deletions.
2 changes: 1 addition & 1 deletion src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1829,7 +1829,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
jl_ptls_t ptls = jl_current_task->ptls;
ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
jl_critical_error(0, NULL);
jl_critical_error(0, NULL, jl_current_task);
abort();
}
// not reached
Expand Down
2 changes: 1 addition & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1013,7 +1013,7 @@ size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t
size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT;
#endif
JL_DLLEXPORT jl_value_t *jl_get_backtrace(void);
void jl_critical_error(int sig, bt_context_t *context);
void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct);
JL_DLLEXPORT void jl_raise_debugger(void);
int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
JL_DLLEXPORT void jl_gdblookup(void* ip) JL_NOTSAFEPOINT;
Expand Down
19 changes: 10 additions & 9 deletions src/signal-handling.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,19 +241,20 @@ void jl_show_sigill(void *_ctx)
}

// what to do on a critical error on a thread
void jl_critical_error(int sig, bt_context_t *context)
void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct)
{

jl_task_t *ct = jl_current_task;
jl_bt_element_t *bt_data = ct->ptls->bt_data;
size_t *bt_size = &ct->ptls->bt_size;
size_t i, n = *bt_size;
jl_bt_element_t *bt_data = ct ? ct->ptls->bt_data : NULL;
size_t *bt_size = ct ? &ct->ptls->bt_size : NULL;
size_t i, n = ct ? *bt_size : 0;
if (sig) {
// kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
jl_set_safe_restore(NULL);
ct->gcstack = NULL;
ct->eh = NULL;
ct->excstack = NULL;
if (ct) {
ct->gcstack = NULL;
ct->eh = NULL;
ct->excstack = NULL;
}
#ifndef _OS_WINDOWS_
sigset_t sset;
sigemptyset(&sset);
Expand All @@ -277,7 +278,7 @@ void jl_critical_error(int sig, bt_context_t *context)
jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
}
jl_safe_printf("in expression starting at %s:%d\n", jl_filename, jl_lineno);
if (context) {
if (context && ct) {
// Must avoid extended backtrace frames here unless we're sure bt_data
// is properly rooted.
*bt_size = n = rec_backtrace_ctx(bt_data, JL_MAX_BT_SIZE, context, NULL);
Expand Down
2 changes: 1 addition & 1 deletion src/signals-mach.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ static void jl_try_deliver_sigint(void)
static void JL_NORETURN jl_exit_thread0_cb(int exitstate)
{
CFI_NORETURN
jl_critical_error(exitstate - 128, NULL);
jl_critical_error(exitstate - 128, NULL, jl_current_task);
jl_exit(exitstate);
}

Expand Down
123 changes: 62 additions & 61 deletions src/signals-unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
uv_tty_reset_mode();
if (sig == SIGILL)
jl_show_sigill(context);
jl_critical_error(sig, jl_to_bt_context(context));
jl_critical_error(sig, jl_to_bt_context(context), jl_get_current_task());
if (sig != SIGSEGV &&
sig != SIGBUS &&
sig != SIGILL) {
Expand Down Expand Up @@ -410,7 +410,7 @@ CFI_NORETURN
// (unavoidable due to its async nature).
// Try harder to exit each time if we get multiple exit requests.
if (thread0_exit_count <= 1) {
jl_critical_error(thread0_exit_state - 128, NULL);
jl_critical_error(thread0_exit_state - 128, NULL, jl_current_task);
jl_exit(thread0_exit_state);
}
else if (thread0_exit_count == 2) {
Expand Down Expand Up @@ -747,71 +747,72 @@ static void *signal_listener(void *arg)
unw_context_t *signal_context;
// sample each thread, round-robin style in reverse order
// (so that thread zero gets notified last)
if (critical || profile)
if (critical || profile) {
jl_lock_profile();
jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
for (int idx = jl_n_threads; idx-- > 0; ) {
// Stop the threads in the random round-robin order.
int i = profile_round_robin_thread_order[idx];
// notify thread to stop
jl_thread_suspend_and_get_state(i, &signal_context);

// do backtrace on thread contexts for critical signals
// this part must be signal-handler safe
if (critical) {
bt_size += rec_backtrace_ctx(bt_data + bt_size,
JL_MAX_BT_SIZE / jl_n_threads - 1,
signal_context, NULL);
bt_data[bt_size++].uintptr = 0;
}

// do backtrace for profiler
if (profile && running) {
if (jl_profile_is_buffer_full()) {
// Buffer full: Delete the timer
jl_profile_stop_timer();
if (!critical)
jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
for (int idx = jl_n_threads; idx-- > 0; ) {
// Stop the threads in the random round-robin order.
int i = critical ? idx : profile_round_robin_thread_order[idx];
// notify thread to stop
jl_thread_suspend_and_get_state(i, &signal_context);

// do backtrace on thread contexts for critical signals
// this part must be signal-handler safe
if (critical) {
bt_size += rec_backtrace_ctx(bt_data + bt_size,
JL_MAX_BT_SIZE / jl_n_threads - 1,
signal_context, NULL);
bt_data[bt_size++].uintptr = 0;
}
else {
// unwinding can fail, so keep track of the current state
// and restore from the SEGV handler if anything happens.
jl_jmp_buf *old_buf = jl_get_safe_restore();
jl_jmp_buf buf;

jl_set_safe_restore(&buf);
if (jl_setjmp(buf, 0)) {
jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
} else {
// Get backtrace data
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
bt_size_max - bt_size_cur - 1, signal_context, NULL);
}
jl_set_safe_restore(old_buf);

jl_ptls_t ptls = jl_all_tls_states[i];

// store threadid but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;

// store task id
bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);

// store cpu cycle clock
bt_data_prof[bt_size_cur++].uintptr = cycleclock();

// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;

// Mark the end of this block with two 0's
bt_data_prof[bt_size_cur++].uintptr = 0;
bt_data_prof[bt_size_cur++].uintptr = 0;
// do backtrace for profiler
if (profile && running) {
if (jl_profile_is_buffer_full()) {
// Buffer full: Delete the timer
jl_profile_stop_timer();
}
else {
// unwinding can fail, so keep track of the current state
// and restore from the SEGV handler if anything happens.
jl_jmp_buf *old_buf = jl_get_safe_restore();
jl_jmp_buf buf;

jl_set_safe_restore(&buf);
if (jl_setjmp(buf, 0)) {
jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
} else {
// Get backtrace data
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
bt_size_max - bt_size_cur - 1, signal_context, NULL);
}
jl_set_safe_restore(old_buf);

jl_ptls_t ptls = jl_all_tls_states[i];

// store threadid but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;

// store task id
bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);

// store cpu cycle clock
bt_data_prof[bt_size_cur++].uintptr = cycleclock();

// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;

// Mark the end of this block with two 0's
bt_data_prof[bt_size_cur++].uintptr = 0;
bt_data_prof[bt_size_cur++].uintptr = 0;
}
}
}

// notify thread to resume
jl_thread_resume(i, sig);
}
if (critical || profile)
// notify thread to resume
jl_thread_resume(i, sig);
}
jl_unlock_profile();
}
#ifndef HAVE_MACH
if (profile && running) {
#if defined(HAVE_TIMER)
Expand Down
7 changes: 4 additions & 3 deletions src/signals-win.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ void __cdecl crt_sig_handler(int sig, int num)
RtlCaptureContext(&Context);
if (sig == SIGILL)
jl_show_sigill(&Context);
jl_critical_error(sig, &Context);
jl_critical_error(sig, &Context, jl_get_current_task());
raise(sig);
}
}
Expand Down Expand Up @@ -225,7 +225,8 @@ static BOOL WINAPI sigint_handler(DWORD wsig) //This needs winapi types to guara

LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
{
jl_ptls_t ptls = jl_current_task->ptls;
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
if (ExceptionInfo->ExceptionRecord->ExceptionFlags == 0) {
switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
case EXCEPTION_INT_DIVIDE_BY_ZERO:
Expand Down Expand Up @@ -312,7 +313,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);

jl_critical_error(0, ExceptionInfo->ContextRecord);
jl_critical_error(0, ExceptionInfo->ContextRecord, ct);
static int recursion = 0;
if (recursion++)
exit(1);
Expand Down

0 comments on commit 36399c7

Please sign in to comment.