Skip to content

Commit

Permalink
fix always_copy_stacks bugs while ensuring unwinding should not segfa…
Browse files Browse the repository at this point in the history
…ult during copy_stacks
  • Loading branch information
vtjnash committed Aug 19, 2024
1 parent 6da595c commit e8f7d12
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 133 deletions.
3 changes: 1 addition & 2 deletions src/julia_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,9 @@ typedef struct _jl_tls_states_t {
struct _jl_task_t *previous_task;
struct _jl_task_t *root_task;
struct _jl_timing_block_t *timing_stack;
// This is the location of our copy_stack
void *stackbase;
size_t stacksize;
// This is needed to support always_copy_stacks:
jl_stack_context_t copy_stack_ctx;
// Temp storage for exception thrown in signal handler. Not rooted.
struct _jl_value_t *sig_exception;
// Temporary backtrace buffer. Scanned for gc roots when bt_size > 0.
Expand Down
195 changes: 64 additions & 131 deletions src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,73 +209,97 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
jl_gc_wb_back(lastt);
}

JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, char *p)
JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_ucontext_t *t, jl_ptls_t ptls, char *p)
{
size_t nb = t->ctx.copy_stack;
size_t nb = t->copy_stack;
char *_x = (char*)ptls->stackbase - nb;
if (!p) {
// switch to a stackframe that's beyond the bounds of the last switch
p = _x;
if ((char*)&_x > _x) {
p = (char*)alloca((char*)&_x - _x);
// switch to a stackframe that's well beyond the bounds of the last switch
p = _x - 4096;
if ((char*)&_x > p) {
p = (char*)alloca((char*)&_x - p);
}
restore_stack(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca
}
void *_y = t->ctx.stkbuf;
void *_y = t->stkbuf;
assert(_x != NULL && _y != NULL);
#if defined(_OS_WINDOWS_)
#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
void *volatile *return_address = (void *volatile *)__builtin_frame_address(0) + 1;
assert(*return_address == __builtin_return_address(0));
*return_address = NULL;
#else
#warning profiling may segfault in this build
#endif
#else
CFI_NORETURN
#endif
memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe

#if defined(_OS_WINDOWS_)
jl_setcontext(t->ctx.copy_ctx);
jl_setcontext(t->copy_ctx);
#else
jl_longjmp(t->ctx.copy_ctx->uc_mcontext, 1);
jl_longjmp(t->copy_ctx->uc_mcontext, 1);
#endif
abort(); // unreachable
}

JL_NO_ASAN static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt)
JL_NO_ASAN static void restore_stack2(jl_ucontext_t *t, jl_ptls_t ptls, jl_ucontext_t *lastt)
{
assert(t->ctx.copy_stack && !lastt->ctx.copy_stack);
size_t nb = t->ctx.copy_stack;
assert(t->copy_stack && !lastt->copy_stack);
size_t nb = t->copy_stack;
if (nb > 1) {
char *_x = (char*)ptls->stackbase - nb;
void *_y = t->ctx.stkbuf;
void *_y = t->stkbuf;
assert(_x != NULL && _y != NULL);
memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb);
}
#if defined(_OS_WINDOWS_)
// jl_swapcontext and setjmp are the same on Windows, so we can just use swapcontext directly
tsan_switch_to_ctx(&t->ctx);
jl_swapcontext(lastt->ctx.ctx, t->ctx.copy_ctx);
tsan_switch_to_ctx(t);
jl_swapcontext(lastt->ctx, t->copy_ctx);
#else
#if defined(JL_HAVE_UNW_CONTEXT)
volatile int returns = 0;
int r = unw_getcontext(lastt->ctx.ctx);
int r = unw_getcontext(lastt->ctx);
if (++returns == 2) // r is garbage after the first return
return;
if (r != 0 || returns != 1)
abort();
#elif defined(JL_HAVE_ASM)
if (jl_setjmp(lastt->ctx.ctx->uc_mcontext, 0))
if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
return;
#else
#error COPY_STACKS is incompatible with this platform
#endif
tsan_switch_to_ctx(&t->ctx);
jl_longjmp(t->ctx.copy_ctx->uc_mcontext, 1);
tsan_switch_to_ctx(t);
jl_longjmp(t->copy_ctx->uc_mcontext, 1);
#endif
}

JL_NO_ASAN static void NOINLINE restore_stack3(jl_ucontext_t *t, jl_ptls_t ptls, char *p)
{
#if !defined(JL_HAVE_ASM)
char *_x = (char*)ptls->stackbase;
if (!p) {
// switch to a stackframe that's beyond the bounds of the last switch
p = _x - 4096;
if ((char*)&_x > p) {
p = (char*)alloca((char*)&_x - p);
}
restore_stack3(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca
}
#endif
#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
void *volatile *return_address = (void *volatile *)__builtin_frame_address(0) + 1;
assert(*return_address == __builtin_return_address(0));
*return_address = NULL;
#else
#warning profiling may segfault in this build
#endif
CFI_NORETURN
tsan_switch_to_ctx(t);
jl_start_fiber_set(t); // (doesn't return)
abort();
}

#endif

/* Rooted by the base module */
Expand Down Expand Up @@ -523,7 +547,7 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
}
if (!killed && !lastt->ctx.copy_stack) {
sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
restore_stack2(t, ptls, lastt); // half jl_swap_fiber and half restore_stack
restore_stack2(&t->ctx, ptls, &lastt->ctx); // half jl_swap_fiber and half restore_stack
}
else {
tsan_switch_to_ctx(&t->ctx);
Expand All @@ -536,11 +560,11 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
}

if (lastt->ctx.copy_stack) {
restore_stack(t, ptls, NULL); // (doesn't return)
restore_stack(&t->ctx, ptls, NULL); // (doesn't return)
abort();
}
else {
restore_stack(t, ptls, (char*)1); // (doesn't return)
restore_stack(&t->ctx, ptls, (char*)1); // (doesn't return)
abort();
}
}
Expand Down Expand Up @@ -594,37 +618,25 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
if (t->ctx.copy_stack) {
#ifdef COPY_STACKS
tsan_switch_to_ctx(&t->ctx);
if (killed) {
sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
tsan_destroy_ctx(ptls, &lastt->ctx);
}
else {
sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
}
// create a temporary non-copy_stack context for starting this fiber
jl_ucontext_t ctx = t->ctx;
ctx.ctx = NULL;
ctx.stkbuf = (char*)ptls->stackbase - ptls->stacksize;
ctx.bufsz = ptls->stacksize;
ctx.copy_stack = 0;
ctx.started = 0;
if (always_copy_stacks) {
if (lastt->ctx.copy_stack || killed) {
#if defined(_OS_WINDOWS_)
jl_setcontext(&ptls->copy_stack_ctx);
#else
jl_longjmp(ptls->copy_stack_ctx.uc_mcontext, 1);
#endif
abort(); // unreachable
}
else {
t->ctx.copy_ctx = &ptls->copy_stack_ctx;
restore_stack2(t, ptls, lastt);
}
if (killed) {
sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
tsan_destroy_ctx(ptls, &lastt->ctx);
if (lastt->ctx.copy_stack)
restore_stack3(&ctx, ptls, NULL); // (doesn't return)
else
jl_start_fiber_set(&ctx);
abort();
}
else if (lastt->ctx.copy_stack || killed) {
// copy_stack resumes at the jl_setjmp earlier in this function, so don't swap here
jl_start_fiber_set(&ctx); // (doesn't return)
sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
if (lastt->ctx.copy_stack) {
restore_stack3(&ctx, ptls, NULL); // (doesn't return)
abort();
}
else {
Expand Down Expand Up @@ -1233,7 +1245,6 @@ CFI_NORETURN
#endif
jl_ptls_t ptls = ct->ptls;
sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &ct->ctx);
ct->ctx.ctx = NULL;
_start_task();
}

Expand All @@ -1247,6 +1258,7 @@ CFI_NORETURN
#else
jl_task_t *ct = jl_current_task;
#endif
ct->ctx.ctx = NULL;
jl_ptls_t ptls = ct->ptls;
jl_value_t *res;
assert(ptls->finalizers_inhibited == 0);
Expand Down Expand Up @@ -1468,6 +1480,7 @@ JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *
}
JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
{
CFI_NORETURN
char *stk = (char*)t->stkbuf;
size_t ssize = t->bufsz;
uintptr_t fn = (uintptr_t)&start_task;
Expand Down Expand Up @@ -1538,81 +1551,6 @@ JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
}
#endif

static void NOINLINE init_always_copy_stacks(jl_stack_context_t *copy_stack_ctx)
{
CFI_NORETURN // Try to prevent the unwinder from seeing our caller before we get a chance to
// destroy the frame next time we return here. If JL_HAS_ASM is defined, it
// actually might be better to call jl_start_fiber_set with appropriate values
// for the stack, so that we don't have to temporarily run with the undefined
// stack contents present after the jl_setjmp returns 1.
if (!jl_setjmp(copy_stack_ctx->uc_mcontext, 0))
return;
#if defined(JL_HAVE_ASM) || defined(_OS_WINDOWS_)
uintptr_t fn = (uintptr_t)&start_task; // sanitizer_finish_switch_fiber is part of start_task
#ifdef _CPU_X86_64_
asm volatile (
" movq %0, %%rax;\n"
" xorq %%rbp, %%rbp;\n"
" push %%rbp;\n" // instead of RSP
" jmpq *%%rax;\n" // call `fn` with fake stack frame
" ud2"
: : "r"(fn) : "memory" );
#elif defined(_CPU_X86_)
asm volatile (
" movl %0, %%eax;\n"
" xorl %%ebp, %%ebp;\n"
" push %%ebp;\n" // instead of ESP
" jmpl *%%eax;\n" // call `fn` with fake stack frame
" ud2"
: : "r"(fn) : "memory" );
#elif defined(_CPU_AARCH64_)
asm volatile(
" mov x29, xzr;\n" // Clear link register (x29) and frame pointer
" mov x30, xzr;\n" // (x30) to terminate unwinder.
" br %0;\n" // call `fn` with fake stack frame
" brk #0x1" // abort
: : "r"(fn) : "memory" );
#elif defined(_CPU_ARM_)
// A "i" constraint on `&start_task` works only on clang and not on GCC.
asm(" mov lr, #0;\n" // Clear link register (lr) and frame pointer
" mov fp, #0;\n" // (fp) to terminate unwinder.
" bx %0;\n" // call `fn` with fake stack frame. While `bx` can change
// the processor mode to thumb, this will never happen
// because all our addresses are word-aligned.
" udf #0" // abort
: : "r"(fn) : "memory" );
#elif defined(_CPU_PPC64_)
// N.B.: There is two iterations of the PPC64 ABI.
// v2 is current and used here. Make sure you have the
// correct version of the ABI reference when working on this code.
asm volatile(
// Move stack (-0x30 for initial stack frame) to stack pointer
" addi 1, 1, -0x30;\n"
// Build stack frame
// Skip local variable save area
" std 2, 0x28(1);\n" // Save TOC
// Clear link editor/compiler words
" std 0, 0x20(1);\n"
" std 0, 0x18(1);\n"
// Clear LR/CR save area
" std 0, 0x10(1);\n"
" std 0, 0x8(1);\n"
" std 0, 0x0(1); \n" // Clear back link to terminate unwinder
" mtlr 0; \n" // Clear link register
" mr 12, %1; \n" // Set up target global entry point
" mtctr 12; \n" // Move jump target to counter register
" bctr; \n" // branch to counter (lr update disabled)
" trap; \n"
: : "r"(fn) : "memory");
#else
#error JL_HAVE_ASM defined but not implemented for this CPU type
#endif
__builtin_unreachable();
#else
start_task();
#endif
}

// Initialize a root task using the given stack.
jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
{
Expand Down Expand Up @@ -1697,13 +1635,8 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
if (always_copy_stacks) {
// when this is set, we will attempt to corrupt the process stack to switch tasks,
// although this is unreliable, and thus not recommended
ptls->stackbase = stack_hi;
ptls->stacksize = ssize;
#ifdef _OS_WINDOWS_
ptls->copy_stack_ctx.uc_stack.ss_sp = stack_hi;
ptls->copy_stack_ctx.uc_stack.ss_size = ssize;
#endif
init_always_copy_stacks(&ptls->copy_stack_ctx);
ptls->stackbase = jl_get_frame_addr();
ptls->stacksize = (char*)ptls->stackbase - (char*)stack_lo;
}
else {
size_t bufsz = JL_STACK_SIZE;
Expand Down

0 comments on commit e8f7d12

Please sign in to comment.