Skip to content

Commit

Permalink
use sigsetjmp / siglongjmp instead so that signal mask behavior is de…
Browse files Browse the repository at this point in the history
…fined consistently across platforms (back port from loladiro fork). should help with #1216, but doesn't yet fix it
  • Loading branch information
vtjnash committed Sep 4, 2012
1 parent 656e0d3 commit b842bf4
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 16 deletions.
6 changes: 6 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ void segv_handler(int sig, siginfo_t *info, void *context)
volatile sig_atomic_t jl_signal_pending = 0;
volatile sig_atomic_t jl_defer_signal = 0;

void restore_signals() {
sigset_t sset;
sigemptyset (&sset);
sigprocmask (SIG_SETMASK, &sset, 0);
}
void sigint_handler(int sig, siginfo_t *info, void *context)
{
sigset_t sset;
Expand Down Expand Up @@ -124,6 +129,7 @@ extern jmp_buf * volatile jl_jmp_target;

void julia_init(char *imageFile)
{
(void)uv_default_loop(); restore_signals(); //XXX: this needs to be early in load process
jl_page_size = sysconf(_SC_PAGESIZE);
jl_find_stack_bottom();
jl_dl_handle = jl_load_dynamic_library(NULL);
Expand Down
7 changes: 6 additions & 1 deletion src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1066,10 +1066,15 @@ static inline void jl_eh_restore_state(jl_savestate_t *ss)
DLLEXPORT void jl_enter_handler(jl_savestate_t *ss, jmp_buf *handlr);
DLLEXPORT void jl_pop_handler(int n);

#if defined(__WIN32__)
#define sigsetjmp(a,b) setjmp(a)
#define siglongjmp(a,b) longjmp(a,b)
#endif

#define JL_TRY \
int i__tr, i__ca; jl_savestate_t __ss; jmp_buf __handlr; \
jl_enter_handler(&__ss, &__handlr); \
if (!setjmp(__handlr)) \
if (!sigsetjmp(__handlr,1)) \
for (i__tr=1; i__tr; i__tr=0, jl_eh_restore_state(&__ss))

#define JL_EH_POP() jl_eh_restore_state(&__ss)
Expand Down
27 changes: 12 additions & 15 deletions src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
*/
#include <stdlib.h>
#include <string.h>
#include <setjmp.h>
#include <assert.h>
#include <sys/mman.h>
#include <signal.h>
Expand All @@ -17,9 +16,7 @@
#include <execinfo.h>
#elif defined(__WIN32__)
#include <Winbase.h>
#include <setjmp.h>
#define sigsetjmp(a,b) setjmp(a)
#define siglongjmp(a,b) longjmp(a,b)
#include <malloc.h>
#else
// This gives unwind only local unwinding options ==> faster code
#define UNW_LOCAL_ONLY
Expand Down Expand Up @@ -57,9 +54,9 @@ static void probe(struct _probe_data *p)
{
p->prior_local = p->probe_local;
p->probe_local = (intptr_t)&p;
setjmp( *(p->ref_probe) );
sigsetjmp( *(p->ref_probe), 1 );
p->ref_probe = &p->probe_env;
setjmp( p->probe_sameAR );
sigsetjmp( p->probe_sameAR, 1 );
boundhigh(p);
}

Expand Down Expand Up @@ -179,7 +176,7 @@ void __attribute__((noinline)) restore_stack(jl_task_t *t, jmp_buf *where, char
if (t->stkbuf != NULL) {
memcpy(_x, t->stkbuf, t->ssize);
}
longjmp(*jl_jmp_target, 1);
siglongjmp(*jl_jmp_target, 1);
}

static void switch_stack(jl_task_t *t, jmp_buf *where)
Expand Down Expand Up @@ -207,7 +204,7 @@ static void ctx_switch(jl_task_t *t, jmp_buf *where)
/*
making task switching interrupt-safe is going to be challenging.
we need JL_SIGATOMIC_BEGIN in jl_enter_handler, and then
JL_SIGATOMIC_END after every JL_TRY setjmp that returns zero.
JL_SIGATOMIC_END after every JL_TRY sigsetjmp that returns zero.
also protect jl_eh_restore_state.
then we need JL_SIGATOMIC_BEGIN at the top of this function (ctx_switch).
the JL_SIGATOMIC_END at the end of this function handles the case
Expand All @@ -217,7 +214,7 @@ static void ctx_switch(jl_task_t *t, jmp_buf *where)
*IF AND ONLY IF* throwing the exception involved a task switch.
*/
//JL_SIGATOMIC_BEGIN();
if (!setjmp(jl_current_task->ctx)) {
if (!sigsetjmp(jl_current_task->ctx, 1)) {
#ifdef COPY_STACKS
jl_task_t *lastt = jl_current_task;
save_stack(lastt);
Expand All @@ -232,9 +229,9 @@ static void ctx_switch(jl_task_t *t, jmp_buf *where)

#ifdef COPY_STACKS
jl_jmp_target = where;
longjmp(lastt->base_ctx, 1);
siglongjmp(lastt->base_ctx, 1);
#else
longjmp(*where, 1);
siglongjmp(*where, 1);
#endif
}
//JL_SIGATOMIC_END();
Expand Down Expand Up @@ -365,7 +362,7 @@ static void start_task(jl_task_t *t)
local_sp += sizeof(jl_gcframe_t);
local_sp += 12*sizeof(void*);
t->stackbase = (void*)(local_sp + _frame_offset);
if (setjmp(t->base_ctx)) {
if (sigsetjmp(t->base_ctx, 1)) {
// we get here to remove our data from the process stack
switch_stack(jl_current_task, jl_jmp_target);
}
Expand Down Expand Up @@ -394,7 +391,7 @@ static void start_task(jl_task_t *t)
#ifndef COPY_STACKS
static void init_task(jl_task_t *t)
{
if (setjmp(t->ctx)) {
if (sigsetjmp(t->ctx, 1)) {
start_task(t);
}
// this runs when the task is created
Expand Down Expand Up @@ -534,8 +531,8 @@ void jl_raise(jl_value_t *e)
jl_exception_in_transit = bt;
JL_GC_POP();
}
if (jl_current_task == eh) {
longjmp(*eh->state.eh_ctx, 1);
if (jl_current_task == eh && eh->state.eh_ctx!=0) {
siglongjmp(*eh->state.eh_ctx, 1);
}
else {
if (eh->done || eh->state.eh_ctx==NULL) {
Expand Down

7 comments on commit b842bf4

@JeffBezanson
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't this be handled by resetting the signal mask in e.g. segv_handler like we do now? What else do we do that changes the signal mask? Can you confirm that this fixes something? If so, it would need to be changed in the code generator too.

@vtjnash
Copy link
Member Author

@vtjnash vtjnash commented on b842bf4 Sep 4, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently it probably could be. But sigsetjmp / siglongjmp are just a few extra assembly instructions more then their less well defined (and older) counterparts setjmp / longjmp so it's not really a significantly change. I've contemplated altering the signal mask during atomic operations (including things like uv_write) that would be bad to have interrupted. I had trouble with libuv modifying the signal mask when I didn't expect it, so I wanted to at least keep those changes limited to a single context while we try to eliminate them. Tracking those is easier if the errors occur in the same threading context as the libuv calls.

Since readline (and other libraries / user code) may also alter the signal mask, I wasn't sure it was safe to simply clear them (plus that would require more function calls during a context switch).

But right, the code generator does this too in the try catch blocks! With that patched too, this change seems to fix #1216

@JeffBezanson
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I'm much more convinced if it actually fixes #1216 :)
Doesn't sigprocmask require a system call though?
We have JL_SIGATOMIC, which uses a lighter-weight mechanism than changing the signal mask.
Readline might mess with signals, but since the problem also happened in the basic repl can we conclude it is caused by libuv?

@JeffBezanson
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find try is about 3x slower, and throw+catch is about 50% slower. Of course try/catch is usually a tiny fraction of runtime, but it would be nice if we could skip the sigprocmask by stopping the problem at the source.

@vtjnash
Copy link
Member Author

@vtjnash vtjnash commented on b842bf4 Sep 4, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was almost definitely caused by libuv. The internal libev tries to install a signal mask for everything that it was not told to handle. The call to (void)uv_default_loop() forces this to happen early and at a known time, then restore_signals() only needs to be called once to clean up from that. I had tried to just completely disable it, but it is required for spawning child processes now. It may be possible to disable using hacks into the libev backend.

I think I will eventually try to start wrapping the calls to uv_write with JL_SIGATOMIC then (since uv will assert(0) when you try to re-enter that function -- i.e. if the user sends an interrupt while the program is pouring out text). sigprocmask did seem a bit too heavy for that, but it's possible other libraries will eventually want (or need) to make use of it (i assume the syscall doesn't do much more than update a similar variable to JL_SIGATOMIC, and thus it's probably a bit faster method for when the signal is getting raised often relative to the calls to sigprocmask)

@vtjnash
Copy link
Member Author

@vtjnash vtjnash commented on b842bf4 Sep 4, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I didn't see that they could be so different. On Mac, setjmp == sigsetjmp(*,1) but _setjmp == sigsetjmp(*,0)

I would like to keep using sigsetjmp since it is more consistently defined across architectures. However once we can get libuv to behave and not mess up the signal mask, we can change the second argument to 0 to skip the syscall for sigprocmask

@carlobaldassi
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get

$ make
    CC src/jltypes.o
    CC src/gf.o
    FLISP src/julia_flisp.boot
    FLISP src/julia_flisp.boot.inc
    CC src/ast.o
    CC src/builtins.o
    CC src/module.o
    CC src/codegen.o
codegen.cpp: In function ‘void init_julia_llvm_env(llvm::Module*)’:
codegen.cpp:2245:63: error: ‘sigsetjmp’ was not declared in this scope
codegen.cpp:2245:63: note: suggested alternative:
/home/carlo/Programmi/julia/usr/include/llvm/Intrinsics.gen:1056:5: note:   ‘llvm::Intrinsic::sigsetjmp’

(even after make clean)
I'm on latest Ubuntu, 64bit, using gcc 4.6.
Anything I could do?

Please sign in to comment.