diff --git a/base/gcutils.jl b/base/gcutils.jl index 1280b4ab71afca..e74752f4f66262 100644 --- a/base/gcutils.jl +++ b/base/gcutils.jl @@ -65,8 +65,8 @@ end Immediately run finalizers registered for object `x`. """ -finalize(@nospecialize(o)) = ccall(:jl_finalize_th, Cvoid, (Ptr{Cvoid}, Any,), - Core.getptls(), o) +finalize(@nospecialize(o)) = ccall(:jl_finalize_th, Cvoid, (Any, Any,), + current_task(), o) """ Base.GC diff --git a/base/task.jl b/base/task.jl index 2c3e5c44e6d0b0..1ed68f70f7ab76 100644 --- a/base/task.jl +++ b/base/task.jl @@ -619,19 +619,22 @@ function enq_work(t::Task) # 1. The Task's stack is currently being used by the scheduler for a certain thread. # 2. There is only 1 thread. # 3. The multiq is full (can be fixed by making it growable). - if t.sticky || tid != 0 || Threads.nthreads() == 1 + if t.sticky || Threads.nthreads() == 1 if tid == 0 tid = Threads.threadid() ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, tid-1) end push!(Workqueues[tid], t) else - tid = 0 if ccall(:jl_enqueue_task, Cint, (Any,), t) != 0 # if multiq is full, give to a random thread (TODO fix) - tid = mod(time_ns() % Int, Threads.nthreads()) + 1 - ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, tid-1) + if tid == 0 + tid = mod(time_ns() % Int, Threads.nthreads()) + 1 + ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, tid-1) + end push!(Workqueues[tid], t) + else + tid = 0 end end ccall(:jl_wakeup_thread, Cvoid, (Int16,), (tid - 1) % Int16) diff --git a/cli/Makefile b/cli/Makefile index c38b9972a243b6..d4a1b2472c24d3 100644 --- a/cli/Makefile +++ b/cli/Makefile @@ -6,7 +6,7 @@ include $(JULIAHOME)/Make.inc include $(JULIAHOME)/deps/llvm-ver.make -HEADERS := $(addprefix $(SRCDIR)/,jl_exports.h loader.h) $(addprefix $(JULIAHOME)/src/,support/platform.h support/dirpath.h jl_exported_data.inc jl_exported_funcs.inc) +HEADERS := $(addprefix $(SRCDIR)/,jl_exports.h loader.h) $(addprefix $(JULIAHOME)/src/,julia_fasttls.h support/platform.h support/dirpath.h jl_exported_data.inc jl_exported_funcs.inc) LOADER_CFLAGS = $(JCFLAGS) -I$(BUILDROOT)/src -I$(JULIAHOME)/src -I$(JULIAHOME)/src/support -I$(build_includedir) -ffreestanding LOADER_LDFLAGS = $(JLDFLAGS) -ffreestanding -L$(build_shlibdir) -L$(build_libdir) @@ -116,7 +116,7 @@ endif $(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir) @$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \ $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(RPATH_LIB) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT))) - @$(INSTALL_NAME_CMD)libjulia-debug.$(SHLIB_EXT) $@.tmp + @$(INSTALL_NAME_CMD)libjulia-debug.$(SHLIB_EXT) $@ ifeq ($(OS), WINNT) @$(call PRINT_ANALYZE, $(OBJCOPY) $(build_libdir)/$(notdir $@).tmp.a $(STRIP_EXPORTED_FUNCS) $(build_libdir)/$(notdir $@).a && rm $(build_libdir)/$(notdir $@).tmp.a) endif diff --git a/cli/loader.h b/cli/loader.h index 5b1c10abc99982..6df1557ec2c26b 100644 --- a/cli/loader.h +++ b/cli/loader.h @@ -3,6 +3,7 @@ /* Bring in definitions for `_OS_X_`, `PATH_MAX` and `PATHSEPSTRING`, `jl_ptls_t`, etc... */ #include "../src/support/platform.h" #include "../src/support/dirpath.h" +#include "../src/julia_fasttls.h" #ifdef _OS_WINDOWS_ /* We need to reimplement a bunch of standard library stuff on windows, @@ -43,15 +44,6 @@ #include #endif -// Borrow definitions from `julia.h` -#if defined(__GNUC__) -# define JL_CONST_FUNC __attribute__((const)) -#elif defined(_COMPILER_MICROSOFT_) -# define JL_CONST_FUNC __declspec(noalias) -#else -# define JL_CONST_FUNC -#endif - // Borrow definition from `support/dtypes.h` #ifdef _OS_WINDOWS_ # ifdef LIBRARY_EXPORTS @@ -68,12 +60,6 @@ # endif #define JL_HIDDEN __attribute__ ((visibility("hidden"))) #endif -#ifdef JL_DEBUG_BUILD -#define JL_NAKED __attribute__ ((naked,no_stack_protector)) -#else -#define JL_NAKED __attribute__ ((naked)) -#endif - /* * DEP_LIBS is our list of dependent libraries that must be loaded before `libjulia`. * Note that order matters, as each entry will be opened in-order. We define here a diff --git a/cli/loader_exe.c b/cli/loader_exe.c index e0cfdd93fbee71..dd7561b14d1f64 100644 --- a/cli/loader_exe.c +++ b/cli/loader_exe.c @@ -11,23 +11,14 @@ extern "C" { #include "loader_win_utils.c" #endif -/* Define ptls getter, as this cannot be defined within a shared library. */ -#if !defined(_OS_WINDOWS_) && !defined(_OS_DARWIN_) -JL_DLLEXPORT JL_CONST_FUNC void * jl_get_ptls_states_static(void) -{ - /* Because we can't #include in this file, we define a TLS state object with - * hopefully enough room; at last check, the `jl_tls_states_t` struct was <16KB. */ - static __attribute__((tls_model("local-exec"))) __thread char tls_states[32768]; - return &tls_states; -} -#endif +JULIA_DEFINE_FAST_TLS #ifdef _OS_WINDOWS_ int mainCRTStartup(void) { int argc; LPWSTR * wargv = CommandLineToArgv(GetCommandLine(), &argc); - char ** argv = (char **)malloc(sizeof(char *)*(argc+ 1)); + char ** argv = (char **)malloc(sizeof(char*) * (argc + 1)); setup_stdio(); #else int main(int argc, char * argv[]) @@ -36,7 +27,7 @@ int main(int argc, char * argv[]) // Convert Windows wchar_t values to UTF8 #ifdef _OS_WINDOWS_ - for (int i=0; iptls, tsz, atype); // No allocation or safepoint allowed after this a->flags.how = 0; data = (char*)a + doffs; @@ -129,10 +129,10 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, data = jl_gc_managed_malloc(tot); // Allocate the Array **after** allocating the data // to make sure the array is still young - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype); + a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype); // No allocation or safepoint allowed after this a->flags.how = 2; - jl_gc_track_malloced_array(ptls, a); + jl_gc_track_malloced_array(ct->ptls, a); } a->flags.pooled = tsz <= GC_MAX_SZCLASS; @@ -213,7 +213,7 @@ static inline int is_ntuple_long(jl_value_t *v) JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, jl_value_t *_dims) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_array_t *a; size_t ndims = jl_nfields(_dims); assert(is_ntuple_long(_dims)); @@ -222,7 +222,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, int ndimwords = jl_array_ndimwords(ndims); int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT); - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype); + a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype); // No allocation or safepoint allowed after this a->flags.pooled = tsz <= GC_MAX_SZCLASS; a->flags.ndims = ndims; @@ -298,12 +298,12 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_array_t *a; int ndimwords = jl_array_ndimwords(1); int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT); - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, jl_array_uint8_type); + a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type); a->flags.pooled = tsz <= GC_MAX_SZCLASS; a->flags.ndims = 1; a->offset = 0; @@ -327,7 +327,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str) JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, size_t nel, int own_buffer) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_array_t *a; jl_value_t *eltype = jl_tparam0(atype); @@ -350,7 +350,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, int ndimwords = jl_array_ndimwords(1); int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT); - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype); + a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype); // No allocation or safepoint allowed after this a->flags.pooled = tsz <= GC_MAX_SZCLASS; a->data = data; @@ -365,7 +365,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, a->flags.isaligned = 0; // TODO: allow passing memalign'd buffers if (own_buffer) { a->flags.how = 2; - jl_gc_track_malloced_array(ptls, a); + jl_gc_track_malloced_array(ct->ptls, a); jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0)); } else { @@ -381,7 +381,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, jl_value_t *_dims, int own_buffer) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; size_t nel = 1; jl_array_t *a; size_t ndims = jl_nfields(_dims); @@ -417,7 +417,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, int ndimwords = jl_array_ndimwords(ndims); int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT); - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype); + a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype); // No allocation or safepoint allowed after this a->flags.pooled = tsz <= GC_MAX_SZCLASS; a->data = data; @@ -433,7 +433,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, a->flags.isaligned = 0; if (own_buffer) { a->flags.how = 2; - jl_gc_track_malloced_array(ptls, a); + jl_gc_track_malloced_array(ct->ptls, a); jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0)); } else { @@ -519,7 +519,8 @@ JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len) jl_throw(jl_memory_exception); if (len == 0) return jl_an_empty_string; - jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, jl_string_type); // force inlining + jl_task_t *ct = jl_current_task; + jl_value_t *s = jl_gc_alloc_(ct->ptls, sz, jl_string_type); // force inlining *(size_t*)s = len; memcpy((char*)s + sizeof(size_t), str, len); ((char*)s + sizeof(size_t))[len] = 0; @@ -533,7 +534,8 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) jl_throw(jl_memory_exception); if (len == 0) return jl_an_empty_string; - jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, jl_string_type); // force inlining + jl_task_t *ct = jl_current_task; + jl_value_t *s = jl_gc_alloc_(ct->ptls, sz, jl_string_type); // force inlining *(size_t*)s = len; ((char*)s + sizeof(size_t))[len] = 0; return s; @@ -672,7 +674,7 @@ JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i) // the **beginning** of the new buffer. static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; assert(!a->flags.isshared || a->flags.how == 3); size_t elsz = a->elsize; size_t nbytes = newlen * elsz; @@ -714,12 +716,12 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen) newbuf = 1; if (nbytes >= MALLOC_THRESH) { a->data = jl_gc_managed_malloc(nbytes); - jl_gc_track_malloced_array(ptls, a); + jl_gc_track_malloced_array(ct->ptls, a); a->flags.how = 2; a->flags.isaligned = 1; } else { - a->data = jl_gc_alloc_buf(ptls, nbytes); + a->data = jl_gc_alloc_buf(ct->ptls, nbytes); a->flags.how = 1; jl_gc_wb_buf(a, a->data, nbytes); } @@ -1008,8 +1010,9 @@ STATIC_INLINE void jl_array_shrink(jl_array_t *a, size_t dec) typetagdata = (char*)malloc_s(a->nrows); memcpy(typetagdata, jl_array_typetagdata(a), a->nrows); } + jl_task_t *ct = jl_current_task; char *originaldata = (char*) a->data - a->offset * a->elsize; - char *newdata = (char*)jl_gc_alloc_buf(jl_get_ptls_states(), newbytes); + char *newdata = (char*)jl_gc_alloc_buf(ct->ptls, newbytes); jl_gc_wb_buf(a, newdata, newbytes); a->maxsize -= dec; if (isbitsunion) { diff --git a/src/ast.c b/src/ast.c index 307d731109933a..e7048eabc65988 100644 --- a/src/ast.c +++ b/src/ast.c @@ -267,7 +267,7 @@ static jl_ast_context_list_t *jl_ast_ctx_freed = NULL; static jl_ast_context_t *jl_ast_ctx_enter(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; JL_SIGATOMIC_BEGIN(); JL_LOCK_NOGC(&flisp_lock); jl_ast_context_list_t *node; @@ -275,7 +275,7 @@ static jl_ast_context_t *jl_ast_ctx_enter(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOI // First check if the current task is using one of the contexts for (node = jl_ast_ctx_using;node;(node = node->next)) { ctx = jl_ast_context_list_item(node); - if (ctx->task == ptls->current_task) { + if (ctx->task == ct) { ctx->ref++; JL_UNLOCK_NOGC(&flisp_lock); return ctx; @@ -287,7 +287,7 @@ static jl_ast_context_t *jl_ast_ctx_enter(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOI jl_ast_context_list_insert(&jl_ast_ctx_using, node); ctx = jl_ast_context_list_item(node); ctx->ref = 1; - ctx->task = ptls->current_task; + ctx->task = ct; ctx->module = NULL; JL_UNLOCK_NOGC(&flisp_lock); return ctx; @@ -295,7 +295,7 @@ static jl_ast_context_t *jl_ast_ctx_enter(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOI // Construct a new one if we can't find any ctx = (jl_ast_context_t*)calloc(1, sizeof(jl_ast_context_t)); ctx->ref = 1; - ctx->task = ptls->current_task; + ctx->task = ct; node = &ctx->list; jl_ast_context_list_insert(&jl_ast_ctx_using, node); JL_UNLOCK_NOGC(&flisp_lock); @@ -318,11 +318,11 @@ static void jl_ast_ctx_leave(jl_ast_context_t *ctx) void jl_init_flisp(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (jl_ast_ctx_using || jl_ast_ctx_freed) return; jl_ast_main_ctx.ref = 1; - jl_ast_main_ctx.task = ptls->current_task; + jl_ast_main_ctx.task = ct; jl_ast_context_list_insert(&jl_ast_ctx_using, &jl_ast_main_ctx.list); jl_init_ast_ctx(&jl_ast_main_ctx); // To match the one in jl_ast_ctx_leave @@ -1033,7 +1033,7 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, size_t world, int throw_load_error) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; JL_TIMING(MACRO_INVOCATION); size_t nargs = jl_array_len(args) + 1; JL_NARGSV("macrocall", 3); // macro name, location, and module @@ -1051,8 +1051,8 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule for (i = 3; i < nargs; i++) margs[i] = jl_array_ptr_ref(args, i - 1); - size_t last_age = ptls->world_age; - ptls->world_age = world < jl_world_counter ? world : jl_world_counter; + size_t last_age = ct->world_age; + ct->world_age = world < jl_world_counter ? world : jl_world_counter; jl_value_t *result; JL_TRY { margs[0] = jl_toplevel_eval(*ctx, margs[0]); @@ -1081,7 +1081,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule jl_current_exception())); } } - ptls->world_age = last_age; + ct->world_age = last_age; JL_GC_POP(); return result; } @@ -1284,11 +1284,11 @@ JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t args[2] = filename; args[3] = jl_box_ulong(offset); args[4] = options; - jl_ptls_t ptls = jl_get_ptls_states(); - size_t last_age = ptls->world_age; - ptls->world_age = jl_world_counter; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_world_counter; jl_value_t *result = jl_apply(args, 5); - ptls->world_age = last_age; + ct->world_age = last_age; args[0] = result; // root during error checks below JL_TYPECHK(parse, simplevector, result); if (jl_svec_len(result) != 2) diff --git a/src/builtins.c b/src/builtins.c index 37471bbb7a7185..31165ff5569761 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -718,24 +718,24 @@ JL_CALLABLE(jl_f__apply_iterate) // this is like `_apply`, but with quasi-exact checks to make sure it is pure JL_CALLABLE(jl_f__apply_pure) { - jl_ptls_t ptls = jl_get_ptls_states(); - int last_in = ptls->in_pure_callback; + jl_task_t *ct = jl_current_task; + int last_in = ct->ptls->in_pure_callback; jl_value_t *ret = NULL; JL_TRY { - ptls->in_pure_callback = 1; + ct->ptls->in_pure_callback = 1; // because this function was declared pure, // we should be allowed to run it in any world // so we run it in the newest world; // because, why not :) // and `promote` works better this way - size_t last_age = ptls->world_age; - ptls->world_age = jl_world_counter; + size_t last_age = ct->world_age; + ct->world_age = jl_world_counter; ret = do_apply(args, nargs, NULL); - ptls->world_age = last_age; - ptls->in_pure_callback = last_in; + ct->world_age = last_age; + ct->ptls->in_pure_callback = last_in; } JL_CATCH { - ptls->in_pure_callback = last_in; + ct->ptls->in_pure_callback = last_in; jl_rethrow(); } return ret; @@ -744,12 +744,12 @@ JL_CALLABLE(jl_f__apply_pure) // this is like a regular call, but always runs in the newest world JL_CALLABLE(jl_f__call_latest) { - jl_ptls_t ptls = jl_get_ptls_states(); - size_t last_age = ptls->world_age; - if (!ptls->in_pure_callback) - ptls->world_age = jl_world_counter; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + if (!ct->ptls->in_pure_callback) + ct->world_age = jl_world_counter; jl_value_t *ret = jl_apply(args, nargs); - ptls->world_age = last_age; + ct->world_age = last_age; return ret; } @@ -758,15 +758,15 @@ JL_CALLABLE(jl_f__call_latest) JL_CALLABLE(jl_f__call_in_world) { JL_NARGSV(_apply_in_world, 2); - jl_ptls_t ptls = jl_get_ptls_states(); - size_t last_age = ptls->world_age; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; JL_TYPECHK(_apply_in_world, ulong, args[0]); size_t world = jl_unbox_ulong(args[0]); world = world <= jl_world_counter ? world : jl_world_counter; - if (!ptls->in_pure_callback) - ptls->world_age = world; + if (!ct->ptls->in_pure_callback) + ct->world_age = world; jl_value_t *ret = jl_apply(&args[1], nargs - 1); - ptls->world_age = last_age; + ct->world_age = last_age; return ret; } @@ -781,8 +781,8 @@ JL_CALLABLE(jl_f_tuple) JL_GC_PROMISE_ROOTED(tt); // it is a concrete type if (tt->instance != NULL) return tt->instance; - jl_ptls_t ptls = jl_get_ptls_states(); - jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(tt), tt); + jl_task_t *ct = jl_current_task; + jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(tt), tt); for (i = 0; i < nargs; i++) set_nth_field(tt, (void*)jv, i, args[i]); return jv; @@ -1061,7 +1061,7 @@ JL_CALLABLE(jl_f_apply_type) JL_CALLABLE(jl_f_applicable) { JL_NARGSV(applicable, 1); - size_t world = jl_get_ptls_states()->world_age; + size_t world = jl_current_task->world_age; return jl_method_lookup(args, nargs, world) != NULL ? jl_true : jl_false; } @@ -1127,10 +1127,10 @@ JL_CALLABLE(jl_f_invoke_kwsorter) jl_expr_t *jl_exprn(jl_sym_t *head, size_t n) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_array_t *ar = jl_alloc_vec_any(n); JL_GC_PUSH1(&ar); - jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t), + jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ct->ptls, sizeof(jl_expr_t), jl_expr_type); ex->head = head; ex->args = ar; @@ -1140,14 +1140,14 @@ jl_expr_t *jl_exprn(jl_sym_t *head, size_t n) JL_CALLABLE(jl_f__expr) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; JL_NARGSV(Expr, 1); JL_TYPECHK(Expr, symbol, args[0]); jl_array_t *ar = jl_alloc_vec_any(nargs-1); JL_GC_PUSH1(&ar); for(size_t i=0; i < nargs-1; i++) jl_array_ptr_set(ar, i, args[i+1]); - jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t), + jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ct->ptls, sizeof(jl_expr_t), jl_expr_type); ex->head = (jl_sym_t*)args[0]; ex->args = ar; @@ -1162,8 +1162,8 @@ JL_DLLEXPORT jl_tvar_t *jl_new_typevar(jl_sym_t *name, jl_value_t *lb, jl_value_ jl_type_error_rt("TypeVar", "lower bound", (jl_value_t *)jl_type_type, lb); if (ub != (jl_value_t *)jl_any_type && !jl_is_type(ub) && !jl_is_typevar(ub)) jl_type_error_rt("TypeVar", "upper bound", (jl_value_t *)jl_type_type, ub); - jl_ptls_t ptls = jl_get_ptls_states(); - jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ptls, sizeof(jl_tvar_t), jl_tvar_type); + jl_task_t *ct = jl_current_task; + jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ct->ptls, sizeof(jl_tvar_t), jl_tvar_type); tv->name = name; tv->lb = lb; tv->ub = ub; diff --git a/src/ccall.cpp b/src/ccall.cpp index 00a6403d6434b0..5a37cc289ea43e 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -1287,22 +1287,6 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) }; #define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), #name) - static jl_ptls_t (*ptls_getter)(void) = [] { - // directly accessing the address of an ifunc can cause compile-time linker issues - // on some configurations (e.g. AArch64 + -Bsymbolic-functions), so we guard the - // `&jl_get_ptls_states` within this `#ifdef` guard, and use a more roundabout - // method involving `jl_dlsym()` on Linux platforms instead. -#ifdef _OS_LINUX_ - jl_ptls_t (*p)(void); - void *handle = jl_dlopen(nullptr, 0); - jl_dlsym(handle, "jl_get_ptls_states", (void **)&p, 0); - jl_dlclose(handle); - return p; -#else - return &jl_get_ptls_states; -#endif - }(); - // emit arguments jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nccallargs); for (size_t i = 0; i < nccallargs; i++) { @@ -1475,27 +1459,27 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) JL_GC_POP(); ctx.builder.CreateCall(prepare_call(gcroot_flush_func)); emit_signal_fence(ctx); - ctx.builder.CreateLoad(T_size, ctx.signalPage, true); + ctx.builder.CreateLoad(T_size, get_current_signal_page(ctx), true); emit_signal_fence(ctx); return ghostValue(jl_nothing_type); } - else if (_is_libjulia_func((uintptr_t)ptls_getter, "jl_get_ptls_states")) { + else if (is_libjulia_func("jl_get_ptls_states")) { assert(lrt == T_size); assert(!isVa && !llvmcall && nccallargs == 0); JL_GC_POP(); return mark_or_box_ccall_result(ctx, - ctx.builder.CreatePtrToInt(ctx.ptlsStates, lrt), + ctx.builder.CreatePtrToInt(get_current_ptls(ctx), lrt), retboxed, rt, unionall, static_rt); } else if (is_libjulia_func(jl_threadid)) { assert(lrt == T_int16); assert(!isVa && !llvmcall && nccallargs == 0); JL_GC_POP(); - Value *ptls_i16 = emit_bitcast(ctx, ctx.ptlsStates, T_pint16); - const int tid_offset = offsetof(jl_tls_states_t, tid); - Value *ptid = ctx.builder.CreateInBoundsGEP(ptls_i16, ConstantInt::get(T_size, tid_offset / 2)); + Value *ptask_i16 = emit_bitcast(ctx, get_current_task(ctx), T_pint16); + const int tid_offset = offsetof(jl_task_t, tid); + Value *ptid = ctx.builder.CreateInBoundsGEP(ptask_i16, ConstantInt::get(T_size, tid_offset / sizeof(int16_t))); LoadInst *tid = ctx.builder.CreateAlignedLoad(ptid, Align(sizeof(int16_t))); - tbaa_decorate(tbaa_const, tid); + tbaa_decorate(tbaa_gcframe, tid); return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt); } else if (is_libjulia_func(jl_gc_disable_finalizers_internal) @@ -1504,7 +1488,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) #endif ) { JL_GC_POP(); - Value *ptls_i32 = emit_bitcast(ctx, ctx.ptlsStates, T_pint32); + Value *ptls_i32 = emit_bitcast(ctx, get_current_ptls(ctx), T_pint32); const int finh_offset = offsetof(jl_tls_states_t, finalizers_inhibited); Value *pfinh = ctx.builder.CreateInBoundsGEP(ptls_i32, ConstantInt::get(T_size, finh_offset / 4)); LoadInst *finh = ctx.builder.CreateAlignedLoad(pfinh, Align(sizeof(int32_t))); @@ -1524,18 +1508,14 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) assert(lrt == T_prjlvalue); assert(!isVa && !llvmcall && nccallargs == 0); JL_GC_POP(); - Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_pprjlvalue); - const int ct_offset = offsetof(jl_tls_states_t, current_task); - Value *pct = ctx.builder.CreateInBoundsGEP(ptls_pv, ConstantInt::get(T_size, ct_offset / sizeof(void*))); - LoadInst *ct = ctx.builder.CreateAlignedLoad(pct, Align(sizeof(void*))); - tbaa_decorate(tbaa_const, ct); + auto ct = track_pjlvalue(ctx, emit_bitcast(ctx, get_current_task(ctx), T_pjlvalue)); return mark_or_box_ccall_result(ctx, ct, retboxed, rt, unionall, static_rt); } else if (is_libjulia_func(jl_set_next_task)) { assert(lrt == T_void); assert(!isVa && !llvmcall && nccallargs == 1); JL_GC_POP(); - Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_ppjlvalue); + Value *ptls_pv = emit_bitcast(ctx, get_current_ptls(ctx), T_ppjlvalue); const int nt_offset = offsetof(jl_tls_states_t, next_task); Value *pnt = ctx.builder.CreateInBoundsGEP(ptls_pv, ConstantInt::get(T_size, nt_offset / sizeof(void*))); ctx.builder.CreateStore(emit_pointer_from_objref(ctx, boxed(ctx, argv[0])), pnt); @@ -1576,7 +1556,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) checkBB, contBB); ctx.builder.SetInsertPoint(checkBB); ctx.builder.CreateLoad( - ctx.builder.CreateConstInBoundsGEP1_32(T_size, ctx.signalPage, -1), + ctx.builder.CreateConstInBoundsGEP1_32(T_size, get_current_signal_page(ctx), -1), true); ctx.builder.CreateBr(contBB); ctx.f->getBasicBlockList().push_back(contBB); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 7f88d2d2fff4be..c5014fdbdbe17a 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2746,7 +2746,7 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std // allocation for known size object static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt) { - Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8); + Value *ptls_ptr = emit_bitcast(ctx, get_current_ptls(ctx), T_pint8); Function *F = prepare_call(jl_alloc_obj_func); auto call = ctx.builder.CreateCall(F, {ptls_ptr, ConstantInt::get(T_size, static_size), maybe_decay_untracked(ctx, jt)}); call->setAttributes(F->getAttributes()); @@ -3072,7 +3072,7 @@ static void emit_signal_fence(jl_codectx_t &ctx) static Value *emit_defer_signal(jl_codectx_t &ctx) { - Value *ptls = emit_bitcast(ctx, ctx.ptlsStates, + Value *ptls = emit_bitcast(ctx, get_current_ptls(ctx), PointerType::get(T_sigatomic, 0)); Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, defer_signal) / sizeof(sig_atomic_t)); diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp index b7cc89f210548a..675afc3453fbad 100644 --- a/src/clangsa/GCChecker.cpp +++ b/src/clangsa/GCChecker.cpp @@ -728,7 +728,6 @@ bool GCChecker::isGCTrackedType(QualType QT) { Name.endswith_lower("jl_module_t") || Name.endswith_lower("jl_tupletype_t") || Name.endswith_lower("jl_gc_tracked_buffer_t") || - Name.endswith_lower("jl_tls_states_t") || Name.endswith_lower("jl_binding_t") || Name.endswith_lower("jl_ordereddict_t") || Name.endswith_lower("jl_tvar_t") || diff --git a/src/codegen.cpp b/src/codegen.cpp index 4944356a73dbb0..3f96a280df5b2a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -428,13 +428,14 @@ static const auto jlboxed_uint8_cache = new JuliaVariable{ [](LLVMContext &C) { return (Type*)ArrayType::get(T_pjlvalue, 256); }, }; -static const auto jltls_states_func = new JuliaFunction{ - "julia.ptls_states", +static const auto jlpgcstack_func = new JuliaFunction{ + "julia.get_pgcstack", [](LLVMContext &C) { return FunctionType::get(PointerType::get(T_ppjlvalue, 0), false); }, nullptr, }; + // important functions // Symbols are not gc-tracked, but we'll treat them as callee rooted anyway, // because they may come from a gc-rooted location @@ -1092,8 +1093,7 @@ class jl_codectx_t { int nvargs = -1; bool is_opaque_closure = false; - CallInst *ptlsStates = NULL; - Value *signalPage = NULL; + CallInst *pgcstack = NULL; Value *world_age_field = NULL; bool debug_enabled = false; @@ -1129,6 +1129,9 @@ static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i); static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg); static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0); +static Value *get_current_task(jl_codectx_t &ctx); +static Value *get_current_ptls(jl_codectx_t &ctx); +static Value *get_current_signal_page(jl_codectx_t &ctx); static void CreateTrap(IRBuilder<> &irbuilder); static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF, jl_cgval_t *args, size_t nargs, CallingConv::ID cc); @@ -1202,7 +1205,7 @@ static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_con static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty) { - return new AllocaInst(lty, 0, "", /*InsertBefore=*/ctx.ptlsStates); + return new AllocaInst(lty, 0, "", /*InsertBefore=*/ctx.pgcstack); } static void undef_derived_strct(IRBuilder<> &irbuilder, Value *ptr, jl_datatype_t *sty, MDNode *tbaa) @@ -2016,9 +2019,9 @@ static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args, v[i] = args[i].constant; } assert(v[0] == jl_builtin_apply_type); - size_t last_age = jl_get_ptls_states()->world_age; + size_t last_age = jl_current_task->world_age; // call apply_type, but ignore errors. we know that will work in world 1. - jl_get_ptls_states()->world_age = 1; + jl_current_task->world_age = 1; jl_value_t *result; JL_TRY { result = jl_apply(v, nargs); @@ -2026,7 +2029,7 @@ static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args, JL_CATCH { result = NULL; } - jl_get_ptls_states()->world_age = last_age; + jl_current_task->world_age = last_age; return result; } @@ -2102,9 +2105,9 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex) return NULL; } } - size_t last_age = jl_get_ptls_states()->world_age; + size_t last_age = jl_current_task->world_age; // here we know we're calling specific builtin functions that work in world 1. - jl_get_ptls_states()->world_age = 1; + jl_current_task->world_age = 1; jl_value_t *result; JL_TRY { result = jl_apply(v, n+1); @@ -2112,7 +2115,7 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex) JL_CATCH { result = NULL; } - jl_get_ptls_states()->world_age = last_age; + jl_current_task->world_age = last_age; JL_GC_POP(); return result; } @@ -4744,20 +4747,58 @@ JL_GCC_IGNORE_STOP static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0) { // TODO: requires the runtime, but is generated unconditionally - // allocate a placeholder gc instruction - ctx.ptlsStates = ctx.builder.CreateCall(prepare_call(jltls_states_func)); - int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void*); - ctx.signalPage = emit_nthptr_recast(ctx, ctx.ptlsStates, nthfield, tbaa_const, - PointerType::get(T_psize, 0)); + ctx.pgcstack = ctx.builder.CreateCall(prepare_call(jlpgcstack_func)); +} + +static Value *get_current_task(jl_codectx_t &ctx) +{ + const int ptls_offset = offsetof(jl_task_t, gcstack); + return ctx.builder.CreateInBoundsGEP( + T_pjlvalue, emit_bitcast(ctx, ctx.pgcstack, T_ppjlvalue), + ConstantInt::get(T_size, -ptls_offset / sizeof(void *)), + "current_task"); } +// Get PTLS through current task. +static Value *get_current_ptls(jl_codectx_t &ctx) +{ + const int ptls_offset = offsetof(jl_task_t, ptls); + Value *pptls = ctx.builder.CreateInBoundsGEP( + T_pjlvalue, get_current_task(ctx), + ConstantInt::get(T_size, ptls_offset / sizeof(void *)), + "ptls_field"); + LoadInst *ptls_load = ctx.builder.CreateAlignedLoad( + emit_bitcast(ctx, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load"); + // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c. + tbaa_decorate(tbaa_gcframe, ptls_load); + // Using `CastInst::Create` to get an `Instruction*` without explicit cast: + auto ptls = CastInst::Create(Instruction::BitCast, ptls_load, T_ppjlvalue, "ptls"); + ctx.builder.Insert(ptls); + return ptls; +} + +// Store world age at the entry block of the function. This function should be +// called right after `allocate_gc_frame` and there should be no context switch. static void emit_last_age_field(jl_codectx_t &ctx) { + auto ptls = get_current_task(ctx); + assert(ctx.builder.GetInsertBlock() == ctx.pgcstack->getParent()); ctx.world_age_field = ctx.builder.CreateInBoundsGEP( T_size, - ctx.builder.CreateBitCast(ctx.ptlsStates, T_psize), - ConstantInt::get(T_size, offsetof(jl_tls_states_t, world_age) / sizeof(size_t))); + ctx.builder.CreateBitCast(ptls, T_psize), + ConstantInt::get(T_size, offsetof(jl_task_t, world_age) / sizeof(size_t)), + "world_age"); +} + +// Get signal page through current task. +static Value *get_current_signal_page(jl_codectx_t &ctx) +{ + // return ctx.builder.CreateCall(prepare_call(reuse_signal_page_func)); + auto ptls = get_current_ptls(ctx); + int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *); + return emit_nthptr_recast(ctx, ptls, nthfield, tbaa_const, + PointerType::get(T_psize, 0)); } static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_codegen_params_t ¶ms) @@ -4985,14 +5026,11 @@ static Function* gen_cfun_wrapper( emit_last_age_field(ctx); Value *dummy_world = ctx.builder.CreateAlloca(T_size); - Value *have_tls = ctx.builder.CreateIsNotNull(ctx.ptlsStates); + Value *have_tls = ctx.builder.CreateIsNotNull(ctx.pgcstack); // TODO: in the future, try to initialize a full TLS context here // for now, just use a dummy field to avoid a branch in this function ctx.world_age_field = ctx.builder.CreateSelect(have_tls, ctx.world_age_field, dummy_world); Value *last_age = tbaa_decorate(tbaa_gcframe, ctx.builder.CreateAlignedLoad(ctx.world_age_field, Align(sizeof(size_t)))); - Value *valid_tls = ctx.builder.CreateIsNotNull(last_age); - have_tls = ctx.builder.CreateAnd(have_tls, valid_tls); - ctx.world_age_field = ctx.builder.CreateSelect(valid_tls, ctx.world_age_field, dummy_world); Value *world_v = ctx.builder.CreateAlignedLoad(prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t))); // TODO: cast(world_v)->setOrdering(AtomicOrdering::Monotonic); @@ -6345,9 +6383,9 @@ static std::pair, jl_llvm_functions_t> (va && (int)i == ctx.vaSlot) || // or it's the va arg tuple i == 0) { // or it is the first argument (which isn't in `argArray`) AllocaInst *av = new AllocaInst(T_prjlvalue, 0, - jl_symbol_name(s), /*InsertBefore*/ctx.ptlsStates); + jl_symbol_name(s), /*InsertBefore*/ctx.pgcstack); StoreInst *SI = new StoreInst(V_rnull, av, false, Align(sizeof(void*))); - SI->insertAfter(ctx.ptlsStates); + SI->insertAfter(ctx.pgcstack); varinfo.boxroot = av; if (ctx.debug_enabled && varinfo.dinfo) { DIExpression *expr; @@ -7775,7 +7813,7 @@ static void init_jit_functions(void) global_jlvalue_to_llvm(new JuliaVariable{"jl_undefref_exception", true, get_pjlvalue}, &jl_undefref_exception); add_named_global(jlgetworld_global, &jl_world_counter); add_named_global("__stack_chk_fail", &__stack_chk_fail); - add_named_global(jltls_states_func, (void*)NULL); + add_named_global(jlpgcstack_func, (void*)NULL); add_named_global(jlerror_func, &jl_error); add_named_global(jlthrow_func, &jl_throw); add_named_global(jlundefvarerror_func, &jl_undefined_var_error); diff --git a/src/datatype.c b/src/datatype.c index b65c8a602bfcf5..7e4405dc6235ad 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -42,9 +42,9 @@ static jl_sym_t *jl_demangle_typename(jl_sym_t *s) JL_NOTSAFEPOINT JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *module) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_methtable_t *mt = - (jl_methtable_t*)jl_gc_alloc(ptls, sizeof(jl_methtable_t), + (jl_methtable_t*)jl_gc_alloc(ct->ptls, sizeof(jl_methtable_t), jl_methtable_type); mt->name = jl_demangle_typename(name); mt->module = module; @@ -62,9 +62,9 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *module, int abstract, int mutabl) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_typename_t *tn = - (jl_typename_t*)jl_gc_alloc(ptls, sizeof(jl_typename_t), + (jl_typename_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typename_t), jl_typename_type); tn->name = name; tn->module = module; @@ -91,8 +91,8 @@ jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module, jl_dat jl_datatype_t *jl_new_uninitialized_datatype(void) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ptls, sizeof(jl_datatype_t), jl_datatype_type); + jl_task_t *ct = jl_current_task; + jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type); t->hash = 0; t->hasfreetypevars = 0; t->isdispatchtuple = 0; @@ -232,7 +232,8 @@ STATIC_INLINE void jl_maybe_allocate_singleton_instance(jl_datatype_t *st) if (jl_is_datatype_make_singleton(st)) { // It's possible for st to already have an ->instance if it was redefined if (!st->instance) { - st->instance = jl_gc_alloc(jl_get_ptls_states(), 0, st); + jl_task_t *ct = jl_current_task; + st->instance = jl_gc_alloc(ct->ptls, 0, st); jl_gc_wb(st, st->instance); } } @@ -665,7 +666,6 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data) { // data may not have the alignment required by the size // but will always have the alignment required by the datatype - jl_ptls_t ptls = jl_get_ptls_states(); assert(jl_is_datatype(dt)); jl_datatype_t *bt = (jl_datatype_t*)dt; size_t nb = jl_datatype_size(bt); @@ -682,7 +682,8 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data) if (bt == jl_uint16_type) return jl_box_uint16(*(uint16_t*)data); if (bt == jl_char_type) return jl_box_char(*(uint32_t*)data); - jl_value_t *v = jl_gc_alloc(ptls, nb, bt); + jl_task_t *ct = jl_current_task; + jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt); switch (nb) { case 1: *(uint8_t*) v = *(uint8_t*)data; break; case 2: *(uint16_t*)v = jl_load_unaligned_i16(data); break; @@ -700,7 +701,8 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data) JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_value_t *bt) { uint64_t data = 0xffffffffffffffffULL; - jl_value_t *v = jl_gc_alloc(jl_get_ptls_states(), sizeof(size_t), bt); + jl_task_t *ct = jl_current_task; + jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(size_t), bt); memcpy(v, &data, sizeof(size_t)); return v; } @@ -760,14 +762,14 @@ UNBOX_FUNC(float64, double) UNBOX_FUNC(voidpointer, void*) UNBOX_FUNC(uint8pointer, uint8_t*) -#define BOX_FUNC(typ,c_type,pfx,nw) \ - JL_DLLEXPORT jl_value_t *pfx##_##typ(c_type x) \ - { \ - jl_ptls_t ptls = jl_get_ptls_states(); \ - jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*), \ - jl_##typ##_type); \ - *(c_type*)jl_data_ptr(v) = x; \ - return v; \ +#define BOX_FUNC(typ,c_type,pfx,nw) \ + JL_DLLEXPORT jl_value_t *pfx##_##typ(c_type x) \ + { \ + jl_task_t *ct = jl_current_task; \ + jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*), \ + jl_##typ##_type); \ + *(c_type*)jl_data_ptr(v) = x; \ + return v; \ } BOX_FUNC(float32, float, jl_box, 1) BOX_FUNC(voidpointer, void*, jl_box, 1) @@ -781,29 +783,29 @@ BOX_FUNC(float64, double, jl_box, 2) #define NBOX_C 1024 #define SIBOX_FUNC(typ,c_type,nw)\ - static jl_value_t *boxed_##typ##_cache[NBOX_C]; \ - JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x) \ - { \ - jl_ptls_t ptls = jl_get_ptls_states(); \ - c_type idx = x+NBOX_C/2; \ - if ((u##c_type)idx < (u##c_type)NBOX_C) \ - return boxed_##typ##_cache[idx]; \ - jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*), \ - jl_##typ##_type); \ - *(c_type*)jl_data_ptr(v) = x; \ - return v; \ + static jl_value_t *boxed_##typ##_cache[NBOX_C]; \ + JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x) \ + { \ + jl_task_t *ct = jl_current_task; \ + c_type idx = x+NBOX_C/2; \ + if ((u##c_type)idx < (u##c_type)NBOX_C) \ + return boxed_##typ##_cache[idx]; \ + jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*), \ + jl_##typ##_type); \ + *(c_type*)jl_data_ptr(v) = x; \ + return v; \ } -#define UIBOX_FUNC(typ,c_type,nw) \ - static jl_value_t *boxed_##typ##_cache[NBOX_C]; \ - JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x) \ - { \ - jl_ptls_t ptls = jl_get_ptls_states(); \ - if (x < NBOX_C) \ - return boxed_##typ##_cache[x]; \ - jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*), \ - jl_##typ##_type); \ - *(c_type*)jl_data_ptr(v) = x; \ - return v; \ +#define UIBOX_FUNC(typ,c_type,nw) \ + static jl_value_t *boxed_##typ##_cache[NBOX_C]; \ + JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x) \ + { \ + jl_task_t *ct = jl_current_task; \ + if (x < NBOX_C) \ + return boxed_##typ##_cache[x]; \ + jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*), \ + jl_##typ##_type); \ + *(c_type*)jl_data_ptr(v) = x; \ + return v; \ } SIBOX_FUNC(int16, int16_t, 1) SIBOX_FUNC(int32, int32_t, 1) @@ -822,11 +824,11 @@ UIBOX_FUNC(uint64, uint64_t, 2) static jl_value_t *boxed_char_cache[128]; JL_DLLEXPORT jl_value_t *jl_box_char(uint32_t x) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; uint32_t u = bswap_32(x); if (u < 128) return boxed_char_cache[(uint8_t)u]; - jl_value_t *v = jl_gc_alloc(ptls, sizeof(void*), jl_char_type); + jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(void*), jl_char_type); *(uint32_t*)jl_data_ptr(v) = x; return v; } @@ -889,12 +891,12 @@ JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x) JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (type->instance != NULL) return type->instance; va_list args; size_t nf = jl_datatype_nfields(type); va_start(args, type); - jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type); + jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type); for (size_t i = 0; i < nf; i++) { set_nth_field(type, (void*)jv, i, va_arg(args, jl_value_t*)); } @@ -913,7 +915,7 @@ static void init_struct_tail(jl_datatype_t *type, jl_value_t *jv, size_t na) JL_ JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (!jl_is_datatype(type) || type->layout == NULL) { jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type); } @@ -926,7 +928,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, } if (type->instance != NULL) return type->instance; - jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type); + jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type); for (size_t i = 0; i < na; i++) { set_nth_field(type, (void*)jv, i, args[i]); } @@ -936,7 +938,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (!jl_is_tuple(tup)) jl_type_error("new", (jl_value_t*)jl_tuple_type, tup); if (!jl_is_datatype(type) || type->layout == NULL) @@ -955,7 +957,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup) } return type->instance; } - jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type); + jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type); jl_value_t *fi = NULL; if (type->layout->npointers > 0) { // if there are references, zero the space first to prevent the GC @@ -977,10 +979,10 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup) JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (type->instance != NULL) return type->instance; size_t size = jl_datatype_size(type); - jl_value_t *jv = jl_gc_alloc(ptls, size, type); + jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type); if (size > 0) memset(jl_data_ptr(jv), 0, size); return jv; diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index 95b562311b25ba..ad9ed659cbe0dc 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -214,7 +214,7 @@ class JuliaJITEventListener: public JITEventListener const RuntimeDyld::LoadedObjectInfo &L, RTDyldMemoryManager *memmgr) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; // This function modify codeinst->fptr in GC safe region. // This should be fine since the GC won't scan this field. int8_t gc_state = jl_gc_safe_enter(ptls); diff --git a/src/disasm.cpp b/src/disasm.cpp index 28e0178e116544..dfc1e32b56eeb2 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -510,7 +510,6 @@ extern "C" JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo, char binary) { assert(fptr != 0); - jl_ptls_t ptls = jl_get_ptls_states(); std::string code; raw_string_ostream stream(code); @@ -538,6 +537,7 @@ jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, } // Dump assembly code + jl_ptls_t ptls = jl_current_task->ptls; int8_t gc_state = jl_gc_safe_enter(ptls); jl_dump_asm_internal( fptr, symsize, slide, diff --git a/src/dump.c b/src/dump.c index 5d9924497a7c40..9a2627e362b2e4 100644 --- a/src/dump.c +++ b/src/dump.c @@ -1134,10 +1134,11 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t * if (!unique_func) unique_func = jl_get_global(jl_base_module, jl_symbol("unique")); jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps}; - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_world_counter; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_world_counter; jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; // write a placeholder for total size so that we can quickly seek past all of the // dependencies if we don't need them @@ -1185,8 +1186,8 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t * if (toplevel && prefs_hash_func && get_compiletime_prefs_func) { // Temporary invoke in newest world age - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_world_counter; + size_t last_age = ct->world_age; + ct->world_age = jl_world_counter; // call get_compiletime_prefs(__toplevel__) jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL}; @@ -1198,7 +1199,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t * prefs_hash = (jl_value_t*)jl_apply(args, 3); // Reset world age to normal - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; } } @@ -2234,7 +2235,7 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist) jl_serializer_state s = { &f, - jl_get_ptls_states(), + jl_current_task->ptls, mod_array }; jl_serialize_value(&s, worklist); @@ -2545,7 +2546,7 @@ static int trace_method(jl_typemap_entry_t *entry, void *closure) static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array) { JL_TIMING(LOAD_MODULE); - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (ios_eof(f) || !jl_read_verify_header(f)) { ios_close(f); return jl_get_exceptionf(jl_errorexception_type, @@ -2576,7 +2577,7 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array) // prepare to deserialize int en = jl_gc_enable(0); - jl_gc_enable_finalizers(ptls, 0); + jl_gc_enable_finalizers(ct, 0); ++jl_world_counter; // reserve a world age for the deserialization arraylist_new(&backref_list, 4000); @@ -2587,7 +2588,7 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array) jl_serializer_state s = { f, - ptls, + ct->ptls, mod_array }; jl_array_t *restored = (jl_array_t*)jl_deserialize_value(&s, (jl_value_t**)&restored); @@ -2622,7 +2623,7 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array) arraylist_free(&backref_list); ios_close(f); - jl_gc_enable_finalizers(ptls, 1); // make sure we don't run any Julia code concurrently before this point + jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point if (tracee_list) { jl_methtable_t *mt; while ((mt = (jl_methtable_t*)arraylist_pop(tracee_list)) != NULL) { @@ -2667,7 +2668,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *m void jl_init_serializer(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; htable_new(&ser_tag, 0); htable_new(&common_symbol_tag, 0); htable_new(&backref_table, 0); @@ -2712,7 +2713,7 @@ void jl_init_serializer(void) jl_type_type_mt, jl_nonfunction_mt, jl_opaque_closure_type, - ptls->root_task, + ct->ptls->root_task, NULL }; diff --git a/src/gc-debug.c b/src/gc-debug.c index febab095a409c9..3a0e4bf78598b7 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -539,14 +539,14 @@ void gc_scrub_record_task(jl_task_t *t) static void gc_scrub_range(char *low, char *high) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_jmp_buf *old_buf = ptls->safe_restore; + jl_ptls_t ptls = jl_current_task->ptls; + jl_jmp_buf *old_buf = jl_get_safe_restore(); jl_jmp_buf buf; if (jl_setjmp(buf, 0)) { - ptls->safe_restore = old_buf; + jl_set_safe_restore(old_buf); return; } - ptls->safe_restore = &buf; + jl_set_safe_restore(&buf); low = (char*)((uintptr_t)low & ~(uintptr_t)15); for (char **stack_p = ((char**)high) - 1; stack_p > (char**)low; stack_p--) { char *p = *stack_p; @@ -570,13 +570,13 @@ static void gc_scrub_range(char *low, char *high) // set mark to GC_MARKED (young and marked) tag->bits.gc = GC_MARKED; } - ptls->safe_restore = old_buf; + jl_set_safe_restore(old_buf); } static void gc_scrub_task(jl_task_t *ta) { int16_t tid = ta->tid; - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; jl_ptls_t ptls2 = NULL; if (tid != -1) ptls2 = jl_all_tls_states[tid]; @@ -1252,12 +1252,12 @@ int gc_slot_to_arrayidx(void *obj, void *_slot) // `pc_offset` will be added to `sp` for convenience in the debugger. NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset) { - jl_jmp_buf *old_buf = ptls->safe_restore; + jl_jmp_buf *old_buf = jl_get_safe_restore(); jl_jmp_buf buf; - ptls->safe_restore = &buf; + jl_set_safe_restore(&buf); if (jl_setjmp(buf, 0) != 0) { jl_safe_printf("\n!!! ERROR when unwinding gc mark loop -- ABORTING !!!\n"); - ptls->safe_restore = old_buf; + jl_set_safe_restore(old_buf); return; } void **top = sp.pc + pc_offset; @@ -1378,7 +1378,7 @@ NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_off break; } } - ptls->safe_restore = old_buf; + jl_set_safe_restore(old_buf); } #ifdef __cplusplus diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 934dac2d7d6c6d..3708531e9b4051 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -119,7 +119,8 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz) { - _jl_free_stack(jl_get_ptls_states(), stkbuf, bufsz); + jl_task_t *ct = jl_current_task; + _jl_free_stack(ct->ptls, stkbuf, bufsz); } @@ -142,7 +143,8 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task) JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPOINT { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; size_t ssize = *bufsz; void *stk = NULL; if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) { @@ -250,13 +252,14 @@ void sweep_stack_pools(void) JL_DLLEXPORT jl_array_t *jl_live_tasks(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; arraylist_t *live_tasks = &ptls->heap.live_tasks; size_t i, j, l; jl_array_t *a; do { l = live_tasks->len; - a = jl_alloc_vec_any(l + 1); // may gc + a = jl_alloc_vec_any(l + 1); // may gc, changing the number of tasks } while (l + 1 < live_tasks->len); l = live_tasks->len; void **lst = live_tasks->items; diff --git a/src/gc.c b/src/gc.c index df06d7e5a6e7f6..45a2cb11e30d8d 100644 --- a/src/gc.c +++ b/src/gc.c @@ -265,7 +265,7 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT jl_gc_have_pending_finalizers = 1; } -static void run_finalizer(jl_ptls_t ptls, jl_value_t *o, jl_value_t *ff) +static void run_finalizer(jl_task_t *ct, jl_value_t *o, jl_value_t *ff) { if (gc_ptr_tag(o, 1)) { ((void (*)(void*))ff)(gc_ptr_clear_tag(o, 1)); @@ -273,10 +273,10 @@ static void run_finalizer(jl_ptls_t ptls, jl_value_t *o, jl_value_t *ff) } jl_value_t *args[2] = {ff,o}; JL_TRY { - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_world_counter; + size_t last_age = ct->world_age; + ct->world_age = jl_world_counter; jl_apply(args, 2); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; } JL_CATCH { jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: "); @@ -340,36 +340,36 @@ static void finalize_object(arraylist_t *list, jl_value_t *o, // The first two entries are assumed to be empty and the rest are assumed to // be pointers to `jl_value_t` objects -static void jl_gc_push_arraylist(jl_ptls_t ptls, arraylist_t *list) +static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) { void **items = list->items; items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2); - items[1] = ptls->pgcstack; - ptls->pgcstack = (jl_gcframe_t*)items; + items[1] = ct->gcstack; + ct->gcstack = (jl_gcframe_t*)items; } // Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock // to be hold for the current thread and will release the lock when the // function returns. -static void jl_gc_run_finalizers_in_list(jl_ptls_t ptls, arraylist_t *list) +static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) { // empty out the first two entries for the GC frame arraylist_push(list, list->items[0]); arraylist_push(list, list->items[1]); - jl_gc_push_arraylist(ptls, list); + jl_gc_push_arraylist(ct, list); jl_value_t **items = (jl_value_t**)list->items; size_t len = list->len; JL_UNLOCK_NOGC(&finalizers_lock); // run finalizers in reverse order they were added, so lower-level finalizers run last for (size_t i = len-4; i >= 2; i -= 2) - run_finalizer(ptls, items[i], items[i + 1]); + run_finalizer(ct, items[i], items[i + 1]); // first entries were moved last to make room for GC frame metadata - run_finalizer(ptls, items[len-2], items[len-1]); + run_finalizer(ct, items[len-2], items[len-1]); // matches the jl_gc_push_arraylist above JL_GC_POP(); } -static void run_finalizers(jl_ptls_t ptls) +static void run_finalizers(jl_task_t *ct) { // Racy fast path: // The race here should be OK since the race can only happen if @@ -391,17 +391,18 @@ static void run_finalizers(jl_ptls_t ptls) jl_gc_have_pending_finalizers = 0; arraylist_new(&to_finalize, 0); // This releases the finalizers lock. - jl_gc_run_finalizers_in_list(ptls, &copied_list); + jl_gc_run_finalizers_in_list(ct, &copied_list); arraylist_free(&copied_list); } -JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_ptls_t ptls) +JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) { - if (ptls == NULL) - ptls = jl_get_ptls_states(); + if (ct == NULL) + ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) { ptls->in_finalizer = 1; - run_finalizers(ptls); + run_finalizers(ct); ptls->in_finalizer = 0; } } @@ -409,30 +410,31 @@ JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_ptls_t ptls) JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls) { if (ptls == NULL) - ptls = jl_get_ptls_states(); + ptls = jl_current_task->ptls; return ptls->finalizers_inhibited; } JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; ptls->finalizers_inhibited++; } JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; #ifdef NDEBUG - ptls->finalizers_inhibited--; + ct->ptls->finalizers_inhibited--; #else - jl_gc_enable_finalizers(ptls, 1); + jl_gc_enable_finalizers(ct, 1); #endif } -JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on) +JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on) { - if (ptls == NULL) - ptls = jl_get_ptls_states(); + if (ct == NULL) + ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; int old_val = ptls->finalizers_inhibited; int new_val = old_val + (on ? -1 : 1); if (new_val < 0) { @@ -452,7 +454,7 @@ JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on) } ptls->finalizers_inhibited = new_val; if (jl_gc_have_pending_finalizers) { - jl_gc_run_pending_finalizers(ptls); + jl_gc_run_pending_finalizers(ct); } } @@ -470,14 +472,14 @@ static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT flist->len = 0; } -void jl_gc_run_all_finalizers(jl_ptls_t ptls) +void jl_gc_run_all_finalizers(jl_task_t *ct) { schedule_all_finalizers(&finalizer_list_marked); for (int i = 0;i < jl_n_threads;i++) { jl_ptls_t ptls2 = jl_all_tls_states[i]; schedule_all_finalizers(&ptls2->finalizers); } - run_finalizers(ptls); + run_finalizers(ct); } static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT @@ -523,7 +525,7 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct } } -JL_DLLEXPORT void jl_finalize_th(jl_ptls_t ptls, jl_value_t *o) +JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) { JL_LOCK_NOGC(&finalizers_lock); // Copy the finalizers into a temporary list so that code in the finalizer @@ -535,12 +537,12 @@ JL_DLLEXPORT void jl_finalize_th(jl_ptls_t ptls, jl_value_t *o) // still holding a reference to the object for (int i = 0; i < jl_n_threads; i++) { jl_ptls_t ptls2 = jl_all_tls_states[i]; - finalize_object(&ptls2->finalizers, o, &copied_list, ptls != ptls2); + finalize_object(&ptls2->finalizers, o, &copied_list, ct->tid != i); } finalize_object(&finalizer_list_marked, o, &copied_list, 0); if (copied_list.len > 0) { // This releases the finalizers lock. - jl_gc_run_finalizers_in_list(ptls, &copied_list); + jl_gc_run_finalizers_in_list(ct, &copied_list); } else { JL_UNLOCK_NOGC(&finalizers_lock); @@ -1047,7 +1049,7 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; ptls->gc_num.allocd += sz; } @@ -1178,7 +1180,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT { // Do not pass in `ptls` as argument. This slows down the fast path // in pool_alloc significantly - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; jl_gc_pagemeta_t *pg = jl_gc_alloc_page(); pg->osize = p->osize; pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1); @@ -1549,7 +1551,7 @@ static void gc_sweep_perm_alloc(void) JL_DLLEXPORT void jl_gc_queue_root(jl_value_t *ptr) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; jl_taggedvalue_t *o = jl_astaggedvalue(ptr); // The modification of the `gc_bits` is not atomic but it // should be safe here since GC is not allowed to run here and we only @@ -1601,7 +1603,7 @@ void jl_gc_queue_multiroot(jl_value_t *parent, jl_value_t *ptr) JL_NOTSAFEPOINT void gc_queue_binding(jl_binding_t *bnd) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; jl_taggedvalue_t *buf = jl_astaggedvalue(bnd); buf->bits.gc = GC_MARKED; arraylist_push(&ptls->heap.rem_bindings, bnd); @@ -2636,40 +2638,31 @@ mark: { jl_task_t *ta = (jl_task_t*)new_obj; gc_scrub_record_task(ta); void *stkbuf = ta->stkbuf; - int16_t tid = ta->tid; - jl_ptls_t ptls2 = NULL; - if (tid != -1) - ptls2 = jl_all_tls_states[tid]; if (gc_cblist_task_scanner) { export_gc_state(ptls, &sp); gc_invoke_callbacks(jl_gc_cb_task_scanner_t, gc_cblist_task_scanner, - (ta, ptls2 != NULL && ta == ptls2->root_task)); + (ta, ta->tid != -1 && ta == jl_all_tls_states[ta->tid]->root_task)); import_gc_state(ptls, &sp); } #ifdef COPY_STACKS if (stkbuf && ta->copy_stack) gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz); #endif - jl_gcframe_t *s = NULL; + jl_gcframe_t *s = ta->gcstack; size_t nroots; uintptr_t offset = 0; uintptr_t lb = 0; uintptr_t ub = (uintptr_t)-1; - if (ptls2 && ta == ptls2->current_task) { - s = ptls2->pgcstack; - } - else if (stkbuf) { - s = ta->gcstack; #ifdef COPY_STACKS - if (ta->copy_stack) { - assert(tid != -1 && ptls2 != NULL); - ub = (uintptr_t)ptls2->stackbase; - lb = ub - ta->copy_stack; - offset = (uintptr_t)stkbuf - lb; - } -#endif + if (stkbuf && ta->copy_stack && ta->ptls == NULL) { + assert(ta->tid >= 0); + jl_ptls_t ptls2 = jl_all_tls_states[ta->tid]; + ub = (uintptr_t)ptls2->stackbase; + lb = ub - ta->copy_stack; + offset = (uintptr_t)stkbuf - lb; } +#endif if (s) { nroots = gc_read_stack(&s->nroots, offset, lb, ub); assert(nroots <= UINT32_MAX); @@ -2778,6 +2771,8 @@ static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp gc_mark_queue_obj(gc_cache, sp, ptls2->root_task); if (ptls2->next_task) gc_mark_queue_obj(gc_cache, sp, ptls2->next_task); + if (ptls2->previous_task) // shouldn't be necessary, but no reason not to + gc_mark_queue_obj(gc_cache, sp, ptls2->previous_task); if (ptls2->previous_exception) gc_mark_queue_obj(gc_cache, sp, ptls2->previous_exception); } @@ -2860,11 +2855,11 @@ static void sweep_finalizer_list(arraylist_t *list) } // collector entry point and control -static volatile uint32_t jl_gc_disable_counter = 0; +static volatile uint32_t jl_gc_disable_counter = 1; JL_DLLEXPORT int jl_gc_enable(int on) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; int prev = !ptls->disable_gc; ptls->disable_gc = (on == 0); if (on && !prev) { @@ -2885,7 +2880,7 @@ JL_DLLEXPORT int jl_gc_enable(int on) JL_DLLEXPORT int jl_gc_is_enabled(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; return !ptls->disable_gc; } @@ -3204,7 +3199,8 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; if (jl_gc_disable_counter) { size_t localbytes = ptls->gc_num.allocd + gc_num.interval; ptls->gc_num.allocd = -(int64_t)gc_num.interval; @@ -3256,7 +3252,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) if (!ptls->finalizers_inhibited && ptls->locks.len == 0) { int8_t was_in_finalizer = ptls->in_finalizer; ptls->in_finalizer = 1; - run_finalizers(ptls); + run_finalizers(ct); ptls->in_finalizer = was_in_finalizer; } gc_invoke_callbacks(jl_gc_cb_post_gc_t, @@ -3285,9 +3281,11 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { + if (ptls->tid == 0) + ptls->disable_gc = 1; jl_thread_heap_t *heap = &ptls->heap; jl_gc_pool_t *p = heap->norm_pools; - for(int i=0; i < JL_GC_N_POOLS; i++) { + for (int i = 0; i < JL_GC_N_POOLS; i++) { assert((jl_gc_sizeclasses[i] < 16 && jl_gc_sizeclasses[i] % sizeof(void*) == 0) || (jl_gc_sizeclasses[i] % 16 == 0)); @@ -3360,8 +3358,10 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) { - jl_ptls_t ptls = jl_get_ptls_states(); - if (ptls && ptls->world_age) { + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); ptls->gc_num.allocd += sz; ptls->gc_num.malloc++; @@ -3371,8 +3371,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) { - jl_ptls_t ptls = jl_get_ptls_states(); - if (ptls && ptls->world_age) { + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); ptls->gc_num.allocd += nm*sz; ptls->gc_num.malloc++; @@ -3382,9 +3384,11 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; free(p); - if (ptls && ptls->world_age) { + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; ptls->gc_num.freed += sz; ptls->gc_num.freecall++; } @@ -3392,8 +3396,10 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) { - jl_ptls_t ptls = jl_get_ptls_states(); - if (ptls && ptls->world_age) { + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); if (sz < old) ptls->gc_num.freed += (old - sz); @@ -3458,7 +3464,7 @@ JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; maybe_collect(ptls); size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); if (allocsz < sz) // overflow in adding offs, size was "negative" @@ -3521,7 +3527,7 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t *owner) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1); } @@ -3546,7 +3552,7 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); bigval_t *hdr = bigval_header(v); - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; maybe_collect(ptls); // don't want this to happen during jl_gc_managed_realloc gc_big_object_unlink(hdr); // TODO: this is not safe since it frees the old pointer. ideally we'd like @@ -3652,49 +3658,48 @@ void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; jl_gc_add_finalizer_th(ptls, v, f); } JL_DLLEXPORT void jl_finalize(jl_value_t *o) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_finalize_th(ptls, o); + jl_finalize_th(jl_current_task, o); } JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; return jl_gc_new_weakref_th(ptls, value); } JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; return jl_gc_alloc(ptls, sz, NULL); } JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; return jl_gc_alloc(ptls, 0, NULL); } JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; return jl_gc_alloc(ptls, sizeof(void*), NULL); } JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL); } JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL); } diff --git a/src/gf.c b/src/gf.c index 5bdf7c8ec29b09..0c02fd1602f089 100644 --- a/src/gf.c +++ b/src/gf.c @@ -32,15 +32,15 @@ JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT JL_DLLEXPORT size_t jl_get_tls_world_age(void) JL_NOTSAFEPOINT { - return jl_get_ptls_states()->world_age; + return jl_current_task->world_age; } /// ----- Handling for Julia callbacks ----- /// JL_DLLEXPORT int8_t jl_is_in_pure_context(void) { - jl_ptls_t ptls = jl_get_ptls_states(); - return ptls->in_pure_callback; + jl_task_t *ct = jl_current_task; + return ct->ptls->in_pure_callback; } tracer_cb jl_newmeth_tracer = NULL; @@ -51,15 +51,15 @@ JL_DLLEXPORT void jl_register_newmeth_tracer(void (*callback)(jl_method_t *trace void jl_call_tracer(tracer_cb callback, jl_value_t *tracee) { - jl_ptls_t ptls = jl_get_ptls_states(); - int last_in = ptls->in_pure_callback; + jl_task_t *ct = jl_current_task; + int last_in = ct->ptls->in_pure_callback; JL_TRY { - ptls->in_pure_callback = 1; + ct->ptls->in_pure_callback = 1; callback(tracee); - ptls->in_pure_callback = last_in; + ct->ptls->in_pure_callback = last_in; } JL_CATCH { - ptls->in_pure_callback = last_in; + ct->ptls->in_pure_callback = last_in; jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: tracer callback function threw an error:\n"); jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception()); jl_printf((JL_STREAM*)STDERR_FILENO, "\n"); @@ -282,13 +282,13 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force) jl_printf(JL_STDERR, "\n"); } #endif - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; int last_errno = errno; #ifdef _OS_WINDOWS_ DWORD last_error = GetLastError(); #endif - size_t last_age = ptls->world_age; - ptls->world_age = jl_typeinf_world; + size_t last_age = ct->world_age; + ct->world_age = jl_typeinf_world; mi->inInference = 1; in_inference++; JL_TRY { @@ -301,7 +301,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force) jlbacktrace(); // written to STDERR_FILENO src = NULL; } - ptls->world_age = last_age; + ct->world_age = last_age; in_inference--; mi->inInference = 0; #ifdef _OS_WINDOWS_ @@ -319,11 +319,11 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force) JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs) { - jl_ptls_t ptls = jl_get_ptls_states(); - size_t last_age = ptls->world_age; - ptls->world_age = jl_typeinf_world; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_typeinf_world; jl_value_t *ret = jl_apply(args, nargs); - ptls->world_age = last_age; + ct->world_age = last_age; return ret; } @@ -368,9 +368,9 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst( int32_t const_flags, size_t min_world, size_t max_world /*, jl_array_t *edges, int absolute_max*/) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; assert(min_world <= max_world && "attempting to set invalid world constraints"); - jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ptls, sizeof(jl_code_instance_t), + jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_instance_t), jl_code_instance_type); codeinst->def = mi; codeinst->min_world = min_world; @@ -1335,15 +1335,16 @@ static void invalidate_external(jl_method_instance_t *mi, size_t max_world) { args[1] = (jl_value_t*)mi; args[2] = jl_box_uint32(max_world); - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_get_world_counter(); + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); jl_value_t **cbs = (jl_value_t**)jl_array_ptr_data(callbacks); for (i = 0; i < l; i++) { args[0] = cbs[i]; jl_apply(args, 3); } - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; JL_GC_POP(); } JL_CATCH { @@ -1821,7 +1822,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args, jl_printf((JL_STREAM*)STDERR_FILENO, "A method error occurred before the base MethodError type was defined. Aborting...\n"); jl_static_show((JL_STREAM*)STDERR_FILENO,(jl_value_t*)f); jl_printf((JL_STREAM*)STDERR_FILENO," world %u\n", (unsigned)world); jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n"); - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0); jl_critical_error(0, NULL); abort(); @@ -2246,7 +2247,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t JL_DLLEXPORT jl_value_t *jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_method_instance_t *mfunc) { - size_t world = jl_get_ptls_states()->world_age; + size_t world = jl_current_task->world_age; return _jl_invoke(F, args, nargs, mfunc, world); } @@ -2417,7 +2418,7 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint32_t nargs) { - size_t world = jl_get_ptls_states()->world_age; + size_t world = jl_current_task->world_age; jl_method_instance_t *mfunc = jl_lookup_generic_(F, args, nargs, jl_int32hash_fast(jl_return_address()), world); @@ -2471,7 +2472,7 @@ JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup_worlds(jl_value_t *types, size_t wo // NOTE: assumes argument type is a subtype of the lookup type. jl_value_t *jl_gf_invoke(jl_value_t *types0, jl_value_t *gf, jl_value_t **args, size_t nargs) { - size_t world = jl_get_ptls_states()->world_age; + size_t world = jl_current_task->world_age; jl_value_t *types = NULL; JL_GC_PUSH1(&types); types = jl_argtype_with_function(gf, types0); @@ -2520,7 +2521,7 @@ jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation } JL_GC_PROMISE_ROOTED(mfunc); - size_t world = jl_get_ptls_states()->world_age; + size_t world = jl_current_task->world_age; return _jl_invoke(gf, args, nargs - 1, mfunc, world); } @@ -2607,8 +2608,8 @@ enum SIGNATURE_FULLY_COVERS { static jl_method_match_t *make_method_match(jl_tupletype_t *spec_types, jl_svec_t *sparams, jl_method_t *method, enum SIGNATURE_FULLY_COVERS fully_covers) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_method_match_t *match = (jl_method_match_t*)jl_gc_alloc(ptls, sizeof(jl_method_match_t), jl_method_match_type); + jl_task_t *ct = jl_current_task; + jl_method_match_t *match = (jl_method_match_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_match_t), jl_method_match_type); match->spec_types = spec_types; match->sparams = sparams; match->method = method; diff --git a/src/init.c b/src/init.c index 7ed1a200eb2a5d..5f49e2becd7c51 100644 --- a/src/init.c +++ b/src/init.c @@ -205,7 +205,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) if (jl_all_tls_states == NULL) return; - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (exitcode == 0) jl_write_compiler_output(); @@ -218,10 +218,10 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("_atexit")); if (f != NULL) { JL_TRY { - size_t last_age = ptls->world_age; - ptls->world_age = jl_get_world_counter(); + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); jl_apply(&f, 1); - ptls->world_age = last_age; + ct->world_age = last_age; } JL_CATCH { jl_printf((JL_STREAM*)STDERR_FILENO, "\natexit hook threw an error: "); @@ -237,7 +237,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_STDOUT = (uv_stream_t*) STDOUT_FILENO; JL_STDERR = (uv_stream_t*) STDERR_FILENO; - jl_gc_run_all_finalizers(ptls); + jl_gc_run_all_finalizers(ct); uv_loop_t *loop = jl_global_event_loop(); @@ -249,7 +249,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_UV_LOCK(); uv_walk(loop, jl_uv_exitcleanup_walk, &queue); struct uv_shutdown_queue_item *item = queue.first; - if (ptls->current_task != NULL) { + if (ct != NULL) { while (item) { JL_TRY { while (item) { @@ -615,8 +615,8 @@ static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel) static void jl_set_io_wait(int v) { - jl_ptls_t ptls = jl_get_ptls_states(); - ptls->io_wait = v; + jl_task_t *ct = jl_current_task; + ct->ptls->io_wait = v; } extern jl_mutex_t jl_modules_mutex; @@ -632,9 +632,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) { jl_init_timing(); // Make sure we finalize the tls callback before starting any threads. - jl_get_ptls_states_getter(); - jl_ptls_t ptls = jl_get_ptls_states(); - (void)ptls; assert(ptls); // make sure early that we have initialized ptls + (void)jl_get_pgcstack(); jl_safepoint_init(); libsupport_init(); htable_new(&jl_current_modules, 0); @@ -646,6 +644,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) init_stdio(); restore_fp_env(); restore_signals(); + jl_init_intrinsic_properties(); jl_page_size = jl_getpagesize(); uint64_t total_mem = uv_get_total_memory(); @@ -727,11 +726,14 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) } jl_gc_init(); - + jl_init_tasks(); jl_init_threading(); - jl_init_intrinsic_properties(); - jl_gc_enable(0); + jl_ptls_t ptls = jl_init_threadtls(0); + jl_init_root_task(ptls, stack_lo, stack_hi); + jl_task_t *ct = jl_current_task; + + jl_init_threadinginfra(); jl_resolve_sysimg_location(rel); // loads sysimg if available, and conditionally sets jl_options.cpu_target @@ -740,16 +742,12 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) if (jl_options.cpu_target == NULL) jl_options.cpu_target = "native"; - if (jl_options.image_file) { + if (jl_options.image_file) jl_restore_system_image(jl_options.image_file); - } - else { + else jl_init_types(); - jl_init_codegen(); - } - jl_init_tasks(); - jl_init_root_task(stack_lo, stack_hi); + jl_init_codegen(); jl_init_common_symbols(); jl_init_flisp(); jl_init_serializer(); @@ -770,10 +768,10 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) // Do initialization needed before starting child threads jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("__preinit_threads__")); if (f) { - size_t last_age = ptls->world_age; - ptls->world_age = jl_get_world_counter(); + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); jl_apply(&f, 1); - ptls->world_age = last_age; + ct->world_age = last_age; } } else { diff --git a/src/interpreter.c b/src/interpreter.c index 008886f1c99c9b..6df5d122a7695d 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -405,13 +405,14 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, { jl_handler_t __eh; size_t ns = jl_array_len(stmts); + jl_task_t *ct = jl_current_task; while (1) { s->ip = ip; if (ip >= ns) jl_error("`body` expression must terminate in `return`. Use `block` instead."); if (toplevel) - jl_get_ptls_states()->world_age = jl_world_counter; + ct->world_age = jl_world_counter; jl_value_t *stmt = jl_array_ptr_ref(stmts, ip); assert(!jl_is_phinode(stmt)); size_t next_ip = ip + 1; @@ -516,8 +517,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, int hand_n_leave = jl_unbox_long(jl_exprarg(stmt, 0)); assert(hand_n_leave > 0); // equivalent to jl_pop_handler(hand_n_leave), but retaining eh for longjmp: - jl_ptls_t ptls = jl_get_ptls_states(); - jl_handler_t *eh = ptls->current_task->eh; + jl_handler_t *eh = ct->eh; while (--hand_n_leave > 0) eh = eh->prev; jl_eh_restore_state(eh); @@ -714,9 +714,10 @@ jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t s->continue_at = 0; s->mi = NULL; JL_GC_ENABLEFRAME(s); - size_t last_age = jl_get_ptls_states()->world_age; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; jl_value_t *r = eval_body(stmts, s, 0, 1); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; JL_GC_POP(); return r; } diff --git a/src/ircode.c b/src/ircode.c index da78a3a8a327ae..e86f022df04ec4 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -699,7 +699,7 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) jl_ircode_state s = { &dest, m, - jl_get_ptls_states() + jl_current_task->ptls }; uint8_t flags = (code->aggressive_constprop << 4) @@ -783,7 +783,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t jl_ircode_state s = { &src, m, - jl_get_ptls_states() + jl_current_task->ptls }; jl_code_info_t *code = jl_new_code_info_uninit(); diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index d37dd43bfb04ac..82611bfb50dfab 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -436,7 +436,6 @@ XX(jl_set_module_uuid) \ XX(jl_set_next_task) \ XX(jl_set_nth_field) \ - XX(jl_set_ptls_states_getter) \ XX(jl_set_safe_restore) \ XX(jl_set_sysimg_so) \ XX(jl_set_task_tid) \ diff --git a/src/jl_uv.c b/src/jl_uv.c index 78238bf07e8ebe..a2dce926a924f6 100644 --- a/src/jl_uv.c +++ b/src/jl_uv.c @@ -105,11 +105,11 @@ static void jl_uv_closeHandle(uv_handle_t *handle) JL_STDERR = (JL_STREAM*)STDERR_FILENO; // also let the client app do its own cleanup if (handle->type != UV_FILE && handle->data) { - jl_ptls_t ptls = jl_get_ptls_states(); - size_t last_age = ptls->world_age; - ptls->world_age = jl_world_counter; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_world_counter; jl_uv_call_close_callback((jl_value_t*)handle->data); - ptls->world_age = last_age; + ct->world_age = last_age; } if (handle == (uv_handle_t*)&signal_async) return; @@ -205,17 +205,17 @@ extern volatile unsigned _threadedregion; JL_DLLEXPORT int jl_process_events(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; uv_loop_t *loop = jl_io_loop; - jl_gc_safepoint_(ptls); - if (loop && (_threadedregion || ptls->tid == 0)) { + jl_gc_safepoint_(ct->ptls); + if (loop && (_threadedregion || ct->tid == 0)) { if (jl_atomic_load(&jl_uv_n_waiters) == 0 && jl_mutex_trylock(&jl_uv_mutex)) { loop->stop_flag = 0; int r = uv_run(loop, UV_RUN_NOWAIT); JL_UV_UNLOCK(); return r; } - jl_gc_safepoint_(ptls); + jl_gc_safepoint_(ct->ptls); } return 0; } @@ -404,9 +404,9 @@ JL_DLLEXPORT int jl_fs_access(char *path, int mode) JL_DLLEXPORT int jl_fs_write(uv_os_fd_t handle, const char *data, size_t len, int64_t offset) JL_NOTSAFEPOINT { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_get_current_task(); // TODO: fix this cheating - if (ptls->safe_restore || ptls->tid != 0) + if (jl_get_safe_restore() || ct == NULL || ct->tid != 0) #ifdef _OS_WINDOWS_ return WriteFile(handle, data, len, NULL, NULL); #else @@ -505,8 +505,8 @@ JL_DLLEXPORT void jl_uv_puts(uv_stream_t *stream, const char *str, size_t n) } // TODO: Hack to make CoreIO thread-safer - jl_ptls_t ptls = jl_get_ptls_states(); - if (ptls->tid != 0) { + jl_task_t *ct = jl_get_current_task(); + if (ct == NULL || ct->tid != 0) { if (stream == JL_STDOUT) { fd = UV_STDOUT_FD; } diff --git a/src/jlapi.c b/src/jlapi.c index 669cce270c2d77..2192d4cee665f0 100644 --- a/src/jlapi.c +++ b/src/jlapi.c @@ -117,7 +117,7 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str) jl_exception_clear(); } JL_CATCH { - jl_get_ptls_states()->previous_exception = jl_current_exception(); + jl_current_task->ptls->previous_exception = jl_current_exception(); r = NULL; } return r; @@ -125,18 +125,18 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str) JL_DLLEXPORT jl_value_t *jl_current_exception(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT { - jl_excstack_t *s = jl_get_ptls_states()->current_task->excstack; + jl_excstack_t *s = jl_current_task->excstack; return s && s->top != 0 ? jl_excstack_exception(s, s->top) : jl_nothing; } JL_DLLEXPORT jl_value_t *jl_exception_occurred(void) { - return jl_get_ptls_states()->previous_exception; + return jl_current_task->ptls->previous_exception; } JL_DLLEXPORT void jl_exception_clear(void) { - jl_get_ptls_states()->previous_exception = NULL; + jl_current_task->ptls->previous_exception = NULL; } // get the name of a type as a string @@ -176,6 +176,7 @@ JL_DLLEXPORT const char *jl_string_ptr(jl_value_t *s) JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t nargs) { jl_value_t *v; + jl_task_t *ct = jl_current_task; nargs++; // add f to args JL_TRY { jl_value_t **argv; @@ -183,15 +184,15 @@ JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t na argv[0] = (jl_value_t*)f; for (int i = 1; i < nargs; i++) argv[i] = args[i - 1]; - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_get_world_counter(); + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); v = jl_apply(argv, nargs); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; JL_GC_POP(); jl_exception_clear(); } JL_CATCH { - jl_get_ptls_states()->previous_exception = jl_current_exception(); + ct->ptls->previous_exception = jl_current_exception(); v = NULL; } return v; @@ -200,17 +201,18 @@ JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t na JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f) { jl_value_t *v; + jl_task_t *ct = jl_current_task; JL_TRY { JL_GC_PUSH1(&f); - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_get_world_counter(); + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); v = jl_apply_generic(f, NULL, 0); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; JL_GC_POP(); jl_exception_clear(); } JL_CATCH { - jl_get_ptls_states()->previous_exception = jl_current_exception(); + ct->ptls->previous_exception = jl_current_exception(); v = NULL; } return v; @@ -219,20 +221,21 @@ JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f) JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a) { jl_value_t *v; + jl_task_t *ct = jl_current_task; JL_TRY { jl_value_t **argv; JL_GC_PUSHARGS(argv, 2); argv[0] = f; argv[1] = a; - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_get_world_counter(); + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); v = jl_apply(argv, 2); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; JL_GC_POP(); jl_exception_clear(); } JL_CATCH { - jl_get_ptls_states()->previous_exception = jl_current_exception(); + ct->ptls->previous_exception = jl_current_exception(); v = NULL; } return v; @@ -241,21 +244,22 @@ JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a) JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b) { jl_value_t *v; + jl_task_t *ct = jl_current_task; JL_TRY { jl_value_t **argv; JL_GC_PUSHARGS(argv, 3); argv[0] = f; argv[1] = a; argv[2] = b; - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_get_world_counter(); + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); v = jl_apply(argv, 3); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; JL_GC_POP(); jl_exception_clear(); } JL_CATCH { - jl_get_ptls_states()->previous_exception = jl_current_exception(); + ct->ptls->previous_exception = jl_current_exception(); v = NULL; } return v; @@ -272,15 +276,16 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a, argv[1] = a; argv[2] = b; argv[3] = c; - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_get_world_counter(); + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); v = jl_apply(argv, 4); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; JL_GC_POP(); jl_exception_clear(); } JL_CATCH { - jl_get_ptls_states()->previous_exception = jl_current_exception(); + jl_current_task->ptls->previous_exception = jl_current_exception(); v = NULL; } return v; @@ -305,7 +310,7 @@ JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld) jl_exception_clear(); } JL_CATCH { - jl_get_ptls_states()->previous_exception = jl_current_exception(); + jl_current_task->ptls->previous_exception = jl_current_exception(); v = NULL; } return v; @@ -318,8 +323,8 @@ JL_DLLEXPORT void jl_sigatomic_begin(void) JL_DLLEXPORT void jl_sigatomic_end(void) { - jl_ptls_t ptls = jl_get_ptls_states(); - if (ptls->defer_signal == 0) + jl_task_t *ct = jl_current_task; + if (ct->ptls->defer_signal == 0) jl_error("sigatomic_end called in non-sigatomic region"); JL_SIGATOMIC_END(); } @@ -431,33 +436,33 @@ JL_DLLEXPORT jl_value_t *(jl_get_fieldtypes)(jl_value_t *v) #ifndef __clang_analyzer__ JL_DLLEXPORT int8_t (jl_gc_unsafe_enter)(void) { - jl_ptls_t ptls = jl_get_ptls_states(); - return jl_gc_unsafe_enter(ptls); + jl_task_t *ct = jl_current_task; + return jl_gc_unsafe_enter(ct->ptls); } JL_DLLEXPORT void (jl_gc_unsafe_leave)(int8_t state) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_gc_unsafe_leave(ptls, state); + jl_task_t *ct = jl_current_task; + jl_gc_unsafe_leave(ct->ptls, state); } JL_DLLEXPORT int8_t (jl_gc_safe_enter)(void) { - jl_ptls_t ptls = jl_get_ptls_states(); - return jl_gc_safe_enter(ptls); + jl_task_t *ct = jl_current_task; + return jl_gc_safe_enter(ct->ptls); } JL_DLLEXPORT void (jl_gc_safe_leave)(int8_t state) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_gc_safe_leave(ptls, state); + jl_task_t *ct = jl_current_task; + jl_gc_safe_leave(ct->ptls, state); } #endif JL_DLLEXPORT void (jl_gc_safepoint)(void) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_gc_safepoint_(ptls); + jl_task_t *ct = jl_current_task; + jl_gc_safepoint_(ct->ptls); } JL_DLLEXPORT void (jl_cpu_pause)(void) @@ -548,10 +553,11 @@ static NOINLINE int true_main(int argc, char *argv[]) if (start_client) { JL_TRY { - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_get_world_counter(); + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); jl_apply(&start_client, 1); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; } JL_CATCH { jl_no_exc_handler(jl_current_exception()); @@ -688,7 +694,7 @@ JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[]) julia_init(jl_options.image_file_specified ? JL_IMAGE_CWD : JL_IMAGE_JULIA_HOME); if (lisp_prompt) { - jl_get_ptls_states()->world_age = jl_get_world_counter(); + jl_current_task->world_age = jl_get_world_counter(); jl_lisp_prompt(); return 0; } diff --git a/src/jltypes.c b/src/jltypes.c index 496ce3dbe92862..12a20dfe882065 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -1841,8 +1841,8 @@ jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n) jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t); } } - jl_ptls_t ptls = jl_get_ptls_states(); - jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ptls, sizeof(jl_vararg_t), jl_vararg_type); + jl_task_t *ct = jl_current_task; + jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type); vm->T = t; vm->N = n; return vm; @@ -1943,7 +1943,6 @@ void jl_init_types(void) JL_GC_DISABLED jl_symbol_type = jl_new_uninitialized_datatype(); jl_simplevector_type = jl_new_uninitialized_datatype(); jl_methtable_type = jl_new_uninitialized_datatype(); - jl_nothing = jl_gc_permobj(0, NULL); jl_emptysvec = (jl_svec_t*)jl_gc_permobj(sizeof(void*), jl_simplevector_type); jl_svec_set_len_unsafe(jl_emptysvec, 0); @@ -2550,6 +2549,7 @@ void jl_init_types(void) JL_GC_DISABLED 0, 1, 6); jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type); jl_svecset(jl_task_type->types, 0, listt); + jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header; jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type); diff --git a/src/julia.h b/src/julia.h index 1d9d735c10d06b..c8ab34605c5149 100644 --- a/src/julia.h +++ b/src/julia.h @@ -9,6 +9,7 @@ #define STORE_ARRAY_LEN //** End Configuration options **// +#include "julia_fasttls.h" #include "libsupport.h" #include #include @@ -90,7 +91,6 @@ typedef struct _jl_taggedvalue_t jl_taggedvalue_t; #include "atomics.h" -#include "tls.h" #include "julia_threads.h" #include "julia_assert.h" @@ -746,11 +746,11 @@ extern JL_DLLIMPORT jl_value_t *jl_nothing JL_GLOBALLY_ROOTED; // gc ------------------------------------------------------------------------- -typedef struct _jl_gcframe_t { +struct _jl_gcframe_t { size_t nroots; struct _jl_gcframe_t *prev; // actual roots go here -} jl_gcframe_t; +}; // NOTE: it is the caller's responsibility to make sure arguments are // rooted such that the gc can see them on the stack. @@ -761,7 +761,7 @@ typedef struct _jl_gcframe_t { // jl_value_t *x=NULL, *y=NULL; JL_GC_PUSH2(&x, &y); // x = f(); y = g(); foo(x, y) -#define jl_pgcstack (jl_get_ptls_states()->pgcstack) +#define jl_pgcstack (jl_current_task->gcstack) #define JL_GC_ENCODE_PUSHARGS(n) (((size_t)(n))<<2) #define JL_GC_ENCODE_PUSH(n) ((((size_t)(n))<<2)|1) @@ -1807,6 +1807,11 @@ typedef struct _jl_task_t { uint8_t _isexception; // set if `result` is an exception to throw or that we exited with // hidden state: + // saved gc stack top for context switches + jl_gcframe_t *gcstack; + size_t world_age; + // quick lookup for current ptls + jl_tls_states_t *ptls; // == jl_all_tls_states[tid] // id of owning thread - does not need to be defined until the task runs int16_t tid; // multiqueue priority @@ -1831,9 +1836,6 @@ typedef struct _jl_task_t { size_t bufsz; // actual sizeof stkbuf unsigned int copy_stack:31; // sizeof stack for copybuf unsigned int started:1; - - // saved gc stack top for context switches - jl_gcframe_t *gcstack; } jl_task_t; #define JL_TASK_STATE_RUNNABLE 0 @@ -1842,11 +1844,14 @@ typedef struct _jl_task_t { JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t*, jl_value_t*, size_t); JL_DLLEXPORT void jl_switchto(jl_task_t **pt); +JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int tid) JL_NOTSAFEPOINT; JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED); JL_DLLEXPORT void JL_NORETURN jl_rethrow(void); JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void); JL_DLLEXPORT void JL_NORETURN jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED); JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e); +JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT; +#define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack)) #include "locks.h" // requires jl_task_t definition @@ -2111,13 +2116,13 @@ typedef struct { float value; } jl_nullable_float32_t; -#define jl_current_task (jl_get_ptls_states()->current_task) #define jl_root_task (jl_get_ptls_states()->root_task) -JL_DLLEXPORT jl_value_t *jl_get_current_task(void); +JL_DLLEXPORT jl_task_t *jl_get_current_task(void) JL_NOTSAFEPOINT; -JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void); -JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *); +// TODO: we need to pin the task while using this (set pure bit) +JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void) JL_NOTSAFEPOINT; +JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *) JL_NOTSAFEPOINT; // codegen interface ---------------------------------------------------------- // The root propagation here doesn't have to be literal, but callers should @@ -2144,23 +2149,6 @@ typedef struct { extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams; extern JL_DLLEXPORT int jl_default_debug_info_kind; -#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_) -#define JULIA_DEFINE_FAST_TLS() \ -JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t jl_get_ptls_states_static(void) \ -{ \ - static __attribute__((tls_model("local-exec"))) __thread jl_tls_states_t tls_states; \ - return &tls_states; \ -} \ -__attribute__((constructor)) void jl_register_ptls_states_getter(void) \ -{ \ - /* We need to make sure this function is called before any reference to */ \ - /* TLS variables. */ \ - jl_set_ptls_states_getter(jl_get_ptls_states_static); \ -} -#else -#define JULIA_DEFINE_FAST_TLS() -#endif - #ifdef __cplusplus } #endif diff --git a/src/julia_fasttls.h b/src/julia_fasttls.h new file mode 100644 index 00000000000000..0dc0c05c82e106 --- /dev/null +++ b/src/julia_fasttls.h @@ -0,0 +1,44 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#ifndef JL_FASTTLS_H +#define JL_FASTTLS_H + +// Thread-local storage access + +#ifdef __cplusplus +extern "C" { +#endif + +/* Bring in definitions for `_OS_X_`, `PATH_MAX` and `PATHSEPSTRING`, `jl_ptls_t`, etc... */ +#include "support/platform.h" +#include "support/dirpath.h" + +typedef struct _jl_gcframe_t jl_gcframe_t; + +#if defined(_OS_DARWIN_) +#include +typedef void *(jl_get_pgcstack_func)(pthread_key_t); // aka typeof(pthread_getspecific) +#else +typedef jl_gcframe_t **(jl_get_pgcstack_func)(void); +#endif + +#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_) +#define JULIA_DEFINE_FAST_TLS \ +static __attribute__((tls_model("local-exec"))) __thread jl_gcframe_t **jl_pgcstack_localexec; \ +JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack_static(void) \ +{ \ + return jl_pgcstack_localexec; \ +} \ +JL_DLLEXPORT jl_gcframe_t ***jl_pgcstack_addr_static(void) \ +{ \ + return &jl_pgcstack_localexec; \ +} +#else +#define JULIA_DEFINE_FAST_TLS +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/julia_internal.h b/src/julia_internal.h index a07c8e8568ab67..eadad66392d639 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -396,7 +396,7 @@ JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT; JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT; void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT; void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT; -void jl_gc_run_all_finalizers(jl_ptls_t ptls); +void jl_gc_run_all_finalizers(jl_task_t *ct); void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task); void gc_queue_binding(jl_binding_t *bnd) JL_NOTSAFEPOINT; @@ -413,8 +413,8 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT { // if parent is marked and buf is not if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) { - jl_ptls_t ptls = jl_get_ptls_states(); - gc_setmark_buf(ptls, bufptr, 3, minsz); + jl_task_t *ct = jl_current_task; + gc_setmark_buf(ct->ptls, bufptr, 3, minsz); } } @@ -669,7 +669,7 @@ void jl_init_intrinsic_functions(void); void jl_init_intrinsic_properties(void); void jl_init_tasks(void) JL_GC_DISABLED; void jl_init_stack_limits(int ismaster, void **stack_hi, void **stack_lo); -void jl_init_root_task(void *stack_lo, void *stack_hi); +void jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi); void jl_init_serializer(void); void jl_gc_init(void); void jl_init_uv(void); @@ -730,17 +730,26 @@ void jl_safepoint_defer_sigint(void); int jl_safepoint_consume_sigint(void); void jl_wake_libuv(void); +void jl_set_pgcstack(jl_gcframe_t **) JL_NOTSAFEPOINT; +#if defined(_OS_DARWIN_) +typedef pthread_key_t jl_pgcstack_key_t; +#elif defined(_OS_WINDOWS_) +typedef DWORD jl_pgcstack_key_t; +#else +typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT; +#endif +void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k); + #if !defined(__clang_analyzer__) -jl_get_ptls_states_func jl_get_ptls_states_getter(void); static inline void jl_set_gc_and_wait(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; // reading own gc state doesn't need atomic ops since no one else // should store to it. - int8_t state = ptls->gc_state; - jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING); + int8_t state = ct->ptls->gc_state; + jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING); jl_safepoint_wait_gc(); - jl_atomic_store_release(&ptls->gc_state, state); + jl_atomic_store_release(&ct->ptls->gc_state, state); } #endif void jl_gc_set_permalloc_region(void *start, void *end); diff --git a/src/julia_threads.h b/src/julia_threads.h index b832d5c55b440f..d0d70f88c79dba 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -7,6 +7,14 @@ #include // threading ------------------------------------------------------------------ +#ifdef __cplusplus +extern "C" { +#endif + + +JL_DLLEXPORT int16_t jl_threadid(void); +JL_DLLEXPORT void jl_threading_profile(void); + // JULIA_ENABLE_THREADING may be controlled by altering JULIA_THREADS in Make.user // When running into scheduler issues, this may help provide information on the @@ -182,9 +190,7 @@ struct _jl_bt_element_t; // This includes all the thread local states we care about for a thread. // Changes to TLS field types must be reflected in codegen. #define JL_MAX_BT_SIZE 80000 -struct _jl_tls_states_t { - struct _jl_gcframe_t *pgcstack; - size_t world_age; +typedef struct _jl_tls_states_t { int16_t tid; uint64_t rngseed; volatile size_t *safepoint; @@ -197,18 +203,23 @@ struct _jl_tls_states_t { // gc_state = 2 means the thread is running unmanaged code that can be // execute at the same time with the GC. int8_t gc_state; // read from foreign threads + // execution of certain certain impure + // statements is prohibited from certain + // callbacks (such as generated functions) + // as it may make compilation undecidable + int8_t in_pure_callback; int8_t in_finalizer; int8_t disable_gc; - jl_thread_heap_t heap; + // Counter to disable finalizer **on the current thread** + int finalizers_inhibited; + jl_thread_heap_t heap; // this is very large, and the offset is baked into codegen jl_thread_gc_num_t gc_num; uv_mutex_t sleep_lock; uv_cond_t wake_signal; volatile sig_atomic_t defer_signal; struct _jl_task_t *current_task; struct _jl_task_t *next_task; -#ifdef MIGRATE_TASKS struct _jl_task_t *previous_task; -#endif struct _jl_task_t *root_task; struct _jl_timing_block_t *timing_stack; void *stackbase; @@ -222,7 +233,6 @@ struct _jl_tls_states_t { struct jl_stack_context_t copy_stack_ctx; #endif }; - jl_jmp_buf *safe_restore; // Temp storage for exception thrown in signal handler. Not rooted. struct _jl_value_t *sig_exception; // Temporary backtrace buffer. Scanned for gc roots when bt_size > 0. @@ -240,18 +250,11 @@ struct _jl_tls_states_t { void *signal_stack; #endif jl_thread_t system_id; - // execution of certain certain impure - // statements is prohibited from certain - // callbacks (such as generated functions) - // as it may make compilation undecidable - int in_pure_callback; - // Counter to disable finalizer **on the current thread** - int finalizers_inhibited; arraylist_t finalizers; jl_gc_mark_cache_t gc_cache; arraylist_t sweep_objs; jl_gc_mark_sp_t gc_mark_sp; - // Saved exception for previous external API call or NULL if cleared. + // Saved exception for previous *external* API call or NULL if cleared. // Access via jl_exception_occurred(). struct _jl_value_t *previous_exception; @@ -264,7 +267,9 @@ struct _jl_tls_states_t { uint64_t sleep_enter; uint64_t sleep_leave; ) -}; +} jl_tls_states_t; + +typedef jl_tls_states_t *jl_ptls_t; // Update codegen version in `ccall.cpp` after changing either `pause` or `wake` #ifdef __MIC__ @@ -285,10 +290,6 @@ struct _jl_tls_states_t { # define JL_CPU_WAKE_NOOP 1 #endif -#ifdef __cplusplus -extern "C" { -#endif - JL_DLLEXPORT void (jl_cpu_pause)(void); JL_DLLEXPORT void (jl_cpu_wake)(void); @@ -342,10 +343,10 @@ int8_t jl_gc_safe_leave(jl_ptls_t ptls, int8_t state); // Can be a safepoint #endif JL_DLLEXPORT void (jl_gc_safepoint)(void); -JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on); +JL_DLLEXPORT void jl_gc_enable_finalizers(struct _jl_task_t *ct, int on); JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void); JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void); -JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_ptls_t ptls); +JL_DLLEXPORT void jl_gc_run_pending_finalizers(struct _jl_task_t *ct); extern JL_DLLEXPORT int jl_gc_have_pending_finalizers; JL_DLLEXPORT void jl_wakeup_thread(int16_t tid); diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index e11df11dcc9762..bc68edda2cad78 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -37,7 +37,7 @@ struct FinalLowerGC: public FunctionPass, private JuliaPassContext { Function *queueRootFunc; Function *poolAllocFunc; Function *bigAllocFunc; - CallInst *ptlsStates; + Instruction *pgcstack; bool doInitialization(Module &M) override; bool doFinalization(Module &M) override; @@ -60,8 +60,6 @@ struct FinalLowerGC: public FunctionPass, private JuliaPassContext { // Lowers a `julia.queue_gc_root` intrinsic. Value *lowerQueueGCRoot(CallInst *target, Function &F); - - Instruction *getPgcstack(Instruction *ptlsStates); }; Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F) @@ -111,7 +109,6 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) T_size->getPointerTo()), Align(sizeof(void*))); inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); - Value *pgcstack = builder.Insert(getPgcstack(ptlsStates)); inst = builder.CreateAlignedStore( builder.CreateAlignedLoad(pgcstack, Align(sizeof(void*))), builder.CreatePointerCast( @@ -138,8 +135,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F) inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); inst = builder.CreateAlignedStore( inst, - builder.CreateBitCast( - builder.Insert(getPgcstack(ptlsStates)), + builder.CreateBitCast(pgcstack, PointerType::get(T_prjlvalue, 0)), Align(sizeof(void*))); inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); @@ -171,16 +167,6 @@ Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F) return target; } -Instruction *FinalLowerGC::getPgcstack(Instruction *ptlsStates) -{ - Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, pgcstack) / sizeof(void*)); - return GetElementPtrInst::CreateInBounds( - T_ppjlvalue, - ptlsStates, - ArrayRef(offset), - "jl_pgcstack"); -} - Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) { assert(target->getNumArgOperands() == 2); @@ -282,13 +268,13 @@ bool FinalLowerGC::runOnFunction(Function &F) LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n"); // Check availability of functions again since they might have been deleted. initFunctions(*F.getParent()); - if (!ptls_getter) - return true; + if (!pgcstack_getter) + return false; - // Look for a call to 'julia.ptls_states'. - ptlsStates = getPtls(F); - if (!ptlsStates) - return true; + // Look for a call to 'julia.get_pgcstack'. + pgcstack = getPGCstack(F); + if (!pgcstack) + return false; // Acquire intrinsic functions. auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index ef30966b4cc5a9..c731b7cb0254df 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -321,7 +321,7 @@ struct LateLowerGCFrame: public FunctionPass, private JuliaPassContext { } private: - CallInst *ptlsStates; + CallInst *pgcstack; void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector &SafepointsSoFar, SmallVector &&RefinedPtr = SmallVector()); void NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses); @@ -1148,7 +1148,7 @@ static bool isLoadFromConstGV(Value *v, bool &task_local) if (callee && callee->getName() == "julia.typeof") { return true; } - if (callee && callee->getName() == "julia.ptls_states") { + if (callee && callee->getName() == "julia.get_pgcstack") { task_local = true; return true; } @@ -1514,7 +1514,7 @@ State LateLowerGCFrame::LocalScan(Function &F) { // Known functions emitted in codegen that are not safepoints if (callee == pointer_from_objref_func || callee == gc_preserve_begin_func || callee == gc_preserve_end_func || callee == typeof_func || - callee == ptls_getter || + callee == pgcstack_getter || callee == write_barrier_func || callee->getName() == "memcmp") { continue; } @@ -2519,7 +2519,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State auto pushGcframe = CallInst::Create( getOrDeclare(jl_intrinsics::pushGCFrame), {gcframe, ConstantInt::get(T_int32, 0)}); - pushGcframe->insertAfter(ptlsStates); + pushGcframe->insertAfter(pgcstack); // Replace Allocas unsigned AllocaSlot = 2; // first two words are metadata @@ -2616,11 +2616,11 @@ bool LateLowerGCFrame::runOnFunction(Function &F) { LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n"); // Check availability of functions again since they might have been deleted. initFunctions(*F.getParent()); - if (!ptls_getter) + if (!pgcstack_getter) return CleanupIR(F); - ptlsStates = getPtls(F); - if (!ptlsStates) + pgcstack = getPGCstack(F); + if (!pgcstack) return CleanupIR(F); State S = LocalScan(F); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index d594408a20992a..0eed7aec98f0bc 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -24,7 +24,7 @@ JuliaPassContext::JuliaPassContext() : T_size(nullptr), T_int8(nullptr), T_int32(nullptr), T_pint8(nullptr), T_jlvalue(nullptr), T_prjlvalue(nullptr), T_ppjlvalue(nullptr), T_pjlvalue(nullptr), T_pjlvalue_der(nullptr), - T_ppjlvalue_der(nullptr), ptls_getter(nullptr), gc_flush_func(nullptr), + T_ppjlvalue_der(nullptr), pgcstack_getter(nullptr), gc_flush_func(nullptr), gc_preserve_begin_func(nullptr), gc_preserve_end_func(nullptr), pointer_from_objref_func(nullptr), alloc_obj_func(nullptr), typeof_func(nullptr), write_barrier_func(nullptr), module(nullptr) @@ -40,7 +40,7 @@ void JuliaPassContext::initFunctions(Module &M) { module = &M; - ptls_getter = M.getFunction("julia.ptls_states"); + pgcstack_getter = M.getFunction("julia.get_pgcstack"); gc_flush_func = M.getFunction("julia.gcroot_flush"); gc_preserve_begin_func = M.getFunction("llvm.julia.gc_preserve_begin"); gc_preserve_end_func = M.getFunction("llvm.julia.gc_preserve_end"); @@ -69,14 +69,15 @@ void JuliaPassContext::initAll(Module &M) T_ppjlvalue = PointerType::get(T_pjlvalue, 0); T_pjlvalue_der = PointerType::get(T_jlvalue, AddressSpace::Derived); T_ppjlvalue_der = PointerType::get(T_prjlvalue, AddressSpace::Derived); + T_pppjlvalue = PointerType::get(T_ppjlvalue, 0); } -llvm::CallInst *JuliaPassContext::getPtls(llvm::Function &F) const +llvm::CallInst *JuliaPassContext::getPGCstack(llvm::Function &F) const { for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); - ptls_getter && I != E; ++I) { + pgcstack_getter && I != E; ++I) { if (CallInst *callInst = dyn_cast(&*I)) { - if (callInst->getCalledOperand() == ptls_getter) { + if (callInst->getCalledOperand() == pgcstack_getter) { return callInst; } } diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index 71cab27e76ceba..f80786d1e71499 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -49,6 +49,7 @@ struct JuliaPassContext { // Types derived from 'jl_value_t'. llvm::Type *T_jlvalue; llvm::PointerType *T_prjlvalue; + llvm::PointerType *T_pppjlvalue; llvm::PointerType *T_ppjlvalue; llvm::PointerType *T_pjlvalue; llvm::PointerType *T_pjlvalue_der; @@ -59,7 +60,7 @@ struct JuliaPassContext { llvm::MDNode *tbaa_tag; // Intrinsics. - llvm::Function *ptls_getter; + llvm::Function *pgcstack_getter; llvm::Function *gc_flush_func; llvm::Function *gc_preserve_begin_func; llvm::Function *gc_preserve_end_func; @@ -86,10 +87,10 @@ struct JuliaPassContext { return module->getContext(); } - // Gets a call to the `julia.ptls_states` intrinisc in the entry + // Gets a call to the `julia.get_pgcstack' intrinsic in the entry // point of the given function, if there exists such a call. // Otherwise, `nullptr` is returned. - llvm::CallInst *getPtls(llvm::Function &F) const; + llvm::CallInst *getPGCstack(llvm::Function &F) const; // Gets the intrinsic or well-known function that conforms to // the given description if it exists in the module. If not, diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index 6fbc40ceff0c4b..9cecceac9a1875 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -47,33 +47,36 @@ struct LowerPTLS: public ModulePass { private: const bool imaging_mode; Module *M; - Function *ptls_getter; + Function *pgcstack_getter; LLVMContext *ctx; MDNode *tbaa_const; - PointerType *T_ptls_getter; + FunctionType *FT_pgcstack_getter; + PointerType *T_pgcstack_getter; PointerType *T_ppjlvalue; PointerType *T_pppjlvalue; Type *T_int8; Type *T_size; PointerType *T_pint8; - GlobalVariable *ptls_slot{nullptr}; - GlobalVariable *ptls_offset{nullptr}; - void set_ptls_attrs(CallInst *ptlsStates) const; - Instruction *emit_ptls_tp(Value *offset, Instruction *insertBefore) const; + GlobalVariable *pgcstack_func_slot{nullptr}; + GlobalVariable *pgcstack_key_slot{nullptr}; + GlobalVariable *pgcstack_offset{nullptr}; + void set_pgcstack_attrs(CallInst *pgcstack) const; + Instruction *emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const; template T *add_comdat(T *G) const; GlobalVariable *create_aliased_global(Type *T, StringRef name) const; - void fix_ptls_use(CallInst *ptlsStates); + void fix_pgcstack_use(CallInst *pgcstack); bool runOnModule(Module &M) override; }; -void LowerPTLS::set_ptls_attrs(CallInst *ptlsStates) const +void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const { - ptlsStates->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); - ptlsStates->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); + pgcstack->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); + pgcstack->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); } -Instruction *LowerPTLS::emit_ptls_tp(Value *offset, Instruction *insertBefore) const +Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const { + Value *tls; #if defined(_CPU_X86_64_) || defined(_CPU_X86_) if (insertBefore->getFunction()->callsFunctionThatReturnsTwice()) { // Workaround LLVM bug by hiding the offset computation @@ -95,47 +98,49 @@ Instruction *LowerPTLS::emit_ptls_tp(Value *offset, Instruction *insertBefore) c # endif // The add instruction clobbers flags - Value *tls; if (offset) { std::vector args(0); args.push_back(offset->getType()); auto tp = InlineAsm::get(FunctionType::get(T_pint8, args, false), dyn_asm_str, "=&r,r,~{dirflag},~{fpsr},~{flags}", false); - tls = CallInst::Create(tp, offset, "ptls_i8", insertBefore); + tls = CallInst::Create(tp, offset, "pgcstack_i8", insertBefore); } else { auto tp = InlineAsm::get(FunctionType::get(T_pint8, false), const_asm_str.c_str(), "=r,~{dirflag},~{fpsr},~{flags}", false); - tls = CallInst::Create(tp, "ptls_i8", insertBefore); + tls = CallInst::Create(tp, "pgcstack_i8", insertBefore); } - return new BitCastInst(tls, T_pppjlvalue, "ptls", insertBefore); } + else #endif - // AArch64/ARM doesn't seem to have this issue. - // (Possibly because there are many more registers and the offset is - // positive and small) - // It's also harder to emit the offset in a generic way on ARM/AArch64 - // (need to generate one or two `add` with shift) so let llvm emit - // the add for now. + { + // AArch64/ARM doesn't seem to have this issue. + // (Possibly because there are many more registers and the offset is + // positive and small) + // It's also harder to emit the offset in a generic way on ARM/AArch64 + // (need to generate one or two `add` with shift) so let llvm emit + // the add for now. #if defined(_CPU_AARCH64_) - const char *asm_str = "mrs $0, tpidr_el0"; + const char *asm_str = "mrs $0, tpidr_el0"; #elif defined(__ARM_ARCH) && __ARM_ARCH >= 7 - const char *asm_str = "mrc p15, 0, $0, c13, c0, 3"; + const char *asm_str = "mrc p15, 0, $0, c13, c0, 3"; #elif defined(_CPU_X86_64_) - const char *asm_str = "movq %fs:0, $0"; + const char *asm_str = "movq %fs:0, $0"; #elif defined(_CPU_X86_) - const char *asm_str = "movl %gs:0, $0"; + const char *asm_str = "movl %gs:0, $0"; #else - const char *asm_str = nullptr; - assert(0 && "Cannot emit thread pointer for this architecture."); + const char *asm_str = nullptr; + assert(0 && "Cannot emit thread pointer for this architecture."); #endif - if (!offset) - offset = ConstantInt::getSigned(T_size, jl_tls_offset); - auto tp = InlineAsm::get(FunctionType::get(T_pint8, false), asm_str, "=r", false); - Value *tls = CallInst::Create(tp, "thread_ptr", insertBefore); - tls = GetElementPtrInst::Create(T_int8, tls, {offset}, "ptls_i8", insertBefore); - return new BitCastInst(tls, T_pppjlvalue, "ptls", insertBefore); + if (!offset) + offset = ConstantInt::getSigned(T_size, jl_tls_offset); + auto tp = InlineAsm::get(FunctionType::get(T_pint8, false), asm_str, "=r", false); + tls = CallInst::Create(tp, "thread_ptr", insertBefore); + tls = GetElementPtrInst::Create(T_int8, tls, {offset}, "ppgcstack_i8", insertBefore); + } + tls = new BitCastInst(tls, T_pppjlvalue->getPointerTo(), "ppgcstack", insertBefore); + return new LoadInst(T_pppjlvalue, tls, "pgcstack", false, insertBefore); } GlobalVariable *LowerPTLS::create_aliased_global(Type *T, StringRef name) const @@ -173,98 +178,128 @@ inline T *LowerPTLS::add_comdat(T *G) const return G; } -void LowerPTLS::fix_ptls_use(CallInst *ptlsStates) +void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack) { - if (ptlsStates->use_empty()) { - ptlsStates->eraseFromParent(); + if (pgcstack->use_empty()) { + pgcstack->eraseFromParent(); return; } if (imaging_mode) { if (jl_tls_elf_support) { // if (offset != 0) - // ptls = tp + offset; + // pgcstack = tp + offset; // else - // ptls = getter(); - auto offset = new LoadInst(T_size, ptls_offset, "", false, ptlsStates); + // pgcstack = getter(); + auto offset = new LoadInst(T_size, pgcstack_offset, "", false, pgcstack); offset->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None)); - auto cmp = new ICmpInst(ptlsStates, CmpInst::ICMP_NE, offset, + auto cmp = new ICmpInst(pgcstack, CmpInst::ICMP_NE, offset, Constant::getNullValue(offset->getType())); MDBuilder MDB(*ctx); SmallVector Weights{9, 1}; TerminatorInst *fastTerm; TerminatorInst *slowTerm; - SplitBlockAndInsertIfThenElse(cmp, ptlsStates, &fastTerm, &slowTerm, + SplitBlockAndInsertIfThenElse(cmp, pgcstack, &fastTerm, &slowTerm, MDB.createBranchWeights(Weights)); - auto fastTLS = emit_ptls_tp(offset, fastTerm); - auto phi = PHINode::Create(T_pppjlvalue, 2, "", ptlsStates); - ptlsStates->replaceAllUsesWith(phi); - ptlsStates->moveBefore(slowTerm); - auto getter = new LoadInst(T_ptls_getter, ptls_slot, "", false, ptlsStates); + auto fastTLS = emit_pgcstack_tp(offset, fastTerm); + auto phi = PHINode::Create(T_pppjlvalue, 2, "", pgcstack); + pgcstack->replaceAllUsesWith(phi); + pgcstack->moveBefore(slowTerm); + auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack); getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None)); - ptlsStates->setCalledFunction(ptlsStates->getFunctionType(), getter); - set_ptls_attrs(ptlsStates); + pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter); + set_pgcstack_attrs(pgcstack); phi->addIncoming(fastTLS, fastTLS->getParent()); - phi->addIncoming(ptlsStates, ptlsStates->getParent()); + phi->addIncoming(pgcstack, pgcstack->getParent()); return; } // In imaging mode, we emit the function address as a load of a static // variable to be filled (in `staticdata.c`) at initialization time of the sysimg. - // This way we can by pass the extra indirection in `jl_get_ptls_states` + // This way we can bypass the extra indirection in `jl_get_pgcstack` // since we may not know which getter function to use ahead of time. - auto getter = new LoadInst(T_ptls_getter, ptls_slot, "", false, ptlsStates); + auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack); getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None)); - ptlsStates->setCalledFunction(ptlsStates->getFunctionType(), getter); - set_ptls_attrs(ptlsStates); +#if defined(_OS_DARWIN_) + auto key = new LoadInst(T_size, pgcstack_key_slot, "", false, pgcstack); + key->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); + key->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None)); + auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, getter, {key}, "", pgcstack); + new_pgcstack->takeName(pgcstack); + pgcstack->replaceAllUsesWith(new_pgcstack); + pgcstack->eraseFromParent(); + pgcstack = new_pgcstack; +#else + pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter); +#endif + set_pgcstack_attrs(pgcstack); } else if (jl_tls_offset != -1) { - ptlsStates->replaceAllUsesWith(emit_ptls_tp(nullptr, ptlsStates)); - ptlsStates->eraseFromParent(); + pgcstack->replaceAllUsesWith(emit_pgcstack_tp(nullptr, pgcstack)); + pgcstack->eraseFromParent(); } else { // use the address of the actual getter function directly - auto val = ConstantInt::get(T_size, (uintptr_t)jl_get_ptls_states_getter()); - ptlsStates->setCalledFunction(ptlsStates->getFunctionType(), ConstantExpr::getIntToPtr(val, T_ptls_getter)); - set_ptls_attrs(ptlsStates); + jl_get_pgcstack_func *f; + jl_pgcstack_key_t k; + jl_pgcstack_getkey(&f, &k); + Constant *val = ConstantInt::get(T_size, (uintptr_t)f); + val = ConstantExpr::getIntToPtr(val, T_pgcstack_getter); +#if defined(_OS_DARWIN_) + assert(sizeof(k) == sizeof(uintptr_t)); + Constant *key = ConstantInt::get(T_size, (uintptr_t)k); + auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, val, {key}, "", pgcstack); + new_pgcstack->takeName(pgcstack); + pgcstack->replaceAllUsesWith(new_pgcstack); + pgcstack->eraseFromParent(); + pgcstack = new_pgcstack; +#else + pgcstack->setCalledFunction(pgcstack->getFunctionType(), val); +#endif + set_pgcstack_attrs(pgcstack); } } bool LowerPTLS::runOnModule(Module &_M) { M = &_M; - ptls_getter = M->getFunction("julia.ptls_states"); - if (!ptls_getter) + pgcstack_getter = M->getFunction("julia.get_pgcstack"); + if (!pgcstack_getter) return false; ctx = &M->getContext(); tbaa_const = tbaa_make_child("jtbaa_const", nullptr, true).first; - auto FT_ptls_getter = ptls_getter->getFunctionType(); - T_ptls_getter = FT_ptls_getter->getPointerTo(); - T_pppjlvalue = cast(FT_ptls_getter->getReturnType()); - T_ppjlvalue = cast(T_pppjlvalue->getElementType()); T_int8 = Type::getInt8Ty(*ctx); T_size = sizeof(size_t) == 8 ? Type::getInt64Ty(*ctx) : Type::getInt32Ty(*ctx); T_pint8 = T_int8->getPointerTo(); + FT_pgcstack_getter = pgcstack_getter->getFunctionType(); +#if defined(_OS_DARWIN_) + assert(sizeof(jl_pgcstack_key_t) == sizeof(uintptr_t)); + FT_pgcstack_getter = FunctionType::get(FT_pgcstack_getter->getReturnType(), {T_size}, false); +#endif + T_pgcstack_getter = FT_pgcstack_getter->getPointerTo(); + T_pppjlvalue = cast(FT_pgcstack_getter->getReturnType()); + T_ppjlvalue = cast(T_pppjlvalue->getElementType()); if (imaging_mode) { - ptls_slot = create_aliased_global(T_ptls_getter, "jl_get_ptls_states_slot"); - ptls_offset = create_aliased_global(T_size, "jl_tls_offset"); + pgcstack_func_slot = create_aliased_global(T_pgcstack_getter, "jl_pgcstack_func_slot"); + pgcstack_key_slot = create_aliased_global(T_size, "jl_pgcstack_key_slot"); // >= sizeof(jl_pgcstack_key_t) + pgcstack_offset = create_aliased_global(T_size, "jl_tls_offset"); } - for (auto it = ptls_getter->user_begin(); it != ptls_getter->user_end();) { + for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) { auto call = cast(*it); ++it; - assert(call->getCalledOperand() == ptls_getter); - fix_ptls_use(call); + assert(call->getCalledOperand() == pgcstack_getter); + fix_pgcstack_use(call); } - assert(ptls_getter->use_empty()); - ptls_getter->eraseFromParent(); + assert(pgcstack_getter->use_empty()); + pgcstack_getter->eraseFromParent(); return true; } diff --git a/src/locks.h b/src/locks.h index 262390bb718f31..d993f71beefddf 100644 --- a/src/locks.h +++ b/src/locks.h @@ -22,6 +22,7 @@ static inline void jl_mutex_wait(jl_mutex_t *lock, int safepoint) { jl_thread_t self = jl_thread_self(); jl_thread_t owner = jl_atomic_load_relaxed(&lock->owner); + jl_task_t *ct = jl_current_task; if (owner == self) { lock->count++; return; @@ -33,8 +34,7 @@ static inline void jl_mutex_wait(jl_mutex_t *lock, int safepoint) return; } if (safepoint) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_gc_safepoint_(ptls); + jl_gc_safepoint_(ct->ptls); } jl_cpu_pause(); owner = jl_atomic_load_relaxed(&lock->owner); @@ -53,7 +53,7 @@ static inline void jl_mutex_lock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT static inline void jl_lock_frame_push(jl_mutex_t *lock) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; small_arraylist_t *locks = &ptls->locks; uint32_t len = locks->len; if (__unlikely(len >= locks->max)) { @@ -66,19 +66,19 @@ static inline void jl_lock_frame_push(jl_mutex_t *lock) } static inline void jl_lock_frame_pop(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; assert(ptls->locks.len > 0); ptls->locks.len--; } #define JL_SIGATOMIC_BEGIN() do { \ - jl_get_ptls_states()->defer_signal++; \ + jl_current_task->ptls->defer_signal++; \ jl_signal_fence(); \ } while (0) #define JL_SIGATOMIC_END() do { \ jl_signal_fence(); \ - if (--jl_get_ptls_states()->defer_signal == 0) { \ - jl_sigint_safepoint(jl_get_ptls_states()); \ + if (--jl_current_task->ptls->defer_signal == 0) { \ + jl_sigint_safepoint(jl_current_task->ptls); \ } \ } while (0) @@ -128,12 +128,11 @@ static inline void jl_mutex_unlock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT static inline void jl_mutex_unlock(jl_mutex_t *lock) { - jl_ptls_t ptls = jl_get_ptls_states(); jl_mutex_unlock_nogc(lock); jl_lock_frame_pop(); JL_SIGATOMIC_END(); if (jl_gc_have_pending_finalizers) { - jl_gc_run_pending_finalizers(ptls); // may GC + jl_gc_run_pending_finalizers(jl_current_task); // may GC } } diff --git a/src/method.c b/src/method.c index b972d98ef12606..ed0593cb77d7df 100644 --- a/src/method.c +++ b/src/method.c @@ -340,9 +340,9 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_method_instance_t *li = - (jl_method_instance_t*)jl_gc_alloc(ptls, sizeof(jl_method_instance_t), + (jl_method_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_instance_t), jl_method_instance_type); li->def.value = NULL; li->specTypes = NULL; @@ -357,9 +357,9 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void) JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_code_info_t *src = - (jl_code_info_t*)jl_gc_alloc(ptls, sizeof(jl_code_info_t), + (jl_code_info_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_info_t), jl_code_info_type); src->code = NULL; src->codelocs = NULL; @@ -467,15 +467,15 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo) jl_code_info_t *func = NULL; jl_value_t *ex = NULL; JL_GC_PUSH2(&ex, &func); - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; int last_lineno = jl_lineno; - int last_in = ptls->in_pure_callback; - size_t last_age = jl_get_ptls_states()->world_age; + int last_in = ct->ptls->in_pure_callback; + size_t last_age = ct->world_age; JL_TRY { - ptls->in_pure_callback = 1; + ct->ptls->in_pure_callback = 1; // and the right world - ptls->world_age = def->primary_world; + ct->world_age = def->primary_world; // invoke code generator jl_tupletype_t *ttdt = (jl_tupletype_t*)jl_unwrap_unionall(tt); @@ -492,7 +492,7 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo) if (!jl_is_code_info(func)) { if (jl_is_expr(func) && ((jl_expr_t*)func)->head == error_sym) { - ptls->in_pure_callback = 0; + ct->ptls->in_pure_callback = 0; jl_toplevel_eval(def->module, (jl_value_t*)func); } jl_error("The function body AST defined by this @generated function is not pure. This likely means it contains a closure, a comprehension or a generator."); @@ -510,13 +510,13 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo) } } - ptls->in_pure_callback = last_in; + ct->ptls->in_pure_callback = last_in; jl_lineno = last_lineno; - ptls->world_age = last_age; + ct->world_age = last_age; jl_add_function_name_to_lineinfo(func, (jl_value_t*)def->name); } JL_CATCH { - ptls->in_pure_callback = last_in; + ct->ptls->in_pure_callback = last_in; jl_lineno = last_lineno; jl_rethrow(); } @@ -526,9 +526,9 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo) JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_code_info_t *newsrc = - (jl_code_info_t*)jl_gc_alloc(ptls, sizeof(jl_code_info_t), + (jl_code_info_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_info_t), jl_code_info_type); *newsrc = *src; return newsrc; @@ -655,9 +655,9 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src) JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_method_t *m = - (jl_method_t*)jl_gc_alloc(ptls, sizeof(jl_method_t), jl_method_type); + (jl_method_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_t), jl_method_type); m->specializations = jl_emptysvec; m->speckeyset = (jl_array_t*)jl_an_empty_vec_any; m->sig = NULL; diff --git a/src/module.c b/src/module.c index 231efbb357653d..dc71fb86c036a3 100644 --- a/src/module.c +++ b/src/module.c @@ -13,9 +13,9 @@ extern "C" { JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; const jl_uuid_t uuid_zero = {0, 0}; - jl_module_t *m = (jl_module_t*)jl_gc_alloc(ptls, sizeof(jl_module_t), + jl_module_t *m = (jl_module_t*)jl_gc_alloc(ct->ptls, sizeof(jl_module_t), jl_module_type); assert(jl_is_symbol(name)); m->name = name; @@ -133,9 +133,9 @@ JL_DLLEXPORT uint8_t jl_istopmod(jl_module_t *mod) static jl_binding_t *new_binding(jl_sym_t *name) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; assert(jl_is_symbol(name)); - jl_binding_t *b = (jl_binding_t*)jl_gc_alloc_buf(ptls, sizeof(jl_binding_t)); + jl_binding_t *b = (jl_binding_t*)jl_gc_alloc_buf(ct->ptls, sizeof(jl_binding_t)); b->name = name; b->value = NULL; b->owner = NULL; diff --git a/src/opaque_closure.c b/src/opaque_closure.c index 9f2662f32eb25b..8b82b0ca1a7371 100644 --- a/src/opaque_closure.c +++ b/src/opaque_closure.c @@ -5,12 +5,12 @@ JL_DLLEXPORT jl_value_t *jl_invoke_opaque_closure(jl_opaque_closure_t *oc, jl_va { jl_value_t *ret = NULL; JL_GC_PUSH1(&ret); - jl_ptls_t ptls = jl_get_ptls_states(); - size_t last_age = ptls->world_age; - ptls->world_age = oc->world; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = oc->world; ret = jl_interpret_opaque_closure(oc, args, nargs); jl_typeassert(ret, jl_tparam1(jl_typeof(oc))); - ptls->world_age = last_age; + ct->world_age = last_age; JL_GC_POP(); return ret; } @@ -25,14 +25,14 @@ jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *isv JL_TYPECHK(new_opaque_closure, type, rt_lb); JL_TYPECHK(new_opaque_closure, type, rt_ub); JL_TYPECHK(new_opaque_closure, method, source); - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_value_t *oc_type JL_ALWAYS_LEAFTYPE; oc_type = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, rt_ub); JL_GC_PROMISE_ROOTED(oc_type); jl_value_t *captures = NULL; JL_GC_PUSH1(&captures); captures = jl_f_tuple(NULL, env, nenv); - jl_opaque_closure_t *oc = (jl_opaque_closure_t*)jl_gc_alloc(ptls, sizeof(jl_opaque_closure_t), oc_type); + jl_opaque_closure_t *oc = (jl_opaque_closure_t*)jl_gc_alloc(ct->ptls, sizeof(jl_opaque_closure_t), oc_type); JL_GC_POP(); oc->source = (jl_method_t*)source; oc->isva = jl_unbox_bool(isva); diff --git a/src/options.h b/src/options.h index 3ffbf05b2249ff..5ea220900b5eb9 100644 --- a/src/options.h +++ b/src/options.h @@ -114,7 +114,7 @@ #endif // allow a suspended Task to restart on a different thread -//#define MIGRATE_TASKS +#define MIGRATE_TASKS // threading options ---------------------------------------------------------- diff --git a/src/partr.c b/src/partr.c index 782b418ebd882b..c9d4885cc34b56 100644 --- a/src/partr.c +++ b/src/partr.c @@ -126,7 +126,7 @@ static inline void sift_down(taskheap_t *heap, int32_t idx) static inline int multiq_insert(jl_task_t *task, int16_t priority) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; uint64_t rn; task->prio = priority; @@ -153,7 +153,7 @@ static inline int multiq_insert(jl_task_t *task, int16_t priority) static inline jl_task_t *multiq_deletemin(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; uint64_t rn1 = 0, rn2; int32_t i; int16_t prio1, prio2; @@ -228,7 +228,8 @@ void jl_init_threadinginfra(void) /* initialize the synchronization trees pool and the multiqueue */ multiq_init(); - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; + jl_install_thread_signal_handler(ptls); uv_mutex_init(&ptls->sleep_lock); uv_cond_init(&ptls->wake_signal); } @@ -242,12 +243,11 @@ void jl_threadfun(void *arg) jl_threadarg_t *targ = (jl_threadarg_t*)arg; // initialize this thread (set tid, create heap, set up root task) - jl_init_threadtls(targ->tid); + jl_ptls_t ptls = jl_init_threadtls(targ->tid); void *stack_lo, *stack_hi; jl_init_stack_limits(0, &stack_lo, &stack_hi); - jl_init_root_task(stack_lo, stack_hi); - - jl_ptls_t ptls = jl_get_ptls_states(); + jl_init_root_task(ptls, stack_lo, stack_hi); + jl_install_thread_signal_handler(ptls); // set up sleep mechanism for this thread uv_mutex_init(&ptls->sleep_lock); @@ -347,7 +347,7 @@ static void wake_libuv(void) /* ensure thread tid is awake if necessary */ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; jl_thread_t uvlock = jl_atomic_load(&jl_uv_mutex.owner); int16_t self = ptls->tid; jl_thread_t system_self = jl_all_tls_states[self]->system_id; @@ -391,7 +391,7 @@ static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q) jl_value_t *args[2] = { trypoptask, q }; jl_task_t *task = (jl_task_t*)jl_apply(args, 2); if (jl_typeis(task, jl_task_type)) { - int self = jl_get_ptls_states()->tid; + int self = jl_current_task->tid; jl_set_task_tid(task, self); return task; } @@ -411,12 +411,11 @@ extern volatile unsigned _threadedregion; JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; uint64_t start_cycles = 0; - jl_task_t *task; while (1) { - task = get_next_task(trypoptask, q); + jl_task_t *task = get_next_task(trypoptask, q); if (task) return task; @@ -428,6 +427,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q) } jl_cpu_pause(); + jl_ptls_t ptls = ct->ptls; if (sleep_check_after_threshold(&start_cycles) || (!_threadedregion && ptls->tid == 0)) { jl_atomic_store(&ptls->sleep_check_state, sleeping); // acquire sleep-check lock if (!multiq_check_empty()) { @@ -435,7 +435,9 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q) jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us continue; } - task = get_next_task(trypoptask, q); + task = get_next_task(trypoptask, q); // WARNING: this should not yield + if (ptls != ct->ptls) + continue; if (task) { if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us diff --git a/src/rtutils.c b/src/rtutils.c index 98dc68dfa02f3b..07e4c969ddbfa5 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -210,18 +210,17 @@ JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t) JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_task_t *current_task = ptls->current_task; + jl_task_t *ct = jl_current_task; // Must have no safepoint - eh->prev = current_task->eh; - eh->gcstack = ptls->pgcstack; - eh->gc_state = ptls->gc_state; - eh->locks_len = ptls->locks.len; - eh->defer_signal = ptls->defer_signal; - eh->world_age = ptls->world_age; - current_task->eh = eh; + eh->prev = ct->eh; + eh->gcstack = ct->gcstack; + eh->gc_state = ct->ptls->gc_state; + eh->locks_len = ct->ptls->locks.len; + eh->defer_signal = ct->ptls->defer_signal; + eh->world_age = ct->world_age; + ct->eh = eh; #ifdef ENABLE_TIMINGS - eh->timing_stack = ptls->timing_stack; + eh->timing_stack = ct->ptls->timing_stack; #endif } @@ -232,50 +231,49 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh) // there's additional cleanup required, eg pushing the exception stack. JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; #ifdef _OS_WINDOWS_ - if (ptls->needs_resetstkoflw) { + if (ct->ptls->needs_resetstkoflw) { _resetstkoflw(); - ptls->needs_resetstkoflw = 0; + ct->ptls->needs_resetstkoflw = 0; } #endif - jl_task_t *current_task = ptls->current_task; - // `eh` may be not equal to `ptls->current_task->eh`. See `jl_pop_handler` + // `eh` may be not equal to `ct->eh`. See `jl_pop_handler` // This function should **NOT** have any safepoint before the ones at the // end. - sig_atomic_t old_defer_signal = ptls->defer_signal; - int8_t old_gc_state = ptls->gc_state; - current_task->eh = eh->prev; - ptls->pgcstack = eh->gcstack; - small_arraylist_t *locks = &ptls->locks; + sig_atomic_t old_defer_signal = ct->ptls->defer_signal; + int8_t old_gc_state = ct->ptls->gc_state; + ct->eh = eh->prev; + ct->gcstack = eh->gcstack; + small_arraylist_t *locks = &ct->ptls->locks; int unlocks = locks->len > eh->locks_len; if (unlocks) { for (size_t i = locks->len; i > eh->locks_len; i--) jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]); locks->len = eh->locks_len; } - ptls->world_age = eh->world_age; - ptls->defer_signal = eh->defer_signal; + ct->world_age = eh->world_age; + ct->ptls->defer_signal = eh->defer_signal; if (old_gc_state != eh->gc_state) { - jl_atomic_store_release(&ptls->gc_state, eh->gc_state); + jl_atomic_store_release(&ct->ptls->gc_state, eh->gc_state); if (old_gc_state) { - jl_gc_safepoint_(ptls); + jl_gc_safepoint_(ct->ptls); } } if (old_defer_signal && !eh->defer_signal) { - jl_sigint_safepoint(ptls); + jl_sigint_safepoint(ct->ptls); } if (jl_gc_have_pending_finalizers && unlocks && eh->locks_len == 0) { - jl_gc_run_pending_finalizers(ptls); + jl_gc_run_pending_finalizers(ct); } } JL_DLLEXPORT void jl_pop_handler(int n) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (__unlikely(n <= 0)) return; - jl_handler_t *eh = ptls->current_task->eh; + jl_handler_t *eh = ct->eh; while (--n > 0) eh = eh->prev; jl_eh_restore_state(eh); @@ -283,15 +281,15 @@ JL_DLLEXPORT void jl_pop_handler(int n) JL_DLLEXPORT size_t jl_excstack_state(void) JL_NOTSAFEPOINT { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_excstack_t *s = ptls->current_task->excstack; + jl_task_t *ct = jl_current_task; + jl_excstack_t *s = ct->excstack; return s ? s->top : 0; } JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_excstack_t *s = ptls->current_task->excstack; + jl_task_t *ct = jl_current_task; + jl_excstack_t *s = ct->excstack; if (s) { assert(s->top >= state); s->top = state; @@ -312,7 +310,8 @@ static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT, if (s && s->reserved_size >= reserved_size) return; size_t bufsz = sizeof(jl_excstack_t) + sizeof(uintptr_t)*reserved_size; - jl_excstack_t *new_s = (jl_excstack_t*)jl_gc_alloc_buf(jl_get_ptls_states(), bufsz); + jl_task_t *ct = jl_current_task; + jl_excstack_t *new_s = (jl_excstack_t*)jl_gc_alloc_buf(ct->ptls, bufsz); new_s->top = 0; new_s->reserved_size = reserved_size; if (s) @@ -1270,10 +1269,9 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_jmp_buf *old_buf = ptls->safe_restore; + jl_jmp_buf *old_buf = jl_get_safe_restore(); jl_jmp_buf buf; - ptls->safe_restore = &buf; + jl_set_safe_restore(&buf); if (!jl_setjmp(buf, 0)) { jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)jl_value); jl_printf((JL_STREAM*)STDERR_FILENO,"\n"); @@ -1281,7 +1279,7 @@ JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT else { jl_printf((JL_STREAM*)STDERR_FILENO, "\n!!! ERROR in jl_ -- ABORTING !!!\n"); } - ptls->safe_restore = old_buf; + jl_set_safe_restore(old_buf); } JL_DLLEXPORT void jl_breakpoint(jl_value_t *v) diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp index 050347513aa45d..ba265eb67be764 100644 --- a/src/runtime_ccall.cpp +++ b/src/runtime_ccall.cpp @@ -340,8 +340,8 @@ jl_value_t *jl_get_cfunction_trampoline( ((void**)result)[1] = (void*)fobj; } if (!permanent) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_gc_add_ptr_finalizer(ptls, result, (void*)(uintptr_t)&trampoline_deleter); + jl_task_t *ct = jl_current_task; + jl_gc_add_ptr_finalizer(ct->ptls, result, (void*)(uintptr_t)&trampoline_deleter); ((void**)result)[2] = (void*)cache; ((void**)result)[3] = (void*)nval; } diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 22c3a8988b5801..e284fa5acfff63 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -414,7 +414,7 @@ static inline jl_value_t *jl_intrinsiclambda_ty1(jl_value_t *ty, void *pa, unsig static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsigned osize, unsigned osize2, const void *voidlist) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; intrinsic_u1_t op = select_intrinsic_u1(osize2, (const intrinsic_u1_t*)voidlist); uint64_t cnt = op(osize * host_char_bit, pa); // TODO: the following assume little-endian @@ -422,7 +422,7 @@ static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsign if (osize <= sizeof(cnt)) { return jl_new_bits(ty, &cnt); } - jl_value_t *newv = jl_gc_alloc(ptls, osize, ty); + jl_value_t *newv = jl_gc_alloc(ct->ptls, osize, ty); // perform zext, if needed memset((char*)jl_data_ptr(newv) + sizeof(cnt), 0, osize - sizeof(cnt)); memcpy(jl_data_ptr(newv), &cnt, sizeof(cnt)); @@ -478,13 +478,13 @@ typedef void (fintrinsic_op1)(unsigned, void*, void*); static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *halfop, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (!jl_is_primitivetype(jl_typeof(a))) jl_errorf("%s: value is not a primitive type", name); if (!jl_is_primitivetype(ty)) jl_errorf("%s: type is not a primitive type", name); unsigned sz2 = jl_datatype_size(ty); - jl_value_t *newv = jl_gc_alloc(ptls, sz2, ty); + jl_value_t *newv = jl_gc_alloc(ct->ptls, sz2, ty); void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv); unsigned sz = jl_datatype_size(jl_typeof(a)); switch (sz) { @@ -645,8 +645,8 @@ static inline jl_value_t *jl_intrinsiclambda_checked(jl_value_t *ty, void *pa, v params[1] = (jl_value_t*)jl_bool_type; jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2); JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALAWYS_LEAFTYPE) - jl_ptls_t ptls = jl_get_ptls_states(); - jl_value_t *newv = jl_gc_alloc(ptls, ((jl_datatype_t*)tuptyp)->size, tuptyp); + jl_task_t *ct = jl_current_task; + jl_value_t *newv = jl_gc_alloc(ct->ptls, ((jl_datatype_t*)tuptyp)->size, tuptyp); intrinsic_checked_t op = select_intrinsic_checked(sz2, (const intrinsic_checked_t*)voidlist); int ovflw = op(sz * host_char_bit, pa, pb, jl_data_ptr(newv)); @@ -673,14 +673,14 @@ static inline jl_value_t *jl_intrinsiclambda_checkeddiv(jl_value_t *ty, void *pa bi_intrinsic_ctype(OP, name, 64, double) \ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \ { \ - jl_ptls_t ptls = jl_get_ptls_states();\ + jl_task_t *ct = jl_current_task; \ jl_value_t *ty = jl_typeof(a); \ if (jl_typeof(b) != ty) \ jl_error(#name ": types of a and b must match"); \ if (!jl_is_primitivetype(ty)) \ jl_error(#name ": values are not primitive types"); \ int sz = jl_datatype_size(ty); \ - jl_value_t *newv = jl_gc_alloc(ptls, sz, ty); \ + jl_value_t *newv = jl_gc_alloc(ct->ptls, sz, ty); \ void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pr = jl_data_ptr(newv); \ switch (sz) { \ /* choose the right size c-type operation */ \ @@ -736,14 +736,14 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \ ter_intrinsic_ctype(OP, name, 64, double) \ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c) \ { \ - jl_ptls_t ptls = jl_get_ptls_states();\ + jl_task_t *ct = jl_current_task; \ jl_value_t *ty = jl_typeof(a); \ if (jl_typeof(b) != ty || jl_typeof(c) != ty) \ jl_error(#name ": types of a, b, and c must match"); \ if (!jl_is_primitivetype(ty)) \ jl_error(#name ": values are not primitive types"); \ - int sz = jl_datatype_size(ty); \ - jl_value_t *newv = jl_gc_alloc(ptls, sz, ty); \ + int sz = jl_datatype_size(ty); \ + jl_value_t *newv = jl_gc_alloc(ct->ptls, sz, ty); \ void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pc = jl_data_ptr(c), *pr = jl_data_ptr(newv); \ switch (sz) { \ /* choose the right size c-type operation */ \ diff --git a/src/safepoint.c b/src/safepoint.c index 2f90afaf508e06..fcd09ffef0334f 100644 --- a/src/safepoint.c +++ b/src/safepoint.c @@ -115,7 +115,7 @@ int jl_safepoint_start_gc(void) return 1; } // The thread should have set this already - assert(jl_get_ptls_states()->gc_state == JL_GC_STATE_WAITING); + assert(jl_current_task->ptls->gc_state == JL_GC_STATE_WAITING); jl_mutex_lock_nogc(&safepoint_lock); // In case multiple threads enter the GC at the same time, only allow // one of them to actually run the collection. We can't just let the @@ -156,7 +156,7 @@ void jl_safepoint_end_gc(void) void jl_safepoint_wait_gc(void) { // The thread should have set this is already - assert(jl_get_ptls_states()->gc_state != 0); + assert(jl_current_task->ptls->gc_state != 0); // Use normal volatile load in the loop for speed until GC finishes. // Then use an acquire load to make sure the GC result is visible on this thread. while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) { diff --git a/src/signal-handling.c b/src/signal-handling.c index aa642eeedf2a2a..8011c62934d289 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -106,17 +106,16 @@ static uintptr_t jl_get_pc_from_ctx(const void *_ctx); void jl_show_sigill(void *_ctx); static size_t jl_safe_read_mem(const volatile char *ptr, char *out, size_t len) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_jmp_buf *old_buf = ptls->safe_restore; + jl_jmp_buf *old_buf = jl_get_safe_restore(); jl_jmp_buf buf; - ptls->safe_restore = &buf; + jl_set_safe_restore(&buf); volatile size_t i = 0; if (!jl_setjmp(buf, 0)) { - for (;i < len;i++) { + for (; i < len; i++) { out[i] = ptr[i]; } } - ptls->safe_restore = old_buf; + jl_set_safe_restore(old_buf); return i; } @@ -235,18 +234,16 @@ void jl_show_sigill(void *_ctx) void jl_critical_error(int sig, bt_context_t *context) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_bt_element_t *bt_data = ptls->bt_data; - size_t *bt_size = &ptls->bt_size; + jl_task_t *ct = jl_current_task; + jl_bt_element_t *bt_data = ct->ptls->bt_data; + size_t *bt_size = &ct->ptls->bt_size; size_t i, n = *bt_size; if (sig) { // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit) - ptls->pgcstack = NULL; - ptls->safe_restore = NULL; - if (ptls->current_task) { - ptls->current_task->eh = NULL; - ptls->current_task->excstack = NULL; - } + jl_set_safe_restore(NULL); + ct->gcstack = NULL; + ct->eh = NULL; + ct->excstack = NULL; #ifndef _OS_WINDOWS_ sigset_t sset; sigemptyset(&sset); diff --git a/src/signals-mach.c b/src/signals-mach.c index d7f8fcfacc944d..3f133c3189b105 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -84,7 +84,6 @@ extern boolean_t exc_server(mach_msg_header_t *, mach_msg_header_t *); void *mach_segv_listener(void *arg) { (void)arg; - (void)jl_get_ptls_states(); while (1) { int ret = mach_msg_server(exc_server, 2048, segv_port, MACH_MSG_TIMEOUT_NONE); jl_safe_printf("mach_msg_server: %s\n", mach_error_string(ret)); @@ -167,7 +166,7 @@ static void jl_call_in_state(jl_ptls_t ptls2, host_thread_state_t *state, #else #error "julia: throw-in-context not supported on this platform" #endif - if (ptls2->signal_stack == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) { + if (ptls2 == NULL || ptls2->signal_stack == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) { rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment } else { @@ -210,10 +209,11 @@ static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exceptio kern_return_t ret = thread_get_state(thread, THREAD_STATE, (thread_state_t)&state, &count); HANDLE_MACH_ERROR("thread_get_state", ret); jl_ptls_t ptls2 = jl_all_tls_states[tid]; - if (!ptls2->safe_restore) { + if (!jl_get_safe_restore()) { assert(exception); - ptls2->bt_size = rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, - (bt_context_t*)&state, ptls2->pgcstack); + ptls2->bt_size = + rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, (bt_context_t *)&state, + NULL /*current_task?*/); ptls2->sig_exception = exception; } jl_call_in_state(ptls2, &state, &jl_sig_throw); @@ -223,9 +223,10 @@ static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exceptio static void segv_handler(int sig, siginfo_t *info, void *context) { - jl_ptls_t ptls = jl_get_ptls_states(); assert(sig == SIGSEGV || sig == SIGBUS); - if (ptls->safe_restore) { // restarting jl_ or jl_unwind_stepn + if (jl_get_safe_restore()) { // restarting jl_ or jl_unwind_stepn + jl_task_t *ct = jl_get_current_task(); + jl_ptls_t ptls = ct == NULL ? NULL : ct->ptls; jl_call_in_state(ptls, (host_thread_state_t*)jl_to_bt_context(context), &jl_sig_throw); } else { @@ -291,7 +292,7 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, } return KERN_SUCCESS; } - if (ptls2->safe_restore) { + if (jl_get_safe_restore()) { jl_throw_in_thread(tid, thread, jl_stackovf_exception); return KERN_SUCCESS; } @@ -301,7 +302,7 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, if (msync((void*)(fault_addr & ~(jl_page_size - 1)), 1, MS_ASYNC) == 0) { // check if this was a valid address #endif jl_value_t *excpt; - if (is_addr_on_stack(ptls2, (void*)fault_addr)) { + if (is_addr_on_stack(ptls2->current_task, (void*)fault_addr)) { excpt = jl_stackovf_exception; } #ifdef SEGV_EXCEPTION diff --git a/src/signals-unix.c b/src/signals-unix.c index 98aa9264eb3842..bb19e2bd65d785 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -109,7 +109,7 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c // checks that the syscall is made in the signal handler and that // the ucontext address is valid. Hopefully the value of the ucontext // will not be part of the validation... - if (!ptls->signal_stack) { + if (!ptls || !ptls->signal_stack) { sigset_t sset; sigemptyset(&sset); sigaddset(&sset, sig); @@ -196,26 +196,29 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c #endif } -static void jl_throw_in_ctx(jl_ptls_t ptls, jl_value_t *e, int sig, void *sigctx) +static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *e, int sig, void *sigctx) { - if (!ptls->safe_restore) - ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, - jl_to_bt_context(sigctx), ptls->pgcstack); - ptls->sig_exception = e; + jl_ptls_t ptls = ct->ptls; + if (!jl_get_safe_restore()) { + ptls->bt_size = + rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx), + ct->gcstack); + ptls->sig_exception = e; + } jl_call_in_ctx(ptls, &jl_sig_throw, sig, sigctx); } static pthread_t signals_thread; -static int is_addr_on_stack(jl_ptls_t ptls, void *addr) +static int is_addr_on_stack(jl_task_t *ct, void *addr) { - jl_task_t *t = ptls->current_task; - if (t->copy_stack) + if (ct->copy_stack) { + jl_ptls_t ptls = ct->ptls; return ((char*)addr > (char*)ptls->stackbase - ptls->stacksize && (char*)addr < (char*)ptls->stackbase); - else - return ((char*)addr > (char*)t->stkbuf && - (char*)addr < (char*)t->stkbuf + t->bufsz); + } + return ((char*)addr > (char*)ct->stkbuf && + (char*)addr < (char*)ct->stkbuf + ct->bufsz); } static void sigdie_handler(int sig, siginfo_t *info, void *context) @@ -305,26 +308,34 @@ static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context) static void segv_handler(int sig, siginfo_t *info, void *context) { - jl_ptls_t ptls = jl_get_ptls_states(); + if (jl_get_safe_restore()) { // restarting jl_ or profile + jl_call_in_ctx(NULL, &jl_sig_throw, sig, context); + return; + } + jl_task_t *ct = jl_get_current_task(); + if (ct == NULL) { + sigdie_handler(sig, info, context); + return; + } assert(sig == SIGSEGV || sig == SIGBUS); if (jl_addr_is_safepoint((uintptr_t)info->si_addr)) { jl_set_gc_and_wait(); // Do not raise sigint on worker thread - if (ptls->tid != 0) + if (ct->tid != 0) return; - if (ptls->defer_signal) { + if (ct->ptls->defer_signal) { jl_safepoint_defer_sigint(); } else if (jl_safepoint_consume_sigint()) { jl_clear_force_sigint(); - jl_throw_in_ctx(ptls, jl_interrupt_exception, sig, context); + jl_throw_in_ctx(ct, jl_interrupt_exception, sig, context); } return; } - if (ptls->safe_restore || is_addr_on_stack(ptls, info->si_addr)) { // stack overflow, or restarting jl_ - jl_throw_in_ctx(ptls, jl_stackovf_exception, sig, context); + if (is_addr_on_stack(ct, info->si_addr)) { // stack overflow + jl_throw_in_ctx(ct, jl_stackovf_exception, sig, context); } - else if (jl_is_on_sigstack(ptls, info->si_addr, context)) { + else if (jl_is_on_sigstack(ct->ptls, info->si_addr, context)) { // This mainly happens when one of the finalizers during final cleanup // on the signal stack has a deep/infinite recursion. // There isn't anything more we can do @@ -334,11 +345,11 @@ static void segv_handler(int sig, siginfo_t *info, void *context) _exit(sig + 128); } else if (sig == SIGSEGV && info->si_code == SEGV_ACCERR && is_write_fault(context)) { // writing to read-only memory (e.g., mmap) - jl_throw_in_ctx(ptls, jl_readonlymemory_exception, sig, context); + jl_throw_in_ctx(ct, jl_readonlymemory_exception, sig, context); } else { #ifdef SEGV_EXCEPTION - jl_throw_in_ctx(ptls, jl_segv_exception, sig, context); + jl_throw_in_ctx(ct, jl_segv_exception, sig, context); #else sigdie_handler(sig, info, context); #endif @@ -433,7 +444,10 @@ static void jl_exit_thread0(int state, jl_bt_element_t *bt_data, size_t bt_size) // 3: exit with `thread0_exit_state` void usr2_handler(int sig, siginfo_t *info, void *ctx) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_get_current_task(); + if (ct == NULL) + return; + jl_ptls_t ptls = ct->ptls; int errno_save = errno; sig_atomic_t request = jl_atomic_exchange(&ptls->signal_request, 0); #if !defined(JL_DISABLE_LIBUNWIND) @@ -457,11 +471,11 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx) jl_safe_printf("WARNING: Force throwing a SIGINT\n"); // Force a throw jl_clear_force_sigint(); - jl_throw_in_ctx(ptls, jl_interrupt_exception, sig, ctx); + jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx); } } else if (request == 3) { - jl_call_in_ctx(ptls, jl_exit_thread0_cb, sig, ctx); + jl_call_in_ctx(ct->ptls, jl_exit_thread0_cb, sig, ctx); } errno = errno_save; } @@ -753,11 +767,10 @@ static void *signal_listener(void *arg) else { // unwinding can fail, so keep track of the current state // and restore from the SEGV handler if anything happens. - jl_ptls_t ptls = jl_get_ptls_states(); - jl_jmp_buf *old_buf = ptls->safe_restore; + jl_jmp_buf *old_buf = jl_get_safe_restore(); jl_jmp_buf buf; - ptls->safe_restore = &buf; + jl_set_safe_restore(&buf); if (jl_setjmp(buf, 0)) { jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n"); } else { @@ -765,7 +778,7 @@ static void *signal_listener(void *arg) bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, signal_context, NULL); } - ptls->safe_restore = old_buf; + jl_set_safe_restore(old_buf); // Mark the end of this block with 0 bt_data_prof[bt_size_cur++].uintptr = 0; @@ -832,8 +845,15 @@ void restore_signals(void) static void fpe_handler(int sig, siginfo_t *info, void *context) { (void)info; - jl_ptls_t ptls = jl_get_ptls_states(); - jl_throw_in_ctx(ptls, jl_diverror_exception, sig, context); + if (jl_get_safe_restore()) { // restarting jl_ or profile + jl_call_in_ctx(NULL, &jl_sig_throw, sig, context); + return; + } + jl_task_t *ct = jl_get_current_task(); + if (ct == NULL) // exception on foreign thread is fatal + sigdie_handler(sig, info, context); + else + jl_throw_in_ctx(ct, jl_diverror_exception, sig, context); } static void sigint_handler(int sig) diff --git a/src/signals-win.c b/src/signals-win.c index ace5a178d483a0..984330dc434dc1 100644 --- a/src/signals-win.c +++ b/src/signals-win.c @@ -43,11 +43,11 @@ static char *strsignal(int sig) static void jl_try_throw_sigint(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_safepoint_enable_sigint(); jl_wake_libuv(); int force = jl_check_force_sigint(); - if (force || (!ptls->defer_signal && ptls->io_wait)) { + if (force || (!ct->ptls->defer_signal && ct->ptls->io_wait)) { jl_safepoint_consume_sigint(); if (force) jl_safe_printf("WARNING: Force throwing a SIGINT\n"); @@ -59,7 +59,7 @@ static void jl_try_throw_sigint(void) void __cdecl crt_sig_handler(int sig, int num) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; CONTEXT Context; switch (sig) { case SIGFPE: @@ -86,7 +86,7 @@ void __cdecl crt_sig_handler(int sig, int num) } break; default: // SIGSEGV, (SSIGTERM, IGILL) - if (ptls->safe_restore) + if (jl_get_safe_restore()) jl_rethrow(); memset(&Context, 0, sizeof(Context)); RtlCaptureContext(&Context); @@ -108,7 +108,9 @@ static int have_backtrace_fiber; static void JL_NORETURN start_backtrace_fiber(void) { // collect the backtrace - stkerror_ptls->bt_size = rec_backtrace_ctx(stkerror_ptls->bt_data, JL_MAX_BT_SIZE, stkerror_ctx, stkerror_ptls->pgcstack); + stkerror_ptls->bt_size = + rec_backtrace_ctx(stkerror_ptls->bt_data, JL_MAX_BT_SIZE, stkerror_ctx, + NULL /*current_task?*/); // switch back to the execution fiber jl_setcontext(&error_return_fiber); abort(); @@ -122,7 +124,8 @@ void restore_signals(void) void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; #if defined(_CPU_X86_64_) DWORD64 Rsp = (ctxThread->Rsp & (DWORD64)-16) - 8; #elif defined(_CPU_X86_) @@ -130,11 +133,12 @@ void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread) #else #error WIN16 not supported :P #endif - if (!ptls->safe_restore) { + if (!jl_get_safe_restore()) { assert(excpt != NULL); ptls->bt_size = 0; if (excpt != jl_stackovf_exception) { - ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread, ptls->pgcstack); + ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread, + ct->gcstack); } else if (have_backtrace_fiber) { JL_LOCK(&backtrace_lock); @@ -222,7 +226,7 @@ static BOOL WINAPI sigint_handler(DWORD wsig) //This needs winapi types to guara LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; if (ExceptionInfo->ExceptionRecord->ExceptionFlags == 0) { switch (ExceptionInfo->ExceptionRecord->ExceptionCode) { case EXCEPTION_INT_DIVIDE_BY_ZERO: @@ -248,7 +252,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo) } return EXCEPTION_CONTINUE_EXECUTION; } - if (ptls->safe_restore) { + if (jl_get_safe_restore()) { jl_throw_in_ctx(NULL, ExceptionInfo->ContextRecord); return EXCEPTION_CONTINUE_EXECUTION; } diff --git a/src/simplevector.c b/src/simplevector.c index 41b1be14da7f4c..2b87eb92c41d13 100644 --- a/src/simplevector.c +++ b/src/simplevector.c @@ -34,8 +34,8 @@ jl_svec_t *(jl_perm_symsvec)(size_t n, ...) JL_DLLEXPORT jl_svec_t *jl_svec1(void *a) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ptls, sizeof(void*) * 2, + jl_task_t *ct = jl_current_task; + jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 2, jl_simplevector_type); jl_svec_set_len_unsafe(v, 1); jl_svecset(v, 0, a); @@ -44,8 +44,8 @@ JL_DLLEXPORT jl_svec_t *jl_svec1(void *a) JL_DLLEXPORT jl_svec_t *jl_svec2(void *a, void *b) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ptls, sizeof(void*) * 3, + jl_task_t *ct = jl_current_task; + jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 3, jl_simplevector_type); jl_svec_set_len_unsafe(v, 2); jl_svecset(v, 0, a); @@ -55,9 +55,9 @@ JL_DLLEXPORT jl_svec_t *jl_svec2(void *a, void *b) JL_DLLEXPORT jl_svec_t *jl_alloc_svec_uninit(size_t n) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (n == 0) return jl_emptysvec; - jl_svec_t *jv = (jl_svec_t*)jl_gc_alloc(ptls, (n + 1) * sizeof(void*), + jl_svec_t *jv = (jl_svec_t*)jl_gc_alloc(ct->ptls, (n + 1) * sizeof(void*), jl_simplevector_type); jl_svec_set_len_unsafe(jv, n); return jv; diff --git a/src/stackwalk.c b/src/stackwalk.c index 790b7583cc5870..b686bbf5be847d 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -83,11 +83,10 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b } #endif #if !defined(_OS_WINDOWS_) - jl_ptls_t ptls = jl_get_ptls_states(); - jl_jmp_buf *old_buf = ptls->safe_restore; + jl_jmp_buf *old_buf = jl_get_safe_restore(); jl_jmp_buf buf; + jl_set_safe_restore(&buf); if (!jl_setjmp(buf, 0)) { - ptls->safe_restore = &buf; #endif int have_more_frames = 1; while (have_more_frames) { @@ -175,7 +174,7 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b // reader happy. if (n > 0) n -= 1; } - ptls->safe_restore = old_buf; + jl_set_safe_restore(old_buf); #endif #if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) JL_UNLOCK_NOGC(&jl_in_stackwalk); @@ -314,7 +313,7 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size, JL_DLLEXPORT jl_value_t *jl_get_backtrace(void) { - jl_excstack_t *s = jl_get_ptls_states()->current_task->excstack; + jl_excstack_t *s = jl_current_task->excstack; jl_bt_element_t *bt_data = NULL; size_t bt_size = 0; if (s && s->top) { @@ -336,8 +335,8 @@ JL_DLLEXPORT jl_value_t *jl_get_backtrace(void) JL_DLLEXPORT jl_value_t *jl_get_excstack(jl_task_t* task, int include_bt, int max_entries) { JL_TYPECHK(current_exceptions, task, (jl_value_t*)task); - jl_ptls_t ptls = jl_get_ptls_states(); - if (task != ptls->current_task && task->_state == JL_TASK_STATE_RUNNABLE) { + jl_task_t *ct = jl_current_task; + if (task != ct && task->_state == JL_TASK_STATE_RUNNABLE) { jl_error("Inspecting the exception stack of a task which might " "be running concurrently isn't allowed."); } @@ -574,11 +573,11 @@ static int jl_unw_step(bt_cursor_t *cursor, int from_signal_handler, uintptr_t * JL_DLLEXPORT jl_value_t *jl_lookup_code_address(void *ip, int skipC) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_frame_t *frames = NULL; - int8_t gc_state = jl_gc_safe_enter(ptls); + int8_t gc_state = jl_gc_safe_enter(ct->ptls); int n = jl_getFunctionInfo(&frames, (uintptr_t)ip, skipC, 0); - jl_gc_safe_leave(ptls, gc_state); + jl_gc_safe_leave(ct->ptls, gc_state); jl_value_t *rs = (jl_value_t*)jl_alloc_svec(n); JL_GC_PUSH1(&rs); for (int i = 0; i < n; i++) { @@ -694,9 +693,10 @@ extern bt_context_t *jl_to_bt_context(void *sigctx); void jl_rec_backtrace(jl_task_t *t) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; ptls->bt_size = 0; - if (t == ptls->current_task) { + if (t == ct) { ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0); return; } @@ -751,10 +751,10 @@ JL_DLLEXPORT void jl_gdblookup(void* ip) // Print backtrace for current exception in catch block JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT { - jl_ptls_t ptls = jl_get_ptls_states(); - if (ptls->current_task == NULL) + jl_task_t *ct = jl_current_task; + if (ct->ptls == NULL) return; - jl_excstack_t *s = ptls->current_task->excstack; + jl_excstack_t *s = ct->excstack; if (!s) return; size_t i, bt_size = jl_excstack_bt_size(s, s->top); @@ -765,7 +765,8 @@ JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT } JL_DLLEXPORT void jlbacktracet(jl_task_t *t) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; jl_rec_backtrace(t); size_t i, bt_size = ptls->bt_size; jl_bt_element_t *bt_data = ptls->bt_data; diff --git a/src/staticdata.c b/src/staticdata.c index 1fc1797f35fddc..e2d3116b258d70 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -159,7 +159,6 @@ jl_value_t **const*const get_tags(void) { INSERT_TAG(jl_emptytuple); INSERT_TAG(jl_false); INSERT_TAG(jl_true); - INSERT_TAG(jl_nothing); INSERT_TAG(jl_an_empty_string); INSERT_TAG(jl_an_empty_vec_any); INSERT_TAG(jl_module_init_order); @@ -338,9 +337,13 @@ static void jl_load_sysimg_so(void) jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimg_gvars_offsets, 1); sysimg_gvars_offsets += 1; assert(sysimg_fptrs.base); - uintptr_t *tls_getter_slot; - jl_dlsym(jl_sysimg_handle, "jl_get_ptls_states_slot", (void **)&tls_getter_slot, 1); - *tls_getter_slot = (uintptr_t)jl_get_ptls_states_getter(); + + void *pgcstack_func_slot; + jl_dlsym(jl_sysimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 1); + void *pgcstack_key_slot; + jl_dlsym(jl_sysimg_handle, "jl_pgcstack_key_slot", &pgcstack_key_slot, 1); + jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot); + size_t *tls_offset_idx; jl_dlsym(jl_sysimg_handle, "jl_tls_offset", (void **)&tls_offset_idx, 1); *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset); @@ -411,7 +414,7 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m) static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recursive) { // ignore items that are given a special representation - if (v == NULL || jl_is_symbol(v)) { + if (v == NULL || jl_is_symbol(v) || v == jl_nothing) { return; } else if (jl_typeis(v, jl_task_type)) { @@ -572,19 +575,22 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPO else if (v == (jl_value_t*)s->ptls->root_task) { return (uintptr_t)TagRef << RELOC_TAG_OFFSET; } + else if (v == jl_nothing) { + return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + 1; + } else if (jl_typeis(v, jl_int64_type)) { int64_t i64 = *(int64_t*)v + NBOX_C / 2; if ((uint64_t)i64 < NBOX_C) - return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i64 + 1; + return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i64 + 2; } else if (jl_typeis(v, jl_int32_type)) { int32_t i32 = *(int32_t*)v + NBOX_C / 2; if ((uint32_t)i32 < NBOX_C) - return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i32 + 1 + NBOX_C; + return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i32 + 2 + NBOX_C; } else if (jl_typeis(v, jl_uint8_type)) { uint8_t u8 = *(uint8_t*)v; - return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + u8 + 1 + NBOX_C + NBOX_C; + return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + u8 + 2 + NBOX_C + NBOX_C; } if (idx == HT_NOTFOUND) { idx = ptrhash_get(&backref_table, v); @@ -1041,7 +1047,7 @@ static void jl_write_gv_syms(jl_serializer_state *s, jl_sym_t *v) jl_write_gv_syms(s, v->right); } -static void jl_write_gv_int(jl_serializer_state *s, jl_value_t *v) +static void jl_write_gv_tagref(jl_serializer_state *s, jl_value_t *v) { int32_t gv = jl_get_llvm_gv(native_functions, (jl_value_t*)v); if (gv != 0) { @@ -1050,7 +1056,7 @@ static void jl_write_gv_int(jl_serializer_state *s, jl_value_t *v) record_gvar(s, gv, item); } } -static void jl_write_gv_ints(jl_serializer_state *s) +static void jl_write_gv_tagrefs(jl_serializer_state *s) { // this also ensures all objects referenced in the code have // references in the system image to their global variable @@ -1058,12 +1064,15 @@ static void jl_write_gv_ints(jl_serializer_state *s) // they might not have had a reference anywhere in the code // image other than here size_t i; + jl_write_gv_tagref(s, (jl_value_t*)s->ptls->root_task); + jl_write_gv_tagref(s, s->ptls->root_task->tls); + jl_write_gv_tagref(s, jl_nothing); for (i = 0; i < NBOX_C; i++) { - jl_write_gv_int(s, jl_box_int32((int32_t)i - NBOX_C / 2)); - jl_write_gv_int(s, jl_box_int64((int64_t)i - NBOX_C / 2)); + jl_write_gv_tagref(s, jl_box_int32((int32_t)i - NBOX_C / 2)); + jl_write_gv_tagref(s, jl_box_int64((int64_t)i - NBOX_C / 2)); } for (i = 0; i < 256; i++) { - jl_write_gv_int(s, jl_box_uint8(i)); + jl_write_gv_tagref(s, jl_box_uint8(i)); } } @@ -1114,7 +1123,7 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset) assert(offset < nsym_tag && "corrupt relocation item id"); break; case TagRef: - assert(offset < 2 * NBOX_C + 257 && "corrupt relocation item id"); + assert(offset < 2 * NBOX_C + 258 && "corrupt relocation item id"); break; case BindingRef: assert(offset == 0 && "corrupt relocation offset"); @@ -1154,7 +1163,9 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas case TagRef: if (offset == 0) return (uintptr_t)s->ptls->root_task; - offset -= 1; + if (offset == 1) + return (uintptr_t)jl_nothing; + offset -= 2; if (offset < NBOX_C) return (uintptr_t)jl_box_int64((int64_t)offset - NBOX_C / 2); offset -= NBOX_C; @@ -1510,7 +1521,7 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED s.relocs = &relocs; s.gvar_record = &gvar_record; s.fptr_record = &fptr_record; - s.ptls = jl_get_ptls_states(); + s.ptls = jl_current_task->ptls; arraylist_new(&s.relocs_list, 0); arraylist_new(&s.gctags_list, 0); jl_value_t **const*const tags = get_tags(); @@ -1566,7 +1577,7 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED jl_write_values(&s); jl_write_relocations(&s); jl_write_gv_syms(&s, jl_get_root_symbol()); - jl_write_gv_ints(&s); + jl_write_gv_tagrefs(&s); } if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET) || @@ -1695,7 +1706,7 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED s.relocs = &relocs; s.gvar_record = &gvar_record; s.fptr_record = &fptr_record; - s.ptls = jl_get_ptls_states(); + s.ptls = jl_current_task->ptls; arraylist_new(&s.relocs_list, 0); arraylist_new(&s.gctags_list, 0); jl_value_t **const*const tags = get_tags(); @@ -1738,9 +1749,11 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED jl_value_t **tag = tags[i]; *tag = jl_read_value(&s); } - s.ptls->root_task = (jl_task_t*)jl_gc_alloc(s.ptls, sizeof(jl_task_t), jl_task_type); - memset(s.ptls->root_task, 0, sizeof(jl_task_t)); + // set typeof extra-special values now that we have the type set by tags above + jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header; + jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header; s.ptls->root_task->tls = jl_read_value(&s); + jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls); jl_init_int32_int64_cache(); jl_init_box_caches(); @@ -1794,7 +1807,6 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED } s.s = &sysimg; - jl_init_codegen(); jl_update_all_fptrs(&s); // fptr relocs and registration // reinit ccallables, which require codegen to be initialized s.s = f; diff --git a/src/task.c b/src/task.c index 54aaf5fdc530f0..bd77de0898f01d 100644 --- a/src/task.c +++ b/src/task.c @@ -191,7 +191,7 @@ static jl_function_t *task_done_hook_func JL_GLOBALLY_ROOTED = NULL; void JL_NORETURN jl_finish_task(jl_task_t *t) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; JL_SIGATOMIC_BEGIN(); if (t->_isexception) jl_atomic_store_release(&t->_state, JL_TASK_STATE_FAILED); @@ -200,9 +200,9 @@ void JL_NORETURN jl_finish_task(jl_task_t *t) if (t->copy_stack) // early free of stkbuf t->stkbuf = NULL; // ensure that state is cleared - ptls->in_finalizer = 0; - ptls->in_pure_callback = 0; - jl_get_ptls_states()->world_age = jl_world_counter; + ct->ptls->in_finalizer = 0; + ct->ptls->in_pure_callback = 0; + ct->world_age = jl_world_counter; // let the runtime know this task is dead and find a new task to run jl_function_t *done = jl_atomic_load_relaxed(&task_done_hook_func); if (done == NULL) { @@ -223,7 +223,7 @@ void JL_NORETURN jl_finish_task(jl_task_t *t) abort(); } -JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid) +JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid) { size_t off = 0; #ifndef _OS_WINDOWS_ @@ -235,19 +235,16 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid) off = ROOT_TASK_STACK_ADJUSTMENT; } #endif - *tid = -1; - for (int i = 0; i < jl_n_threads; i++) { - jl_ptls_t ptls = jl_all_tls_states[i]; - if (ptls->current_task == task) { - *tid = i; + jl_ptls_t ptls2 = task->ptls; + *ptid = -1; + if (ptls2) { + *ptid = task->tid; #ifdef COPY_STACKS - if (task->copy_stack) { - *size = ptls->stacksize; - return (char *)ptls->stackbase - *size; - } -#endif - break; // continue with normal return + if (task->copy_stack) { + *size = ptls2->stacksize; + return (char *)ptls2->stackbase - *size; } +#endif } *size = task->bufsz - off; return (void *)((char *)task->stkbuf + off); @@ -263,10 +260,8 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task, return; } - int16_t tid = task->tid; - jl_ptls_t ptls2 = (tid != -1) ? jl_all_tls_states[tid] : 0; - - if (task->copy_stack && ptls2 && task == ptls2->current_task) { + jl_ptls_t ptls2 = task->ptls; + if (task->copy_stack && ptls2) { *total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize; *total_end = *active_end = (char*)ptls2->stackbase; } @@ -314,27 +309,27 @@ NOINLINE static void record_backtrace(jl_ptls_t ptls, int skip) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT { - jl_get_ptls_states()->next_task = task; + jl_current_task->ptls->next_task = task; } JL_DLLEXPORT jl_task_t *jl_get_next_task(void) JL_NOTSAFEPOINT { - jl_ptls_t ptls = jl_get_ptls_states(); - if (ptls->next_task) - return ptls->next_task; - return ptls->current_task; + jl_task_t *ct = jl_current_task; + if (ct->ptls->next_task) + return ct->ptls->next_task; + return ct; } #ifdef JL_TSAN_ENABLED const char tsan_state_corruption[] = "TSAN state corrupted. Exiting HARD!\n"; #endif -static void ctx_switch(jl_ptls_t ptls) +static void ctx_switch(jl_task_t *lastt) { + jl_ptls_t ptls = lastt->ptls; jl_task_t **pt = &ptls->next_task; jl_task_t *t = *pt; - assert(t != ptls->current_task); - jl_task_t *lastt = ptls->current_task; + assert(t != lastt); // none of these locks should be held across a task switch assert(ptls->locks.len == 0); @@ -391,17 +386,18 @@ static void ctx_switch(jl_ptls_t ptls) else #endif *pt = NULL; // can't fail after here: clear the gc-root for the target task now - lastt->gcstack = ptls->pgcstack; + lastt->ptls = NULL; } - // set up global state for new task - ptls->pgcstack = t->gcstack; - ptls->world_age = 0; - t->gcstack = NULL; + // set up global state for new task and clear global state for old task + t->ptls = ptls; + ptls->current_task = t; + JL_GC_PROMISE_ROOTED(t); + lastt->ptls = NULL; #ifdef MIGRATE_TASKS ptls->previous_task = lastt; #endif - ptls->current_task = t; + jl_set_pgcstack(&t->gcstack); #if defined(JL_TSAN_ENABLED) tsan_switch_to_ctx(&t->tsan_state); @@ -470,16 +466,11 @@ static void ctx_switch(jl_ptls_t ptls) sanitizer_finish_switch_fiber(); } -static jl_ptls_t NOINLINE refetch_ptls(void) -{ - return jl_get_ptls_states(); -} - JL_DLLEXPORT void jl_switch(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; jl_task_t *t = ptls->next_task; - jl_task_t *ct = ptls->current_task; if (t == ct) { return; } @@ -493,21 +484,13 @@ JL_DLLEXPORT void jl_switch(void) jl_error("task switch not allowed from inside gc finalizer"); if (ptls->in_pure_callback) jl_error("task switch not allowed from inside staged nor pure functions"); - if (t->sticky && jl_atomic_load_acquire(&t->tid) == -1) { - // manually yielding to a task - if (jl_atomic_compare_exchange(&t->tid, -1, ptls->tid) != -1) - jl_error("cannot switch to task running on another thread"); - } - else if (t->tid != ptls->tid) { + if (!jl_set_task_tid(t, ptls->tid)) // manually yielding to a task jl_error("cannot switch to task running on another thread"); - } // Store old values on the stack and reset sig_atomic_t defer_signal = ptls->defer_signal; int8_t gc_state = jl_gc_unsafe_enter(ptls); - size_t world_age = ptls->world_age; int finalizers_inhibited = ptls->finalizers_inhibited; - ptls->world_age = 0; ptls->finalizers_inhibited = 0; #ifdef ENABLE_TIMINGS @@ -517,25 +500,24 @@ JL_DLLEXPORT void jl_switch(void) ptls->timing_stack = NULL; #endif - ctx_switch(ptls); + ctx_switch(ct); #ifdef MIGRATE_TASKS - ptls = refetch_ptls(); + ptls = ct->ptls; t = ptls->previous_task; + ptls->previous_task = NULL; + assert(t != ct); assert(t->tid == ptls->tid); if (!t->sticky && !t->copy_stack) - t->tid = -1; -#elif defined(NDEBUG) - (void)refetch_ptls(); + jl_atomic_store_release(&t->tid, -1); #else - assert(ptls == refetch_ptls()); + assert(ptls == ct->ptls); #endif // Pop old values back off the stack - assert(ct == ptls->current_task && - 0 == ptls->world_age && + assert(ct == jl_current_task && + 0 != ct->ptls && 0 == ptls->finalizers_inhibited); - ptls->world_age = world_age; ptls->finalizers_inhibited = finalizers_inhibited; #ifdef ENABLE_TIMINGS @@ -575,31 +557,26 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e) } // yield to exception handler -static void JL_NORETURN throw_internal(jl_value_t *exception JL_MAYBE_UNROOTED) +static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED) { - jl_ptls_t ptls = jl_get_ptls_states(); + assert(!jl_get_safe_restore()); + jl_ptls_t ptls = ct->ptls; ptls->io_wait = 0; // @time needs its compile timer disabled on error, // and cannot use a try-finally as it would break scope for assignments jl_measure_compile_time[ptls->tid] = 0; - if (ptls->safe_restore) - jl_longjmp(*ptls->safe_restore, 1); - // During startup - if (!ptls->current_task) - jl_no_exc_handler(exception); JL_GC_PUSH1(&exception); jl_gc_unsafe_enter(ptls); if (exception) { // The temporary ptls->bt_data is rooted by special purpose code in the // GC. This exists only for the purpose of preserving bt_data until we // set ptls->bt_size=0 below. - assert(ptls->current_task); - jl_push_excstack(&ptls->current_task->excstack, exception, + jl_push_excstack(&ct->excstack, exception, ptls->bt_data, ptls->bt_size); ptls->bt_size = 0; } - assert(ptls->current_task->excstack && ptls->current_task->excstack->top); - jl_handler_t *eh = ptls->current_task->eh; + assert(ct->excstack && ct->excstack->top); + jl_handler_t *eh = ct->eh; if (eh != NULL) { #ifdef ENABLE_TIMINGS jl_timing_block_t *cur_block = ptls->timing_stack; @@ -619,21 +596,26 @@ static void JL_NORETURN throw_internal(jl_value_t *exception JL_MAYBE_UNROOTED) // record backtrace and raise an error JL_DLLEXPORT void jl_throw(jl_value_t *e JL_MAYBE_UNROOTED) { - jl_ptls_t ptls = jl_get_ptls_states(); assert(e != NULL); - if (ptls->safe_restore) - throw_internal(NULL); - record_backtrace(ptls, 1); - throw_internal(e); + jl_jmp_buf *safe_restore = jl_get_safe_restore(); + if (safe_restore) + jl_longjmp(*safe_restore, 1); + jl_task_t *ct = jl_get_current_task(); + if (ct == NULL) // During startup + jl_no_exc_handler(e); + JL_GC_PROMISE_ROOTED(ct); + record_backtrace(ct->ptls, 1); + throw_internal(ct, e); } // rethrow with current excstack state JL_DLLEXPORT void jl_rethrow(void) { - jl_excstack_t *excstack = jl_get_ptls_states()->current_task->excstack; + jl_task_t *ct = jl_current_task; + jl_excstack_t *excstack = ct->excstack; if (!excstack || excstack->top == 0) jl_error("rethrow() not allowed outside a catch block"); - throw_internal(NULL); + throw_internal(ct, NULL); } // Special case throw for errors detected inside signal handlers. This is not @@ -642,29 +624,34 @@ JL_DLLEXPORT void jl_rethrow(void) JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void) { CFI_NORETURN - jl_ptls_t ptls = jl_get_ptls_states(); + jl_jmp_buf *safe_restore = jl_get_safe_restore(); + if (safe_restore) + jl_longjmp(*safe_restore, 1); + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; jl_value_t *e = ptls->sig_exception; ptls->sig_exception = NULL; - throw_internal(e); + throw_internal(ct, e); } JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED) { // TODO: Should uses of `rethrow(exc)` be replaced with a normal throw, now // that exception stacks allow root cause analysis? - jl_excstack_t *excstack = jl_get_ptls_states()->current_task->excstack; + jl_task_t *ct = jl_current_task; + jl_excstack_t *excstack = ct->excstack; if (!excstack || excstack->top == 0) jl_error("rethrow(exc) not allowed outside a catch block"); // overwrite exception on top of stack. see jl_excstack_exception jl_excstack_raw(excstack)[excstack->top-1].jlvalue = e; JL_GC_PROMISE_ROOTED(e); - throw_internal(NULL); + throw_internal(ct, NULL); } JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion_future, size_t ssize) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_task_t *t = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type); + jl_task_t *ct = jl_current_task; + jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type); t->copy_stack = 0; if (ssize == 0) { // stack size unspecified; use default @@ -695,7 +682,7 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->donenotify = completion_future; t->_isexception = 0; // Inherit logger state from parent task - t->logstate = ptls->current_task->logstate; + t->logstate = ct->logstate; // there is no active exception handler available on this stack yet t->eh = NULL; t->sticky = 1; @@ -703,17 +690,22 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->excstack = NULL; t->started = 0; t->prio = -1; - t->tid = -1; + t->tid = t->copy_stack ? ct->tid : -1; // copy_stacks are always pinned since they can't be moved + t->ptls = NULL; + t->world_age = 0; +#ifdef COPY_STACKS + if (!t->copy_stack) { #if defined(JL_DEBUG_BUILD) - if (!t->copy_stack) memset(&t->ctx, 0, sizeof(t->ctx)); #endif -#ifdef COPY_STACKS - if (always_copy_stacks) - memcpy(&t->copy_stack_ctx, &ptls->copy_stack_ctx, sizeof(t->copy_stack_ctx)); - else if (t->copy_stack) - memcpy(&t->ctx, &ptls->base_ctx, sizeof(t->ctx)); + } + else { + if (always_copy_stacks) + memcpy(&t->copy_stack_ctx, &ct->ptls->copy_stack_ctx, sizeof(t->copy_stack_ctx)); + else + memcpy(&t->ctx, &ct->ptls->base_ctx, sizeof(t->ctx)); + } #endif #ifdef JL_TSAN_ENABLED t->tsan_state = __tsan_create_fiber(0); @@ -721,22 +713,11 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion return t; } -JL_DLLEXPORT jl_value_t *jl_get_current_task(void) -{ - jl_ptls_t ptls = jl_get_ptls_states(); - return (jl_value_t*)ptls->current_task; -} - -JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void) -{ - jl_ptls_t ptls = jl_get_ptls_states(); - return ptls->safe_restore; -} - -JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr) +// a version of jl_current_task safe for unmanaged threads +JL_DLLEXPORT jl_task_t *jl_get_current_task(void) { - jl_ptls_t ptls = jl_get_ptls_states(); - ptls->safe_restore = sr; + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + return pgcstack == NULL ? NULL : container_of(pgcstack, jl_task_t, gcstack); } #ifdef JL_HAVE_ASYNCIFY @@ -747,8 +728,8 @@ JL_DLLEXPORT jl_ucontext_t *task_ctx_ptr(jl_task_t *t) JL_DLLEXPORT jl_value_t *jl_get_root_task(void) { - jl_ptls_t ptls = jl_get_ptls_states(); - return (jl_value_t*)ptls->root_task; + jl_task_t *ct = jl_current_task; + return (jl_value_t*)ct->ptls->root_task; } JL_DLLEXPORT void jl_task_wait() @@ -757,10 +738,11 @@ JL_DLLEXPORT void jl_task_wait() if (!wait_func) { wait_func = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("wait")); } - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_get_world_counter(); + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); jl_apply(&wait_func, 1); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; } JL_DLLEXPORT void jl_schedule_task(jl_task_t *task) @@ -769,11 +751,12 @@ JL_DLLEXPORT void jl_schedule_task(jl_task_t *task) if (!sched_func) { sched_func = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("schedule")); } - size_t last_age = jl_get_ptls_states()->world_age; - jl_get_ptls_states()->world_age = jl_get_world_counter(); + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); jl_value_t *args[] = {(jl_value_t*)sched_func, (jl_value_t*)task}; jl_apply(args, 2); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; } #endif @@ -804,23 +787,29 @@ STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void) CFI_NORETURN // this runs the first time we switch to a task sanitizer_finish_switch_fiber(); - jl_ptls_t ptls = jl_get_ptls_states(); - jl_task_t *t = ptls->current_task; +#ifdef __clang_analyzer__ + jl_task_t *ct = jl_get_current_task(); + JL_GC_PROMISE_ROOTED(ct); +#else + jl_task_t *ct = jl_current_task; +#endif + jl_ptls_t ptls = ct->ptls; jl_value_t *res; assert(ptls->finalizers_inhibited == 0); #ifdef MIGRATE_TASKS jl_task_t *pt = ptls->previous_task; + ptls->previous_task = NULL; if (!pt->sticky && !pt->copy_stack) - pt->tid = -1; + jl_atomic_store_release(&pt->tid, -1); #endif - t->started = 1; - if (t->_isexception) { + ct->started = 1; + if (ct->_isexception) { record_backtrace(ptls, 0); - jl_push_excstack(&t->excstack, t->result, + jl_push_excstack(&ct->excstack, ct->result, ptls->bt_data, ptls->bt_size); - res = t->result; + res = ct->result; } else { JL_TRY { @@ -829,19 +818,19 @@ CFI_NORETURN jl_sigint_safepoint(ptls); } JL_TIMING(ROOT); - ptls->world_age = jl_world_counter; - res = jl_apply(&t->start, 1); + ct->world_age = jl_world_counter; + res = jl_apply(&ct->start, 1); } JL_CATCH { res = jl_current_exception(); - t->_isexception = 1; + ct->_isexception = 1; goto skip_pop_exception; } skip_pop_exception:; } - t->result = res; - jl_gc_wb(t, t->result); - jl_finish_task(t); + ct->result = res; + jl_gc_wb(ct, ct->result); + jl_finish_task(ct); gc_debug_critical_error(); abort(); } @@ -1106,7 +1095,7 @@ static void jl_start_fiber_set(jl_ucontext_t *t) static void start_basefiber(int sig) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0)) start_task(); // sanitizer_finish_switch_fiber is part of start_task } @@ -1119,7 +1108,7 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) if (stk == NULL) return NULL; // setup - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; jl_ucontext_t base_ctx; memcpy(&base_ctx, &ptls->base_ctx, sizeof(ptls->base_ctx)); sigfillset(&set); @@ -1207,15 +1196,24 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) J #endif // Initialize a root task using the given stack. -void jl_init_root_task(void *stack_lo, void *stack_hi) +void jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) { - jl_ptls_t ptls = jl_get_ptls_states(); - if (ptls->root_task == NULL) { - ptls->root_task = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type); - memset(ptls->root_task, 0, sizeof(jl_task_t)); - ptls->root_task->tls = jl_nothing; - } - ptls->current_task = ptls->root_task; + assert(ptls->root_task == NULL); + // We need `gcstack` in `Task` to allocate Julia objects; *including* the `Task` type. + // However, to allocate a `Task` via `jl_gc_alloc` as done in `jl_init_root_task`, + // we need the `Task` type itself. We use stack-allocated "raw" `jl_task_t` struct to + // workaround this chicken-and-egg problem. Note that this relies on GC to be turned + // off as GC fails because we don't/can't allocate the type tag. + struct { + jl_value_t *type; + jl_task_t value; + } bootstrap_task = {0}; + jl_set_pgcstack(&bootstrap_task.value.gcstack); + bootstrap_task.value.ptls = ptls; + if (jl_nothing == NULL) // make a placeholder + jl_nothing = jl_gc_permobj(0, jl_nothing_type); + jl_task_t *ct = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type); + memset(ct, 0, sizeof(jl_task_t)); void *stack = stack_lo; size_t ssize = (char*)stack_hi - (char*)stack_lo; #ifndef _OS_WINDOWS_ @@ -1225,32 +1223,40 @@ void jl_init_root_task(void *stack_lo, void *stack_hi) } #endif if (always_copy_stacks) { - ptls->current_task->copy_stack = 1; - ptls->current_task->stkbuf = NULL; - ptls->current_task->bufsz = 0; + ct->copy_stack = 1; + ct->stkbuf = NULL; + ct->bufsz = 0; } else { - ptls->current_task->copy_stack = 0; - ptls->current_task->stkbuf = stack; - ptls->current_task->bufsz = ssize; + ct->copy_stack = 0; + ct->stkbuf = stack; + ct->bufsz = ssize; } - ptls->current_task->started = 1; - ptls->current_task->next = jl_nothing; - ptls->current_task->queue = jl_nothing; - ptls->current_task->_state = JL_TASK_STATE_RUNNABLE; - ptls->current_task->start = NULL; - ptls->current_task->result = jl_nothing; - ptls->current_task->donenotify = jl_nothing; - ptls->current_task->_isexception = 0; - ptls->current_task->logstate = jl_nothing; - ptls->current_task->eh = NULL; - ptls->current_task->gcstack = NULL; - ptls->current_task->excstack = NULL; - ptls->current_task->tid = ptls->tid; - ptls->current_task->sticky = 1; + ct->started = 1; + ct->next = jl_nothing; + ct->queue = jl_nothing; + ct->tls = jl_nothing; + ct->_state = JL_TASK_STATE_RUNNABLE; + ct->start = NULL; + ct->result = jl_nothing; + ct->donenotify = jl_nothing; + ct->_isexception = 0; + ct->logstate = jl_nothing; + ct->eh = NULL; + ct->gcstack = NULL; + ct->excstack = NULL; + ct->tid = ptls->tid; + ct->sticky = 1; + ct->ptls = ptls; + ct->world_age = 1; // OK to run Julia code on this task + ptls->root_task = ct; + ptls->current_task = ct; + JL_GC_PROMISE_ROOTED(ct); + jl_set_pgcstack(&ct->gcstack); + assert(jl_current_task == ct); #ifdef JL_TSAN_ENABLED - ptls->current_task->tsan_state = __tsan_get_current_fiber(); + ct->tsan_state = __tsan_get_current_fiber(); #endif #ifdef COPY_STACKS diff --git a/src/threading.c b/src/threading.c index 85855ed5b7e09a..235bb9f870ba19 100644 --- a/src/threading.c +++ b/src/threading.c @@ -36,6 +36,32 @@ extern "C" { #include "threading.h" +JL_DLLEXPORT void *jl_get_ptls_states(void) +{ + // mostly deprecated: use current_task instead + return jl_current_task->ptls; +} + +#if !defined(_OS_WINDOWS_) +static pthread_key_t jl_safe_restore_key; + +__attribute__((constructor)) void _jl_init_safe_restore(void) +{ + pthread_key_create(&jl_safe_restore_key, NULL); +} + +JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void) +{ + return (jl_jmp_buf*)pthread_getspecific(jl_safe_restore_key); +} + +JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr) +{ + pthread_setspecific(jl_safe_restore_key, (void*)sr); +} +#endif + + // The tls_states buffer: // // On platforms that do not use ELF (i.e. where `__thread` is emulated with @@ -53,41 +79,42 @@ extern "C" { // Mac doesn't seem to have static TLS model so the runtime TLS getter // registration will only add overhead to TLS access. The `__thread` variables // are emulated with `pthread_key_t` so it is actually faster to use it directly. -static pthread_key_t jl_tls_key; +static pthread_key_t jl_pgcstack_key; -__attribute__((constructor)) void jl_mac_init_tls(void) +__attribute__((constructor)) void jl_init_tls(void) { - pthread_key_create(&jl_tls_key, NULL); + pthread_key_create(&jl_pgcstack_key, NULL); } -JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED +JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT { - void *ptls = pthread_getspecific(jl_tls_key); - if (__unlikely(!ptls)) { - ptls = calloc(1, sizeof(jl_tls_states_t)); - pthread_setspecific(jl_tls_key, ptls); - } - return (jl_ptls_t)ptls; + return pthread_getspecific(jl_pgcstack_key); } -// This is only used after the tls is already initialized on the thread -static JL_CONST_FUNC jl_ptls_t jl_get_ptls_states_fast(void) JL_NOTSAFEPOINT +void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT { - return (jl_ptls_t)pthread_getspecific(jl_tls_key); + pthread_setspecific(jl_pgcstack_key, (void*)pgcstack); } -jl_get_ptls_states_func jl_get_ptls_states_getter(void) +void jl_pgcstack_getkey(jl_get_pgcstack_func **f, pthread_key_t *k) { // for codegen - return &jl_get_ptls_states_fast; + *f = pthread_getspecific; + *k = jl_pgcstack_key; +} + + +JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, pthread_key_t k) +{ + jl_safe_printf("ERROR: Attempt to change TLS address.\n"); } -JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f) { } #elif defined(_OS_WINDOWS_) // Apparently windows doesn't have a static TLS model (or one that can be // reliably used from a shared library) either..... Use `TLSAlloc` instead. -static DWORD jl_tls_key; +static DWORD jl_pgcstack_key; +static DWORD jl_safe_restore_key; // Put this here for now. We can move this out later if we find more use for it. BOOLEAN WINAPI DllMain(IN HINSTANCE hDllHandle, IN DWORD nReason, @@ -95,53 +122,84 @@ BOOLEAN WINAPI DllMain(IN HINSTANCE hDllHandle, IN DWORD nReason, { switch (nReason) { case DLL_PROCESS_ATTACH: - jl_tls_key = TlsAlloc(); - assert(jl_tls_key != TLS_OUT_OF_INDEXES); + jl_pgcstack_key = TlsAlloc(); + assert(jl_pgcstack_key != TLS_OUT_OF_INDEXES); + jl_safe_restore_key = TlsAlloc(); + assert(jl_safe_restore_key != TLS_OUT_OF_INDEXES); // Fall through case DLL_THREAD_ATTACH: - TlsSetValue(jl_tls_key, calloc(1, sizeof(jl_tls_states_t))); break; case DLL_THREAD_DETACH: - free(TlsGetValue(jl_tls_key)); - TlsSetValue(jl_tls_key, NULL); break; case DLL_PROCESS_DETACH: - free(TlsGetValue(jl_tls_key)); - TlsFree(jl_tls_key); + TlsFree(jl_pgcstack_key); + TlsFree(jl_safe_restore_key); break; } return 1; // success } -JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED -{ -#if defined(_CPU_X86_64_) - DWORD *plast_error = (DWORD*)(__readgsqword(0x30) + 0x68); - DWORD last_error = *plast_error; -#elif defined(_CPU_X86_) - DWORD *plast_error = (DWORD*)(__readfsdword(0x18) + 0x34); - DWORD last_error = *plast_error; -#else - DWORD last_error = GetLastError(); -#endif - jl_ptls_t state = (jl_ptls_t)TlsGetValue(jl_tls_key); #if defined(_CPU_X86_64_) - *plast_error = last_error; +#define SAVE_ERRNO \ + DWORD *plast_error = (DWORD*)(__readgsqword(0x30) + 0x68); \ + DWORD last_error = *plast_error +#define LOAD_ERRNO \ + *plast_error = last_error #elif defined(_CPU_X86_) - *plast_error = last_error; +#define SAVE_ERRNO \ + DWORD *plast_error = (DWORD*)(__readfsdword(0x18) + 0x34); \ + DWORD last_error = *plast_error +#define LOAD_ERRNO \ + *plast_error = last_error #else - SetLastError(last_error); +#define SAVE_ERRNO \ + DWORD last_error = GetLastError() +#define LOAD_ERRNO \ + SetLastError(last_error) #endif - return state; + +JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void) +{ + SAVE_ERRNO; + jl_jmp_buf *sr = (jl_jmp_buf*)TlsGetValue(jl_safe_restore_key); + LOAD_ERRNO; + return sr; +} + +JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr) +{ + SAVE_ERRNO; + TlsSetValue(jl_safe_restore_key, (void*)sr); + LOAD_ERRNO; +} + +JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT +{ + SAVE_ERRNO; + jl_gcframe_t **pgcstack = (jl_ptls_t)TlsGetValue(jl_pgcstack_key); + LOAD_ERRNO; + return pgcstack; } -jl_get_ptls_states_func jl_get_ptls_states_getter(void) +void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT +{ + // n.b.: this smashes GetLastError + TlsSetValue(jl_pgcstack_key, (void*)pgcstack); +} + +void jl_pgcstack_getkey(jl_get_pgcstack_func **f, DWORD *k) { // for codegen - return &jl_get_ptls_states; + *f = jl_get_pgcstack; + *k = jl_pgcstack_key; +} + +JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, DWORD k) +{ + jl_safe_printf("ERROR: Attempt to change TLS address.\n"); } -JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f) { } + #else // We use the faster static version in the main executable to replace // the slower version in the shared object. The code in different libraries @@ -149,70 +207,82 @@ JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f) { } // The general solution is to add one more indirection in the C entry point. // // When `ifunc` is available, we can use it to trick the linker to use the -// real address (`jl_get_ptls_states_static`) directly as the symbol address. +// real address (`jl_get_pgcstack_static`) directly as the symbol address. // // However, since the detection of the static version in `ifunc` // is not guaranteed to be reliable, we still need to fallback to the wrapper // version as the symbol address if we didn't find the static version in `ifunc`. // fallback provided for embedding -static JL_CONST_FUNC jl_ptls_t jl_get_ptls_states_fallback(void) +static jl_pgcstack_key_t jl_pgcstack_key; +static __thread jl_gcframe_t **pgcstack_; +static jl_gcframe_t **jl_get_pgcstack_fallback(void) JL_NOTSAFEPOINT +{ + return pgcstack_; +} +static jl_gcframe_t ***jl_pgcstack_addr_fallback(void) JL_NOTSAFEPOINT { - static __thread jl_tls_states_t tls_states; - return &tls_states; + return &pgcstack_; +} +void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT +{ + *jl_pgcstack_key() = pgcstack; } # if JL_USE_IFUNC -JL_DLLEXPORT JL_CONST_FUNC __attribute__((weak)) -jl_ptls_t jl_get_ptls_states_static(void); +JL_DLLEXPORT __attribute__((weak)) +void jl_register_pgcstack_getter(void); # endif -static jl_ptls_t jl_get_ptls_states_init(void); -static jl_get_ptls_states_func jl_tls_states_cb = jl_get_ptls_states_init; -static jl_ptls_t jl_get_ptls_states_init(void) +static jl_gcframe_t **jl_get_pgcstack_init(void); +static jl_get_pgcstack_func *jl_get_pgcstack_cb = jl_get_pgcstack_init; +static jl_gcframe_t **jl_get_pgcstack_init(void) { // This 2-step initialization is used to detect calling - // `jl_set_ptls_states_getter` after the address of the TLS variables + // `jl_pgcstack_getkey` after the address of the TLS variables // are used. Since the address of TLS variables should be constant, // changing the getter address can result in weird crashes. // This is clearly not thread safe but should be fine since we // make sure the tls states callback is finalized before adding // multiple threads - jl_get_ptls_states_func cb = jl_get_ptls_states_fallback; # if JL_USE_IFUNC - if (jl_get_ptls_states_static) - cb = jl_get_ptls_states_static; + if (jl_register_pgcstack_getter) + jl_register_pgcstack_getter(); + else # endif - jl_tls_states_cb = cb; - return cb(); + { + jl_get_pgcstack_cb = jl_get_pgcstack_fallback; + jl_pgcstack_key = &jl_pgcstack_addr_fallback; + } + return jl_get_pgcstack_cb(); } -JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f) +JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, jl_pgcstack_key_t k) { - if (f == jl_tls_states_cb || !f) + if (f == jl_get_pgcstack_cb || !f) return; // only allow setting this once - if (jl_tls_states_cb == jl_get_ptls_states_init) { - jl_tls_states_cb = f; - } - else { + if (jl_get_pgcstack_cb != jl_get_pgcstack_init) { jl_safe_printf("ERROR: Attempt to change TLS address.\n"); exit(1); } + jl_get_pgcstack_cb = f; + jl_pgcstack_key = k; } -JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED +JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack(void) JL_GLOBALLY_ROOTED { #ifndef __clang_analyzer__ - return (*jl_tls_states_cb)(); + return jl_get_pgcstack_cb(); #endif } -jl_get_ptls_states_func jl_get_ptls_states_getter(void) +void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) { - if (jl_tls_states_cb == jl_get_ptls_states_init) - jl_get_ptls_states_init(); + if (jl_get_pgcstack_cb == jl_get_pgcstack_init) + jl_get_pgcstack_init(); // for codegen - return jl_tls_states_cb; + *f = jl_get_pgcstack_cb; + *k = jl_pgcstack_key; } #endif @@ -225,13 +295,12 @@ uint64_t *jl_cumulative_compile_time = NULL; // type of the thread id. JL_DLLEXPORT int16_t jl_threadid(void) { - jl_ptls_t ptls = jl_get_ptls_states(); - return ptls->tid; + return jl_current_task->tid; } -void jl_init_threadtls(int16_t tid) +jl_ptls_t jl_init_threadtls(int16_t tid) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = (jl_ptls_t)calloc(1, sizeof(jl_tls_states_t)); ptls->system_id = jl_thread_self(); seed_cong(&ptls->rngseed); #ifdef _OS_WINDOWS_ @@ -244,10 +313,7 @@ void jl_init_threadtls(int16_t tid) } } #endif - assert(ptls->world_age == 0); - ptls->world_age = 1; // OK to run Julia code on this thread ptls->tid = tid; - ptls->pgcstack = NULL; ptls->gc_state = 0; // GC unsafe // Conditionally initialize the safepoint address. See comment in // `safepoint.c` @@ -258,22 +324,16 @@ void jl_init_threadtls(int16_t tid) ptls->safepoint = (size_t*)(jl_safepoint_pages + jl_page_size * 2 + sizeof(size_t)); } - ptls->defer_signal = 0; jl_bt_element_t *bt_data = (jl_bt_element_t*) malloc_s(sizeof(jl_bt_element_t) * (JL_MAX_BT_SIZE + 1)); memset(bt_data, 0, sizeof(jl_bt_element_t) * (JL_MAX_BT_SIZE + 1)); ptls->bt_data = bt_data; - ptls->sig_exception = NULL; - ptls->previous_exception = NULL; - ptls->next_task = NULL; -#ifdef _OS_WINDOWS_ - ptls->needs_resetstkoflw = 0; -#endif small_arraylist_new(&ptls->locks, 0); jl_init_thread_heap(ptls); - jl_install_thread_signal_handler(ptls); jl_all_tls_states[tid] = ptls; + + return ptls; } // lock for code generation @@ -298,11 +358,11 @@ static inline size_t jl_add_tls_size(size_t orig_size, size_t size, size_t align { return LLT_ALIGN(orig_size, align) + size; } -static inline ssize_t jl_check_tls_bound(void *tp, void *ptls, size_t tls_size) +static inline ssize_t jl_check_tls_bound(void *tp, jl_gcframe_t ***k0, size_t tls_size) { - ssize_t offset = (char*)ptls - (char*)tp; + ssize_t offset = (char*)k0 - (char*)tp; if (offset < JL_ELF_TLS_INIT_SIZE || - (size_t)offset + sizeof(jl_tls_states_t) > tls_size) + (size_t)offset + sizeof(*k0) > tls_size) return -1; return offset; } @@ -313,10 +373,10 @@ static inline size_t jl_add_tls_size(size_t orig_size, size_t size, size_t align { return LLT_ALIGN(orig_size + size, align); } -static inline ssize_t jl_check_tls_bound(void *tp, void *ptls, size_t tls_size) +static inline ssize_t jl_check_tls_bound(void *tp, jl_gcframe_t ***k0, size_t tls_size) { - ssize_t offset = (char*)tp - (char*)ptls; - if (offset < sizeof(jl_tls_states_t) || offset > tls_size) + ssize_t offset = (char*)tp - (char*)k0; + if (offset < sizeof(*k0) || offset > tls_size) return -1; return -offset; } @@ -351,7 +411,12 @@ static int check_tls_cb(struct dl_phdr_info *info, size_t size, void *_data) static void jl_check_tls(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_get_pgcstack_func *f; + jl_gcframe_t ***(*k)(void); + jl_pgcstack_getkey(&f, &k); + jl_gcframe_t ***k0 = k(); + if (k0 == NULL) + return; check_tls_cb_t data = {0}; dl_iterate_phdr(check_tls_cb, &data); if (data.total_size == 0) @@ -368,7 +433,7 @@ static void jl_check_tls(void) #else # error "Cannot emit thread pointer for this architecture." #endif - ssize_t offset = jl_check_tls_bound(tp, ptls, data.total_size); + ssize_t offset = jl_check_tls_bound(tp, k0, data.total_size); if (offset == -1) return; jl_tls_offset = offset; @@ -407,11 +472,6 @@ void jl_init_threading(void) #ifndef __clang_analyzer__ jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*)); #endif - // initialize this thread (set tid, create heap, etc.) - jl_init_threadtls(0); - - // initialize threading infrastructure - jl_init_threadinginfra(); } static uv_barrier_t thread_init_done; diff --git a/src/threading.h b/src/threading.h index 43516ccceebb1e..4c6f1e19881f5c 100644 --- a/src/threading.h +++ b/src/threading.h @@ -21,7 +21,7 @@ typedef struct _jl_threadarg_t { } jl_threadarg_t; // each thread must initialize its TLS -void jl_init_threadtls(int16_t tid); +jl_ptls_t jl_init_threadtls(int16_t tid); // provided by a threading infrastructure void jl_init_threadinginfra(void); diff --git a/src/timing.c b/src/timing.c index 12f47bbfa00865..12093d2e142c09 100644 --- a/src/timing.c +++ b/src/timing.c @@ -48,7 +48,7 @@ void jl_init_timing(void) void jl_destroy_timing(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_ptls_t ptls = jl_current_task->ptls; jl_timing_block_t *stack = ptls->timing_stack; while (stack) { _jl_timing_block_destroy(stack); diff --git a/src/timing.h b/src/timing.h index 9a3307709a38fd..fd84707ad5d2c2 100644 --- a/src/timing.h +++ b/src/timing.h @@ -116,8 +116,8 @@ STATIC_INLINE uint64_t _jl_timing_block_init(jl_timing_block_t *block, int owner STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) { uint64_t t = _jl_timing_block_init(block, owner); - jl_ptls_t ptls = jl_get_ptls_states(); - jl_timing_block_t **prevp = &ptls->timing_stack; + jl_task_t *ct = jl_current_task; + jl_timing_block_t **prevp = &ct->ptls->timing_stack; block->prev = *prevp; if (block->prev) _jl_timing_block_stop(block->prev, t); @@ -126,10 +126,10 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) { STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) { uint64_t t = cycleclock(); - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; _jl_timing_block_stop(block, t); jl_timing_data[block->owner] += block->total; - jl_timing_block_t **pcur = &ptls->timing_stack; + jl_timing_block_t **pcur = &ct->ptls->timing_stack; assert(*pcur == block); *pcur = block->prev; if (block->prev) diff --git a/src/tls.h b/src/tls.h deleted file mode 100644 index 7f14d4acd7dc55..00000000000000 --- a/src/tls.h +++ /dev/null @@ -1,30 +0,0 @@ -// This file is a part of Julia. License is MIT: https://julialang.org/license - -#ifndef JL_TLS_H -#define JL_TLS_H - -// Thread-local storage access - -typedef struct _jl_tls_states_t jl_tls_states_t; - -typedef jl_tls_states_t *jl_ptls_t; - -#ifdef __cplusplus -extern "C" { -#endif - -JL_DLLEXPORT int16_t jl_threadid(void); -JL_DLLEXPORT void jl_threading_profile(void); - -JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT; - -typedef jl_ptls_t (*jl_get_ptls_states_func)(void); -#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_) -JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/toplevel.c b/src/toplevel.c index 88ace36d193a88..33054299b49894 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -66,11 +66,12 @@ void jl_module_run_initializer(jl_module_t *m) jl_function_t *f = jl_module_get_initializer(m); if (f == NULL) return; - size_t last_age = jl_get_ptls_states()->world_age; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; JL_TRY { - jl_get_ptls_states()->world_age = jl_world_counter; + ct->world_age = jl_world_counter; jl_apply(&f, 1); - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; } JL_CATCH { if (jl_initerror_type == NULL) { @@ -115,7 +116,7 @@ static int jl_is__toplevel__mod(jl_module_t *mod) // TODO: add locks around global state mutation operations static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; assert(ex->head == module_sym); if (jl_array_len(ex->args) != 3 || !jl_is_expr(jl_exprarg(ex, 2))) { jl_error("syntax: malformed module expression"); @@ -173,7 +174,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex jl_base_module = newm; } - size_t last_age = ptls->world_age; + size_t last_age = ct->world_age; // add standard imports unless baremodule if (std_imports) { @@ -189,13 +190,13 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args; for (int i = 0; i < jl_array_len(exprs); i++) { // process toplevel form - ptls->world_age = jl_world_counter; + ct->world_age = jl_world_counter; form = jl_expand_stmt_with_loc(jl_array_ptr_ref(exprs, i), newm, jl_filename, jl_lineno); - ptls->world_age = jl_world_counter; + ct->world_age = jl_world_counter; (void)jl_toplevel_eval_flex(newm, form, 1, 1); } newm->primary_world = jl_world_counter; - ptls->world_age = last_age; + ct->world_age = last_age; #if 0 // some optional post-processing steps @@ -267,7 +268,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f, int fast) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_value_t **args; JL_GC_PUSHARGS(args, 3); args[1] = jl_toplevel_eval_flex(m, x, fast, 0); @@ -278,10 +279,10 @@ static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f } else { args[0] = jl_eval_global_var(jl_base_relative_to(m), jl_symbol("getproperty")); - size_t last_age = ptls->world_age; - ptls->world_age = jl_world_counter; + size_t last_age = ct->world_age; + ct->world_age = jl_world_counter; args[0] = jl_apply(args, 3); - ptls->world_age = last_age; + ct->world_age = last_age; } JL_GC_POP(); return args[0]; @@ -411,19 +412,19 @@ static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_RO static jl_value_t *require_func = NULL; int build_mode = jl_generating_output(); jl_module_t *m = NULL; - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (require_func == NULL && jl_base_module != NULL) { require_func = jl_get_global(jl_base_module, jl_symbol("require")); } if (require_func != NULL) { - size_t last_age = ptls->world_age; - ptls->world_age = (build_mode ? jl_base_module->primary_world : jl_world_counter); + size_t last_age = ct->world_age; + ct->world_age = (build_mode ? jl_base_module->primary_world : jl_world_counter); jl_value_t *reqargs[3]; reqargs[0] = require_func; reqargs[1] = (jl_value_t*)mod; reqargs[2] = (jl_value_t*)var; m = (jl_module_t*)jl_apply(reqargs, 3); - ptls->world_age = last_age; + ct->world_age = last_age; } if (m == NULL || !jl_is_module(m)) { jl_errorf("failed to load module %s", jl_symbol_name(var)); @@ -619,7 +620,7 @@ static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...) jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int fast, int expanded) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; if (!jl_is_expr(e)) { if (jl_is_linenode(e)) { jl_lineno = jl_linenode_line(e); @@ -652,7 +653,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int } } - if (ptls->in_pure_callback) { + if (ct->ptls->in_pure_callback) { jl_error("eval cannot be used in a generated function"); } @@ -660,11 +661,11 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int jl_code_info_t *thk = NULL; JL_GC_PUSH3(&mfunc, &thk, &ex); - size_t last_age = ptls->world_age; + size_t last_age = ct->world_age; if (!expanded && jl_needs_lowering(e)) { - ptls->world_age = jl_world_counter; + ct->world_age = jl_world_counter; ex = (jl_expr_t*)jl_expand_with_loc_warn(e, m, jl_filename, jl_lineno); - ptls->world_age = last_age; + ct->world_age = last_age; } jl_sym_t *head = jl_is_expr(ex) ? ex->head : NULL; @@ -868,12 +869,12 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int // TODO: This is still not correct since an `eval` can happen elsewhere, but it // helps in common cases. size_t world = jl_world_counter; - ptls->world_age = world; + ct->world_age = world; if (!has_defs && jl_get_module_infer(m) != 0) { (void)jl_type_infer(mfunc, world, 0); } result = jl_invoke(/*func*/NULL, /*args*/NULL, /*nargs*/0, mfunc); - ptls->world_age = last_age; + ct->world_age = last_age; } else { // use interpreter @@ -923,8 +924,8 @@ static void jl_check_open_for(jl_module_t *m, const char* funcname) JL_DLLEXPORT jl_value_t *jl_toplevel_eval_in(jl_module_t *m, jl_value_t *ex) { - jl_ptls_t ptls = jl_get_ptls_states(); - if (ptls->in_pure_callback) + jl_task_t *ct = jl_current_task; + if (ct->ptls->in_pure_callback) jl_error("eval cannot be used in a generated function"); jl_check_open_for(m, "eval"); jl_value_t *v = NULL; @@ -951,7 +952,8 @@ JL_DLLEXPORT jl_value_t *jl_infer_thunk(jl_code_info_t *thk, jl_module_t *m) jl_method_instance_t *li = method_instance_for_thunk(thk, m); JL_GC_PUSH1(&li); jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0); - jl_code_info_t *src = jl_type_infer(li, jl_get_ptls_states()->world_age, 0); + jl_task_t *ct = jl_current_task; + jl_code_info_t *src = jl_type_infer(li, ct->world_age, 0); JL_GC_POP(); if (src) return src->rettype; @@ -971,8 +973,8 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text, if (!jl_is_string(text) || !jl_is_string(filename)) { jl_errorf("Expected `String`s for `text` and `filename`"); } - jl_ptls_t ptls = jl_get_ptls_states(); - if (ptls->in_pure_callback) + jl_task_t *ct = jl_current_task; + if (ct->ptls->in_pure_callback) jl_error("cannot use include inside a generated function"); jl_check_open_for(module, "include"); @@ -989,7 +991,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text, int last_lineno = jl_lineno; const char *last_filename = jl_filename; - size_t last_age = jl_get_ptls_states()->world_age; + size_t last_age = ct->world_age; int lineno = 0; jl_lineno = 0; jl_filename = jl_string_data(filename); @@ -1006,7 +1008,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text, } expression = jl_expand_with_loc_warn(expression, module, jl_string_data(filename), lineno); - jl_get_ptls_states()->world_age = jl_world_counter; + ct->world_age = jl_world_counter; result = jl_toplevel_eval_flex(module, expression, 1, 1); } } @@ -1016,7 +1018,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text, goto finally; // skip jl_restore_excstack } finally: - jl_get_ptls_states()->world_age = last_age; + ct->world_age = last_age; jl_lineno = last_lineno; jl_filename = last_filename; if (err) { diff --git a/src/typemap.c b/src/typemap.c index b546aadee77acb..58dd2b8b13069f 100644 --- a/src/typemap.c +++ b/src/typemap.c @@ -1091,9 +1091,9 @@ static void jl_typemap_level_insert_(jl_typemap_t *map, jl_typemap_level_t *cach static jl_typemap_level_t *jl_new_typemap_level(void) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; jl_typemap_level_t *cache = - (jl_typemap_level_t*)jl_gc_alloc(ptls, sizeof(jl_typemap_level_t), + (jl_typemap_level_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typemap_level_t), jl_typemap_level_type); cache->arg1 = (jl_array_t*)jl_an_empty_vec_any; cache->targ = (jl_array_t*)jl_an_empty_vec_any; @@ -1244,7 +1244,7 @@ jl_typemap_entry_t *jl_typemap_alloc( jl_tupletype_t *type, jl_tupletype_t *simpletype, jl_svec_t *guardsigs, jl_value_t *newvalue, size_t min_world, size_t max_world) { - jl_ptls_t ptls = jl_get_ptls_states(); + jl_task_t *ct = jl_current_task; assert(min_world > 0 && max_world > 0); if (!simpletype) simpletype = (jl_tupletype_t*)jl_nothing; @@ -1270,7 +1270,7 @@ jl_typemap_entry_t *jl_typemap_alloc( } jl_typemap_entry_t *newrec = - (jl_typemap_entry_t*)jl_gc_alloc(ptls, sizeof(jl_typemap_entry_t), + (jl_typemap_entry_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typemap_entry_t), jl_typemap_entry_type); newrec->sig = type; newrec->simplesig = simpletype; diff --git a/test/clangsa/MissingRoots.c b/test/clangsa/MissingRoots.c index 1c9f7c8e4ad705..78dcc195d59ced 100644 --- a/test/clangsa/MissingRoots.c +++ b/test/clangsa/MissingRoots.c @@ -409,14 +409,6 @@ void stack_rooted(jl_value_t *lb JL_MAYBE_UNROOTED, jl_value_t *ub JL_MAYBE_UNRO JL_GC_POP(); } -void JL_NORETURN throw_internal(jl_value_t *e JL_MAYBE_UNROOTED) -{ - jl_ptls_t ptls = jl_get_ptls_states(); - ptls->sig_exception = e; - jl_gc_unsafe_enter(ptls); - look_at_value(e); -} - JL_DLLEXPORT jl_value_t *jl_totally_used_function(int i) { jl_value_t *v = jl_box_int32(i); // expected-note{{Started tracking value here}} diff --git a/test/llvmpasses/alloc-opt-gcframe.jl b/test/llvmpasses/alloc-opt-gcframe.jl index e48a85641257b7..227569a545adb6 100644 --- a/test/llvmpasses/alloc-opt-gcframe.jl +++ b/test/llvmpasses/alloc-opt-gcframe.jl @@ -16,6 +16,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" # CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0 println(""" define {} addrspace(10)* @return_obj() { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag) @@ -33,6 +34,7 @@ define {} addrspace(10)* @return_obj() { # CHECK-NOT: @llvm.lifetime.end println(""" define i64 @return_load(i64 %i) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag) @@ -47,12 +49,14 @@ define i64 @return_load(i64 %i) { # CHECK-LABEL: }{{$}} # CHECK-LABEL: @ccall_obj +# CHECK: call {}*** @julia.get_pgcstack() # CHECK: call {}*** @julia.ptls_states() # CHECK-NOT: @julia.gc_alloc_obj # CHECK: @jl_gc_pool_alloc # CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0 println(""" define void @ccall_obj(i8* %fptr) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag) @@ -65,6 +69,7 @@ define void @ccall_obj(i8* %fptr) { # CHECK-LABEL: @ccall_ptr # CHECK: alloca i64 +# CHECK: call {}*** @julia.get_pgcstack() # CHECK: call {}*** @julia.ptls_states() # CHECK-NOT: @julia.gc_alloc_obj # CHECK-NOT: @jl_gc_pool_alloc @@ -75,6 +80,7 @@ define void @ccall_obj(i8* %fptr) { # CHECK-NEXT: ret void println(""" define void @ccall_ptr(i8* %fptr) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag) @@ -89,12 +95,14 @@ define void @ccall_ptr(i8* %fptr) { # CHECK-LABEL: }{{$}} # CHECK-LABEL: @ccall_unknown_bundle +# CHECK: call {}*** @julia.get_pgcstack() # CHECK: call {}*** @julia.ptls_states() # CHECK-NOT: @julia.gc_alloc_obj # CHECK: @jl_gc_pool_alloc # CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0 println(""" define void @ccall_unknown_bundle(i8* %fptr) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag) @@ -110,6 +118,7 @@ define void @ccall_unknown_bundle(i8* %fptr) { # CHECK-LABEL: @lifetime_branches # CHECK: alloca i64 +# CHECK: call {}*** @julia.get_pgcstack() # CHECK: call {}*** @julia.ptls_states() # CHECK: L1: # CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8, @@ -126,6 +135,7 @@ define void @ccall_unknown_bundle(i8* %fptr) { # CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8, println(""" define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* br i1 %b, label %L1, label %L3 @@ -151,12 +161,14 @@ L3: # CHECK-LABEL: }{{$}} # CHECK-LABEL: @object_field +# CHECK: call {}*** @julia.get_pgcstack() # CHECK: call {}*** @julia.ptls_states() # CHECK-NOT: @julia.gc_alloc_obj # CHECK-NOT: @jl_gc_pool_alloc # CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !0 println(""" define void @object_field({} addrspace(10)* %field) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag) @@ -170,6 +182,7 @@ define void @object_field({} addrspace(10)* %field) { # CHECK-LABEL: @memcpy_opt # CHECK: alloca [16 x i8], align 16 +# CHECK: call {}*** @julia.get_pgcstack() # CHECK: call {}*** @julia.ptls_states() # CHECK-NOT: @julia.gc_alloc_obj # CHECK-NOT: @jl_gc_pool_alloc @@ -177,6 +190,7 @@ define void @object_field({} addrspace(10)* %field) { println(""" define void @memcpy_opt(i8* %v22) { top: + %pgcstack = call {}*** @julia.get_pgcstack() %v6 = call {}*** @julia.ptls_states() %v18 = bitcast {}*** %v6 to i8* %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %v18, $isz 16, {} addrspace(10)* @tag) @@ -189,6 +203,7 @@ top: # CHECK-LABEL: }{{$}} # CHECK-LABEL: @preserve_opt +# CHECK: call {}*** @julia.get_pgcstack() # CHECK: call {}*** @julia.ptls_states() # CHECK-NOT: @julia.gc_alloc_obj # CHECK-NOT: @jl_gc_pool_alloc @@ -197,6 +212,7 @@ top: println(""" define void @preserve_opt(i8* %v22) { top: + %pgcstack = call {}*** @julia.get_pgcstack() %v6 = call {}*** @julia.ptls_states() %v18 = bitcast {}*** %v6 to i8* %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %v18, $isz 16, {} addrspace(10)* @tag) @@ -212,6 +228,7 @@ top: # CHECK-LABEL: }{{$}} # CHECK-LABEL: @preserve_branches +# CHECK: call {}*** @julia.get_pgcstack() # CHECK: call {}*** @julia.ptls_states() # CHECK: L1: # CHECK-NEXT: @external_function() @@ -224,6 +241,7 @@ top: # CHECK: L3: println(""" define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* br i1 %b, label %L1, label %L3 @@ -249,6 +267,7 @@ L3: println(""" declare void @external_function() declare {}*** @julia.ptls_states() +declare {}*** @julia.get_pgcstack() declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*) declare {}* @julia.pointer_from_objref({} addrspace(11)*) declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) diff --git a/test/llvmpasses/alloc-opt-pass.jl b/test/llvmpasses/alloc-opt-pass.jl index 9bde40036ff735..8fbc9d2c7b7c47 100644 --- a/test/llvmpasses/alloc-opt-pass.jl +++ b/test/llvmpasses/alloc-opt-pass.jl @@ -24,6 +24,7 @@ println(""" # CHECK: L3: println(""" define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* br i1 %b, label %L1, label %L3 @@ -58,6 +59,7 @@ L3: # CHECK: L3: println(""" define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* %v2 = call {} addrspace(10)* @external_function2() @@ -85,6 +87,7 @@ L3: # CHECK: ret void println(""" define void @legal_int_types() { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 12, {} addrspace(10)* @tag) @@ -101,6 +104,7 @@ println(""" declare void @external_function() declare {} addrspace(10)* @external_function2() declare {}*** @julia.ptls_states() +declare {}*** @julia.get_pgcstack() declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*) declare {}* @julia.pointer_from_objref({} addrspace(11)*) declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) @@ -119,6 +123,7 @@ declare void @llvm.julia.gc_preserve_end(token) # CHECK: load i println(""" define void @memref_collision($isz %x) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag) diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll index 04376f7f814964..e29ada14a0d00c 100644 --- a/test/llvmpasses/final-lower-gc.ll +++ b/test/llvmpasses/final-lower-gc.ll @@ -5,6 +5,7 @@ declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*) declare {} addrspace(10)* @jl_box_int64(i64) declare {}*** @julia.ptls_states() +declare {}*** @julia.get_pgcstack() declare void @jl_safepoint() declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32) @@ -21,12 +22,11 @@ top: ; CHECK-LABEL: @gc_frame_lowering ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) -; CHECK: %ptls = call {}*** @julia.ptls_states() - %ptls = call {}*** @julia.ptls_states() +; CHECK: [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack() + %pgcstack = call {}*** @julia.get_pgcstack() ; CHECK-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0 ; CHECK-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64* ; CHECK-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0 -; CHECK-DAG: [[GCFRAME_SLOT:%.*]] = getelementptr inbounds {}**, {}*** %ptls, i32 0 ; CHECK-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 ; CHECK-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}*** ; CHECK-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8 @@ -46,8 +46,7 @@ top: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) ; CHECK-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 ; CHECK-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0 -; CHECK-NEXT: [[GCFRAME_SLOT3:%.*]] = getelementptr inbounds {}**, {}*** %ptls, i32 0 -; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT3]] to {} addrspace(10)** +; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)** ; CHECK-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0 call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) ; CHECK-NEXT: ret void @@ -57,6 +56,7 @@ top: define {} addrspace(10)* @gc_alloc_lowering() { top: ; CHECK-LABEL: @gc_alloc_lowering + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* ; CHECK: %v = call noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll index c11bb7ae4fe56b..00ea20e504bee3 100644 --- a/test/llvmpasses/gcroots.ll +++ b/test/llvmpasses/gcroots.ll @@ -4,12 +4,14 @@ declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*) declare {} addrspace(10)* @jl_box_int64(i64) declare {}*** @julia.ptls_states() +declare {}*** @julia.get_pgcstack() declare void @jl_safepoint() declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32) define void @simple(i64 %a, i64 %b) { top: ; CHECK-LABEL: @simple + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 ; CHECK: call {} addrspace(10)* @jl_box_int64 @@ -33,6 +35,7 @@ define void @leftover_alloca({} addrspace(10)* %a) { ; relying on mem2reg to catch simple cases such as this earlier ; CHECK-LABEL: @leftover_alloca ; CHECK: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %var = alloca {} addrspace(10)* store {} addrspace(10)* %a, {} addrspace(10)** %var @@ -47,6 +50,7 @@ declare void @union_arg({{} addrspace(10)*, i8}) define void @simple_union() { ; CHECK-LABEL: @simple_union + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() ; CHECK: %a = call { {} addrspace(10)*, i8 } @union_ret() %a = call { {} addrspace(10)*, i8 } @union_ret() @@ -61,6 +65,7 @@ declare void @one_arg_boxed({} addrspace(10)*) define void @select_simple(i64 %a, i64 %b) { ; CHECK-LABEL: @select_simple + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) @@ -74,6 +79,7 @@ define void @phi_simple(i64 %a, i64 %b) { top: ; CHECK-LABEL: @phi_simple ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %cmp = icmp eq i64 %a, %b br i1 %cmp, label %alabel, label %blabel @@ -96,6 +102,7 @@ declare void @one_arg_decayed(i64 addrspace(12)*) define void @select_lift(i64 %a, i64 %b) { ; CHECK-LABEL: @select_lift ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)* @@ -112,6 +119,7 @@ define void @phi_lift(i64 %a, i64 %b) { top: ; CHECK-LABEL: @phi_lift ; CHECK: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ] + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %cmp = icmp eq i64 %a, %b br i1 %cmp, label %alabel, label %blabel @@ -133,6 +141,7 @@ common: define void @phi_lift_union(i64 %a, i64 %b) { top: ; CHECK-LABEL: @phi_lift_union + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %cmp = icmp eq i64 %a, %b br i1 %cmp, label %alabel, label %blabel @@ -158,6 +167,7 @@ define void @live_if_live_out(i64 %a, i64 %b) { ; CHECK-LABEL: @live_if_live_out top: ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() ; The failure case is failing to realize that `aboxed` is live across the first ; one_arg_boxed safepoint and putting bboxed in the same root slot @@ -175,6 +185,7 @@ succ: define {} addrspace(10)* @ret_use(i64 %a, i64 %b) { ; CHECK-LABEL: @ret_use ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) ; CHECK: store {} addrspace(10)* %aboxed @@ -185,6 +196,7 @@ define {} addrspace(10)* @ret_use(i64 %a, i64 %b) { define {{} addrspace(10)*, i8} @ret_use_struct() { ; CHECK-LABEL: @ret_use_struct ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() ; CHECK: %aunion = call { {} addrspace(10)*, i8 } @union_ret() %aunion = call { {} addrspace(10)*, i8 } @union_ret() @@ -201,6 +213,7 @@ define i8 @nosafepoint({} addrspace(10)* dereferenceable(16)) { ; CHECK-LABEL: @nosafepoint ; CHECK-NOT: %gcframe top: + %pgcstack = call {}*** @julia.get_pgcstack() %1 = call {}*** @julia.ptls_states() %2 = bitcast {}*** %1 to {} addrspace(10)** %3 = getelementptr {} addrspace(10)*, {} addrspace(10)** %2, i64 3 @@ -219,6 +232,7 @@ top: define void @global_ref() { ; CHECK-LABEL: @global_ref ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load {} addrspace(10)*, {} addrspace(10)** getelementptr ({} addrspace(10)*, {} addrspace(10)** inttoptr (i64 140540744325952 to {} addrspace(10)**), i64 1) ; CHECK: store {} addrspace(10)* %loaded, {} addrspace(10)** @@ -230,6 +244,7 @@ define {} addrspace(10)* @no_redundant_rerooting(i64 %a, i1 %cond) { ; CHECK-LABEL: @no_redundant_rerooting ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) ; CHECK: store {} addrspace(10)* %aboxed @@ -254,6 +269,7 @@ define void @memcpy_use(i64 %a, i64 *%aptr) { ; CHECK-LABEL: @memcpy_use ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) ; CHECK: store {} addrspace(10)* %aboxed @@ -270,6 +286,7 @@ define void @gc_preserve(i64 %a) { ; CHECK-LABEL: @gc_preserve ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) ; CHECK: store {} addrspace(10)* %aboxed @@ -291,6 +308,7 @@ define void @gc_preserve_vec([2 x <2 x {} addrspace(10)*>] addrspace(11)* nocapt ; CHECK-LABEL: @gc_preserve_vec ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 6 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %v = load [2 x <2 x {} addrspace(10)*>], [2 x <2 x {} addrspace(10)*>] addrspace(11)* %0, align 8 ; CHECK-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0 @@ -318,6 +336,7 @@ define {} addrspace(10)* @gv_const() { ; CHECK-LABEL: @gv_const ; CHECK-NOT: %gcframe top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %v10 = load {}*, {}** @gv1, !tbaa !2 %v1 = addrspacecast {}* %v10 to {} addrspace(10)* @@ -331,6 +350,7 @@ top: define {} addrspace(10)* @vec_jlcallarg({} addrspace(10)*, {} addrspace(10)**, i32) { ; CHECK-LABEL: @vec_jlcallarg ; CHECK-NOT: %gcframe + %pgcstack = call {}*** @julia.get_pgcstack() %v4 = call {}*** @julia.ptls_states() %v5 = bitcast {} addrspace(10)** %1 to <2 x {} addrspace(10)*>* %v6 = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*>* %v5, align 8 @@ -343,6 +363,7 @@ declare {} addrspace(10) *@alloc() define {} addrspace(10)* @vec_loadobj() { ; CHECK-LABEL: @vec_loadobj ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 + %pgcstack = call {}*** @julia.get_pgcstack() %v4 = call {}*** @julia.ptls_states() %obj = call {} addrspace(10) *@alloc() %v1 = bitcast {} addrspace(10) * %obj to {} addrspace(10)* addrspace(10)* @@ -356,6 +377,7 @@ define {} addrspace(10)* @vec_loadobj() { define {} addrspace(10)* @vec_gep() { ; CHECK-LABEL: @vec_gep ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 + %pgcstack = call {}*** @julia.get_pgcstack() %v4 = call {}*** @julia.ptls_states() %obj = call {} addrspace(10) *@alloc() %obj1 = bitcast {} addrspace(10) * %obj to {} addrspace(10)* addrspace(10)* @@ -371,6 +393,7 @@ define void @loopyness(i1 %cond1, {} addrspace(10) *%arg) { ; CHECK-LABEL: @loopyness ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() br label %header @@ -402,6 +425,7 @@ define {} addrspace(10)* @phi_union(i1 %cond) { ; CHECK-LABEL: @phi_union ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() br i1 %cond, label %a, label %b @@ -426,6 +450,7 @@ define {} addrspace(10)* @select_union(i1 %cond) { ; CHECK-LABEL: @select_union ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %obj = call {} addrspace(10) *@alloc() %aobj = insertvalue {{} addrspace(10)*, i8} undef, {} addrspace(10)* %obj, 0 @@ -441,6 +466,7 @@ define i8 @simple_arrayptr() { ; CHECK-LABEL: @simple_arrayptr ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %obj1 = call {} addrspace(10) *@alloc() %obj2 = call {} addrspace(10) *@alloc() @@ -457,6 +483,7 @@ define {} addrspace(10)* @vecstoreload(<2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecstoreload ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg call void @jl_safepoint() @@ -470,6 +497,7 @@ define void @vecphi(i1 %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecphi ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() br i1 %cond, label %A, label %B @@ -495,6 +523,7 @@ define i8 @phi_arrayptr(i1 %cond) { ; CHECK-LABEL: @phi_arrayptr ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() br i1 %cond, label %A, label %B @@ -533,6 +562,7 @@ define void @vecselect(i1 %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecselect ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg call void @jl_safepoint() @@ -548,6 +578,7 @@ top: define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecselect_lift ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*> @@ -565,6 +596,7 @@ define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) { define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecvecselect_lift ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*> @@ -582,6 +614,7 @@ define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) { define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) { ; CHECK-LABEL: @vecscalarselect_lift ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)* @@ -600,6 +633,7 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) { define void @scalarvecselect_lift(i1 %cond, i64 %a) { ; CHECK-LABEL: @scalarvecselect_lift ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)* @@ -619,6 +653,7 @@ define i8 @select_arrayptr(i1 %cond) { ; CHECK-LABEL: @select_arrayptr ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %obj1 = call {} addrspace(10) *@alloc() %obj2 = call {} addrspace(10) *@alloc() @@ -648,6 +683,7 @@ define i8 @vector_arrayptrs() { ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] ; top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %obj1 = call {} addrspace(10) *@alloc() %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11) * @@ -669,6 +705,7 @@ define i8 @masked_arrayptrs() { ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] ; top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %obj1 = call {} addrspace(10) *@alloc() %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11) * @@ -690,6 +727,7 @@ define i8 @gather_arrayptrs() { ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] ; top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %obj1 = call {} addrspace(10) *@alloc() %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11)* @@ -710,6 +748,7 @@ define i8 @gather_arrayptrs_alltrue() { ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] ; top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %obj1 = call {} addrspace(10) *@alloc() %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11)* @@ -728,6 +767,7 @@ define i8 @lost_select_decayed(i1 %arg1) { ; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 ; CHECK: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]] top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %obj1 = call {} addrspace(10) *@alloc() %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11)* diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll index 29f889031b629e..a7b8dc7caee38d 100644 --- a/test/llvmpasses/late-lower-gc.ll +++ b/test/llvmpasses/late-lower-gc.ll @@ -5,6 +5,7 @@ declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*) declare {} addrspace(10)* @jl_box_int64(i64) declare {}*** @julia.ptls_states() +declare {}*** @julia.get_pgcstack() declare void @jl_safepoint() declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32) declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*) @@ -14,8 +15,8 @@ define void @gc_frame_lowering(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_lowering ; CHECK: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) - %ptls = call {}*** @julia.ptls_states() -; CHECK: %ptls = call {}*** @julia.ptls_states() +; CHECK: %pgcstack = call {}*** @julia.get_pgcstack() + %pgcstack = call {}*** @julia.get_pgcstack() ; CHECK-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) ; CHECK-NEXT: call {} addrspace(10)* @jl_box_int64 %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) @@ -37,6 +38,7 @@ top: define {} addrspace(10)* @gc_alloc_lowering() { top: ; CHECK-LABEL: @gc_alloc_lowering + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* ; CHECK: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, [[SIZE_T:i.[0-9]+]] 8) @@ -56,6 +58,7 @@ top: define void @gc_drop_aliasing() { top: ; CHECK-LABEL: @gc_drop_aliasing + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* ; CHECK: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, [[SIZE_T:i.[0-9]+]] 8) @@ -79,7 +82,7 @@ define i32 @callee_root({} addrspace(10)* %v0, {} addrspace(10)* %v1) { top: ; CHECK-LABEL: @callee_root ; CHECK-NOT: @julia.new_gc_frame - %v2 = call {}*** @julia.ptls_states() + %v2 = call {}*** @julia.get_pgcstack() %v3 = bitcast {} addrspace(10)* %v0 to {} addrspace(10)* addrspace(10)* %v4 = addrspacecast {} addrspace(10)* addrspace(10)* %v3 to {} addrspace(10)* addrspace(11)* %v5 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v4 unordered, align 8 diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll index 42e768ee132a7a..d9d5ac087b7737 100644 --- a/test/llvmpasses/lower-handlers.ll +++ b/test/llvmpasses/lower-handlers.ll @@ -4,10 +4,11 @@ attributes #1 = { returns_twice } declare i32 @julia.except_enter() #1 declare void @jl_pop_handler(i32) declare i8**** @julia.ptls_states() +declare i8**** @julia.get_pgcstack() define void @simple() { top: - %ptls = call i8**** @julia.ptls_states() + %pgcstack = call i8**** @julia.get_pgcstack() ; CHECK: call void @llvm.lifetime.start ; CHECK: call void @jl_enter_handler ; CHECK: setjmp diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll index 37212a512d68f8..b883a53554a0c9 100644 --- a/test/llvmpasses/refinements.ll +++ b/test/llvmpasses/refinements.ll @@ -2,6 +2,7 @@ declare {}*** @julia.ptls_states() +declare {}*** @julia.get_pgcstack() declare void @jl_safepoint() declare void @one_arg_boxed({} addrspace(10)*) declare {} addrspace(10)* @jl_box_int64(i64) @@ -9,6 +10,7 @@ declare {} addrspace(10)* @jl_box_int64(i64) define void @argument_refinement({} addrspace(10)* %a) { ; CHECK-LABEL: @argument_refinement ; CHECK-NOT: %gcframe + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %casted1 = bitcast {} addrspace(10)* %a to {} addrspace(10)* addrspace(10)* %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1 @@ -22,6 +24,7 @@ define void @argument_refinement({} addrspace(10)* %a) { define void @heap_refinement1(i64 %a) { ; CHECK-LABEL: @heap_refinement1 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)* @@ -38,6 +41,7 @@ define void @heap_refinement1(i64 %a) { define void @heap_refinement2(i64 %a) { ; CHECK-LABEL: @heap_refinement2 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)* @@ -55,6 +59,7 @@ declare {} addrspace(10)* @allocate_some_value() define void @issue22770() { ; CHECK-LABEL: @issue22770 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %y = call {} addrspace(10)* @allocate_some_value() %casted1 = bitcast {} addrspace(10)* %y to {} addrspace(10)* addrspace(10)* @@ -80,6 +85,7 @@ define void @refine_select_phi({} addrspace(10)* %x, {} addrspace(10)* %y, i1 %b ; CHECK-LABEL: @refine_select_phi ; CHECK-NOT: %gcframe top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %s = select i1 %b, {} addrspace(10)* %x, {} addrspace(10)* %y br i1 %b, label %L1, label %L2 @@ -101,6 +107,7 @@ define void @dont_refine_loop({} addrspace(10)* %x) { ; CHECK-LABEL: @dont_refine_loop ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() br label %L1 @@ -122,6 +129,7 @@ define void @refine_loop_const({} addrspace(10)* %x) { ; CHECK-LABEL: @refine_loop_const ; CHECK-NOT: %gcframe top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() br label %L1 @@ -142,6 +150,7 @@ define void @refine_loop_indirect({} addrspace(10)* %x) { ; CHECK-LABEL: @refine_loop_indirect ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %a = call {} addrspace(10)* @allocate_some_value() br label %L1 @@ -166,6 +175,7 @@ define void @refine_loop_indirect2({} addrspace(10)* %x) { ; CHECK-LABEL: @refine_loop_indirect2 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 top: + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %a = call {} addrspace(10)* @allocate_some_value() br label %L1 @@ -189,6 +199,7 @@ declare {} addrspace(10)* @julia.typeof({} addrspace(10)*) #0 define {} addrspace(10)* @typeof({} addrspace(10)* %x) { ; CHECK-LABEL: @typeof( ; CHECK-NOT: %gcframe + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %v = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %x) call void @one_arg_boxed({} addrspace(10)* %v) @@ -201,6 +212,7 @@ define {} addrspace(10)* @setfield({} addrspace(10)* %p) { ; CHECK-LABEL: @setfield( ; CHECK-NOT: %gcframe ; CHECK: call void @jl_gc_queue_root + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %c = call {} addrspace(10)* @allocate_some_value() %fp = bitcast {} addrspace(10)* %p to {} addrspace(10)* addrspace(10)* diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll index c542fd026ff81d..da281fe85fd57e 100644 --- a/test/llvmpasses/returnstwicegc.ll +++ b/test/llvmpasses/returnstwicegc.ll @@ -4,6 +4,7 @@ declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*) declare {} addrspace(10)* @jl_box_int64(i64) declare {}*** @julia.ptls_states() +declare {}*** @julia.get_pgcstack() declare i32 @sigsetjmp(i8*, i32) returns_twice declare void @one_arg_boxed({} addrspace(10)*) @@ -14,6 +15,7 @@ define void @try_catch(i64 %a, i64 %b) top: %sigframe = alloca [208 x i8], align 16 %sigframe.sub = getelementptr inbounds [208 x i8], [208 x i8]* %sigframe, i64 0, i64 0 + call {}*** @julia.get_pgcstack() call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 %a) %val = call i32 @sigsetjmp(i8 *%sigframe.sub, i32 0) returns_twice diff --git a/test/llvmpasses/safepoint_stress.jl b/test/llvmpasses/safepoint_stress.jl index 7ff96643e82c3c..c5345ad07e786e 100644 --- a/test/llvmpasses/safepoint_stress.jl +++ b/test/llvmpasses/safepoint_stress.jl @@ -6,8 +6,10 @@ println(""" declare {} addrspace(10)* @alloc() declare void @one_arg_boxed({} addrspace(10)*) declare {}*** @julia.ptls_states() +declare {}*** @julia.get_pgcstack() define void @stress(i64 %a, i64 %b) { + %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() """)