diff --git a/src/init.c b/src/init.c index 43905933f391a..e69c43837787d 100644 --- a/src/init.c +++ b/src/init.c @@ -745,9 +745,6 @@ void _julia_init(JL_IMAGE_SEARCH rel) jl_init_tasks(); jl_init_root_task(stack_lo, stack_hi); -#ifdef ENABLE_TIMINGS - jl_root_task->timing_stack = jl_root_timing; -#endif jl_init_common_symbols(); jl_init_flisp(); jl_init_serializer(); diff --git a/src/julia.h b/src/julia.h index 326cf12c2cbb0..05358cdbf65ce 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1768,8 +1768,6 @@ typedef struct _jl_task_t { // saved gc stack top for context switches jl_gcframe_t *gcstack; - - jl_timing_block_t *timing_stack; } jl_task_t; #define JL_TASK_STATE_RUNNABLE 0 diff --git a/src/julia_threads.h b/src/julia_threads.h index 1e3b762ef385f..1eaf0e0035f52 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -207,6 +207,7 @@ struct _jl_tls_states_t { struct _jl_task_t *previous_task; #endif struct _jl_task_t *root_task; + struct _jl_timing_block_t *timing_stack; void *stackbase; size_t stacksize; jl_ucontext_t base_ctx; // base context of stack diff --git a/src/rtutils.c b/src/rtutils.c index 9e755784efbec..5ac68510ae392 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -221,7 +221,7 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh) eh->world_age = ptls->world_age; current_task->eh = eh; #ifdef ENABLE_TIMINGS - eh->timing_stack = current_task->timing_stack; + eh->timing_stack = ptls->timing_stack; #endif } diff --git a/src/task.c b/src/task.c index 1f6537e71038d..ce54b3882c284 100644 --- a/src/task.c +++ b/src/task.c @@ -519,9 +519,10 @@ JL_DLLEXPORT void jl_switch(void) ptls->finalizers_inhibited = 0; #ifdef ENABLE_TIMINGS - jl_timing_block_t *blk = ct->timing_stack; + jl_timing_block_t *blk = ptls->timing_stack; if (blk) jl_timing_block_stop(blk); + ptls->timing_stack = NULL; #endif ctx_switch(ptls); @@ -546,7 +547,8 @@ JL_DLLEXPORT void jl_switch(void) ptls->finalizers_inhibited = finalizers_inhibited; #ifdef ENABLE_TIMINGS - assert(blk == ct->timing_stack); + assert(ptls->timing_stack == NULL); + ptls->timing_stack = blk; if (blk) jl_timing_block_start(blk); #else @@ -600,7 +602,7 @@ static void JL_NORETURN throw_internal(jl_value_t *exception JL_MAYBE_UNROOTED) jl_handler_t *eh = ptls->current_task->eh; if (eh != NULL) { #ifdef ENABLE_TIMINGS - jl_timing_block_t *cur_block = ptls->current_task->timing_stack; + jl_timing_block_t *cur_block = ptls->timing_stack; while (cur_block && eh->timing_stack != cur_block) { cur_block = jl_pop_timing_block(cur_block); } @@ -701,9 +703,6 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->started = 0; t->prio = -1; t->tid = -1; -#ifdef ENABLE_TIMINGS - t->timing_stack = jl_root_timing; -#endif #if defined(JL_DEBUG_BUILD) if (!t->copy_stack) diff --git a/src/timing.c b/src/timing.c index 70fb8df213796..12f47bbfa0086 100644 --- a/src/timing.c +++ b/src/timing.c @@ -17,7 +17,7 @@ extern "C" { #error Timings are not supported on your compiler #endif -jl_timing_block_t *jl_root_timing; +static uint64_t t0; uint64_t jl_timing_data[(int)JL_TIMING_LAST] = {0}; const char *jl_timing_names[(int)JL_TIMING_LAST] = { @@ -28,32 +28,32 @@ const char *jl_timing_names[(int)JL_TIMING_LAST] = void jl_print_timings(void) { - uint64_t total_time = 0; + uint64_t total_time = cycleclock() - t0; + uint64_t root_time = total_time; for (int i = 0; i < JL_TIMING_LAST; i++) { - total_time += jl_timing_data[i]; + root_time -= jl_timing_data[i]; } + jl_timing_data[0] = root_time; for (int i = 0; i < JL_TIMING_LAST; i++) { if (jl_timing_data[i] != 0) - fprintf(stderr,"%-25s : %5.2f %% %" PRIu64 "\n", jl_timing_names[i], + fprintf(stderr, "%-25s : %5.2f %% %" PRIu64 "\n", jl_timing_names[i], 100 * (((double)jl_timing_data[i]) / total_time), jl_timing_data[i]); } } void jl_init_timing(void) { - jl_root_timing = (jl_timing_block_t*)malloc_s(sizeof(jl_timing_block_t)); - _jl_timing_block_init(jl_root_timing, JL_TIMING_ROOT); - jl_root_timing->prev = NULL; + t0 = cycleclock(); } void jl_destroy_timing(void) { - jl_timing_block_t *stack = jl_current_task ? jl_current_task->timing_stack : jl_root_timing; + jl_ptls_t ptls = jl_get_ptls_states(); + jl_timing_block_t *stack = ptls->timing_stack; while (stack) { _jl_timing_block_destroy(stack); stack = stack->prev; } - free(jl_root_timing); } jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block) diff --git a/src/timing.h b/src/timing.h index 2848f020a12c0..9a3307709a38f 100644 --- a/src/timing.h +++ b/src/timing.h @@ -23,7 +23,6 @@ extern "C" { #endif void jl_print_timings(void); jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block); -extern jl_timing_block_t *jl_root_timing; void jl_timing_block_start(jl_timing_block_t *cur_block); void jl_timing_block_stop(jl_timing_block_t *cur_block); #ifdef __cplusplus @@ -117,7 +116,8 @@ STATIC_INLINE uint64_t _jl_timing_block_init(jl_timing_block_t *block, int owner STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) { uint64_t t = _jl_timing_block_init(block, owner); - jl_timing_block_t **prevp = jl_current_task ? &jl_current_task->timing_stack : &jl_root_timing; + jl_ptls_t ptls = jl_get_ptls_states(); + jl_timing_block_t **prevp = &ptls->timing_stack; block->prev = *prevp; if (block->prev) _jl_timing_block_stop(block->prev, t); @@ -126,9 +126,10 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) { STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) { uint64_t t = cycleclock(); + jl_ptls_t ptls = jl_get_ptls_states(); _jl_timing_block_stop(block, t); jl_timing_data[block->owner] += block->total; - jl_timing_block_t **pcur = jl_current_task ? &jl_current_task->timing_stack : &jl_root_timing; + jl_timing_block_t **pcur = &ptls->timing_stack; assert(*pcur == block); *pcur = block->prev; if (block->prev)