Skip to content

Commit

Permalink
Merge pull request #41733 from NHDaly/nhd-allow-reentrant-compilation…
Browse files Browse the repository at this point in the history
…-time-measurement

Make `jl_cumulative_compile_time_ns` global (and reentrant).
  • Loading branch information
vchuravy authored Aug 25, 2021
2 parents cc4be25 + b4ca196 commit 272eaf9
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 42 deletions.
2 changes: 1 addition & 1 deletion base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ function gc_alloc_count(diff::GC_Diff)
diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
end

# cumulative total time spent on compilation
# cumulative total time spent on compilation, in nanoseconds
cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())

Expand Down
16 changes: 8 additions & 8 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
JL_GC_PUSH1(&src);
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();

CompilationPolicy policy = (CompilationPolicy) _policy;
Expand Down Expand Up @@ -407,8 +407,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
}

data->M = std::move(clone);
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (policy == CompilationPolicy::ImagingMode)
imaging_mode = 0;
JL_UNLOCK(&codegen_lock); // Might GC
Expand Down Expand Up @@ -908,8 +908,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
jl_llvm_functions_t decls;
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);

Expand All @@ -934,8 +934,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
m.release(); // the return object `llvmf` will be the owning pointer
}
JL_GC_POP();
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock); // Might GC
if (F)
return F;
Expand Down
12 changes: 8 additions & 4 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3161,19 +3161,23 @@ int jl_has_concrete_subtype(jl_value_t *typ)
#define typeinf_lock codegen_lock

static uint64_t inference_start_time = 0;
static uint8_t inference_is_measuring_compile_time = 0;

JL_DLLEXPORT void jl_typeinf_begin(void)
{
JL_LOCK(&typeinf_lock);
if (jl_measure_compile_time[jl_threadid()])
if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
inference_start_time = jl_hrtime();
inference_is_measuring_compile_time = 1;
}
}

JL_DLLEXPORT void jl_typeinf_end(void)
{
int tid = jl_threadid();
if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
if (typeinf_lock.count == 1 && inference_is_measuring_compile_time) {
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time));
inference_is_measuring_compile_time = 0;
}
JL_UNLOCK(&typeinf_lock);
}

Expand Down
44 changes: 22 additions & 22 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,16 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns_before()
{
int tid = jl_threadid();
jl_measure_compile_time[tid] = 1;
return jl_cumulative_compile_time[tid];
// Increment the flag to allow reentrant callers to `@time`.
jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
}
extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns_after()
{
int tid = jl_threadid();
jl_measure_compile_time[tid] = 0;
return jl_cumulative_compile_time[tid];
// Decrement the flag when done measuring, allowing other callers to continue measuring.
jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
}

// this generates llvm code for the lambda info
Expand Down Expand Up @@ -231,8 +231,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
{
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
jl_codegen_params_t params;
jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
Expand All @@ -256,8 +256,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
if (success && llvmmod == NULL)
jl_add_to_ee(std::unique_ptr<Module>(into));
}
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock);
return success;
}
Expand Down Expand Up @@ -313,8 +313,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
{
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
// if we don't have any decls already, try to generate it now
jl_code_info_t *src = NULL;
Expand Down Expand Up @@ -352,8 +352,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
else {
codeinst = NULL;
}
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock);
JL_GC_POP();
return codeinst;
Expand All @@ -367,8 +367,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
if (unspec->invoke == NULL) {
jl_code_info_t *src = NULL;
Expand Down Expand Up @@ -396,8 +396,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_GC_POP();
}
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock); // Might GC
}

Expand All @@ -420,8 +420,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
// so create an exception here so we can print pretty our lies
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
specfptr = (uintptr_t)codeinst->specptr.fptr;
if (specfptr == 0) {
Expand All @@ -446,8 +446,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
}
JL_GC_POP();
}
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock);
}
if (specfptr != 0)
Expand Down
5 changes: 3 additions & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,9 @@ static inline uint64_t cycleclock(void)

#include "timing.h"

extern uint8_t *jl_measure_compile_time;
extern uint64_t *jl_cumulative_compile_time;
// Global *atomic* integers controlling *process-wide* measurement of compilation time.
extern uint8_t jl_measure_compile_time_enabled;
extern uint64_t jl_cumulative_compile_time;

#ifdef _COMPILER_MICROSOFT_
# define jl_return_address() ((uintptr_t)_ReturnAddress())
Expand Down
5 changes: 4 additions & 1 deletion src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,10 @@ static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_M
ptls->io_wait = 0;
// @time needs its compile timer disabled on error,
// and cannot use a try-finally as it would break scope for assignments
jl_measure_compile_time[ptls->tid] = 0;
// We blindly disable compilation time tracking here, for all running Tasks, even though
// it may cause some incorrect measurements. This is a known bug, and is being tracked
// here: https://github.com/JuliaLang/julia/pull/39138
jl_atomic_store_relaxed(&jl_measure_compile_time_enabled, 0);
JL_GC_PUSH1(&exception);
jl_gc_unsafe_enter(ptls);
if (exception) {
Expand Down
6 changes: 2 additions & 4 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,8 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
#endif

jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
uint8_t *jl_measure_compile_time = NULL;
uint64_t *jl_cumulative_compile_time = NULL;
uint8_t jl_measure_compile_time_enabled = 0;
uint64_t jl_cumulative_compile_time = 0;

// return calling thread's ID
// Also update the suspended_threads list in signals-mach when changing the
Expand Down Expand Up @@ -467,8 +467,6 @@ void jl_init_threading(void)
}
if (jl_n_threads <= 0)
jl_n_threads = 1;
jl_measure_compile_time = (uint8_t*)calloc(jl_n_threads, sizeof(*jl_measure_compile_time));
jl_cumulative_compile_time = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time));
#ifndef __clang_analyzer__
jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
#endif
Expand Down
16 changes: 16 additions & 0 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,22 @@ function timev_macro_scope()
end
@test timev_macro_scope() == 1

before = Base.cumulative_compile_time_ns_before();

# exercise concurrent calls to `@time` for reentrant compilation time measurement.
t1 = @async @time begin
sleep(2)
@eval module M ; f(x,y) = x+y ; end
@eval M.f(2,3)
end
t2 = @async begin
sleep(1)
@time 2 + 2
end

after = Base.cumulative_compile_time_ns_after();
@test after >= before;

# interactive utilities

struct ambigconvert; end # inject a problematic `convert` method to ensure it still works
Expand Down

0 comments on commit 272eaf9

Please sign in to comment.