From 7102da2f850afcc47e5ee2a7d4674706ec38d5ee Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Mon, 4 Apr 2022 17:45:12 -0400 Subject: [PATCH 1/2] Use pooled contexts --- doc/src/devdocs/locks.md | 2 + src/aotcompile.cpp | 20 ++++++--- src/codegen.cpp | 2 +- src/jitlayers.cpp | 26 ++++++----- src/jitlayers.h | 94 ++++++++++++++++++++++++++++++++++++++-- 5 files changed, 122 insertions(+), 22 deletions(-) diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md index e9b557bcdd4f0..2cc8ceaaed968 100644 --- a/doc/src/devdocs/locks.md +++ b/doc/src/devdocs/locks.md @@ -29,8 +29,10 @@ The following are definitely leaf locks (level 1), and must not try to acquire a > * flisp > * jl_in_stackwalk (Win32) > * PM_mutex[i] +> * ContextPool::mutex > > > flisp itself is already threadsafe, this lock only protects the `jl_ast_context_list_t` pool +> > likewise, orc::ThreadSafeContexts carry their own lock, the ContextPool::mutex just protects the pool The following is a leaf lock (level 2), and only acquires level 1 locks (safepoint) internally: diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index adeef14cea013..977478107316c 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -253,17 +253,19 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_native_code_desc_t *data = new jl_native_code_desc_t; CompilationPolicy policy = (CompilationPolicy) _policy; bool imaging = imaging_default() || policy == CompilationPolicy::ImagingMode; - orc::ThreadSafeModule backing; - if (!llvmmod) { - backing = jl_create_llvm_module("text", jl_ExecutionEngine->getContext(), imaging); - } - orc::ThreadSafeModule &clone = llvmmod ? *reinterpret_cast(llvmmod) : backing; - auto ctxt = clone.getContext(); jl_workqueue_t emitted; jl_method_instance_t *mi = NULL; jl_code_info_t *src = NULL; JL_GC_PUSH1(&src); JL_LOCK(&jl_codegen_lock); + orc::ThreadSafeContext ctx; + orc::ThreadSafeModule backing; + if (!llvmmod) { + ctx = jl_ExecutionEngine->acquireContext(); + backing = jl_create_llvm_module("text", ctx, imaging); + } + orc::ThreadSafeModule &clone = llvmmod ? *reinterpret_cast(llvmmod) : backing; + auto ctxt = clone.getContext(); jl_codegen_params_t params(ctxt); params.params = cgparams; uint64_t compiler_start_time = 0; @@ -402,6 +404,9 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm data->M = std::move(clone); if (measure_compile_time_enabled) jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); + if (ctx.getContext()) { + jl_ExecutionEngine->releaseContext(std::move(ctx)); + } JL_UNLOCK(&jl_codegen_lock); // Might GC return (void*)data; } @@ -1020,7 +1025,8 @@ void *jl_get_llvmf_defn_impl(jl_method_instance_t *mi, size_t world, char getwra // emit this function into a new llvm module if (src && jl_is_code_info(src)) { JL_LOCK(&jl_codegen_lock); - jl_codegen_params_t output(jl_ExecutionEngine->getContext()); + auto ctx = jl_ExecutionEngine->getContext(); + jl_codegen_params_t output(*ctx); output.world = world; output.params = ¶ms; orc::ThreadSafeModule m = jl_create_llvm_module(name_from_method_instance(mi), output.tsctx, output.imaging); diff --git a/src/codegen.cpp b/src/codegen.cpp index 45d98d8b4b790..d9d6d285f5686 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8371,7 +8371,7 @@ extern "C" void jl_init_llvm(void) if (clopt && clopt->getNumOccurrences() == 0) cl::ProvidePositionalOption(clopt, "4", 1); - jl_ExecutionEngine = new JuliaOJIT(new LLVMContext()); + jl_ExecutionEngine = new JuliaOJIT(); bool jl_using_gdb_jitevents = false; // Register GDB event listener diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index a040287217c3d..92a911273d2fa 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -225,16 +225,21 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); + orc::ThreadSafeContext ctx; auto into = reinterpret_cast(llvmmod); jl_codegen_params_t *pparams = (jl_codegen_params_t*)p; orc::ThreadSafeModule backing; if (into == NULL) { - backing = jl_create_llvm_module("cextern", pparams ? pparams->tsctx : jl_ExecutionEngine->getContext(), pparams ? pparams->imaging : imaging_default()); + if (!pparams) { + ctx = jl_ExecutionEngine->acquireContext(); + } + backing = jl_create_llvm_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->imaging : imaging_default()); into = &backing; } jl_codegen_params_t params(into->getContext()); if (pparams == NULL) pparams = ¶ms; + assert(pparams->tsctx.getContext() == into->getContext().getContext()); const char *name = jl_generate_ccallable(reinterpret_cast(into), sysimg, declrt, sigt, *pparams); bool success = true; if (!sysimg) { @@ -252,6 +257,9 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * } if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); + if (ctx.getContext()) { + jl_ExecutionEngine->releaseContext(std::move(ctx)); + } JL_UNLOCK(&jl_codegen_lock); return success; } @@ -306,7 +314,8 @@ extern "C" JL_DLLEXPORT jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world) { JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion - auto &context = jl_ExecutionEngine->getContext(); + auto ctx = jl_ExecutionEngine->getContext(); + auto &context = *ctx; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) @@ -363,7 +372,8 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) return; } JL_LOCK(&jl_codegen_lock); - auto &context = jl_ExecutionEngine->getContext(); + auto ctx = jl_ExecutionEngine->getContext(); + auto &context = *ctx; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) @@ -417,7 +427,8 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, // (using sentinel value `1` instead) // so create an exception here so we can print pretty our lies JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion - auto &context = jl_ExecutionEngine->getContext(); + auto ctx = jl_ExecutionEngine->getContext(); + auto &context = *ctx; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) @@ -909,7 +920,7 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) { return jl_data_layout; } -JuliaOJIT::JuliaOJIT(LLVMContext *LLVMCtx) +JuliaOJIT::JuliaOJIT() : TM(createTargetMachine()), DL(jl_create_datalayout(*TM)), TMs{ @@ -918,7 +929,6 @@ JuliaOJIT::JuliaOJIT(LLVMContext *LLVMCtx) cantFail(createJTMBFromTM(*TM, 2).createTargetMachine()), cantFail(createJTMBFromTM(*TM, 3).createTargetMachine()) }, - TSCtx(std::unique_ptr(LLVMCtx)), #if JL_LLVM_VERSION >= 130000 ES(cantFail(orc::SelfExecutorProcessControl::Create())), #else @@ -1165,10 +1175,6 @@ void JuliaOJIT::RegisterJITEventListener(JITEventListener *L) } #endif -orc::ThreadSafeContext &JuliaOJIT::getContext() { - return TSCtx; -} - const DataLayout& JuliaOJIT::getDataLayout() const { return DL; diff --git a/src/jitlayers.h b/src/jitlayers.h index df453cb2c2c0b..56699c50d78ec 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -5,7 +5,7 @@ #include #include #include -#include "llvm/IR/LegacyPassManager.h" +#include #include #include @@ -194,6 +194,83 @@ class JuliaOJIT { typedef orc::IRTransformLayer OptimizeLayerT; typedef object::OwningBinary OwningObj; private: + template + struct ResourcePool { + public: + ResourcePool(function_ref creator) : creator(std::move(creator)) {} + class OwningResource { + public: + OwningResource(ResourcePool &pool, ResourceT resource) : pool(pool), resource(std::move(resource)) {} + OwningResource(const OwningResource &) = delete; + OwningResource &operator=(const OwningResource &) = delete; + OwningResource(OwningResource &&) = default; + OwningResource &operator=(OwningResource &&) = default; + ~OwningResource() { + if (resource) pool.release_(std::move(*resource)); + } + ResourceT release() { + ResourceT res(std::move(*resource)); + resource.reset(); + return res; + } + void reset(ResourceT res) { + *resource = std::move(res); + } + ResourceT &operator*() { + return *resource; + } + ResourceT *operator->() { + return get(); + } + ResourceT *get() { + return resource.getPointer(); + } + const ResourceT &operator*() const { + return *resource; + } + const ResourceT *operator->() const { + return get(); + } + const ResourceT *get() const { + return resource.getPointer(); + } + explicit operator bool() const { + return resource; + } + private: + ResourcePool &pool; + llvm::Optional resource; + }; + + OwningResource acquire() { + return OwningResource(*this, acquire_()); + } + + ResourceT acquire_() { + std::unique_lock lock(mutex); + if (!pool.empty()) { + return pool.pop_back_val(); + } + if (!max || created < max) { + created++; + return creator(); + } + empty.wait(lock, [&](){ return !pool.empty(); }); + assert(!pool.empty() && "Expected resource pool to have a value!"); + return pool.pop_back_val(); + } + void release_(ResourceT &&resource) { + std::lock_guard lock(mutex); + pool.push_back(std::move(resource)); + empty.notify_one(); + } + private: + llvm::function_ref creator; + size_t created = 0; + llvm::SmallVector pool; + std::mutex mutex; + std::condition_variable empty; + }; struct OptimizerT { OptimizerT(legacy::PassManager &PM, std::mutex &mutex, int optlevel) : optlevel(optlevel), PM(PM), mutex(mutex) {} @@ -223,7 +300,7 @@ class JuliaOJIT { public: - JuliaOJIT(LLVMContext *Ctx); + JuliaOJIT(); void enableJITDebuggingSupport(); #ifndef JL_USE_JITLINK @@ -239,7 +316,15 @@ class JuliaOJIT { uint64_t getGlobalValueAddress(StringRef Name); uint64_t getFunctionAddress(StringRef Name); StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst); - orc::ThreadSafeContext &getContext(); + auto getContext() { + return ContextPool.acquire(); + } + orc::ThreadSafeContext acquireContext() { + return ContextPool.acquire_(); + } + void releaseContext(orc::ThreadSafeContext &&ctx) { + ContextPool.release_(std::move(ctx)); + } const DataLayout& getDataLayout() const; TargetMachine &getTargetMachine(); const Triple& getTargetTriple() const; @@ -260,11 +345,12 @@ class JuliaOJIT { std::mutex PM_mutexes[4]; std::unique_ptr TMs[4]; - orc::ThreadSafeContext TSCtx; orc::ExecutionSession ES; orc::JITDylib &GlobalJD; orc::JITDylib &JD; + ResourcePool ContextPool{[](){ return orc::ThreadSafeContext(std::make_unique()); }}; + #ifndef JL_USE_JITLINK std::shared_ptr MemMgr; #endif From 6d143d812ea3b9acd9a91734ddccfd6aa2e03129 Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Thu, 7 Apr 2022 14:35:11 -0400 Subject: [PATCH 2/2] Allow move construction of the resource pool --- src/jitlayers.cpp | 1 + src/jitlayers.h | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 92a911273d2fa..4cc1f9a2aa830 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -936,6 +936,7 @@ JuliaOJIT::JuliaOJIT() #endif GlobalJD(ES.createBareJITDylib("JuliaGlobals")), JD(ES.createBareJITDylib("JuliaOJIT")), + ContextPool([](){ return orc::ThreadSafeContext(std::make_unique()); }), #ifdef JL_USE_JITLINK // TODO: Port our memory management optimisations to JITLink instead of using the // default InProcessMemoryManager. diff --git a/src/jitlayers.h b/src/jitlayers.h index 56699c50d78ec..0129662fbb8a3 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -197,7 +197,7 @@ class JuliaOJIT { template struct ResourcePool { public: - ResourcePool(function_ref creator) : creator(std::move(creator)) {} + ResourcePool(function_ref creator) : creator(std::move(creator)), mutex(std::make_unique()) {} class OwningResource { public: OwningResource(ResourcePool &pool, ResourceT resource) : pool(pool), resource(std::move(resource)) {} @@ -241,13 +241,13 @@ class JuliaOJIT { ResourcePool &pool; llvm::Optional resource; }; - + OwningResource acquire() { return OwningResource(*this, acquire_()); } ResourceT acquire_() { - std::unique_lock lock(mutex); + std::unique_lock lock(mutex->mutex); if (!pool.empty()) { return pool.pop_back_val(); } @@ -255,21 +255,25 @@ class JuliaOJIT { created++; return creator(); } - empty.wait(lock, [&](){ return !pool.empty(); }); + mutex->empty.wait(lock, [&](){ return !pool.empty(); }); assert(!pool.empty() && "Expected resource pool to have a value!"); return pool.pop_back_val(); } void release_(ResourceT &&resource) { - std::lock_guard lock(mutex); + std::lock_guard lock(mutex->mutex); pool.push_back(std::move(resource)); - empty.notify_one(); + mutex->empty.notify_one(); } private: llvm::function_ref creator; size_t created = 0; llvm::SmallVector pool; - std::mutex mutex; - std::condition_variable empty; + struct WNMutex { + std::mutex mutex; + std::condition_variable empty; + }; + + std::unique_ptr mutex; }; struct OptimizerT { OptimizerT(legacy::PassManager &PM, std::mutex &mutex, int optlevel) : optlevel(optlevel), PM(PM), mutex(mutex) {} @@ -349,7 +353,7 @@ class JuliaOJIT { orc::JITDylib &GlobalJD; orc::JITDylib &JD; - ResourcePool ContextPool{[](){ return orc::ThreadSafeContext(std::make_unique()); }}; + ResourcePool ContextPool; #ifndef JL_USE_JITLINK std::shared_ptr MemMgr;