From 8fe082cee965883989b382bc9dd3434649377e4c Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 1 Feb 2022 14:49:18 -0500 Subject: [PATCH] llvm-cpufeatures: get TargetMachine from the MachineModuleInfoWrapperPass pass This is usually only supposed to be accessible to a MachineModulePass, but we can trick llvm to give us access to the TargetMachine here too. Also hack in a sysimg check also, so that the compile result is ensured to be compatible with the loaded image too (we disable loading of the sysimg when emitting a new compile). --- src/aotcompile.cpp | 2 ++ src/julia.expmap | 1 + src/llvm-cpufeatures.cpp | 66 ++++++++++++++++++++++++++++++------ src/llvm-multiversioning.cpp | 22 ++++++++++-- src/staticdata.c | 20 +++++++++-- test/llvmpasses/Makefile | 2 +- test/llvmpasses/havefma.ll | 24 +++++++++++++ test/llvmpasses/lit.cfg.py | 2 +- 8 files changed, 120 insertions(+), 19 deletions(-) create mode 100644 test/llvmpasses/havefma.ll diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 7284d0bf7f0a7..9e47e8ec2f948 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -592,6 +593,7 @@ void jl_dump_native_impl(void *native_code, void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM) { + PM->add(new MachineModuleInfoWrapperPass(static_cast(TM))); // do as llc does, not as it says PM->add(new TargetLibraryInfoWrapperPass(Triple(TM->getTargetTriple()))); PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); } diff --git a/src/julia.expmap b/src/julia.expmap index 558dfec6bd260..9a5b98507a49a 100644 --- a/src/julia.expmap +++ b/src/julia.expmap @@ -34,6 +34,7 @@ _Z22jl_coverage_alloc_lineN4llvm9StringRefEi; _Z22jl_malloc_data_pointerN4llvm9StringRefEi; LLVMExtra*; + llvmGetPassPluginInfo; /* freebsd */ environ; diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp index 8accd399371ae..37b70fb569b5c 100644 --- a/src/llvm-cpufeatures.cpp +++ b/src/llvm-cpufeatures.cpp @@ -15,6 +15,7 @@ #include "llvm-version.h" +#include #include #include #include @@ -22,8 +23,12 @@ #include #include #include +#include +#include + #include "julia.h" +extern "C" int32_t (*jl_sysimg_cpuflags[3])(void); #define DEBUG_TYPE "cpufeatures" @@ -36,6 +41,14 @@ Optional always_have_fma(Function &intr) { auto intr_name = intr.getName(); auto typ = intr_name.substr(strlen("julia.cpu.have_fma.")); + // if we are using a sysimage, return that constant + if (typ == "f16" && jl_sysimg_cpuflags[0] != NULL) + return jl_sysimg_cpuflags[0](); + if (typ == "f32" && jl_sysimg_cpuflags[1] != NULL) + return jl_sysimg_cpuflags[1](); + if (typ == "f64" && jl_sysimg_cpuflags[2] != NULL) + return jl_sysimg_cpuflags[2](); + #if defined(_CPU_AARCH64_) return typ == "f32" || typ == "f64"; #else @@ -44,7 +57,7 @@ Optional always_have_fma(Function &intr) { #endif } -bool have_fma(Function &intr, Function &caller) { +bool have_fma(const TargetMachine &TM, Function &intr, Function &caller) { auto unconditional = always_have_fma(intr); if (unconditional.hasValue()) return unconditional.getValue(); @@ -52,9 +65,10 @@ bool have_fma(Function &intr, Function &caller) { auto intr_name = intr.getName(); auto typ = intr_name.substr(strlen("julia.cpu.have_fma.")); + // otherwise, examine the target-features of the compile unit (JIT or AOT) Attribute FSAttr = caller.getFnAttribute("target-features"); StringRef FS = - FSAttr.isValid() ? FSAttr.getValueAsString() : jl_TargetMachine->getTargetFeatureString(); + FSAttr.isValid() ? FSAttr.getValueAsString() : TM.getTargetFeatureString(); SmallVector Features; FS.split(Features, ','); @@ -72,8 +86,8 @@ bool have_fma(Function &intr, Function &caller) { return false; } -void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) { - if (have_fma(intr, caller)) +void lowerHaveFMA(const TargetMachine &TM, Function &intr, Function &caller, CallInst *I) { + if (have_fma(TM, intr, caller)) I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1)); else I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0)); @@ -81,7 +95,7 @@ void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) { return; } -bool lowerCPUFeatures(Module &M) +bool lowerCPUFeatures(const TargetMachine &TM, Module &M) { SmallVector Materialized; @@ -92,7 +106,7 @@ bool lowerCPUFeatures(Module &M) for (Use &U: F.uses()) { User *RU = U.getUser(); CallInst *I = cast(RU); - lowerHaveFMA(F, *I->getParent()->getParent(), I); + lowerHaveFMA(TM, F, *I->getParent()->getParent(), I); Materialized.push_back(I); } } @@ -108,16 +122,38 @@ bool lowerCPUFeatures(Module &M) } } -struct CPUFeatures : PassInfoMixin { - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +struct CPUFeaturesPass : public PassInfoMixin { + static void registerCallbacks(PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &PM, + ArrayRef InnerPipeline) { + if (Name == "CPUFeatures") { + PM.addPass(CPUFeaturesPass()); + return true; + } + return false; + }); + } + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -PreservedAnalyses CPUFeatures::run(Module &M, ModuleAnalysisManager &AM) + +PreservedAnalyses CPUFeaturesPass::run(Module &M, ModuleAnalysisManager &AM) { - lowerCPUFeatures(M); + auto &MMI = AM.getResult(M); + auto &TM = MMI.getTarget(); + lowerCPUFeatures(TM, M); return PreservedAnalyses::all(); } +extern "C" JL_DLLEXPORT ::llvm::PassPluginLibraryInfo +llvmGetPassPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "CPUFeatures", "1", + CPUFeaturesPass::registerCallbacks}; +} + + namespace { struct CPUFeaturesLegacy : public ModulePass { static char ID; @@ -125,7 +161,15 @@ struct CPUFeaturesLegacy : public ModulePass { bool runOnModule(Module &M) { - return lowerCPUFeatures(M); + auto &MMI = getAnalysis().getMMI(); + auto &TM = MMI.getTarget(); + return lowerCPUFeatures(TM, M); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); + ModulePass::getAnalysisUsage(AU); } }; diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index 57e90a9aa8056..ff969440bdc13 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -44,7 +44,7 @@ extern Optional always_have_fma(Function&); namespace { constexpr uint32_t clone_mask = - JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU; + JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH; struct MultiVersioning; @@ -348,6 +348,22 @@ CloneCtx::CloneCtx(MultiVersioning *pass, Module &M) gvars(consume_gv(M, "jl_sysimg_gvars")), M(M) { + + // append cpu feature flags to the end of fvars + for (auto i = 0; i < 3; i++) { + const char *const sizes[] = { "16", "32", "64" }; + std::string Name("jl_sysimg_have_fma"); + Name += sizes[i]; + Function *F = Function::Create(FunctionType::get(T_int32, false), GlobalVariable::PrivateLinkage, Name, M); + BasicBlock *BB = BasicBlock::Create(ctx, "", F); + Name = "julia.cpu.have_fma.f"; + Name += sizes[i]; + FunctionCallee intr = M.getOrInsertFunction(Name, Type::getInt1Ty(ctx)); + Value *julia_cpu_flag = new ZExtInst(CallInst::Create(intr, "", BB), T_int32, "", BB); + ReturnInst::Create(ctx, julia_cpu_flag, BB); + fvars.push_back(F); + } + groups.emplace_back(0, specs[0]); uint32_t ntargets = specs.size(); for (uint32_t i = 1; i < ntargets; i++) { @@ -472,9 +488,9 @@ uint32_t CloneCtx::collect_func_info(Function &F) // for some platforms we know they always do (or don't) support // FMA. in those cases we don't need to clone the function. if (!always_have_fma(*callee).hasValue()) - flag |= JL_TARGET_CLONE_CPU; + flag |= JL_TARGET_CLONE_MATH; } else { - flag |= JL_TARGET_CLONE_CPU; + flag |= JL_TARGET_CLONE_MATH; } } } diff --git a/src/staticdata.c b/src/staticdata.c index 7427e23d391aa..1e3349c501dba 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -1378,6 +1378,7 @@ static jl_value_t *jl_read_value(jl_serializer_state *s) return (jl_value_t*)get_item_for_reloc(s, base, size, offset); } +JL_DLLEXPORT int32_t (*jl_sysimg_cpuflags[3])(void); static void jl_update_all_fptrs(jl_serializer_state *s) { @@ -1389,7 +1390,6 @@ static void jl_update_all_fptrs(jl_serializer_state *s) return; int sysimg_fvars_max = s->fptr_record->size / sizeof(void*); size_t i; - uintptr_t base = (uintptr_t)&s->s->buf[0]; jl_method_instance_t **linfos = (jl_method_instance_t**)&s->fptr_record->buf[0]; uint32_t clone_idx = 0; for (i = 0; i < sysimg_fvars_max; i++) { @@ -1403,8 +1403,8 @@ static void jl_update_all_fptrs(jl_serializer_state *s) specfunc = 0; offset = ~offset; } + uintptr_t base = (uintptr_t)&s->s->buf[0]; jl_code_instance_t *codeinst = (jl_code_instance_t*)(base + offset); - uintptr_t base = (uintptr_t)fvars.base; assert(jl_is_method(codeinst->def->def.method) && codeinst->invoke != jl_fptr_const_return); assert(specfunc ? codeinst->invoke != NULL : codeinst->invoke == NULL); linfos[i] = codeinst->def; @@ -1417,7 +1417,7 @@ static void jl_update_all_fptrs(jl_serializer_state *s) offset = fvars.clone_offsets[clone_idx]; break; } - void *fptr = (void*)(base + offset); + void *fptr = (void*)((uintptr_t)fvars.base + offset); if (specfunc) { codeinst->specptr.fptr = fptr; codeinst->isspecsig = 1; // TODO: set only if confirmed to be true @@ -1428,6 +1428,20 @@ static void jl_update_all_fptrs(jl_serializer_state *s) } } jl_register_fptrs(sysimage_base, &fvars, linfos, sysimg_fvars_max); + // now populate the feature flags accessors too + for (; i < sysimg_fvars_max + 3; i++) { + int32_t offset = fvars.offsets[i]; + for (; clone_idx < fvars.nclones; clone_idx++) { + uint32_t idx = fvars.clone_idxs[clone_idx] & jl_sysimg_val_mask; + if (idx < i) + continue; + if (idx == i) + offset = fvars.clone_offsets[clone_idx]; + break; + } + void *fptr = (void*)((uintptr_t)fvars.base + offset); + ((void**)jl_sysimg_cpuflags)[i - sysimg_fvars_max] = fptr; + } } diff --git a/test/llvmpasses/Makefile b/test/llvmpasses/Makefile index a0b9cf977ede8..67ba989cb943f 100644 --- a/test/llvmpasses/Makefile +++ b/test/llvmpasses/Makefile @@ -4,7 +4,7 @@ include $(JULIAHOME)/Make.inc check: . -TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.jl $(SRCDIR)/*.ll)) +TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.jl $(SRCDIR)/*.ll $(SRCDIR)/*.mir)) . $(TESTS): PATH=$(build_bindir):$(build_depsbindir):$$PATH \ diff --git a/test/llvmpasses/havefma.ll b/test/llvmpasses/havefma.ll new file mode 100644 index 0000000000000..250ad49f86958 --- /dev/null +++ b/test/llvmpasses/havefma.ll @@ -0,0 +1,24 @@ +; RUNx: opt --mtriple=`llvm-config --host-target` -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='require,CPUFeatures' -S %s | FileCheck %s --check-prefixes=CHECK,CHECK-any +; RUNx: opt --mtriple=x86_64-unknown-linux-gnu -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='require,CPUFeatures' -S %s | FileCheck %s --check-prefixes=CHECK,CHECK-generic +; RUNx: opt --mtriple=aarch64-unknown-linux-gnu -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='require,CPUFeatures' -S %s | FileCheck %s --check-prefixes=CHECK,CHECK-aarch64 +; RUNx: opt --mtriple=x86_64-unknown-linux-gnu --march=avx512 -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='require,CPUFeatures' -S %s | FileCheck %s --check-prefixes=CHECK,CHECK-avx512 +; RUN: true + +declare i1 @julia.cpu.have_fma.f32() +declare i1 @julia.cpu.have_fma.f64() + +; CHECK-LABEL: @havefma_test( +; CHECK-LABEL: top: +; CHECK-any-NEXT: %0 = and i1 +; CHECK-generic-NEXT: %0 = and i1 false, false +; CHECK-avx512-NEXT: %0 = and i1 false, false +; CHECK-aarch64-NEXT: %0 = and i1 true, true +; CHECK-NEXT: ret i1 %0 + +define i1 @havefma_test() { +top: + %0 = call i1 @julia.cpu.have_fma.f32() + %1 = call i1 @julia.cpu.have_fma.f64() + %2 = and i1 %0, %1 + ret i1 %2 +} diff --git a/test/llvmpasses/lit.cfg.py b/test/llvmpasses/lit.cfg.py index f53854faf2559..7a294c5b06f16 100644 --- a/test/llvmpasses/lit.cfg.py +++ b/test/llvmpasses/lit.cfg.py @@ -7,7 +7,7 @@ import lit.formats config.name = 'Julia' -config.suffixes = ['.ll','.jl'] +config.suffixes = ['.ll','.mir','.jl'] config.test_source_root = os.path.dirname(__file__) config.test_format = lit.formats.ShTest(True) config.substitutions.append(('%shlibext', '.dylib' if platform.system() == 'Darwin' else '.dll' if