diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp index be9767bd9ff9ca..5ef396b0db9032 100644 --- a/src/llvm-cpufeatures.cpp +++ b/src/llvm-cpufeatures.cpp @@ -7,7 +7,8 @@ // The following intrinsics are supported: // - julia.cpu.have_fma: returns 1 if the platform supports hardware-accelerated FMA // -// XXX: can / do we want to make this a codegen pass to enable querying TargetPassConfig? +// XXX: can / do we want to make this a codegen pass to enable querying TargetPassConfig +// instead of using the global target machine? #include "llvm-version.h" @@ -26,20 +27,40 @@ using namespace llvm; extern TargetMachine *jl_TargetMachine; -namespace { +// whether this platform unconditionally (i.e. without needing multiversioning) supports FMA +Optional always_have_fma() { +#ifdef _CPU_AARCH64_ + return true +#else + return {}; +#endif +} -static void lowerHaveFMA(Function &F, Instruction *I) { - Triple TheTriple = Triple(jl_TargetMachine->getTargetTriple()); +bool have_fma(Function &F) { + auto unconditional = always_have_fma(); + if (unconditional.hasValue()) + return unconditional.getValue(); - Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); - - StringRef CPU = - CPUAttr.isValid() ? CPUAttr.getValueAsString() : jl_TargetMachine->getTargetCPU(); StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : jl_TargetMachine->getTargetFeatureString(); - if (TheTriple.getArch() == Triple::x86_64 && FS.find("+fma") != StringRef::npos) + SmallVector Features; + FS.split(Features, ','); + for (StringRef Feature : Features) +#if defined _CPU_ARM_ + if (Feature == "+vfp4") + return true; +#else + if (Feature == "+fma" || Feature == "+fma4") + return true; +#endif + + return false; +} + +void lowerHaveFMA(Function &F, Instruction *I) { + if (have_fma(F)) I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1)); else I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0)); @@ -47,7 +68,7 @@ static void lowerHaveFMA(Function &F, Instruction *I) { return; } -static bool lowerCPUFeatures(Module &M) +bool lowerCPUFeatures(Module &M) { SmallVector Materialized; if (auto have_fma = M.getFunction("julia.cpu.have_fma")) { @@ -68,7 +89,6 @@ static bool lowerCPUFeatures(Module &M) return false; } } -} struct CPUFeatures : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index d888db2844270a..254db9930934a2 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -43,6 +43,8 @@ using namespace llvm; extern std::pair tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false); +extern Optional always_have_fma(); + namespace { // These are valid detail cloning conditions in the target flags. @@ -470,7 +472,14 @@ uint32_t CloneCtx::collect_func_info(Function &F) flag |= JL_TARGET_CLONE_MATH; } else if (name.startswith("julia.cpu.")) { - flag |= JL_TARGET_CLONE_CPU; + if (name == "julia.cpu.have_fma") { + // for some platforms we know they always do (or don't) support + // FMA. in those cases we don't need to clone the function. + if (!always_have_fma().hasValue()) + flag |= JL_TARGET_CLONE_CPU; + } else { + flag |= JL_TARGET_CLONE_CPU; + } } } }