Skip to content

Commit

Permalink
Improve FMA detection.
Browse files Browse the repository at this point in the history
Don't clone on platforms that always support it, and more accurately check CPU features.
  • Loading branch information
maleadt committed Nov 16, 2021
1 parent cdb2c3c commit 36a7f6f
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 12 deletions.
42 changes: 31 additions & 11 deletions src/llvm-cpufeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
// The following intrinsics are supported:
// - julia.cpu.have_fma: returns 1 if the platform supports hardware-accelerated FMA
//
// XXX: can / do we want to make this a codegen pass to enable querying TargetPassConfig?
// XXX: can / do we want to make this a codegen pass to enable querying TargetPassConfig
// instead of using the global target machine?

#include "llvm-version.h"

Expand All @@ -26,28 +27,48 @@ using namespace llvm;

extern TargetMachine *jl_TargetMachine;

namespace {
// whether this platform unconditionally (i.e. without needing multiversioning) supports FMA
Optional<bool> always_have_fma() {
#ifdef _CPU_AARCH64_
return true;
#else
return {};
#endif
}

static void lowerHaveFMA(Function &F, Instruction *I) {
Triple TheTriple = Triple(jl_TargetMachine->getTargetTriple());
bool have_fma(Function &F) {
auto unconditional = always_have_fma();
if (unconditional.hasValue())
return unconditional.getValue();

Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");

StringRef CPU =
CPUAttr.isValid() ? CPUAttr.getValueAsString() : jl_TargetMachine->getTargetCPU();
StringRef FS =
FSAttr.isValid() ? FSAttr.getValueAsString() : jl_TargetMachine->getTargetFeatureString();

if (TheTriple.getArch() == Triple::x86_64 && FS.find("+fma") != StringRef::npos)
SmallVector<StringRef, 6> Features;
FS.split(Features, ',');
for (StringRef Feature : Features)
#if defined _CPU_ARM_
if (Feature == "+vfp4")
return true;
#else
if (Feature == "+fma" || Feature == "+fma4")
return true;
#endif

return false;
}

void lowerHaveFMA(Function &F, Instruction *I) {
if (have_fma(F))
I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1));
else
I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0));

return;
}

static bool lowerCPUFeatures(Module &M)
bool lowerCPUFeatures(Module &M)
{
SmallVector<Instruction*,6> Materialized;
if (auto have_fma = M.getFunction("julia.cpu.have_fma")) {
Expand All @@ -68,7 +89,6 @@ static bool lowerCPUFeatures(Module &M)
return false;
}
}
}

struct CPUFeatures : PassInfoMixin<CPUFeatures> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
Expand Down
11 changes: 10 additions & 1 deletion src/llvm-multiversioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ using namespace llvm;
extern std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr,
bool isConstant=false);

extern Optional<bool> always_have_fma();

namespace {

// These are valid detail cloning conditions in the target flags.
Expand Down Expand Up @@ -470,7 +472,14 @@ uint32_t CloneCtx::collect_func_info(Function &F)
flag |= JL_TARGET_CLONE_MATH;
}
else if (name.startswith("julia.cpu.")) {
flag |= JL_TARGET_CLONE_CPU;
if (name == "julia.cpu.have_fma") {
// for some platforms we know they always do (or don't) support
// FMA. in those cases we don't need to clone the function.
if (!always_have_fma().hasValue())
flag |= JL_TARGET_CLONE_CPU;
} else {
flag |= JL_TARGET_CLONE_CPU;
}
}
}
}
Expand Down

0 comments on commit 36a7f6f

Please sign in to comment.