Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

llvm-cpufeatures: get TargetMachine from the MachineModuleInfoWrapperPass pass #44005

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <llvm/Analysis/BasicAliasAnalysis.h>
#include <llvm/Analysis/TypeBasedAliasAnalysis.h>
#include <llvm/Analysis/ScopedNoAliasAA.h>
#include <llvm/CodeGen/MachineModuleInfo.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/Scalar.h>
Expand Down Expand Up @@ -592,6 +593,7 @@ void jl_dump_native_impl(void *native_code,

void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM)
{
PM->add(new MachineModuleInfoWrapperPass(static_cast<const LLVMTargetMachine*>(TM))); // do as llc does, not as it says
PM->add(new TargetLibraryInfoWrapperPass(Triple(TM->getTargetTriple())));
PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
}
Expand Down
1 change: 1 addition & 0 deletions src/julia.expmap
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
_Z22jl_coverage_alloc_lineN4llvm9StringRefEi;
_Z22jl_malloc_data_pointerN4llvm9StringRefEi;
LLVMExtra*;
llvmGetPassPluginInfo;

/* freebsd */
environ;
Expand Down
66 changes: 55 additions & 11 deletions src/llvm-cpufeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,20 @@

#include "llvm-version.h"

#include <llvm/CodeGen/MachineModuleInfo.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/PassManager.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/Support/Debug.h>
#include <llvm/Passes/PassBuilder.h>
#include <llvm/Passes/PassPlugin.h>


#include "julia.h"
extern "C" int32_t (*jl_sysimg_cpuflags[3])(void);

#define DEBUG_TYPE "cpufeatures"

Expand All @@ -36,6 +41,14 @@ Optional<bool> always_have_fma(Function &intr) {
auto intr_name = intr.getName();
auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));

// if we are using a sysimage, return that constant
if (typ == "f16" && jl_sysimg_cpuflags[0] != NULL)
return jl_sysimg_cpuflags[0]();
if (typ == "f32" && jl_sysimg_cpuflags[1] != NULL)
return jl_sysimg_cpuflags[1]();
if (typ == "f64" && jl_sysimg_cpuflags[2] != NULL)
return jl_sysimg_cpuflags[2]();

#if defined(_CPU_AARCH64_)
return typ == "f32" || typ == "f64";
#else
Expand All @@ -44,17 +57,18 @@ Optional<bool> always_have_fma(Function &intr) {
#endif
}

bool have_fma(Function &intr, Function &caller) {
bool have_fma(const TargetMachine &TM, Function &intr, Function &caller) {
auto unconditional = always_have_fma(intr);
if (unconditional.hasValue())
return unconditional.getValue();

auto intr_name = intr.getName();
auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));

// otherwise, examine the target-features of the compile unit (JIT or AOT)
Attribute FSAttr = caller.getFnAttribute("target-features");
StringRef FS =
FSAttr.isValid() ? FSAttr.getValueAsString() : jl_TargetMachine->getTargetFeatureString();
FSAttr.isValid() ? FSAttr.getValueAsString() : TM.getTargetFeatureString();

SmallVector<StringRef, 6> Features;
FS.split(Features, ',');
Expand All @@ -72,16 +86,16 @@ bool have_fma(Function &intr, Function &caller) {
return false;
}

void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) {
if (have_fma(intr, caller))
void lowerHaveFMA(const TargetMachine &TM, Function &intr, Function &caller, CallInst *I) {
if (have_fma(TM, intr, caller))
I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1));
else
I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0));

return;
}

bool lowerCPUFeatures(Module &M)
bool lowerCPUFeatures(const TargetMachine &TM, Module &M)
{
SmallVector<Instruction*,6> Materialized;

Expand All @@ -92,7 +106,7 @@ bool lowerCPUFeatures(Module &M)
for (Use &U: F.uses()) {
User *RU = U.getUser();
CallInst *I = cast<CallInst>(RU);
lowerHaveFMA(F, *I->getParent()->getParent(), I);
lowerHaveFMA(TM, F, *I->getParent()->getParent(), I);
Materialized.push_back(I);
}
}
Expand All @@ -108,24 +122,54 @@ bool lowerCPUFeatures(Module &M)
}
}

struct CPUFeatures : PassInfoMixin<CPUFeatures> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
struct CPUFeaturesPass : public PassInfoMixin<CPUFeaturesPass> {
static void registerCallbacks(PassBuilder &PB) {
PB.registerPipelineParsingCallback(
[](StringRef Name, ModulePassManager &PM,
ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
if (Name == "CPUFeatures") {
PM.addPass(CPUFeaturesPass());
return true;
}
return false;
});
}

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};

PreservedAnalyses CPUFeatures::run(Module &M, ModuleAnalysisManager &AM)

PreservedAnalyses CPUFeaturesPass::run(Module &M, ModuleAnalysisManager &AM)
{
lowerCPUFeatures(M);
auto &MMI = AM.getResult<MachineModuleAnalysis>(M);
auto &TM = MMI.getTarget();
lowerCPUFeatures(TM, M);
return PreservedAnalyses::all();
}

extern "C" JL_DLLEXPORT ::llvm::PassPluginLibraryInfo
llvmGetPassPluginInfo() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should move that out to a different file, since we will need to add all the other NewPM to this as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have all the other NewPM changes, right? So I will leave this up to you to move.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are already other passes that support the NewPM, but sure I can do that.

return {LLVM_PLUGIN_API_VERSION, "CPUFeatures", "1",
CPUFeaturesPass::registerCallbacks};
}


namespace {
struct CPUFeaturesLegacy : public ModulePass {
static char ID;
CPUFeaturesLegacy() : ModulePass(ID) {};

bool runOnModule(Module &M)
{
return lowerCPUFeatures(M);
auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
auto &TM = MMI.getTarget();
return lowerCPUFeatures(TM, M);
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.setPreservesAll();
ModulePass::getAnalysisUsage(AU);
}
};

Expand Down
22 changes: 19 additions & 3 deletions src/llvm-multiversioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ extern Optional<bool> always_have_fma(Function&);

namespace {
constexpr uint32_t clone_mask =
JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU;
JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH;

struct MultiVersioning;

Expand Down Expand Up @@ -348,6 +348,22 @@ CloneCtx::CloneCtx(MultiVersioning *pass, Module &M)
gvars(consume_gv<Constant>(M, "jl_sysimg_gvars")),
M(M)
{

// append cpu feature flags to the end of fvars
for (auto i = 0; i < 3; i++) {
const char *const sizes[] = { "16", "32", "64" };
std::string Name("jl_sysimg_have_fma");
Name += sizes[i];
Function *F = Function::Create(FunctionType::get(T_int32, false), GlobalVariable::PrivateLinkage, Name, M);
BasicBlock *BB = BasicBlock::Create(ctx, "", F);
Name = "julia.cpu.have_fma.f";
Name += sizes[i];
FunctionCallee intr = M.getOrInsertFunction(Name, Type::getInt1Ty(ctx));
Value *julia_cpu_flag = new ZExtInst(CallInst::Create(intr, "", BB), T_int32, "", BB);
ReturnInst::Create(ctx, julia_cpu_flag, BB);
fvars.push_back(F);
}

groups.emplace_back(0, specs[0]);
uint32_t ntargets = specs.size();
for (uint32_t i = 1; i < ntargets; i++) {
Expand Down Expand Up @@ -472,9 +488,9 @@ uint32_t CloneCtx::collect_func_info(Function &F)
// for some platforms we know they always do (or don't) support
// FMA. in those cases we don't need to clone the function.
if (!always_have_fma(*callee).hasValue())
flag |= JL_TARGET_CLONE_CPU;
flag |= JL_TARGET_CLONE_MATH;
} else {
flag |= JL_TARGET_CLONE_CPU;
flag |= JL_TARGET_CLONE_MATH;
}
}
}
Expand Down
20 changes: 17 additions & 3 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,7 @@ static jl_value_t *jl_read_value(jl_serializer_state *s)
return (jl_value_t*)get_item_for_reloc(s, base, size, offset);
}

JL_DLLEXPORT int32_t (*jl_sysimg_cpuflags[3])(void);

static void jl_update_all_fptrs(jl_serializer_state *s)
{
Expand All @@ -1389,7 +1390,6 @@ static void jl_update_all_fptrs(jl_serializer_state *s)
return;
int sysimg_fvars_max = s->fptr_record->size / sizeof(void*);
size_t i;
uintptr_t base = (uintptr_t)&s->s->buf[0];
jl_method_instance_t **linfos = (jl_method_instance_t**)&s->fptr_record->buf[0];
uint32_t clone_idx = 0;
for (i = 0; i < sysimg_fvars_max; i++) {
Expand All @@ -1403,8 +1403,8 @@ static void jl_update_all_fptrs(jl_serializer_state *s)
specfunc = 0;
offset = ~offset;
}
uintptr_t base = (uintptr_t)&s->s->buf[0];
jl_code_instance_t *codeinst = (jl_code_instance_t*)(base + offset);
uintptr_t base = (uintptr_t)fvars.base;
assert(jl_is_method(codeinst->def->def.method) && codeinst->invoke != jl_fptr_const_return);
assert(specfunc ? codeinst->invoke != NULL : codeinst->invoke == NULL);
linfos[i] = codeinst->def;
Expand All @@ -1417,7 +1417,7 @@ static void jl_update_all_fptrs(jl_serializer_state *s)
offset = fvars.clone_offsets[clone_idx];
break;
}
void *fptr = (void*)(base + offset);
void *fptr = (void*)((uintptr_t)fvars.base + offset);
if (specfunc) {
codeinst->specptr.fptr = fptr;
codeinst->isspecsig = 1; // TODO: set only if confirmed to be true
Expand All @@ -1428,6 +1428,20 @@ static void jl_update_all_fptrs(jl_serializer_state *s)
}
}
jl_register_fptrs(sysimage_base, &fvars, linfos, sysimg_fvars_max);
// now populate the feature flags accessors too
for (; i < sysimg_fvars_max + 3; i++) {
int32_t offset = fvars.offsets[i];
for (; clone_idx < fvars.nclones; clone_idx++) {
uint32_t idx = fvars.clone_idxs[clone_idx] & jl_sysimg_val_mask;
if (idx < i)
continue;
if (idx == i)
offset = fvars.clone_offsets[clone_idx];
break;
}
void *fptr = (void*)((uintptr_t)fvars.base + offset);
((void**)jl_sysimg_cpuflags)[i - sysimg_fvars_max] = fptr;
}
}


Expand Down
2 changes: 1 addition & 1 deletion test/llvmpasses/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include $(JULIAHOME)/Make.inc

check: .

TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.jl $(SRCDIR)/*.ll))
TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.jl $(SRCDIR)/*.ll $(SRCDIR)/*.mir))

. $(TESTS):
PATH=$(build_bindir):$(build_depsbindir):$$PATH \
Expand Down
24 changes: 24 additions & 0 deletions test/llvmpasses/havefma.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; RUNx: opt --mtriple=`llvm-config --host-target` -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='require<machine-module>,CPUFeatures' -S %s | FileCheck %s --check-prefixes=CHECK,CHECK-any
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

require<machine-module> needs a LLVm patch right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, right now it this is disabled, with the x

; RUNx: opt --mtriple=x86_64-unknown-linux-gnu -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='require<machine-module>,CPUFeatures' -S %s | FileCheck %s --check-prefixes=CHECK,CHECK-generic
; RUNx: opt --mtriple=aarch64-unknown-linux-gnu -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='require<machine-module>,CPUFeatures' -S %s | FileCheck %s --check-prefixes=CHECK,CHECK-aarch64
; RUNx: opt --mtriple=x86_64-unknown-linux-gnu --march=avx512 -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='require<machine-module>,CPUFeatures' -S %s | FileCheck %s --check-prefixes=CHECK,CHECK-avx512
; RUN: true

declare i1 @julia.cpu.have_fma.f32()
declare i1 @julia.cpu.have_fma.f64()

; CHECK-LABEL: @havefma_test(
; CHECK-LABEL: top:
; CHECK-any-NEXT: %0 = and i1
; CHECK-generic-NEXT: %0 = and i1 false, false
; CHECK-avx512-NEXT: %0 = and i1 false, false
; CHECK-aarch64-NEXT: %0 = and i1 true, true
; CHECK-NEXT: ret i1 %0

define i1 @havefma_test() {
top:
%0 = call i1 @julia.cpu.have_fma.f32()
%1 = call i1 @julia.cpu.have_fma.f64()
%2 = and i1 %0, %1
ret i1 %2
}
2 changes: 1 addition & 1 deletion test/llvmpasses/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import lit.formats

config.name = 'Julia'
config.suffixes = ['.ll','.jl']
config.suffixes = ['.ll','.mir','.jl']
config.test_source_root = os.path.dirname(__file__)
config.test_format = lit.formats.ShTest(True)
config.substitutions.append(('%shlibext', '.dylib' if platform.system() == 'Darwin' else '.dll' if
Expand Down