From b5f024fa83b6f1cfe5e83a459c9378b7c5bf096d Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 19 May 2022 20:10:53 +0300 Subject: [PATCH] Fix fundamental confusion about target/tune CPU (#6765) * Fix fundamental confusion about target/tune CPU Sooo. Uh, remember when in https://github.com/halide/Halide/pull/6655 we've agreed that we want to add support to precisely specify the CPU for which the code should be *tuned* for, but not *targeted* for. Aka, similar to clang's `-mtune=` option, that does not affect the ISA set selection? So guess what, that's not what we did, apparently. `CodeGen_LLVM::mcpu()` / `halide_mcpu` actually do specify the *target* CPU. It was obvious in retrospect, because e.g. `CodeGen_X86::mattrs()` does not, in fact, ever specify `+avx2`, yet we get AVX2 :) So we've unintentionally added `-march=` support. Oops. While i'd like to add `-march=` support, that was not the goal here. Fixing this is complicated by the fact that `llvm::Target::createTargetMachine()` only takes `CPU Target` string, you can't specify `CPU Tune`. But this is actually a blessing in disguise, because it allows us to fix another bug at the same time: There is a problem with halide "compile to llvm ir assembly", a lot of information from Halide Target is not //really// lowered into LLVM Module, but is embedded as a metadata, that is then extracted by halide `make_target_machine()`. While that is not a problem in itself, it makes it *impossible* to dump the LLVM IR, and manually play with it, because e.g. the CPU [Target] and Attributes (ISA set) are not actually lowered into the form LLVM understands, but are in some halide-specific metadata. So, to fix the first bug, we must lower the CPU Tune into per-function `"tune-cpu"` metadata, and while there we might as well lower `"target-cpu"` and `"target-features"` similarly. * Address review notes * Hopefully silence bogus issue reported by ancient GCC * Call `set_function_attributes_from_halide_target_options()` when JIT compiling * Fix grammar --- src/CodeGen_ARM.cpp | 9 ++++-- src/CodeGen_Hexagon.cpp | 9 ++++-- src/CodeGen_Internal.cpp | 37 +++++++++++++++-------- src/CodeGen_Internal.h | 8 ++--- src/CodeGen_LLVM.cpp | 7 +++-- src/CodeGen_LLVM.h | 18 ++++++++--- src/CodeGen_MIPS.cpp | 9 ++++-- src/CodeGen_PTX_Dev.cpp | 15 ++++++---- src/CodeGen_PowerPC.cpp | 9 ++++-- src/CodeGen_RISCV.cpp | 9 ++++-- src/CodeGen_WebAssembly.cpp | 9 ++++-- src/CodeGen_X86.cpp | 59 +++++++++++++++++++++---------------- src/JITModule.cpp | 11 ++----- 13 files changed, 137 insertions(+), 72 deletions(-) diff --git a/src/CodeGen_ARM.cpp b/src/CodeGen_ARM.cpp index c9962d530d65..7fa4bd35b84f 100644 --- a/src/CodeGen_ARM.cpp +++ b/src/CodeGen_ARM.cpp @@ -72,7 +72,8 @@ class CodeGen_ARM : public CodeGen_Posix { }; vector casts, calls, averagings, negations; - string mcpu() const override; + string mcpu_target() const override; + string mcpu_tune() const override; string mattrs() const override; bool use_soft_float_abi() const override; int native_vector_bits() const override; @@ -1392,7 +1393,7 @@ Type CodeGen_ARM::upgrade_type_for_storage(const Type &t) const { return CodeGen_Posix::upgrade_type_for_storage(t); } -string CodeGen_ARM::mcpu() const { +string CodeGen_ARM::mcpu_target() const { if (target.bits == 32) { if (target.has_feature(Target::ARMv7s)) { return "swift"; @@ -1410,6 +1411,10 @@ string CodeGen_ARM::mcpu() const { } } +string CodeGen_ARM::mcpu_tune() const { + return mcpu_target(); +} + string CodeGen_ARM::mattrs() const { if (target.bits == 32) { if (target.has_feature(Target::ARMv7s)) { diff --git a/src/CodeGen_Hexagon.cpp b/src/CodeGen_Hexagon.cpp index a32bca98ff7d..9f7ce30de473 100644 --- a/src/CodeGen_Hexagon.cpp +++ b/src/CodeGen_Hexagon.cpp @@ -42,7 +42,8 @@ class CodeGen_Hexagon : public CodeGen_Posix { void init_module() override; - std::string mcpu() const override; + std::string mcpu_target() const override; + std::string mcpu_tune() const override; std::string mattrs() const override; int isa_version; bool use_soft_float_abi() const override; @@ -1788,7 +1789,7 @@ Value *CodeGen_Hexagon::call_intrin(llvm::Type *result_type, const string &name, fn, std::move(args)); } -string CodeGen_Hexagon::mcpu() const { +string CodeGen_Hexagon::mcpu_target() const { if (target.has_feature(Halide::Target::HVX_v66)) { return "hexagonv66"; } else if (target.has_feature(Halide::Target::HVX_v65)) { @@ -1798,6 +1799,10 @@ string CodeGen_Hexagon::mcpu() const { } } +string CodeGen_Hexagon::mcpu_tune() const { + return mcpu_target(); +} + string CodeGen_Hexagon::mattrs() const { std::stringstream attrs; attrs << "+hvx-length128b"; diff --git a/src/CodeGen_Internal.cpp b/src/CodeGen_Internal.cpp index cf2b25cc0a7e..f880fc86f1eb 100644 --- a/src/CodeGen_Internal.cpp +++ b/src/CodeGen_Internal.cpp @@ -590,16 +590,15 @@ bool get_md_string(llvm::Metadata *value, std::string &result) { return false; } -void get_target_options(const llvm::Module &module, llvm::TargetOptions &options, std::string &mcpu, std::string &mattrs) { +void get_target_options(const llvm::Module &module, llvm::TargetOptions &options) { bool use_soft_float_abi = false; get_md_bool(module.getModuleFlag("halide_use_soft_float_abi"), use_soft_float_abi); - get_md_string(module.getModuleFlag("halide_mcpu"), mcpu); - get_md_string(module.getModuleFlag("halide_mattrs"), mattrs); std::string mabi; get_md_string(module.getModuleFlag("halide_mabi"), mabi); bool use_pic = true; get_md_bool(module.getModuleFlag("halide_use_pic"), use_pic); + // FIXME: can this be migrated into `set_function_attributes_from_halide_target_options()`? bool per_instruction_fast_math_flags = false; get_md_bool(module.getModuleFlag("halide_per_instruction_fast_math_flags"), per_instruction_fast_math_flags); @@ -629,9 +628,14 @@ void clone_target_options(const llvm::Module &from, llvm::Module &to) { to.addModuleFlag(llvm::Module::Warning, "halide_use_soft_float_abi", use_soft_float_abi ? 1 : 0); } - std::string mcpu; - if (get_md_string(from.getModuleFlag("halide_mcpu"), mcpu)) { - to.addModuleFlag(llvm::Module::Warning, "halide_mcpu", llvm::MDString::get(context, mcpu)); + std::string mcpu_target; + if (get_md_string(from.getModuleFlag("halide_mcpu_target"), mcpu_target)) { + to.addModuleFlag(llvm::Module::Warning, "halide_mcpu_target", llvm::MDString::get(context, mcpu_target)); + } + + std::string mcpu_tune; + if (get_md_string(from.getModuleFlag("halide_mcpu_tune"), mcpu_tune)) { + to.addModuleFlag(llvm::Module::Warning, "halide_mcpu_tune", llvm::MDString::get(context, mcpu_tune)); } std::string mattrs; @@ -657,9 +661,7 @@ std::unique_ptr make_target_machine(const llvm::Module &mod internal_assert(llvm_target) << "Could not create LLVM target for " << triple.str() << "\n"; llvm::TargetOptions options; - std::string mcpu = ""; - std::string mattrs = ""; - get_target_options(module, options, mcpu, mattrs); + get_target_options(module, options); bool use_pic = true; get_md_bool(module.getModuleFlag("halide_use_pic"), use_pic); @@ -668,7 +670,7 @@ std::unique_ptr make_target_machine(const llvm::Module &mod get_md_bool(module.getModuleFlag("halide_use_large_code_model"), use_large_code_model); auto *tm = llvm_target->createTargetMachine(module.getTargetTriple(), - mcpu, mattrs, + /*CPU target=*/"", /*Features=*/"", options, use_pic ? llvm::Reloc::PIC_ : llvm::Reloc::Static, use_large_code_model ? llvm::CodeModel::Large : llvm::CodeModel::Small, @@ -676,10 +678,21 @@ std::unique_ptr make_target_machine(const llvm::Module &mod return std::unique_ptr(tm); } -void set_function_attributes_for_target(llvm::Function *fn, const Target &t) { +void set_function_attributes_from_halide_target_options(llvm::Function &fn) { + llvm::Module &module = *fn.getParent(); + + std::string mcpu_target, mcpu_tune, mattrs; + get_md_string(module.getModuleFlag("halide_mcpu_target"), mcpu_target); + get_md_string(module.getModuleFlag("halide_mcpu_tune"), mcpu_tune); + get_md_string(module.getModuleFlag("halide_mattrs"), mattrs); + + fn.addFnAttr("target-cpu", mcpu_target); + fn.addFnAttr("tune-cpu", mcpu_tune); + fn.addFnAttr("target-features", mattrs); + // Turn off approximate reciprocals for division. It's too // inaccurate even for us. - fn->addFnAttr("reciprocal-estimates", "none"); + fn.addFnAttr("reciprocal-estimates", "none"); } void embed_bitcode(llvm::Module *M, const string &halide_command) { diff --git a/src/CodeGen_Internal.h b/src/CodeGen_Internal.h index 3fe1b8b696f5..b48a630e11a7 100644 --- a/src/CodeGen_Internal.h +++ b/src/CodeGen_Internal.h @@ -92,8 +92,8 @@ Expr lower_signed_shift_right(const Expr &a, const Expr &b); /** Reduce a mux intrinsic to a select tree */ Expr lower_mux(const Call *mux); -/** Given an llvm::Module, set llvm:TargetOptions, cpu and attr information */ -void get_target_options(const llvm::Module &module, llvm::TargetOptions &options, std::string &mcpu, std::string &mattrs); +/** Given an llvm::Module, set llvm:TargetOptions information */ +void get_target_options(const llvm::Module &module, llvm::TargetOptions &options); /** Given two llvm::Modules, clone target options from one to the other */ void clone_target_options(const llvm::Module &from, llvm::Module &to); @@ -101,8 +101,8 @@ void clone_target_options(const llvm::Module &from, llvm::Module &to); /** Given an llvm::Module, get or create an llvm:TargetMachine */ std::unique_ptr make_target_machine(const llvm::Module &module); -/** Set the appropriate llvm Function attributes given a Target. */ -void set_function_attributes_for_target(llvm::Function *, const Target &); +/** Set the appropriate llvm Function attributes given the Halide Target. */ +void set_function_attributes_from_halide_target_options(llvm::Function &); /** Save a copy of the llvm IR currently represented by the module as * data in the __LLVM,__bitcode section. Emulates clang's diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp index a0f84d6cb6f4..a1fa954412cc 100644 --- a/src/CodeGen_LLVM.cpp +++ b/src/CodeGen_LLVM.cpp @@ -455,7 +455,8 @@ void CodeGen_LLVM::init_codegen(const std::string &name, bool any_strict_float) // Add some target specific info to the module as metadata. module->addModuleFlag(llvm::Module::Warning, "halide_use_soft_float_abi", use_soft_float_abi() ? 1 : 0); - module->addModuleFlag(llvm::Module::Warning, "halide_mcpu", MDString::get(*context, mcpu())); + module->addModuleFlag(llvm::Module::Warning, "halide_mcpu_target", MDString::get(*context, mcpu_target())); + module->addModuleFlag(llvm::Module::Warning, "halide_mcpu_tune", MDString::get(*context, mcpu_tune())); module->addModuleFlag(llvm::Module::Warning, "halide_mattrs", MDString::get(*context, mattrs())); module->addModuleFlag(llvm::Module::Warning, "halide_mabi", MDString::get(*context, mabi())); module->addModuleFlag(llvm::Module::Warning, "halide_use_pic", use_pic() ? 1 : 0); @@ -523,7 +524,7 @@ std::unique_ptr CodeGen_LLVM::compile(const Module &input) { } FunctionType *func_t = FunctionType::get(i32_t, arg_types, false); function = llvm::Function::Create(func_t, llvm_linkage(f.linkage), names.extern_name, module.get()); - set_function_attributes_for_target(function, target); + set_function_attributes_from_halide_target_options(*function); // Mark the buffer args as no alias and save indication for add_argv_wrapper if needed std::vector buffer_args(f.args.size()); @@ -564,6 +565,8 @@ std::unique_ptr CodeGen_LLVM::compile(const Module &input) { } std::unique_ptr CodeGen_LLVM::finish_codegen() { + llvm::for_each(*module, set_function_attributes_from_halide_target_options); + // Verify the module is ok internal_assert(!verifyModule(*module, &llvm::errs())); debug(2) << "Done generating llvm bitcode\n"; diff --git a/src/CodeGen_LLVM.h b/src/CodeGen_LLVM.h index dcdc6eee07e1..606840a679ec 100644 --- a/src/CodeGen_LLVM.h +++ b/src/CodeGen_LLVM.h @@ -106,11 +106,21 @@ class CodeGen_LLVM : public IRVisitor { virtual void end_func(const std::vector &args); // @} - /** What should be passed as -mcpu, -mattrs, and related for - * compilation. The architecture-specific code generator should - * define these. */ + /** What should be passed as -mcpu (warning: implies attrs!), -mattrs, + * and related for compilation. The architecture-specific code generator + * should define these. + * + * `mcpu_target()` - target this specific CPU, in the sense of the allowed + * ISA sets *and* the CPU-specific tuning/assembly instruction scheduling. + * + * `mcpu_tune()` - expect that we will be running on this specific CPU, + * so perform CPU-specific tuning/assembly instruction scheduling, *but* + * DON'T sacrifice the portability, support running on other CPUs, only + * make use of the ISAs that are enabled by `mcpu_target()`+`mattrs()`. + */ // @{ - virtual std::string mcpu() const = 0; + virtual std::string mcpu_target() const = 0; + virtual std::string mcpu_tune() const = 0; virtual std::string mattrs() const = 0; virtual std::string mabi() const; virtual bool use_soft_float_abi() const = 0; diff --git a/src/CodeGen_MIPS.cpp b/src/CodeGen_MIPS.cpp index 4118a12b684f..26bd3a502146 100644 --- a/src/CodeGen_MIPS.cpp +++ b/src/CodeGen_MIPS.cpp @@ -19,7 +19,8 @@ class CodeGen_MIPS : public CodeGen_Posix { protected: using CodeGen_Posix::visit; - string mcpu() const override; + string mcpu_target() const override; + string mcpu_tune() const override; string mattrs() const override; bool use_soft_float_abi() const override; int native_vector_bits() const override; @@ -29,7 +30,7 @@ CodeGen_MIPS::CodeGen_MIPS(const Target &t) : CodeGen_Posix(t) { } -string CodeGen_MIPS::mcpu() const { +string CodeGen_MIPS::mcpu_target() const { if (target.bits == 32) { return ""; } else { @@ -37,6 +38,10 @@ string CodeGen_MIPS::mcpu() const { } } +string CodeGen_MIPS::mcpu_tune() const { + return mcpu_target(); +} + string CodeGen_MIPS::mattrs() const { if (target.bits == 32) { return ""; diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 779512dc7348..711040f54afd 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -91,7 +91,8 @@ class CodeGen_PTX_Dev : public CodeGen_LLVM, public CodeGen_GPU_Dev { // @} std::string march() const; - std::string mcpu() const override; + std::string mcpu_target() const override; + std::string mcpu_tune() const override; std::string mattrs() const override; bool use_soft_float_abi() const override; int native_vector_bits() const override; @@ -153,7 +154,7 @@ void CodeGen_PTX_Dev::add_kernel(Stmt stmt, // Make our function FunctionType *func_t = FunctionType::get(void_t, arg_types, false); function = llvm::Function::Create(func_t, llvm::Function::ExternalLinkage, name, module.get()); - set_function_attributes_for_target(function, target); + set_function_attributes_from_halide_target_options(*function); // Mark the buffer args as no alias for (size_t i = 0; i < args.size(); i++) { @@ -542,7 +543,7 @@ string CodeGen_PTX_Dev::march() const { return "nvptx64"; } -string CodeGen_PTX_Dev::mcpu() const { +string CodeGen_PTX_Dev::mcpu_target() const { if (target.has_feature(Target::CUDACapability86)) { return "sm_86"; } else if (target.has_feature(Target::CUDACapability80)) { @@ -566,6 +567,10 @@ string CodeGen_PTX_Dev::mcpu() const { } } +string CodeGen_PTX_Dev::mcpu_tune() const { + return mcpu_target(); +} + string CodeGen_PTX_Dev::mattrs() const { if (target.has_feature(Target::CUDACapability86)) { return "+ptx71"; @@ -617,7 +622,7 @@ vector CodeGen_PTX_Dev::compile_to_src() { std::unique_ptr target_machine(llvm_target->createTargetMachine(triple.str(), - mcpu(), mattrs(), options, + mcpu_target(), mattrs(), options, llvm::Reloc::PIC_, llvm::CodeModel::Small, CodeGenOpt::Aggressive)); @@ -758,7 +763,7 @@ vector CodeGen_PTX_Dev::compile_to_src() { f.write(buffer.data(), buffer.size()); f.close(); - string cmd = "ptxas --gpu-name " + mcpu() + " " + ptx.pathname() + " -o " + sass.pathname(); + string cmd = "ptxas --gpu-name " + mcpu_target() + " " + ptx.pathname() + " -o " + sass.pathname(); if (system(cmd.c_str()) == 0) { cmd = "nvdisasm " + sass.pathname(); int ret = system(cmd.c_str()); diff --git a/src/CodeGen_PowerPC.cpp b/src/CodeGen_PowerPC.cpp index 42dec77fd75d..7f1e7252e941 100644 --- a/src/CodeGen_PowerPC.cpp +++ b/src/CodeGen_PowerPC.cpp @@ -22,7 +22,8 @@ class CodeGen_PowerPC : public CodeGen_Posix { protected: void init_module() override; - string mcpu() const override; + string mcpu_target() const override; + string mcpu_tune() const override; string mattrs() const override; bool use_soft_float_abi() const override; int native_vector_bits() const override; @@ -141,7 +142,7 @@ void CodeGen_PowerPC::visit(const Max *op) { return CodeGen_Posix::visit(op); } -string CodeGen_PowerPC::mcpu() const { +string CodeGen_PowerPC::mcpu_target() const { if (target.bits == 32) { return "ppc32"; } else { @@ -155,6 +156,10 @@ string CodeGen_PowerPC::mcpu() const { } } +string CodeGen_PowerPC::mcpu_tune() const { + return mcpu_target(); +} + string CodeGen_PowerPC::mattrs() const { string features; string separator; diff --git a/src/CodeGen_RISCV.cpp b/src/CodeGen_RISCV.cpp index 01395f596b91..434105724c3a 100644 --- a/src/CodeGen_RISCV.cpp +++ b/src/CodeGen_RISCV.cpp @@ -19,7 +19,8 @@ class CodeGen_RISCV : public CodeGen_Posix { protected: using CodeGen_Posix::visit; - string mcpu() const override; + string mcpu_target() const override; + string mcpu_tune() const override; string mattrs() const override; string mabi() const override; bool use_soft_float_abi() const override; @@ -30,10 +31,14 @@ CodeGen_RISCV::CodeGen_RISCV(const Target &t) : CodeGen_Posix(t) { } -string CodeGen_RISCV::mcpu() const { +string CodeGen_RISCV::mcpu_target() const { return ""; } +string CodeGen_RISCV::mcpu_tune() const { + return mcpu_target(); +} + string CodeGen_RISCV::mattrs() const { // Note: the default march is "rv[32|64]imafdc", // which includes standard extensions: diff --git a/src/CodeGen_WebAssembly.cpp b/src/CodeGen_WebAssembly.cpp index 83dc6775fc5f..2a63b8df2f36 100644 --- a/src/CodeGen_WebAssembly.cpp +++ b/src/CodeGen_WebAssembly.cpp @@ -29,7 +29,8 @@ class CodeGen_WebAssembly : public CodeGen_Posix { void init_module() override; - string mcpu() const override; + string mcpu_target() const override; + string mcpu_tune() const override; string mattrs() const override; bool use_soft_float_abi() const override; int native_vector_bits() const override; @@ -256,10 +257,14 @@ void CodeGen_WebAssembly::codegen_vector_reduce(const VectorReduce *op, const Ex CodeGen_Posix::codegen_vector_reduce(op, init); } -string CodeGen_WebAssembly::mcpu() const { +string CodeGen_WebAssembly::mcpu_target() const { return ""; } +string CodeGen_WebAssembly::mcpu_tune() const { + return mcpu_target(); +} + string CodeGen_WebAssembly::mattrs() const { std::ostringstream s; string sep; diff --git a/src/CodeGen_X86.cpp b/src/CodeGen_X86.cpp index c14a6a0f0671..38fc2321d919 100644 --- a/src/CodeGen_X86.cpp +++ b/src/CodeGen_X86.cpp @@ -53,7 +53,8 @@ class CodeGen_X86 : public CodeGen_Posix { CodeGen_X86(Target); protected: - string mcpu() const override; + string mcpu_target() const override; + string mcpu_tune() const override; string mattrs() const override; bool use_soft_float_abi() const override; int native_vector_bits() const override; @@ -689,8 +690,33 @@ void CodeGen_X86::visit(const Store *op) { CodeGen_Posix::visit(op); } -string CodeGen_X86::mcpu() const { - // First, check if any explicit request for tuning exists. +string CodeGen_X86::mcpu_target() const { + // Perform an ad-hoc guess for the -mcpu given features. + // WARNING: this is used to drive -mcpu, *NOT* -mtune! + // The CPU choice here *WILL* affect -mattrs! + if (target.has_feature(Target::AVX512_SapphireRapids)) { + return "sapphirerapids"; + } else if (target.has_feature(Target::AVX512_Cannonlake)) { + return "cannonlake"; + } else if (target.has_feature(Target::AVX512_Skylake)) { + return "skylake-avx512"; + } else if (target.has_feature(Target::AVX512_KNL)) { + return "knl"; + } else if (target.has_feature(Target::AVX2)) { + return "haswell"; + } else if (target.has_feature(Target::AVX)) { + return "corei7-avx"; + } else if (target.has_feature(Target::SSE41)) { + // We want SSE4.1 but not SSE4.2, hence "penryn" rather than "corei7" + return "penryn"; + } else { + // Default should not include SSSE3, hence "k8" rather than "core2" + return "k8"; + } +} + +string CodeGen_X86::mcpu_tune() const { + // Check if any explicit request for tuning exists. switch (target.processor_tune) { // Please keep sorted. case Target::Processor::AMDFam10: return "amdfam10"; @@ -718,31 +744,14 @@ string CodeGen_X86::mcpu() const { return "znver3"; case Target::Processor::ProcessorGeneric: - break; // Detect "best" CPU from the enabled ISA's. - } - - // And only after that, perform an ad-hoc guess for the tune given features. - if (target.has_feature(Target::AVX512_SapphireRapids)) { - return "sapphirerapids"; - } else if (target.has_feature(Target::AVX512_Cannonlake)) { - return "cannonlake"; - } else if (target.has_feature(Target::AVX512_Skylake)) { - return "skylake-avx512"; - } else if (target.has_feature(Target::AVX512_KNL)) { - return "knl"; - } else if (target.has_feature(Target::AVX2)) { - return "haswell"; - } else if (target.has_feature(Target::AVX)) { - return "corei7-avx"; - } else if (target.has_feature(Target::SSE41)) { - // We want SSE4.1 but not SSE4.2, hence "penryn" rather than "corei7" - return "penryn"; - } else { - // Default should not include SSSE3, hence "k8" rather than "core2" - return "k8"; + break; } + internal_assert(target.processor_tune == Target::Processor::ProcessorGeneric && "The switch should be exhaustive."); + return mcpu_target(); // Detect "best" CPU from the enabled ISA's. } +// FIXME: we should lower everything here, instead of relying +// that -mcpu= (`mcpu_target()`) implies/sets features for us. string CodeGen_X86::mattrs() const { string features; string separator; diff --git a/src/JITModule.cpp b/src/JITModule.cpp index acb8be5da8c7..444b355ba039 100644 --- a/src/JITModule.cpp +++ b/src/JITModule.cpp @@ -253,10 +253,10 @@ void JITModule::compile_module(std::unique_ptr m, const string &fu debug(2) << "Target triple: " << m->getTargetTriple() << "\n"; string error_string; - string mcpu; - string mattrs; + llvm::for_each(*m, set_function_attributes_from_halide_target_options); + llvm::TargetOptions options; - get_target_options(*m, options, mcpu, mattrs); + get_target_options(*m, options); DataLayout initial_module_data_layout = m->getDataLayout(); string module_name = m->getModuleIdentifier(); @@ -269,11 +269,6 @@ void JITModule::compile_module(std::unique_ptr m, const string &fu engine_builder.setMCJITMemoryManager(std::unique_ptr(memory_manager)); engine_builder.setOptLevel(CodeGenOpt::Aggressive); - if (!mcpu.empty()) { - engine_builder.setMCPU(mcpu); - } - std::vector mattrs_array = {mattrs}; - engine_builder.setMAttrs(mattrs_array); TargetMachine *tm = engine_builder.selectTarget(); internal_assert(tm) << error_string << "\n";