diff --git a/src/Makefile b/src/Makefile index 90455d51e9345..f13f7f2005b4c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -56,7 +56,7 @@ CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \ llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \ llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \ - llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures + llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures pipeline FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir) CG_LLVM_LIBS := all ifeq ($(USE_POLLY),1) @@ -289,7 +289,7 @@ $(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\ intrinsics.cpp jitlayers.h debug-registry.h intrinsics.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h) $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h) -$(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h +$(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h $(SRCDIR)/jitlayers.h $(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h) $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h @@ -312,6 +312,7 @@ $(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR) $(BUILDDIR)/llvm-propagate-addrspaces.o $(BUILDDIR)/llvm-propagate-addrspaces.dbg.obj: $(SRCDIR)/codegen_shared.h $(BUILDDIR)/llvm-remove-addrspaces.o $(BUILDDIR)/llvm-remove-addrspaces.dbg.obj: $(SRCDIR)/codegen_shared.h $(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h +$(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/jitlayers.h $(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h) $(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c) $(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 7a19b34bd6824..3dfdeea1e6fd9 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -45,6 +45,44 @@ #endif #endif +// NewPM needs to manually include all the pass headers +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizer.h" +#include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" +#include "llvm/Transforms/Scalar/ADCE.h" +#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" +#include "llvm/Transforms/Scalar/DCE.h" +#include "llvm/Transforms/Scalar/DeadStoreElimination.h" +#include "llvm/Transforms/Scalar/DivRemPairs.h" +#include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/IndVarSimplify.h" +#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h" +#include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/JumpThreading.h" +#include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LoopDeletion.h" +#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" +#include "llvm/Transforms/Scalar/LoopInstSimplify.h" +#include "llvm/Transforms/Scalar/LoopLoadElimination.h" +#include "llvm/Transforms/Scalar/LoopRotation.h" +#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" +#include "llvm/Transforms/Scalar/LoopUnrollPass.h" +#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" +#include "llvm/Transforms/Scalar/Reassociate.h" +#include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" +#include "llvm/Transforms/Vectorize/LoopVectorize.h" +#include "llvm/Transforms/Vectorize/SLPVectorizer.h" +#include "llvm/Transforms/Vectorize/VectorCombine.h" + + + // for outputting code #include #include @@ -515,29 +553,39 @@ void jl_dump_native_impl(void *native_code, std::vector unopt_bc_Archive; std::vector outputs; +#ifndef JL_USE_NEW_PM legacy::PassManager preopt, postopt; +#else + PassBuilder PB; + AnalysisManagers AM{*TM, PB, getOptLevel(jl_options.opt_level)}; + ModulePassManager preopt, postopt; +#endif + legacy::PassManager emitter; - if (unopt_bc_fname) + if (unopt_bc_fname) { +#ifndef JL_USE_NEW_PM preopt.add(createBitcodeWriterPass(unopt_bc_OS)); +#else + preopt.addPass(BitcodeWriterPass(unopt_bc_OS)); +#endif + } - //Is this necessary for TM? - // addTargetPasses(&postopt, TM->getTargetTriple(), TM->getTargetIRAnalysis()); - if (bc_fname) + if (bc_fname) { +#ifndef JL_USE_NEW_PM postopt.add(createBitcodeWriterPass(bc_OS)); +#else + postopt.addPass(BitcodeWriterPass(bc_OS)); +#endif + } + //Is this necessary for TM? + addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis()); if (obj_fname) - if (TM->addPassesToEmitFile(postopt, obj_OS, nullptr, CGFT_ObjectFile, false)) + if (TM->addPassesToEmitFile(emitter, obj_OS, nullptr, CGFT_ObjectFile, false)) jl_safe_printf("ERROR: target does not support generation of object files\n"); if (asm_fname) - if (TM->addPassesToEmitFile(postopt, asm_OS, nullptr, CGFT_AssemblyFile, false)) + if (TM->addPassesToEmitFile(emitter, asm_OS, nullptr, CGFT_AssemblyFile, false)) jl_safe_printf("ERROR: target does not support generation of object files\n"); - legacy::PassManager optimizer; - if (bc_fname || obj_fname || asm_fname) { - addTargetPasses(&optimizer, TM->getTargetTriple(), TM->getTargetIRAnalysis()); - addOptimizationPasses(&optimizer, jl_options.opt_level, true, true); - addMachinePasses(&optimizer, jl_options.opt_level); - } - // Reset the target triple to make sure it matches the new target machine auto dataM = data->M.getModuleUnlocked(); dataM->setTargetTriple(TM->getTargetTriple().str()); @@ -549,6 +597,17 @@ void jl_dump_native_impl(void *native_code, T_size = Type::getInt32Ty(Context); Type *T_psize = T_size->getPointerTo(); +#ifndef JL_USE_NEW_PM + legacy::PassManager optimizer; + if (bc_fname || obj_fname || asm_fname) { + addTargetPasses(&optimizer, TM->getTargetTriple(), TM->getTargetIRAnalysis()); + addOptimizationPasses(&optimizer, jl_options.opt_level, true, true); + addMachinePasses(&optimizer, jl_options.opt_level); + } +#else + NewPM optimizer{std::move(TM), getOptLevel(jl_options.opt_level), {true, true}}; +#endif + // add metadata information if (imaging_default()) { emit_offset_table(*dataM, data->jl_sysimg_gvars, "jl_sysimg_gvars", T_psize); @@ -567,7 +626,11 @@ void jl_dump_native_impl(void *native_code, // do the actual work auto add_output = [&] (Module &M, StringRef unopt_bc_Name, StringRef bc_Name, StringRef obj_Name, StringRef asm_Name) { - preopt.run(M); + preopt.run(M +#ifdef JL_USE_NEW_PM + , AM.MAM +#endif + ); optimizer.run(M); // We would like to emit an alias or an weakref alias to redirect these symbols @@ -585,7 +648,12 @@ void jl_dump_native_impl(void *native_code, injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2", FunctionType::get(Type::getHalfTy(Context), { Type::getDoubleTy(Context) }, false)); - postopt.run(M); + postopt.run(M +#ifdef JL_USE_NEW_PM + , AM.MAM +#endif + ); + emitter.run(M); if (unopt_bc_fname) emit_result(unopt_bc_Archive, unopt_bc_Buffer, unopt_bc_Name, outputs); @@ -937,6 +1005,9 @@ void jl_add_optimization_passes_impl(LLVMPassManagerRef PM, int opt_level, int l addOptimizationPasses(unwrap(PM), opt_level, lower_intrinsics); } +void buildBasicPipeline(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O, OptimizationOptions options); +void buildFullPipeline(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O, OptimizationOptions options); + // new pass manager plugin // NOTE: Instead of exporting all the constructors in passes.h we could @@ -946,79 +1017,39 @@ static void registerCallbacks(PassBuilder &PB) { PB.registerPipelineParsingCallback( [](StringRef Name, FunctionPassManager &PM, ArrayRef InnerPipeline) { - if (Name == "DemoteFloat16") { - PM.addPass(DemoteFloat16()); - return true; - } - if (Name == "CombineMulAdd") { - PM.addPass(CombineMulAdd()); - return true; - } - if (Name == "LateLowerGCFrame") { - PM.addPass(LateLowerGC()); - return true; - } - if (Name == "AllocOpt") { - PM.addPass(AllocOptPass()); - return true; - } - if (Name == "PropagateJuliaAddrspaces") { - PM.addPass(PropagateJuliaAddrspacesPass()); - return true; - } - if (Name == "LowerExcHandlers") { - PM.addPass(LowerExcHandlers()); - return true; - } - if (Name == "GCInvariantVerifier") { - // TODO: Parse option and allow users to set `Strong` - PM.addPass(GCInvariantVerifierPass()); - return true; +#define FUNCTION_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + PM.addPass(CREATE_PASS); \ + return true; \ } +#include "llvm-julia-passes.inc" +#undef FUNCTION_PASS return false; }); PB.registerPipelineParsingCallback( [](StringRef Name, ModulePassManager &PM, ArrayRef InnerPipeline) { - if (Name == "CPUFeatures") { - PM.addPass(CPUFeatures()); - return true; - } - if (Name == "RemoveNI") { - PM.addPass(RemoveNI()); - return true; - } - if (Name == "LowerSIMDLoop") { - PM.addPass(LowerSIMDLoop()); - return true; - } - if (Name == "FinalLowerGC") { - PM.addPass(FinalLowerGCPass()); - return true; - } - if (Name == "RemoveJuliaAddrspaces") { - PM.addPass(RemoveJuliaAddrspacesPass()); - return true; - } - if (Name == "MultiVersioning") { - PM.addPass(MultiVersioning()); - return true; - } - if (Name == "LowerPTLS") { - PM.addPass(LowerPTLSPass()); - return true; +#define MODULE_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + PM.addPass(CREATE_PASS); \ + return true; \ } +#include "llvm-julia-passes.inc" +#undef MODULE_PASS return false; }); PB.registerPipelineParsingCallback( [](StringRef Name, LoopPassManager &PM, ArrayRef InnerPipeline) { - if (Name == "JuliaLICM") { - PM.addPass(JuliaLICMPass()); - return true; +#define LOOP_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + PM.addPass(CREATE_PASS); \ + return true; \ } +#include "llvm-julia-passes.inc" +#undef LOOP_PASS return false; }); } diff --git a/src/codegen.cpp b/src/codegen.cpp index 8ac0cf6105601..2eb21d0de0bc4 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8597,6 +8597,9 @@ extern "C" void jl_init_llvm(void) if (jl_using_gdb_jitevents) jl_ExecutionEngine->enableJITDebuggingSupport(); +#if defined(_COMPILER_ASAN_ENABLED_) && defined(JL_USE_NEW_PM) +#warning "JIT profiling support (JL_USE_*_JITEVENTS) not yet available for ASAN with NewPM (requires JITLink)" +#else #if defined(JL_USE_INTEL_JITEVENTS) || \ defined(JL_USE_OPROFILE_JITEVENTS) || \ defined(JL_USE_PERF_JITEVENTS) @@ -8638,6 +8641,7 @@ extern "C" void jl_init_llvm(void) jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener()); #endif #endif +#endif #endif cl::PrintOptionValues(); diff --git a/src/disasm.cpp b/src/disasm.cpp index 838934a6c5893..64c88bda54b4a 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -482,9 +482,17 @@ void jl_strip_llvm_debug(Module *m) void jl_strip_llvm_addrspaces(Module *m) { +#ifndef JL_USE_NEW_PM legacy::PassManager PM; PM.add(createRemoveJuliaAddrspacesPass()); PM.run(*m); +#else + PassBuilder PB; + AnalysisManagers AM{PB}; + ModulePassManager PM; + PM.addPass(RemoveJuliaAddrspacesPass()); + PM.run(*m, AM.MAM); +#endif } // print an llvm IR acquired from jl_get_llvmf diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 009b969201164..bdd1f5c1f879c 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -874,7 +874,11 @@ namespace { namespace { +#ifndef JL_USE_NEW_PM typedef legacy::PassManager PassManager; +#else + typedef NewPM PassManager; +#endif orc::JITTargetMachineBuilder createJTMBFromTM(TargetMachine &TM, int optlevel) { return orc::JITTargetMachineBuilder(TM.getTargetTriple()) @@ -896,21 +900,24 @@ namespace { } }; +#ifndef JL_USE_NEW_PM struct PMCreator { std::unique_ptr TM; int optlevel; PMCreator(TargetMachine &TM, int optlevel) : TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {} PMCreator(const PMCreator &other) : PMCreator(*other.TM, other.optlevel) {} - PMCreator(PMCreator &&other) : TM(std::move(other.TM)), optlevel(other.optlevel) {} + PMCreator &operator=(const PMCreator &other) { + TM = cantFail(createJTMBFromTM(*other.TM, other.optlevel).createTargetMachine()); + optlevel = other.optlevel; + return *this; + } + PMCreator(PMCreator &&other) = default; + PMCreator &operator=(PMCreator &&other) = default; friend void swap(PMCreator &self, PMCreator &other) { using std::swap; swap(self.TM, other.TM); swap(self.optlevel, other.optlevel); } - PMCreator &operator=(PMCreator other) { - swap(*this, other); - return *this; - } std::unique_ptr operator()() { auto PM = std::make_unique(); addTargetPasses(PM.get(), TM->getTargetTriple(), TM->getTargetIRAnalysis()); @@ -920,6 +927,18 @@ namespace { } }; +#else + + struct PMCreator { + orc::JITTargetMachineBuilder JTMB; + OptimizationLevel O; + PMCreator(TargetMachine &TM, int optlevel) : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)) {} + std::unique_ptr operator()() { + return std::make_unique(cantFail(JTMB.createTargetMachine()), O); + } + }; +#endif + struct OptimizerT { OptimizerT(TargetMachine &TM, int optlevel) : optlevel(optlevel), PMs(PMCreator(TM, optlevel)) {} diff --git a/src/jitlayers.h b/src/jitlayers.h index c4a89f882beaa..5191eb99cb697 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -13,9 +13,14 @@ #include #include +#include +#include +#include + #include #include "julia_assert.h" #include "debug-registry.h" +#include "platform.h" #include #include @@ -36,7 +41,10 @@ // and feature support (e.g. Windows, JITEventListeners for various profilers, // etc.). Thus, we currently only use JITLink where absolutely required, that is, // for Mac/aarch64. -#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) + +#define JL_USE_NEW_PM + +#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(_COMPILER_ASAN_ENABLED_) && defined(JL_USE_NEW_PM) && defined(_OS_LINUX_) # if JL_LLVM_VERSION < 130000 # pragma message("On aarch64-darwin, LLVM version >= 13 is required for JITLink; fallback suffers from occasional segfaults") # endif @@ -97,6 +105,42 @@ struct jl_locked_stream { } }; +struct OptimizationOptions { + bool lower_intrinsics; + bool dump_native; + bool external_use; + + static constexpr OptimizationOptions defaults() { + return {true, false, false}; + } +}; + +struct NewPM { + std::unique_ptr TM; + StandardInstrumentations SI; + std::unique_ptr PIC; + PassBuilder PB; + ModulePassManager MPM; + OptimizationLevel O; + + NewPM(std::unique_ptr TM, OptimizationLevel O, OptimizationOptions options = OptimizationOptions::defaults()); + + void run(Module &M); +}; + +struct AnalysisManagers { + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + + AnalysisManagers(PassBuilder &PB); + AnalysisManagers(TargetMachine &TM, PassBuilder &PB, OptimizationLevel O); +}; + +OptimizationLevel getOptLevel(int optlevel); + + typedef struct _jl_llvm_functions_t { std::string functionObject; // jlcall llvm Function name std::string specFunctionObject; // specialized llvm Function name diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc new file mode 100644 index 0000000000000..99dfc437d30e2 --- /dev/null +++ b/src/llvm-julia-passes.inc @@ -0,0 +1,29 @@ +//TODO clobber files when this changes in Makefile + +//Module passes +#ifdef MODULE_PASS +MODULE_PASS("CPUFeatures", CPUFeatures()) +MODULE_PASS("RemoveNI", RemoveNI()) +MODULE_PASS("LowerSIMDLoop", LowerSIMDLoop()) +MODULE_PASS("FinalLowerGC", FinalLowerGCPass()) +MODULE_PASS("JuliaMultiVersioning", MultiVersioning()) +MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass()) +MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass()) +MODULE_PASS("LowerPTLSPass", LowerPTLSPass()) +#endif + +//Function passes +#ifdef FUNCTION_PASS +FUNCTION_PASS("DemoteFloat16", DemoteFloat16()) +FUNCTION_PASS("CombineMulAdd", CombineMulAdd()) +FUNCTION_PASS("LateLowerGCFrame", LateLowerGC()) +FUNCTION_PASS("AllocOpt", AllocOptPass()) +FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass()) +FUNCTION_PASS("LowerExcHandlers", LowerExcHandlers()) +FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass()) +#endif + +//Loop passes +#ifdef LOOP_PASS +LOOP_PASS("JuliaLICM", JuliaLICMPass()) +#endif diff --git a/src/pipeline.cpp b/src/pipeline.cpp new file mode 100644 index 0000000000000..3960981bf1404 --- /dev/null +++ b/src/pipeline.cpp @@ -0,0 +1,602 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include +#include "platform.h" + +//We don't care about uninitialized variables in LLVM; that's LLVM's problem +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + +// analysis passes +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// NewPM needs to manually include all the pass headers +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic pop +#endif + +#include "passes.h" + +#include + +#include "julia.h" +#include "julia_internal.h" +#include "jitlayers.h" +#include "julia_assert.h" + +using namespace llvm; + +namespace { + //Shamelessly stolen from Clang's approach to sanitizers + //TODO do we want to enable other sanitizers? + static void addSanitizerPasses(ModulePassManager &MPM, OptimizationLevel O) { + // Coverage sanitizer + // if (CodeGenOpts.hasSanitizeCoverage()) { + // auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); + // MPM.addPass(ModuleSanitizerCoveragePass( + // SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles, + // CodeGenOpts.SanitizeCoverageIgnorelistFiles)); + // } + + #ifdef _COMPILER_MSAN_ENABLED_ + auto MSanPass = [&](/*SanitizerMask Mask, */bool CompileKernel) { + // if (LangOpts.Sanitize.has(Mask)) { + // int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins; + // bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); + + // MemorySanitizerOptions options(TrackOrigins, Recover, CompileKernel, + // CodeGenOpts.SanitizeMemoryParamRetval); + MemorySanitizerOptions options; + MPM.addPass(ModuleMemorySanitizerPass(options)); + FunctionPassManager FPM; + FPM.addPass(MemorySanitizerPass(options)); + if (O != OptimizationLevel::O0) { + // MemorySanitizer inserts complex instrumentation that mostly + // follows the logic of the original code, but operates on + // "shadow" values. It can benefit from re-running some + // general purpose optimization passes. + FPM.addPass(EarlyCSEPass()); + // TODO: Consider add more passes like in + // addGeneralOptsForMemorySanitizer. EarlyCSEPass makes visible + // difference on size. It's not clear if the rest is still + // usefull. InstCombinePass breakes + // compiler-rt/test/msan/select_origin.cpp. + } + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + // } + }; + MSanPass(/*SanitizerKind::Memory, */false); + // MSanPass(SanitizerKind::KernelMemory, true); + #endif + + #ifdef _COMPILER_TSAN_ENABLED_ + // if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + MPM.addPass(ModuleThreadSanitizerPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); + // } + #endif + + + #ifdef _COMPILER_ASAN_ENABLED_ + auto ASanPass = [&](/*SanitizerMask Mask, */bool CompileKernel) { + // if (LangOpts.Sanitize.has(Mask)) { + // bool UseGlobalGC = asanUseGlobalsGC(TargetTriple, CodeGenOpts); + // bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator; + // llvm::AsanDtorKind DestructorKind = + // CodeGenOpts.getSanitizeAddressDtor(); + // AddressSanitizerOptions Opts; + // Opts.CompileKernel = CompileKernel; + // Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask); + // Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; + // Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn(); + MPM.addPass(RequireAnalysisPass()); + // MPM.addPass(ModuleAddressSanitizerPass( + // Opts, UseGlobalGC, UseOdrIndicator, DestructorKind)); + //Let's assume the defaults are actually fine for our purposes + MPM.addPass(ModuleAddressSanitizerPass(AddressSanitizerOptions())); + // } + }; + ASanPass(/*SanitizerKind::Address, */false); + // ASanPass(SanitizerKind::KernelAddress, true); + #endif + + // auto HWASanPass = [&](SanitizerMask Mask, bool CompileKernel) { + // if (LangOpts.Sanitize.has(Mask)) { + // bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); + // MPM.addPass(HWAddressSanitizerPass( + // {CompileKernel, Recover, + // /*DisableOptimization=*/CodeGenOpts.OptimizationLevel == 0})); + // } + // }; + // HWASanPass(/*SanitizerKind::HWAddress, */false); + // // HWASanPass(SanitizerKind::KernelHWAddress, true); + + // if (LangOpts.Sanitize.has(SanitizerKind::DataFlow)) { + // MPM.addPass(DataFlowSanitizerPass(LangOpts.NoSanitizeFiles)); + // } + } + + void addVerificationPasses(ModulePassManager &MPM) { + FunctionPassManager FPM; + FPM.addPass(GCInvariantVerifierPass()); + FPM.addPass(VerifierPass()); + MPM.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(FPM))); + } + + auto basicSimplifyCFGOptions() { + return SimplifyCFGOptions() + .convertSwitchRangeToICmp(true) + .convertSwitchToLookupTable(true) + .forwardSwitchCondToPhi(true); + } + + auto aggressiveSimplifyCFGOptions() { + return SimplifyCFGOptions() + .convertSwitchRangeToICmp(true) + .convertSwitchToLookupTable(true) + .forwardSwitchCondToPhi(true) + //These mess with loop rotation, so only do them after that + .hoistCommonInsts(true) + // Causes an SRET assertion error in late-gc-lowering + // .sinkCommonInsts(true) + ; + } + + // TODO(vchuravy/maleadt): + // Since we are not using the PassBuilder fully and instead rolling our own, we are missing out on + // TargetMachine::registerPassBuilderCallbacks. We need to find a solution either in working with upstream + // or adapting PassBuilder (or subclassing it) to suite our needs. This is in particular important for + // BPF, NVPTX, and AMDGPU. + //TODO implement these once LLVM exposes + //the PassBuilder extension point callbacks + void invokePipelineStartCallbacks(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokePeepholeEPCallbacks(FunctionPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeEarlySimplificationCallbacks(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeCGSCCCallbacks(CGSCCPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeOptimizerEarlyCallbacks(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeLateLoopOptimizationCallbacks(LoopPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeLoopOptimizerEndCallbacks(LoopPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeScalarOptimizerCallbacks(FunctionPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeVectorizerCallbacks(FunctionPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeOptimizerLastCallbacks(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} +} + +//The actual pipelines +//TODO Things we might want to consider: +//? annotation2metadata pass +//? force function attributes pass +//? annotation remarks pass +//? infer function attributes pass +//? lower expect intrinsic pass +//? warn missed transformations pass +//* For vectorization +//? loop unroll/jam after loop vectorization +//? optimization remarks pass +//? cse/cvp/instcombine/bdce/sccp/licm/unswitch after loop vectorization ( +// cleanup as much as possible before trying to slp vectorize) +//? vectorcombine pass +//* For optimization +//? float2int pass +//? lower constant intrinsics pass +//? loop sink pass +//? hot-cold splitting pass + +//Use for O1 and below +void buildBasicPipeline(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O, OptimizationOptions options) { +// #ifdef JL_DEBUG_BUILD + addVerificationPasses(MPM); +// #endif + invokePipelineStartCallbacks(MPM, PB, O); + MPM.addPass(ConstantMergePass()); + if (!options.dump_native) { + MPM.addPass(CPUFeatures()); + if (O.getSpeedupLevel() > 0) { + MPM.addPass(createModuleToFunctionPassAdaptor(InstSimplifyPass())); + } + } + { + FunctionPassManager FPM; + FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); + if (O.getSpeedupLevel() > 0) { + FPM.addPass(SROAPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(EarlyCSEPass()); + } + FPM.addPass(MemCpyOptPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + invokeEarlySimplificationCallbacks(MPM, PB, O); + MPM.addPass(AlwaysInlinerPass()); + { + CGSCCPassManager CGPM; + invokeCGSCCCallbacks(CGPM, PB, O); + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + } + invokeOptimizerEarlyCallbacks(MPM, PB, O); + MPM.addPass(LowerSIMDLoop()); + { + FunctionPassManager FPM; + { + LoopPassManager LPM; + invokeLateLoopOptimizationCallbacks(LPM, PB, O); + invokeLoopOptimizerEndCallbacks(LPM, PB, O); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + } + invokeScalarOptimizerCallbacks(FPM, PB, O); + invokeVectorizerCallbacks(FPM, PB, O); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + if (options.lower_intrinsics) { + //TODO no barrier pass? + { + FunctionPassManager FPM; + FPM.addPass(LowerExcHandlers()); + FPM.addPass(GCInvariantVerifierPass(false)); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + MPM.addPass(RemoveNI()); + MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGC())); + MPM.addPass(FinalLowerGCPass()); + MPM.addPass(LowerPTLSPass(options.dump_native)); + } else { + MPM.addPass(RemoveNI()); + } + MPM.addPass(LowerSIMDLoop()); // TODO why do we do this twice + if (options.dump_native) { + MPM.addPass(MultiVersioning(options.external_use)); + MPM.addPass(CPUFeatures()); + if (O.getSpeedupLevel() > 0) { + FunctionPassManager FPM; + FPM.addPass(InstSimplifyPass()); + FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + } + invokeOptimizerLastCallbacks(MPM, PB, O); + addSanitizerPasses(MPM, O); + MPM.addPass(createModuleToFunctionPassAdaptor(DemoteFloat16())); +} + +//Use for O2 and above +void buildFullPipeline(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O, OptimizationOptions options) { +// #ifdef JL_DEBUG_BUILD + addVerificationPasses(MPM); +// #endif + invokePipelineStartCallbacks(MPM, PB, O); + MPM.addPass(ConstantMergePass()); + { + FunctionPassManager FPM; + FPM.addPass(PropagateJuliaAddrspacesPass()); + //TODO consider not using even basic simplification + //options here, and adding a run of CVP to take advantage + //of the unsimplified codegen information (e.g. known + //zeros or ones) + FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); + FPM.addPass(DCEPass()); + FPM.addPass(SROAPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + invokeEarlySimplificationCallbacks(MPM, PB, O); + MPM.addPass(AlwaysInlinerPass()); + invokeOptimizerEarlyCallbacks(MPM, PB, O); + { + CGSCCPassManager CGPM; + invokeCGSCCCallbacks(CGPM, PB, O); + { + FunctionPassManager FPM; + FPM.addPass(AllocOptPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); + CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); + } + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + } + if (options.dump_native) { + MPM.addPass(MultiVersioning(options.external_use)); + } + MPM.addPass(CPUFeatures()); + { + FunctionPassManager FPM; + FPM.addPass(SROAPass()); + FPM.addPass(InstSimplifyPass()); + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(ReassociatePass()); + FPM.addPass(EarlyCSEPass()); + FPM.addPass(AllocOptPass()); + invokePeepholeEPCallbacks(FPM, PB, O); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + MPM.addPass(LowerSIMDLoop()); + { + FunctionPassManager FPM; + { + LoopPassManager LPM1, LPM2; + LPM1.addPass(LoopRotatePass()); + invokeLateLoopOptimizationCallbacks(LPM1, PB, O); + //We don't know if the loop callbacks support MSSA + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA = */false)); + LPM2.addPass(LICMPass()); + LPM2.addPass(JuliaLICMPass()); + LPM2.addPass(SimpleLoopUnswitchPass()); + LPM2.addPass(LICMPass()); + LPM2.addPass(JuliaLICMPass()); + //LICM needs MemorySSA now, so we must use it + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), /*UseMemorySSA = */true)); + } + FPM.addPass(IRCEPass()); + { + LoopPassManager LPM; + LPM.addPass(LoopInstSimplifyPass()); + LPM.addPass(LoopIdiomRecognizePass()); + LPM.addPass(IndVarSimplifyPass()); + LPM.addPass(LoopDeletionPass()); + invokeLoopOptimizerEndCallbacks(LPM, PB, O); + //We don't know if the loop end callbacks support MSSA + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); + } + FPM.addPass(LoopUnrollPass()); + FPM.addPass(AllocOptPass()); + FPM.addPass(SROAPass()); + FPM.addPass(InstSimplifyPass()); + FPM.addPass(GVNPass()); + FPM.addPass(MemCpyOptPass()); + FPM.addPass(SCCPPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(DCEPass()); + FPM.addPass(IRCEPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(JumpThreadingPass()); + if (O.getSpeedupLevel() >= 3) { + FPM.addPass(GVNPass()); + } + FPM.addPass(DSEPass()); + invokePeepholeEPCallbacks(FPM, PB, O); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + FPM.addPass(AllocOptPass()); + { + LoopPassManager LPM; + LPM.addPass(LoopDeletionPass()); + LPM.addPass(LoopInstSimplifyPass()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + } + invokeScalarOptimizerCallbacks(FPM, PB, O); + //TODO look into loop vectorize options + FPM.addPass(LoopVectorizePass()); + FPM.addPass(LoopLoadEliminationPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + FPM.addPass(SLPVectorizerPass()); + invokeVectorizerCallbacks(FPM, PB, O); + FPM.addPass(ADCEPass()); + //TODO add BDCEPass here? + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + if (options.lower_intrinsics) { + //TODO barrier pass? + { + FunctionPassManager FPM; + FPM.addPass(LowerExcHandlers()); + FPM.addPass(GCInvariantVerifierPass(false)); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + // Needed **before** LateLowerGCFrame on LLVM < 12 + // due to bug in `CreateAlignmentAssumption`. + MPM.addPass(RemoveNI()); + MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGC())); + MPM.addPass(FinalLowerGCPass()); + { + FunctionPassManager FPM; + FPM.addPass(GVNPass()); + FPM.addPass(SCCPPass()); + FPM.addPass(DCEPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + MPM.addPass(LowerPTLSPass(options.dump_native)); + { + FunctionPassManager FPM; + FPM.addPass(InstCombinePass()); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + } else { + MPM.addPass(RemoveNI()); + } + { + FunctionPassManager FPM; + FPM.addPass(CombineMulAdd()); + FPM.addPass(DivRemPairsPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + invokeOptimizerLastCallbacks(MPM, PB, O); + addSanitizerPasses(MPM, O); + { + FunctionPassManager FPM; + FPM.addPass(DemoteFloat16()); + FPM.addPass(GVNPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } +} + +namespace { + auto createPIC(StandardInstrumentations &SI) { + auto PIC = std::make_unique(); +//Borrowed from LLVM PassBuilder.cpp:386 +#define MODULE_PASS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +PIC->addClassToPassName(CLASS, NAME); +#define MODULE_ANALYSIS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define FUNCTION_PASS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +PIC->addClassToPassName(CLASS, NAME); +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define LOOPNEST_PASS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define LOOP_PASS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +PIC->addClassToPassName(CLASS, NAME); +#define LOOP_ANALYSIS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define CGSCC_PASS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +PIC->addClassToPassName(CLASS, NAME); +#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); + +#include + +#undef MODULE_PASS +#undef MODULE_PASS_WITH_PARAMS +#undef MODULE_ANALYSIS +#undef FUNCTION_PASS +#undef FUNCTION_PASS_WITH_PARAMS +#undef FUNCTION_ANALYSIS +#undef LOOPNEST_PASS +#undef LOOP_PASS +#undef LOOP_PASS_WITH_PARAMS +#undef LOOP_ANALYSIS +#undef CGSCC_PASS +#undef CGSCC_PASS_WITH_PARAMS +#undef CGSCC_ANALYSIS + + SI.registerCallbacks(*PIC); + return PIC; + } + + FunctionAnalysisManager createFAM(OptimizationLevel O, TargetIRAnalysis analysis, const Triple &triple) { + + FunctionAnalysisManager FAM; + // Register the AA manager first so that our version is the one used. + FAM.registerPass([&] { + AAManager AA; + // TODO: Why are we only doing this for -O3? + if (O.getSpeedupLevel() >= 3) { + AA.registerFunctionAnalysis(); + } + if (O.getSpeedupLevel() >= 2) { + AA.registerFunctionAnalysis(); + AA.registerFunctionAnalysis(); + } + // TM->registerDefaultAliasAnalyses(AA); + return AA; + }); + // Register our TargetLibraryInfoImpl. + FAM.registerPass([&] { return llvm::TargetIRAnalysis(analysis); }); + FAM.registerPass([&] { return llvm::TargetLibraryAnalysis(llvm::TargetLibraryInfoImpl(triple)); }); + return FAM; + } + + ModulePassManager createMPM(PassBuilder &PB, OptimizationLevel O, OptimizationOptions options) { + ModulePassManager MPM; + if (O.getSpeedupLevel() < 2) + buildBasicPipeline(MPM, PB, O, options); + else + buildFullPipeline(MPM, PB, O, options); + return MPM; + } +} + +NewPM::NewPM(std::unique_ptr TM, OptimizationLevel O, OptimizationOptions options) : + TM(std::move(TM)), SI(false), PIC(createPIC(SI)), + PB(this->TM.get(), PipelineTuningOptions(), None, PIC.get()), + MPM(createMPM(PB, O, options)), O(O) {} + +AnalysisManagers::AnalysisManagers(TargetMachine &TM, PassBuilder &PB, OptimizationLevel O) : LAM(), FAM(createFAM(O, TM.getTargetIRAnalysis(), TM.getTargetTriple())), CGAM(), MAM() { + PB.registerLoopAnalyses(LAM); + PB.registerFunctionAnalyses(FAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerModuleAnalyses(MAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); +} + +AnalysisManagers::AnalysisManagers(PassBuilder &PB) : LAM(), FAM(), CGAM(), MAM() { + PB.registerLoopAnalyses(LAM); + PB.registerFunctionAnalyses(FAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerModuleAnalyses(MAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); +} + +void NewPM::run(Module &M) { + //We must recreate the analysis managers every time + //so that analyses from previous runs of the pass manager + //do not hang around for the next run + AnalysisManagers AM{*TM, PB, O}; + MPM.run(M, AM.MAM); +} + +OptimizationLevel getOptLevel(int optlevel) { + switch (std::min(std::max(optlevel, 0), 3)) { + case 0: + return OptimizationLevel::O0; + case 1: + return OptimizationLevel::O1; + case 2: + return OptimizationLevel::O2; + case 3: + return OptimizationLevel::O3; + } + llvm_unreachable("cannot get here!"); +}