From 2e0b75c3696cf82c5dfa0c034a5e0b110691a29f Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Fri, 29 Jul 2022 21:11:45 -0700 Subject: [PATCH] Create the new pass manager pipelines (#46175) * Create basic NewPM structures * Replace incidental uses of the legacy pass manager with the new pass manager * Run the MC emitter --- src/Makefile | 3 +- src/aotcompile.cpp | 92 ++---- src/disasm.cpp | 8 +- src/jitlayers.h | 40 +++ src/llvm-julia-passes.inc | 27 ++ src/pipeline.cpp | 604 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 700 insertions(+), 74 deletions(-) create mode 100644 src/llvm-julia-passes.inc create mode 100644 src/pipeline.cpp diff --git a/src/Makefile b/src/Makefile index 90455d51e9345..8b996f28aeee0 100644 --- a/src/Makefile +++ b/src/Makefile @@ -56,7 +56,7 @@ CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \ llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \ llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \ - llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures + llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures pipeline FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir) CG_LLVM_LIBS := all ifeq ($(USE_POLLY),1) @@ -317,6 +317,7 @@ $(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $ $(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h $(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h +$(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h) $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 7a19b34bd6824..1a43fc450db6f 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -515,20 +515,23 @@ void jl_dump_native_impl(void *native_code, std::vector unopt_bc_Archive; std::vector outputs; - legacy::PassManager preopt, postopt; + PassBuilder emptyPB; + AnalysisManagers empty(emptyPB); + ModulePassManager preopt, postopt; + legacy::PassManager emitter; // MC emission is only supported on legacy PM if (unopt_bc_fname) - preopt.add(createBitcodeWriterPass(unopt_bc_OS)); + preopt.addPass(BitcodeWriterPass(unopt_bc_OS)); - //Is this necessary for TM? - // addTargetPasses(&postopt, TM->getTargetTriple(), TM->getTargetIRAnalysis()); if (bc_fname) - postopt.add(createBitcodeWriterPass(bc_OS)); + postopt.addPass(BitcodeWriterPass(bc_OS)); + //Is this necessary for TM? + addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis()); if (obj_fname) - if (TM->addPassesToEmitFile(postopt, obj_OS, nullptr, CGFT_ObjectFile, false)) + if (TM->addPassesToEmitFile(emitter, obj_OS, nullptr, CGFT_ObjectFile, false)) jl_safe_printf("ERROR: target does not support generation of object files\n"); if (asm_fname) - if (TM->addPassesToEmitFile(postopt, asm_OS, nullptr, CGFT_AssemblyFile, false)) + if (TM->addPassesToEmitFile(emitter, asm_OS, nullptr, CGFT_AssemblyFile, false)) jl_safe_printf("ERROR: target does not support generation of object files\n"); legacy::PassManager optimizer; @@ -567,7 +570,7 @@ void jl_dump_native_impl(void *native_code, // do the actual work auto add_output = [&] (Module &M, StringRef unopt_bc_Name, StringRef bc_Name, StringRef obj_Name, StringRef asm_Name) { - preopt.run(M); + preopt.run(M, empty.MAM); optimizer.run(M); // We would like to emit an alias or an weakref alias to redirect these symbols @@ -585,7 +588,8 @@ void jl_dump_native_impl(void *native_code, injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2", FunctionType::get(Type::getHalfTy(Context), { Type::getDoubleTy(Context) }, false)); - postopt.run(M); + postopt.run(M, empty.MAM); + emitter.run(M); if (unopt_bc_fname) emit_result(unopt_bc_Archive, unopt_bc_Buffer, unopt_bc_Name, outputs); @@ -946,79 +950,27 @@ static void registerCallbacks(PassBuilder &PB) { PB.registerPipelineParsingCallback( [](StringRef Name, FunctionPassManager &PM, ArrayRef InnerPipeline) { - if (Name == "DemoteFloat16") { - PM.addPass(DemoteFloat16()); - return true; - } - if (Name == "CombineMulAdd") { - PM.addPass(CombineMulAdd()); - return true; - } - if (Name == "LateLowerGCFrame") { - PM.addPass(LateLowerGC()); - return true; - } - if (Name == "AllocOpt") { - PM.addPass(AllocOptPass()); - return true; - } - if (Name == "PropagateJuliaAddrspaces") { - PM.addPass(PropagateJuliaAddrspacesPass()); - return true; - } - if (Name == "LowerExcHandlers") { - PM.addPass(LowerExcHandlers()); - return true; - } - if (Name == "GCInvariantVerifier") { - // TODO: Parse option and allow users to set `Strong` - PM.addPass(GCInvariantVerifierPass()); - return true; - } +#define FUNCTION_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#include "llvm-julia-passes.inc" +#undef FUNCTION_PASS return false; }); PB.registerPipelineParsingCallback( [](StringRef Name, ModulePassManager &PM, ArrayRef InnerPipeline) { - if (Name == "CPUFeatures") { - PM.addPass(CPUFeatures()); - return true; - } - if (Name == "RemoveNI") { - PM.addPass(RemoveNI()); - return true; - } - if (Name == "LowerSIMDLoop") { - PM.addPass(LowerSIMDLoop()); - return true; - } - if (Name == "FinalLowerGC") { - PM.addPass(FinalLowerGCPass()); - return true; - } - if (Name == "RemoveJuliaAddrspaces") { - PM.addPass(RemoveJuliaAddrspacesPass()); - return true; - } - if (Name == "MultiVersioning") { - PM.addPass(MultiVersioning()); - return true; - } - if (Name == "LowerPTLS") { - PM.addPass(LowerPTLSPass()); - return true; - } +#define MODULE_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#include "llvm-julia-passes.inc" +#undef MODULE_PASS return false; }); PB.registerPipelineParsingCallback( [](StringRef Name, LoopPassManager &PM, ArrayRef InnerPipeline) { - if (Name == "JuliaLICM") { - PM.addPass(JuliaLICMPass()); - return true; - } +#define LOOP_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#include "llvm-julia-passes.inc" +#undef LOOP_PASS return false; }); } diff --git a/src/disasm.cpp b/src/disasm.cpp index 838934a6c5893..69692da4c4b16 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -482,9 +482,11 @@ void jl_strip_llvm_debug(Module *m) void jl_strip_llvm_addrspaces(Module *m) { - legacy::PassManager PM; - PM.add(createRemoveJuliaAddrspacesPass()); - PM.run(*m); + PassBuilder PB; + AnalysisManagers AM(PB); + ModulePassManager MPM; + MPM.addPass(RemoveJuliaAddrspacesPass()); + MPM.run(*m, AM.MAM); } // print an llvm IR acquired from jl_get_llvmf diff --git a/src/jitlayers.h b/src/jitlayers.h index c4a89f882beaa..54a76630330f8 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -13,9 +13,14 @@ #include #include +#include +#include +#include + #include #include "julia_assert.h" #include "debug-registry.h" +#include "platform.h" #include #include @@ -69,6 +74,41 @@ static inline bool imaging_default() { return jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental); } +struct OptimizationOptions { + bool lower_intrinsics; + bool dump_native; + bool external_use; + + static constexpr OptimizationOptions defaults() { + return {true, false, false}; + } +}; + +struct NewPM { + std::unique_ptr TM; + StandardInstrumentations SI; + std::unique_ptr PIC; + PassBuilder PB; + ModulePassManager MPM; + OptimizationLevel O; + + NewPM(std::unique_ptr TM, OptimizationLevel O, OptimizationOptions options = OptimizationOptions::defaults()); + + void run(Module &M); +}; + +struct AnalysisManagers { + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + + AnalysisManagers(PassBuilder &PB); + AnalysisManagers(TargetMachine &TM, PassBuilder &PB, OptimizationLevel O); +}; + +OptimizationLevel getOptLevel(int optlevel); + struct jl_locked_stream { JL_STREAM *stream = nullptr; std::mutex mutex; diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc new file mode 100644 index 0000000000000..18e0f645d5445 --- /dev/null +++ b/src/llvm-julia-passes.inc @@ -0,0 +1,27 @@ +//Module passes +#ifdef MODULE_PASS +MODULE_PASS("CPUFeatures", CPUFeatures()) +MODULE_PASS("RemoveNI", RemoveNI()) +MODULE_PASS("LowerSIMDLoop", LowerSIMDLoop()) +MODULE_PASS("FinalLowerGC", FinalLowerGCPass()) +MODULE_PASS("JuliaMultiVersioning", MultiVersioning()) +MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass()) +MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass()) +MODULE_PASS("LowerPTLSPass", LowerPTLSPass()) +#endif + +//Function passes +#ifdef FUNCTION_PASS +FUNCTION_PASS("DemoteFloat16", DemoteFloat16()) +FUNCTION_PASS("CombineMulAdd", CombineMulAdd()) +FUNCTION_PASS("LateLowerGCFrame", LateLowerGC()) +FUNCTION_PASS("AllocOpt", AllocOptPass()) +FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass()) +FUNCTION_PASS("LowerExcHandlers", LowerExcHandlers()) +FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass()) +#endif + +//Loop passes +#ifdef LOOP_PASS +LOOP_PASS("JuliaLICM", JuliaLICMPass()) +#endif diff --git a/src/pipeline.cpp b/src/pipeline.cpp new file mode 100644 index 0000000000000..d9602ad7010d4 --- /dev/null +++ b/src/pipeline.cpp @@ -0,0 +1,604 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include +#include "platform.h" + +//We don't care about uninitialized variables in LLVM; that's LLVM's problem +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + +// analysis passes +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// NewPM needs to manually include all the pass headers +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic pop +#endif + +#include "passes.h" + +#include + +#include "julia.h" +#include "julia_internal.h" +#include "jitlayers.h" +#include "julia_assert.h" + +using namespace llvm; + +namespace { + //Shamelessly stolen from Clang's approach to sanitizers + //TODO do we want to enable other sanitizers? + static void addSanitizerPasses(ModulePassManager &MPM, OptimizationLevel O) { + // Coverage sanitizer + // if (CodeGenOpts.hasSanitizeCoverage()) { + // auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); + // MPM.addPass(ModuleSanitizerCoveragePass( + // SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles, + // CodeGenOpts.SanitizeCoverageIgnorelistFiles)); + // } + + #ifdef _COMPILER_MSAN_ENABLED_ + auto MSanPass = [&](/*SanitizerMask Mask, */bool CompileKernel) { + // if (LangOpts.Sanitize.has(Mask)) { + // int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins; + // bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); + + // MemorySanitizerOptions options(TrackOrigins, Recover, CompileKernel, + // CodeGenOpts.SanitizeMemoryParamRetval); + MemorySanitizerOptions options; + MPM.addPass(ModuleMemorySanitizerPass(options)); + FunctionPassManager FPM; + FPM.addPass(MemorySanitizerPass(options)); + if (O != OptimizationLevel::O0) { + // MemorySanitizer inserts complex instrumentation that mostly + // follows the logic of the original code, but operates on + // "shadow" values. It can benefit from re-running some + // general purpose optimization passes. + FPM.addPass(EarlyCSEPass()); + // TODO: Consider add more passes like in + // addGeneralOptsForMemorySanitizer. EarlyCSEPass makes visible + // difference on size. It's not clear if the rest is still + // usefull. InstCombinePass breakes + // compiler-rt/test/msan/select_origin.cpp. + } + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + // } + }; + MSanPass(/*SanitizerKind::Memory, */false); + // MSanPass(SanitizerKind::KernelMemory, true); + #endif + + #ifdef _COMPILER_TSAN_ENABLED_ + // if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + MPM.addPass(ModuleThreadSanitizerPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); + // } + #endif + + + #ifdef _COMPILER_ASAN_ENABLED_ + auto ASanPass = [&](/*SanitizerMask Mask, */bool CompileKernel) { + // if (LangOpts.Sanitize.has(Mask)) { + // bool UseGlobalGC = asanUseGlobalsGC(TargetTriple, CodeGenOpts); + // bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator; + // llvm::AsanDtorKind DestructorKind = + // CodeGenOpts.getSanitizeAddressDtor(); + // AddressSanitizerOptions Opts; + // Opts.CompileKernel = CompileKernel; + // Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask); + // Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; + // Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn(); + MPM.addPass(RequireAnalysisPass()); + // MPM.addPass(ModuleAddressSanitizerPass( + // Opts, UseGlobalGC, UseOdrIndicator, DestructorKind)); + //Let's assume the defaults are actually fine for our purposes + MPM.addPass(ModuleAddressSanitizerPass(AddressSanitizerOptions())); + // } + }; + ASanPass(/*SanitizerKind::Address, */false); + // ASanPass(SanitizerKind::KernelAddress, true); + #endif + + // auto HWASanPass = [&](SanitizerMask Mask, bool CompileKernel) { + // if (LangOpts.Sanitize.has(Mask)) { + // bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); + // MPM.addPass(HWAddressSanitizerPass( + // {CompileKernel, Recover, + // /*DisableOptimization=*/CodeGenOpts.OptimizationLevel == 0})); + // } + // }; + // HWASanPass(/*SanitizerKind::HWAddress, */false); + // // HWASanPass(SanitizerKind::KernelHWAddress, true); + + // if (LangOpts.Sanitize.has(SanitizerKind::DataFlow)) { + // MPM.addPass(DataFlowSanitizerPass(LangOpts.NoSanitizeFiles)); + // } + } + + void addVerificationPasses(ModulePassManager &MPM) { + FunctionPassManager FPM; + FPM.addPass(GCInvariantVerifierPass()); + FPM.addPass(VerifierPass()); + MPM.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(FPM))); + } + + auto basicSimplifyCFGOptions() { + return SimplifyCFGOptions() + .convertSwitchRangeToICmp(true) + .convertSwitchToLookupTable(true) + .forwardSwitchCondToPhi(true); + } + + auto aggressiveSimplifyCFGOptions() { + return SimplifyCFGOptions() + .convertSwitchRangeToICmp(true) + .convertSwitchToLookupTable(true) + .forwardSwitchCondToPhi(true) + //These mess with loop rotation, so only do them after that + .hoistCommonInsts(true) + // Causes an SRET assertion error in late-gc-lowering + // .sinkCommonInsts(true) + ; + } + + // TODO(vchuravy/maleadt): + // Since we are not using the PassBuilder fully and instead rolling our own, we are missing out on + // TargetMachine::registerPassBuilderCallbacks. We need to find a solution either in working with upstream + // or adapting PassBuilder (or subclassing it) to suite our needs. This is in particular important for + // BPF, NVPTX, and AMDGPU. + //TODO implement these once LLVM exposes + //the PassBuilder extension point callbacks + //For now we'll maintain the insertion points even though they don't do anything + //for the sake of documentation + void invokePipelineStartCallbacks(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokePeepholeEPCallbacks(FunctionPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeEarlySimplificationCallbacks(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeCGSCCCallbacks(CGSCCPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeOptimizerEarlyCallbacks(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeLateLoopOptimizationCallbacks(LoopPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeLoopOptimizerEndCallbacks(LoopPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeScalarOptimizerCallbacks(FunctionPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeVectorizerCallbacks(FunctionPassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} + void invokeOptimizerLastCallbacks(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O) {} +} + +//The actual pipelines +//TODO Things we might want to consider: +//? annotation2metadata pass +//? force function attributes pass +//? annotation remarks pass +//? infer function attributes pass +//? lower expect intrinsic pass +//? warn missed transformations pass +//* For vectorization +//? loop unroll/jam after loop vectorization +//? optimization remarks pass +//? cse/cvp/instcombine/bdce/sccp/licm/unswitch after loop vectorization ( +// cleanup as much as possible before trying to slp vectorize) +//? vectorcombine pass +//* For optimization +//? float2int pass +//? lower constant intrinsics pass +//? loop sink pass +//? hot-cold splitting pass + +//Use for O1 and below +void buildBasicPipeline(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O, OptimizationOptions options) { +// #ifdef JL_DEBUG_BUILD + addVerificationPasses(MPM); +// #endif + invokePipelineStartCallbacks(MPM, PB, O); + MPM.addPass(ConstantMergePass()); + if (!options.dump_native) { + MPM.addPass(CPUFeatures()); + if (O.getSpeedupLevel() > 0) { + MPM.addPass(createModuleToFunctionPassAdaptor(InstSimplifyPass())); + } + } + { + FunctionPassManager FPM; + FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); + if (O.getSpeedupLevel() > 0) { + FPM.addPass(SROAPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(EarlyCSEPass()); + } + FPM.addPass(MemCpyOptPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + invokeEarlySimplificationCallbacks(MPM, PB, O); + MPM.addPass(AlwaysInlinerPass()); + { + CGSCCPassManager CGPM; + invokeCGSCCCallbacks(CGPM, PB, O); + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + } + invokeOptimizerEarlyCallbacks(MPM, PB, O); + MPM.addPass(LowerSIMDLoop()); + { + FunctionPassManager FPM; + { + LoopPassManager LPM; + invokeLateLoopOptimizationCallbacks(LPM, PB, O); + invokeLoopOptimizerEndCallbacks(LPM, PB, O); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + } + invokeScalarOptimizerCallbacks(FPM, PB, O); + invokeVectorizerCallbacks(FPM, PB, O); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + if (options.lower_intrinsics) { + //TODO no barrier pass? + { + FunctionPassManager FPM; + FPM.addPass(LowerExcHandlers()); + FPM.addPass(GCInvariantVerifierPass(false)); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + MPM.addPass(RemoveNI()); + MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGC())); + MPM.addPass(FinalLowerGCPass()); + MPM.addPass(LowerPTLSPass(options.dump_native)); + } else { + MPM.addPass(RemoveNI()); + } + MPM.addPass(LowerSIMDLoop()); // TODO why do we do this twice + if (options.dump_native) { + MPM.addPass(MultiVersioning(options.external_use)); + MPM.addPass(CPUFeatures()); + if (O.getSpeedupLevel() > 0) { + FunctionPassManager FPM; + FPM.addPass(InstSimplifyPass()); + FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + } + invokeOptimizerLastCallbacks(MPM, PB, O); + addSanitizerPasses(MPM, O); + MPM.addPass(createModuleToFunctionPassAdaptor(DemoteFloat16())); +} + +//Use for O2 and above +void buildFullPipeline(ModulePassManager &MPM, PassBuilder &PB, OptimizationLevel O, OptimizationOptions options) { +// #ifdef JL_DEBUG_BUILD + addVerificationPasses(MPM); +// #endif + invokePipelineStartCallbacks(MPM, PB, O); + MPM.addPass(ConstantMergePass()); + { + FunctionPassManager FPM; + FPM.addPass(PropagateJuliaAddrspacesPass()); + //TODO consider not using even basic simplification + //options here, and adding a run of CVP to take advantage + //of the unsimplified codegen information (e.g. known + //zeros or ones) + FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); + FPM.addPass(DCEPass()); + FPM.addPass(SROAPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + invokeEarlySimplificationCallbacks(MPM, PB, O); + MPM.addPass(AlwaysInlinerPass()); + invokeOptimizerEarlyCallbacks(MPM, PB, O); + { + CGSCCPassManager CGPM; + invokeCGSCCCallbacks(CGPM, PB, O); + { + FunctionPassManager FPM; + FPM.addPass(AllocOptPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); + CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); + } + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + } + if (options.dump_native) { + MPM.addPass(MultiVersioning(options.external_use)); + } + MPM.addPass(CPUFeatures()); + { + FunctionPassManager FPM; + FPM.addPass(SROAPass()); + FPM.addPass(InstSimplifyPass()); + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(ReassociatePass()); + FPM.addPass(EarlyCSEPass()); + FPM.addPass(AllocOptPass()); + invokePeepholeEPCallbacks(FPM, PB, O); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + MPM.addPass(LowerSIMDLoop()); + { + FunctionPassManager FPM; + { + LoopPassManager LPM1, LPM2; + LPM1.addPass(LoopRotatePass()); + invokeLateLoopOptimizationCallbacks(LPM1, PB, O); + //We don't know if the loop callbacks support MSSA + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA = */false)); + LPM2.addPass(LICMPass()); + LPM2.addPass(JuliaLICMPass()); + LPM2.addPass(SimpleLoopUnswitchPass()); + LPM2.addPass(LICMPass()); + LPM2.addPass(JuliaLICMPass()); + //LICM needs MemorySSA now, so we must use it + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), /*UseMemorySSA = */true)); + } + FPM.addPass(IRCEPass()); + { + LoopPassManager LPM; + LPM.addPass(LoopInstSimplifyPass()); + LPM.addPass(LoopIdiomRecognizePass()); + LPM.addPass(IndVarSimplifyPass()); + LPM.addPass(LoopDeletionPass()); + invokeLoopOptimizerEndCallbacks(LPM, PB, O); + //We don't know if the loop end callbacks support MSSA + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); + } + FPM.addPass(LoopUnrollPass()); + FPM.addPass(AllocOptPass()); + FPM.addPass(SROAPass()); + FPM.addPass(InstSimplifyPass()); + FPM.addPass(GVNPass()); + FPM.addPass(MemCpyOptPass()); + FPM.addPass(SCCPPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(DCEPass()); + FPM.addPass(IRCEPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(JumpThreadingPass()); + if (O.getSpeedupLevel() >= 3) { + FPM.addPass(GVNPass()); + } + FPM.addPass(DSEPass()); + invokePeepholeEPCallbacks(FPM, PB, O); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + FPM.addPass(AllocOptPass()); + { + LoopPassManager LPM; + LPM.addPass(LoopDeletionPass()); + LPM.addPass(LoopInstSimplifyPass()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + } + invokeScalarOptimizerCallbacks(FPM, PB, O); + //TODO look into loop vectorize options + FPM.addPass(LoopVectorizePass()); + FPM.addPass(LoopLoadEliminationPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + FPM.addPass(SLPVectorizerPass()); + invokeVectorizerCallbacks(FPM, PB, O); + FPM.addPass(ADCEPass()); + //TODO add BDCEPass here? + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + if (options.lower_intrinsics) { + //TODO barrier pass? + { + FunctionPassManager FPM; + FPM.addPass(LowerExcHandlers()); + FPM.addPass(GCInvariantVerifierPass(false)); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + // Needed **before** LateLowerGCFrame on LLVM < 12 + // due to bug in `CreateAlignmentAssumption`. + MPM.addPass(RemoveNI()); + MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGC())); + MPM.addPass(FinalLowerGCPass()); + { + FunctionPassManager FPM; + FPM.addPass(GVNPass()); + FPM.addPass(SCCPPass()); + FPM.addPass(DCEPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + MPM.addPass(LowerPTLSPass(options.dump_native)); + { + FunctionPassManager FPM; + FPM.addPass(InstCombinePass()); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + } else { + MPM.addPass(RemoveNI()); + } + { + FunctionPassManager FPM; + FPM.addPass(CombineMulAdd()); + FPM.addPass(DivRemPairsPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + invokeOptimizerLastCallbacks(MPM, PB, O); + addSanitizerPasses(MPM, O); + { + FunctionPassManager FPM; + FPM.addPass(DemoteFloat16()); + FPM.addPass(GVNPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } +} + +namespace { + auto createPIC(StandardInstrumentations &SI) { + auto PIC = std::make_unique(); +//Borrowed from LLVM PassBuilder.cpp:386 +#define MODULE_PASS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +PIC->addClassToPassName(CLASS, NAME); +#define MODULE_ANALYSIS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define FUNCTION_PASS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +PIC->addClassToPassName(CLASS, NAME); +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define LOOPNEST_PASS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define LOOP_PASS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +PIC->addClassToPassName(CLASS, NAME); +#define LOOP_ANALYSIS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define CGSCC_PASS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ +PIC->addClassToPassName(CLASS, NAME); +#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); + +#include "llvm-julia-passes.inc" + +#undef MODULE_PASS +#undef MODULE_PASS_WITH_PARAMS +#undef MODULE_ANALYSIS +#undef FUNCTION_PASS +#undef FUNCTION_PASS_WITH_PARAMS +#undef FUNCTION_ANALYSIS +#undef LOOPNEST_PASS +#undef LOOP_PASS +#undef LOOP_PASS_WITH_PARAMS +#undef LOOP_ANALYSIS +#undef CGSCC_PASS +#undef CGSCC_PASS_WITH_PARAMS +#undef CGSCC_ANALYSIS + + SI.registerCallbacks(*PIC); + return PIC; + } + + FunctionAnalysisManager createFAM(OptimizationLevel O, TargetIRAnalysis analysis, const Triple &triple) { + + FunctionAnalysisManager FAM; + // Register the AA manager first so that our version is the one used. + FAM.registerPass([&] { + AAManager AA; + // TODO: Why are we only doing this for -O3? + if (O.getSpeedupLevel() >= 3) { + AA.registerFunctionAnalysis(); + } + if (O.getSpeedupLevel() >= 2) { + AA.registerFunctionAnalysis(); + AA.registerFunctionAnalysis(); + } + // TM->registerDefaultAliasAnalyses(AA); + return AA; + }); + // Register our TargetLibraryInfoImpl. + FAM.registerPass([&] { return llvm::TargetIRAnalysis(analysis); }); + FAM.registerPass([&] { return llvm::TargetLibraryAnalysis(llvm::TargetLibraryInfoImpl(triple)); }); + return FAM; + } + + ModulePassManager createMPM(PassBuilder &PB, OptimizationLevel O, OptimizationOptions options) { + ModulePassManager MPM; + if (O.getSpeedupLevel() < 2) + buildBasicPipeline(MPM, PB, O, options); + else + buildFullPipeline(MPM, PB, O, options); + return MPM; + } +} + +NewPM::NewPM(std::unique_ptr TM, OptimizationLevel O, OptimizationOptions options) : + TM(std::move(TM)), SI(false), PIC(createPIC(SI)), + PB(this->TM.get(), PipelineTuningOptions(), None, PIC.get()), + MPM(createMPM(PB, O, options)), O(O) {} + +AnalysisManagers::AnalysisManagers(TargetMachine &TM, PassBuilder &PB, OptimizationLevel O) : LAM(), FAM(createFAM(O, TM.getTargetIRAnalysis(), TM.getTargetTriple())), CGAM(), MAM() { + PB.registerLoopAnalyses(LAM); + PB.registerFunctionAnalyses(FAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerModuleAnalyses(MAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); +} + +AnalysisManagers::AnalysisManagers(PassBuilder &PB) : LAM(), FAM(), CGAM(), MAM() { + PB.registerLoopAnalyses(LAM); + PB.registerFunctionAnalyses(FAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerModuleAnalyses(MAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); +} + +void NewPM::run(Module &M) { + //We must recreate the analysis managers every time + //so that analyses from previous runs of the pass manager + //do not hang around for the next run + AnalysisManagers AM{*TM, PB, O}; + MPM.run(M, AM.MAM); +} + +OptimizationLevel getOptLevel(int optlevel) { + switch (std::min(std::max(optlevel, 0), 3)) { + case 0: + return OptimizationLevel::O0; + case 1: + return OptimizationLevel::O1; + case 2: + return OptimizationLevel::O2; + case 3: + return OptimizationLevel::O3; + } + llvm_unreachable("cannot get here!"); +}