From 59f5c1ed9244afc321be45f2485d4f8a9d626e55 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Tue, 10 Dec 2024 19:24:50 +0000 Subject: [PATCH] Support (optionally) emitting debuginfo w.r.t. LLVM source This change adds two new environmental variables: - `JULIA_DUMP_IR` - when provided, this is a path that all emitted LLVM IR (post-optimization, just before machine code generation) will be saved to - `JULIA_DEBUGINFO` - when set to "LLVM-IR" this will run an additional pass on any emitted functions to rewrite their debuginfo to refer to the LLVM source, rather than the Julia source it was generated from The `debugir` pass that rewrites the debuginfo is vendored from: https://github.com/vaivaswatha/debugir. For simplicity, this is just a copy of the one file that we need for the pass. Using both of these together allows `gdb` to open the dumped IR and means you can step through LLVM IR line-by-line, print SSA values, etc. This can be very useful for debugging segfaults, or issues in codegen. --- THIRDPARTY.md | 2 +- src/Makefile | 3 +- src/codegen.cpp | 18 ++ src/jitlayers.cpp | 72 +++++ src/jitlayers.h | 32 +++ src/llvm-codegen-shared.h | 9 + src/llvm-debugir.cpp | 544 ++++++++++++++++++++++++++++++++++++++ src/pipeline.cpp | 3 + 8 files changed, 681 insertions(+), 2 deletions(-) create mode 100644 src/llvm-debugir.cpp diff --git a/THIRDPARTY.md b/THIRDPARTY.md index 3a74afec4a283..716a146a660b9 100644 --- a/THIRDPARTY.md +++ b/THIRDPARTY.md @@ -5,7 +5,7 @@ for exceptions. - [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)]. - [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed. -- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp) [UIUC] +- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp and src/llvm-debugir.cpp) [UIUC] - [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3] - [Python](https://docs.python.org/3/license.html) (for strtod implementation on Windows) [PSF] - [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3] diff --git a/src/Makefile b/src/Makefile index 9355ca2c4c675..a5a861cfb0b99 100644 --- a/src/Makefile +++ b/src/Makefile @@ -52,7 +52,7 @@ RT_LLVMLINK := CG_LLVMLINK := ifeq ($(JULIACODEGEN),LLVM) -CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop \ +CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-debugir \ llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \ llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \ llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \ @@ -327,6 +327,7 @@ $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvm-codegen-shared.h $(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h $(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h $(BUILDDIR)/llvm-cpufeatures.o $(BUILDDIR)/llvm-cpufeatures.dbg.obj: $(SRCDIR)/jitlayers.h +$(BUILDDIR)/llvm-debugir.o $(BUILDDIR)/llvm-debugir.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(BUILDDIR)/llvm-demote-float16.o $(BUILDDIR)/llvm-demote-float16.dbg.obj: $(SRCDIR)/jitlayers.h $(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-gc-interface-passes.h $(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h diff --git a/src/codegen.cpp b/src/codegen.cpp index 21591acedc632..d379c2b4663b2 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -10395,6 +10395,24 @@ extern "C" void jl_init_llvm(void) #endif #endif + const char *debuginfo_mode = getenv("JULIA_DEBUGINFO"); // JULIA_DEBUGINFO="LLVM-IR" JULIA_DUMP_IR="path/to/dir" + if (debuginfo_mode) { + if (strcasecmp(debuginfo_mode, "julia-source") == 0) + jl_ExecutionEngine->get_debuginfo_mode() = jl_debuginfo_emission_mode_t::julia_source; + else if (strcasecmp(debuginfo_mode, "llvm-ir") == 0) + jl_ExecutionEngine->get_debuginfo_mode() = jl_debuginfo_emission_mode_t::llvm_ir; + else if (strcmp(debuginfo_mode, "") != 0) + fprintf(stderr, "warning: unexpected argument to 'JULIA_DEBUGINFO' env var: \"%s\"\n", debuginfo_mode); + } + + const char *dump_debugir_directory = getenv("JULIA_DUMP_IR"); + if (dump_debugir_directory && strcmp(dump_debugir_directory, "") != 0) { + llvm::SmallString AbsoluteFileName{}; + llvm::sys::fs::expand_tilde(Twine(dump_debugir_directory), AbsoluteFileName); + llvm::sys::fs::make_absolute(AbsoluteFileName); + jl_ExecutionEngine->get_dump_debugir_directory() = AbsoluteFileName.str(); + } + cl::PrintOptionValues(); } diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 80867daade267..14e95fe7055ef 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1314,6 +1314,39 @@ static void registerRTDyldJITObject(orc::MaterializationResponsibility &MR, #endif namespace { + + static std::string createDebugIRName(Module const &M) JL_NOTSAFEPOINT { + std::string path = jl_ExecutionEngine->get_dump_debugir_directory(); + if (!path.empty()) { + path += llvm::sys::path::get_separator(); + } + + std::string filename{}; + for (auto &F : M.functions()) { + if (F.isDeclaration() || F.getName().starts_with("jfptr_")) + continue; + + // Sanitize the filename by allowing only "[a-zA-Z0-9_\-\.]*" + filename = F.getName().str(); + std::replace_if(std::begin(filename), std::end(filename),[](const char &ch) { + return !(std::isalnum(ch) || ch == '_' || ch == '-' || ch == '.'); + }, '_'); + + break; + } + + // If we couldn't find a name to use, just use some unique integer + if (filename.empty()) { + static _Atomic(uint64_t) counter{1}; + filename = std::to_string(jl_atomic_fetch_add_relaxed(&counter, 1)); + } + + path += filename; + path += ".ll"; + + return path; + } + static std::unique_ptr createTargetMachine() JL_NOTSAFEPOINT { TargetOptions options = TargetOptions(); @@ -1509,6 +1542,44 @@ namespace { JL_TIMING(LLVM_JIT, JIT_Opt); //Run the optimization (****PMs[PoolIdx]).run(M); + + bool debug_ir = jl_ExecutionEngine->get_debuginfo_mode() == jl_debuginfo_emission_mode_t::llvm_ir; + bool dump_ir = !jl_ExecutionEngine->get_dump_debugir_directory().empty(); + if (!M.functions().empty() && (debug_ir || dump_ir)) { + + // Generate a debug filename for the emitted IR + std::string debug_name = createDebugIRName(M); + + // If requested, rewrite all debuginfo to reference the LLVM IR itself + std::unique_ptr displayM; + if (debug_ir) { + // displayM is the debug-stripped 'source' that the debuginfo now refers to + displayM = debugir::createDebugInfo(M, "", debug_name); + } + + // Emit the IR that was compiled + if (dump_ir) { + std::error_code EC; + raw_fd_ostream OS_dbg(debug_name, EC, sys::fs::OF_Text); + if (displayM) { + displayM->print(OS_dbg, nullptr); + } else { + M.print(OS_dbg, nullptr); + } + + // Emit the "instrumented" IR (unneeded unless you are debugging the debuginfo + // or running the instrumented IR in isolation) + if (0 && displayM) { + // Replace ".ll" suffix with ".dbg.ll" + debug_name.resize(debug_name.size() - 3); + debug_name += ".dbg.ll"; + std::error_code EC; + raw_fd_ostream OS_dbg(debug_name, EC, sys::fs::OF_Text); + M.print(OS_dbg, nullptr); + } + } + } + assert(!verifyLLVMIR(M)); } @@ -1898,6 +1969,7 @@ JuliaOJIT::JuliaOJIT() JD(ES.createBareJITDylib("JuliaOJIT")), ExternalJD(ES.createBareJITDylib("JuliaExternal")), DLSymOpt(std::make_unique(false)), + debuginfo_mode(jl_debuginfo_emission_mode_t::julia_source), #ifdef JL_USE_JITLINK MemMgr(createJITLinkMemoryManager()), ObjectLayer(ES, *MemMgr), diff --git a/src/jitlayers.h b/src/jitlayers.h index 6665be6a33faa..4c5ca9a6f5e98 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -367,6 +367,28 @@ using CompilerResultT = Expected>; using OptimizerResultT = Expected; using SharedBytesT = StringSet::MapEntryTy)>>; +enum class jl_debuginfo_emission_mode_t { + + // Source-referenced debuginfo (standard behavior) + // + // Preserves and emits any debuginfo in the IR from Julia source. + julia_source = 0, + + // Julia IR-referenced debuginfo + // + // Replaces all debuginfo with references to the Julia SSAIR itself (treating the IR code as + // the 'program source'). Emits a text copy of all emitted IR to 'dump_debugir_directory' so + // the IR source is available when using a debugger. + /* julia_ir, not supported (yet) */ + + // LLVM IR-referenced debuginfo + // + // Replaces all debuginfo with references to the LLVM IR itself (treating the LLVM IR as the + // 'program source'). Emits a text copy of all emitted IR to 'dump_debugir_directory' so the + // IR source is available when using a debugger. + llvm_ir, +}; + class JuliaOJIT { private: // any verification the user wants to do when adding an OwningResource to the pool @@ -582,9 +604,16 @@ class JuliaOJIT { jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return dump_llvm_opt_stream; } + std::string &get_dump_debugir_directory() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { + return dump_debugir_directory; + } std::string getMangledName(StringRef Name) JL_NOTSAFEPOINT; std::string getMangledName(const GlobalValue *GV) JL_NOTSAFEPOINT; + jl_debuginfo_emission_mode_t &get_debuginfo_mode() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { + return debuginfo_mode; + } + // Note that this is a potential safepoint due to jl_get_library_ and jl_dlsym calls // but may be called from inside safe-regions due to jit compilation locks void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; @@ -610,6 +639,9 @@ class JuliaOJIT { jl_locked_stream dump_emitted_mi_name_stream; jl_locked_stream dump_compiles_stream; jl_locked_stream dump_llvm_opt_stream; + std::string dump_debugir_directory; + + jl_debuginfo_emission_mode_t debuginfo_mode; std::mutex llvm_printing_mutex{}; SmallVector, 0> PrintLLVMTimers; diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h index d9551e0552f9c..acd723ccebcee 100644 --- a/src/llvm-codegen-shared.h +++ b/src/llvm-codegen-shared.h @@ -527,3 +527,12 @@ void ConstantUses::forward() } } } + +namespace debugir { + +// Attaches debug info to M, assuming it is parsed from Directory/Filename. +// Returns a module for display in debugger devoid of any debug info. +std::unique_ptr +createDebugInfo(llvm::Module &M, std::string Directory, std::string Filename); + +} // namespace debugir diff --git a/src/llvm-debugir.cpp b/src/llvm-debugir.cpp new file mode 100644 index 0000000000000..0519dd12344a7 --- /dev/null +++ b/src/llvm-debugir.cpp @@ -0,0 +1,544 @@ +//===--- DebugIR.cpp - Transform debug metadata to allow debugging IR -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See THIRDPARTY.MD for details. +// +//===----------------------------------------------------------------------===// +// +// A Module transform pass that emits a succinct version of the IR and replaces +// the source file metadata to allow debuggers to step through the IR. +// +// FIXME: instead of replacing debug metadata, this pass should allow for +// additional metadata to be used to point capable debuggers to the IR file +// without destroying the mapping to the original source file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include + +#include "llvm-version.h" +#include "llvm-codegen-shared.h" + +using namespace llvm; + +#define DEBUG_TYPE "debug-ir" + +namespace { + +/// Builds a map of Value* to line numbers on which the Value appears in a +/// textual representation of the IR by plugging into the AssemblyWriter by +/// masquerading as an AssemblyAnnotationWriter. +class ValueToLineMap : public AssemblyAnnotationWriter { + ValueMap Lines; + typedef ValueMap::const_iterator LineIter; + + void addEntry(const Value *V, formatted_raw_ostream &Out) { + Out.flush(); + Lines.insert(std::make_pair(V, Out.getLine() + 1)); + } + +public: + /// Prints Module to a null buffer in order to build the map of Value pointers + /// to line numbers. + ValueToLineMap(const Module *M) { + raw_null_ostream ThrowAway; + M->print(ThrowAway, this); + } + + // This function is called after an Instruction, GlobalValue, or GlobalAlias + // is printed. + void printInfoComment(const Value &V, formatted_raw_ostream &Out) override { + addEntry(&V, Out); + } + + void emitBasicBlockStartAnnot(const BasicBlock *B, + formatted_raw_ostream &Out) override { + addEntry(B, Out); + } + + void emitFunctionAnnot(const Function *F, + formatted_raw_ostream &Out) override { + addEntry(F, Out); + } + + /// If V appears on a line in the textual IR representation, sets Line to the + /// line number and returns true, otherwise returns false. + bool getLine(const Value *V, unsigned int &Line) const { + LineIter i = Lines.find(V); + if (i != Lines.end()) { + Line = i->second; + return true; + } + return false; + } +}; + +/// Updates debug metadata in a Module: +/// - changes Filename/Directory to values provided on construction +/// - adds/updates line number (DebugLoc) entries associated with each +/// instruction to reflect the instruction's location in an LLVM IR file +class DIUpdater : public InstVisitor { + /// Builder of debug information + DIBuilder Builder; + + /// Helper for type attributes/sizes/etc + DataLayout Layout; + + /// Map of Value* to line numbers + const ValueToLineMap LineTable; + + /// Map of Value* (in original Module) to Value* (in optional cloned Module) + const ValueToValueMapTy *VMap; + + /// Directory of debug metadata + DebugInfoFinder Finder; + + /// Source filename and directory + StringRef Filename; + StringRef Directory; + + // CU nodes needed when creating DI subprograms + DIFile *FileNode; + DILexicalBlockFile *LexicalBlockFileNode; + + Module &M; + int tempNameCounter; + + ValueMap SubprogramDescriptors; + ValueMap BlockDescriptors; + DenseMap TypeDescriptors; + +public: + DIUpdater(Module &M, StringRef Filename = StringRef(), + StringRef Directory = StringRef(), const Module *DisplayM = nullptr, + const ValueToValueMapTy *VMap = nullptr) + : Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap), + Finder(), Filename(Filename), Directory(Directory), FileNode(nullptr), + LexicalBlockFileNode(nullptr), M(M), tempNameCounter(0) { + + // Even without finder, this screws up. + Finder.processModule(M); + visit(&M); + } + + ~DIUpdater() { Builder.finalize(); } + + void visitModule(Module &M) { + (void)M; + if (Finder.compile_unit_count() > 1) + report_fatal_error("DebugIR pass supports only a signle compile unit per " + "Module."); + createCompileUnit(Finder.compile_unit_count() == 1 + ? (DICompileUnit *)*Finder.compile_units().begin() + : nullptr); + } + + void visitFunction(Function &F) { + if (F.isDeclaration() || findDISubprogram(&F)) + return; + + StringRef MangledName = F.getName(); + DISubroutineType *Sig = createFunctionSignature(&F); + + // find line of function declaration + unsigned Line = 0; + if (!findLine(&F, Line)) { + LLVM_DEBUG(dbgs() << "WARNING: No line for Function " << F.getName().str() + << "\n"); + return; + } + + Instruction *FirstInst = &*F.begin()->begin(); + unsigned ScopeLine = 0; + if (!findLine(FirstInst, ScopeLine)) { + LLVM_DEBUG(dbgs() << "WARNING: No line for 1st Instruction in Function " + << F.getName().str() << "\n"); + return; + } + + bool IsOptimized = false; + + DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition; + if (IsOptimized) + SPFlags |= DISubprogram::SPFlagOptimized; + if (F.hasPrivateLinkage() || F.hasInternalLinkage()) + SPFlags |= DISubprogram::SPFlagLocalToUnit; + if (F.isDeclaration()) + SPFlags |= DISubprogram::SPFlagDefinition; + DISubprogram *Sub = + Builder.createFunction(FileNode, F.getName(), MangledName, FileNode, + Line, Sig, ScopeLine, DINode::FlagZero, SPFlags); + F.setSubprogram(Sub); + LLVM_DEBUG(dbgs() << "create subprogram mdnode " << *Sub << ": " + << "\n"); + + SubprogramDescriptors.insert(std::make_pair(&F, Sub)); + + // Clang and the Kaleidoscope tutorial both copy function arguments to + // allocas and then insert debug locations on these allocas. + IRBuilder<> ArgIrBuilder(&F.getEntryBlock(), + F.getEntryBlock().getFirstInsertionPt()); + for (size_t I = 0; I < F.arg_size(); I++) { + auto *Arg = F.getArg(I); + if (Arg->getName().empty()) + continue; + auto *Alloca = + ArgIrBuilder.CreateAlloca(Arg->getType(), nullptr, Arg->getName()); + ArgIrBuilder.CreateStore(Arg, Alloca); + + // Scope must be the function for gdb to recognize this as a function + // argument + auto DILV = Builder.createParameterVariable( + Sub, Arg->getName(), I + 1, FileNode, Line, + getOrCreateType(Arg->getType()), true); + auto Loc = DebugLoc(DILocation::get(M.getContext(), Line, 0, Sub)); + Builder.insertDeclare(Alloca, DILV, Builder.createExpression(), Loc.get(), + &F.getEntryBlock()); + } + } + + void visitInstruction(Instruction &I) { + DebugLoc Loc(I.getDebugLoc()); + + /// If a ValueToValueMap is provided, use it to get the real instruction as + /// the line table was generated on a clone of the module on which we are + /// operating. + Value *RealInst = nullptr; + if (VMap) + RealInst = VMap->lookup(&I); + + if (!RealInst) + RealInst = &I; + + unsigned Col = 0; // FIXME: support columns + unsigned Line; + if (!LineTable.getLine(RealInst, Line)) { + // Instruction has no line, it may have been removed (in the module that + // will be passed to the debugger) so there is nothing to do here. + LLVM_DEBUG(dbgs() << "WARNING: no LineTable entry for instruction " + << RealInst << "\n"); + return; + } + + DILocalScope *Scope; + DILocation *InlinedAt; + if (Loc) { + Scope = llvm::cast(Loc.getScope()); + InlinedAt = Loc.getInlinedAt(); + } else if ((Scope = dyn_cast(findScope(&I)))) { + InlinedAt = nullptr; + } else { + LLVM_DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I + << ". no DebugLoc will be present." + << "\n"); + return; + } + + if (isa(I)) + Scope = Scope->getSubprogram(); // See https://github.com/llvm/llvm-project/issues/118883 + + DebugLoc NewLoc = + DebugLoc(DILocation::get(M.getContext(), Line, Col, Scope, InlinedAt)); + addDebugLocation(I, NewLoc); + + if (!I.getType()->isVoidTy() && !I.getName().empty()) { + auto DILV = Builder.createAutoVariable(Scope, I.getName(), FileNode, Line, + getOrCreateType(I.getType())); + if (isa(I)) + Builder.insertDbgValueIntrinsic(&I, DILV, Builder.createExpression(), + NewLoc.get(), I.getParent()->getFirstNonPHI()); + else if (Instruction *NI = I.getNextNonDebugInstruction(/* SkipPseudoOp */ true)) + Builder.insertDbgValueIntrinsic(&I, DILV, Builder.createExpression(), + NewLoc.get(), NI); + else + Builder.insertDbgValueIntrinsic(&I, DILV, Builder.createExpression(), + NewLoc.get(), I.getParent()); + } + } + +private: + void createCompileUnit(DICompileUnit *CUToReplace) { + std::string Flags; + bool IsOptimized = false; + StringRef Producer; + unsigned RuntimeVersion(0); + StringRef SplitName; + + if (CUToReplace) { + // save fields from existing CU to re-use in the new CU + Producer = CUToReplace->getProducer(); + IsOptimized = CUToReplace->isOptimized(); + Flags = CUToReplace->getFlags().str(); + RuntimeVersion = CUToReplace->getRuntimeVersion(); + SplitName = CUToReplace->getSplitDebugFilename(); + } else { + Producer = + "LLVM Version " XSTR(LLVM_VERSION_MAJOR) "." XSTR(LLVM_VERSION_MINOR); + } + + FileNode = Builder.createFile(Filename, Directory); + DICompileUnit *CU = + Builder.createCompileUnit(dwarf::DW_LANG_C99, FileNode, Producer, + IsOptimized, Flags, RuntimeVersion); + + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu"); + NMD->clearOperands(); + NMD->addOperand(CU); + + for (DISubprogram *S : Finder.subprograms()) { + S->replaceUnit(CU); + S->replaceOperandWith(0, FileNode); // replace 'File' + } + } + + DIScope *getBlockScope(DIScope *ParentScope, const BasicBlock *B) { + auto BScope = BlockDescriptors.find(B); + if (BScope != BlockDescriptors.end()) { + return BScope->second; + } else { + // Let's build a scope for this block. + unsigned Line = 0; + if (!findLine(B, Line)) { + LLVM_DEBUG(dbgs() << "WARNING: No line for basic block " + << B->getName().str() << " in Function " + << B->getParent()->getName().str() << "\n"); + } + auto Scope = Builder.createLexicalBlock(ParentScope, FileNode, Line, 0); + BlockDescriptors[B] = Scope; + return Scope; + } + } + + /// Returns the MDNode* that represents the DI scope to associate with I + DIScope *findScope(const Instruction *I) { + + const BasicBlock *B = I->getParent(); + const Function *F = B->getParent(); + + auto returnFallback = [this, I]() { + (void)I; + LLVM_DEBUG(dbgs() << "WARNING: Using fallback lexical block file scope " + << LexicalBlockFileNode << " as scope for instruction " + << I << "\n"); + return LexicalBlockFileNode; + }; + + DISubprogram *SubprogramScope = findDISubprogram(F); + if (!SubprogramScope) + return returnFallback(); + + auto *EntryBlockScope = getBlockScope(SubprogramScope, B); + if (&F->getEntryBlock() == B) { + return EntryBlockScope; + } + return getBlockScope(EntryBlockScope, B); + } + + /// Returns the MDNode* that is the descriptor for F + DISubprogram *findDISubprogram(const Function *F) { + typedef ValueMap::const_iterator + FuncNodeIter; + FuncNodeIter i = SubprogramDescriptors.find(F); + if (i != SubprogramDescriptors.end()) + return i->second; + + LLVM_DEBUG(dbgs() << "searching for DI scope node for Function " << F + << " in a list of " << Finder.subprogram_count() + << " subprogram nodes" + << "\n"); + + // TODO: When would this even be reached? + for (DISubprogram *S : Finder.subprograms()) { + // TODO: Is describes correct? + if (S->describes(F)) { + LLVM_DEBUG(dbgs() << "Found DISubprogram " << S << " for function " + << F->getName() << "\n"); + return S; + } + } + LLVM_DEBUG(dbgs() << "unable to find DISubprogram node for function " + << F->getName().str() << "\n"); + return nullptr; + } + + /// Sets Line to the line number on which V appears and returns true. If a + /// line location for V is not found, returns false. + bool findLine(const Value *V, unsigned &Line) { + if (LineTable.getLine(V, Line)) + return true; + + if (VMap) { + Value *mapped = VMap->lookup(V); + if (mapped && LineTable.getLine(mapped, Line)) + return true; + } + return false; + } + + std::string getTypeName(Type *T) { + std::string TypeName; + raw_string_ostream TypeStream(TypeName); + if (T) + T->print(TypeStream); + else + TypeStream << "Printing Type"; + TypeStream.flush(); + return TypeName; + } + + /// Returns the MDNode that represents type T if it is already created, or 0 + /// if it is not. + DIType *getType(const Type *T) { + typedef DenseMap::const_iterator TypeNodeIter; + TypeNodeIter i = TypeDescriptors.find(T); + if (i != TypeDescriptors.end()) + return i->second; + return nullptr; + } + + /// Returns a DebugInfo type from an LLVM type T. + DIType *getOrCreateType(Type *T) { + DIType *N = getType(T); + if (N) + return N; + else if (T->isVoidTy()) + return Builder.createUnspecifiedType("void"); + else if (T->isStructTy()) { + // NOTE: where does DINodeArray come from? + StructType *ST = cast(T); + if (ST->isOpaque()) + N = Builder.createUnspecifiedType(ST->getName()); + else { + DICompositeType *S = Builder.createStructType( + LexicalBlockFileNode, + ST->hasName() ? T->getStructName() : "literal", FileNode, + /*LineNumber=*/0, Layout.getTypeSizeInBits(T), + Layout.getPrefTypeAlign(T).value() * CHAR_BIT, /*DIFlags=*/llvm::DINode::FlagZero, + /*DerivedFrom=*/nullptr, llvm::DINodeArray()); // filled in later + N = S; // the Node _is_ the struct type. + + // N is added to the map (early) so that element search below can find + // it, so as to avoid infinite recursion for structs that contain + // pointers to their own type. + TypeDescriptors[T] = N; + + SmallVector + Elements; // unfortunately, SmallVector does not decay to + // SmallVector + + auto *TLayout = Layout.getStructLayout(llvm::cast(T)); + for (unsigned I = 0; I < T->getStructNumElements(); ++I) { + Type *ElType = T->getStructElementType(I); + DIType *ElDIType = getOrCreateType(ElType); + DIType *MemType = Builder.createMemberType( + LexicalBlockFileNode, + (ST->hasName() ? T->getStructName().str() + "." + + std::to_string(tempNameCounter++) + : "literal"), + FileNode, 0, 0, 0, TLayout->getElementOffsetInBits(I), + DINode::DIFlags::FlagZero, ElDIType); + Elements.push_back(MemType); + } + + Builder.replaceArrays(S, Builder.getOrCreateArray(Elements)); + } + } else if (T->isPointerTy()) { + N = Builder.createPointerType( + nullptr, Layout.getPointerTypeSizeInBits(T), + Layout.getPrefTypeAlign(T).value() * CHAR_BIT, +#if LLVM_VERSION_MAJOR > 15 + /*DWARFAddressSpace=*/std::nullopt, +#else + /*DWARFAddressSpace=*/None, +#endif + getTypeName(T)); + } else if (T->isArrayTy()) { + SmallVector + Subscripts; // unfortunately, SmallVector does not decay to + // SmallVector + + Subscripts.push_back( + Builder.getOrCreateSubrange(0, T->getArrayNumElements() - 1)); + + N = Builder.createArrayType(Layout.getTypeSizeInBits(T), + Layout.getPrefTypeAlign(T).value() * CHAR_BIT, + getOrCreateType(T->getArrayElementType()), + Builder.getOrCreateArray(Subscripts)); + } else { + int encoding = llvm::dwarf::DW_ATE_signed; + if (T->isIntegerTy()) + encoding = llvm::dwarf::DW_ATE_unsigned; + else if (T->isFloatingPointTy()) + encoding = llvm::dwarf::DW_ATE_float; + + N = Builder.createBasicType(getTypeName(T), T->getPrimitiveSizeInBits(), + encoding); + } + TypeDescriptors[T] = N; + return N; + } + + /// Returns a DebugInfo type that represents a function signature for Func. + DISubroutineType *createFunctionSignature(const Function *Func) { + SmallVector Params; // SmallVector does not + // auto-case to SmallVector + DIType *ReturnType = getOrCreateType(Func->getReturnType()); + Params.push_back(ReturnType); + + for (const Argument &Arg : Func->args()) { + Type *T = Arg.getType(); + Params.push_back(getOrCreateType(T)); + } + + DITypeRefArray ParamArray = Builder.getOrCreateTypeArray(Params); + return Builder.createSubroutineType(ParamArray); + } + + /// Associates Instruction I with debug location Loc. + void addDebugLocation(Instruction &I, DebugLoc Loc) { I.setDebugLoc(Loc); } +}; + +} // anonymous namespace + +namespace debugir { + +std::unique_ptr createDebugInfo(Module &M, std::string Directory, + std::string Filename) { + + auto VMap = std::make_unique(); + auto DisplayM = CloneModule(M, *VMap); + StripDebugInfo(*(DisplayM.get())); + + { + // DIUpdater is in its own scope so that it's destructor, and hence + // DIBuilder::finalize() gets called. Without that there's dangling stuff. + DIUpdater R(M, Filename, Directory, DisplayM.get(), VMap.get()); + } + + auto DIVersionKey = "Debug Info Version"; + if (!M.getModuleFlag(DIVersionKey)) + // Add the current debug info version into the module. + M.addModuleFlag(Module::Warning, DIVersionKey, DEBUG_METADATA_VERSION); + + assert(!verifyModule(M, &errs()) && "verifyModule found issues"); + + return DisplayM; +} + +} // namespace llvm diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 8c9054c0d65ff..a345d1cd7320e 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -567,6 +568,8 @@ static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimi MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } } + // TODO: This should be conditionally enabled depending on the emission mode for the Execution engine + MPM.addPass(createModuleToFunctionPassAdaptor(InstructionNamerPass())); MPM.addPass(AfterCleanupMarkerPass()); }