Skip to content

Commit

Permalink
[ELF] - Teach LLD to report line numbers for data symbols.
Browse files Browse the repository at this point in the history
This is PR34826.

Currently LLD is unable to report line number when reporting
duplicate declaration of some variable.

That happens because for extracting line information we always use
.debug_line section content which describes mapping from machine
instructions to source file locations, what does not help for
variables as does not describe them.

In this patch I am taking the approproate information about
variables locations from the .debug_info section.

Differential revision: https://reviews.llvm.org/D38721

llvm-svn: 317080
  • Loading branch information
George Rimar committed Nov 1, 2017
1 parent 688f0ca commit 82f0c42
Show file tree
Hide file tree
Showing 7 changed files with 246 additions and 28 deletions.
67 changes: 64 additions & 3 deletions lld/ELF/InputFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Optional<MemoryBufferRef> elf::readFile(StringRef Path) {
return MBRef;
}

template <class ELFT> void ObjFile<ELFT>::initializeDwarfLine() {
template <class ELFT> void ObjFile<ELFT>::initializeDwarf() {
DWARFContext Dwarf(make_unique<LLDDwarfObj<ELFT>>(this));
const DWARFObject &Obj = Dwarf.getDWARFObj();
DwarfLine.reset(new DWARFDebugLine);
Expand All @@ -77,15 +77,76 @@ template <class ELFT> void ObjFile<ELFT>::initializeDwarfLine() {
// The second parameter is offset in .debug_line section
// for compilation unit (CU) of interest. We have only one
// CU (object file), so offset is always 0.
DwarfLine->getOrParseLineTable(LineData, 0);
const DWARFDebugLine::LineTable *LT =
DwarfLine->getOrParseLineTable(LineData, 0);

// Return if there is no debug information about CU available.
if (!Dwarf.getNumCompileUnits())
return;

// Loop over variable records and insert them to VariableLoc.
DWARFCompileUnit *CU = Dwarf.getCompileUnitAtIndex(0);
for (const auto &Entry : CU->dies()) {
DWARFDie Die(CU, &Entry);
// Skip all tags that are not variables.
if (Die.getTag() != dwarf::DW_TAG_variable)
continue;

// Skip if a local variable because we don't need them for generating error
// messages. In general, only non-local symbols can fail to be linked.
if (!dwarf::toUnsigned(Die.find(dwarf::DW_AT_external), 0))
continue;

// Get the source filename index for the variable.
unsigned File = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_file), 0);
if (!LT->hasFileAtIndex(File))
continue;

// Get the line number on which the variable is declared.
unsigned Line = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_line), 0);

// Get the name of the variable and add the collected information to
// VariableLoc. Usually Name is non-empty, but it can be empty if the input
// object file lacks some debug info.
StringRef Name = dwarf::toString(Die.find(dwarf::DW_AT_name), "");
if (!Name.empty())
VariableLoc.insert({Name, {File, Line}});
}
}

// Returns the pair of file name and line number describing location of data
// object (variable, array, etc) definition.
template <class ELFT>
Optional<std::pair<std::string, unsigned>>
ObjFile<ELFT>::getVariableLoc(StringRef Name) {
llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); });

// There is always only one CU so it's offset is 0.
const DWARFDebugLine::LineTable *LT = DwarfLine->getLineTable(0);
if (!LT)
return None;

// Return if we have no debug information about data object.
auto It = VariableLoc.find(Name);
if (It == VariableLoc.end())
return None;

// Take file name string from line table.
std::string FileName;
if (!LT->getFileNameByIndex(
It->second.first /* File */, nullptr,
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FileName))
return None;

return std::make_pair(FileName, It->second.second /*Line*/);
}

// Returns source line information for a given offset
// using DWARF debug info.
template <class ELFT>
Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *S,
uint64_t Offset) {
llvm::call_once(InitDwarfLine, [this]() { initializeDwarfLine(); });
llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); });

// The offset to CU is 0.
const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0);
Expand Down
4 changes: 3 additions & 1 deletion lld/ELF/InputFiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ template <class ELFT> class ObjFile : public ELFFileBase<ELFT> {
// If no information is available, returns "".
std::string getLineInfo(InputSectionBase *S, uint64_t Offset);
llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t);
llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef Name);

// MIPS GP0 value defined by this file. This value represents the gp value
// used to create the relocatable object and required to support
Expand All @@ -200,7 +201,7 @@ template <class ELFT> class ObjFile : public ELFFileBase<ELFT> {
void
initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups);
void initializeSymbols();
void initializeDwarfLine();
void initializeDwarf();
InputSectionBase *getRelocTarget(const Elf_Shdr &Sec);
InputSectionBase *createInputSection(const Elf_Shdr &Sec);
StringRef getSectionName(const Elf_Shdr &Sec);
Expand All @@ -216,6 +217,7 @@ template <class ELFT> class ObjFile : public ELFFileBase<ELFT> {
// single object file, so we cache debugging information in order to
// parse it only once for each object file we link.
std::unique_ptr<llvm::DWARFDebugLine> DwarfLine;
llvm::DenseMap<StringRef, std::pair<unsigned, unsigned>> VariableLoc;
llvm::once_flag InitDwarfLine;
};

Expand Down
52 changes: 33 additions & 19 deletions lld/ELF/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,31 +261,41 @@ std::string InputSectionBase::getLocation(uint64_t Offset) {
return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str();
}

// Returns a source location string. This function is intended to be
// used for constructing an error message. The returned message looks
// like this:
// Concatenates arguments to construct a string representing an error location.
static std::string createFileLineMsg(StringRef Path, unsigned Line) {
std::string Filename = path::filename(Path);
std::string Lineno = ":" + std::to_string(Line);
if (Filename == Path)
return Filename + Lineno;
return Filename + Lineno + " (" + Path.str() + Lineno + ")";
}

// This function is intended to be used for constructing an error message.
// The returned message looks like this:
//
// foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42)
//
// Returns an empty string if there's no way to get line info.
template <class ELFT> std::string InputSectionBase::getSrcMsg(uint64_t Offset) {
// Returns an empty string if there's no way to get line info.
template <class ELFT>
std::string InputSectionBase::getSrcMsg(const SymbolBody &Sym,
uint64_t Offset) {
// Synthetic sections don't have input files.
ObjFile<ELFT> *File = getFile<ELFT>();
if (!File)
return "";

Optional<DILineInfo> Info = File->getDILineInfo(this, Offset);
// In DWARF, functions and variables are stored to different places.
// First, lookup a function for a given offset.
if (Optional<DILineInfo> Info = File->getDILineInfo(this, Offset))
return createFileLineMsg(Info->FileName, Info->Line);

// File->SourceFile contains STT_FILE symbol, and that is a last resort.
if (!Info)
return File->SourceFile;
// If it failed, lookup again as a variable.
if (Optional<std::pair<std::string, unsigned>> FileLine =
File->getVariableLoc(Sym.getName()))
return createFileLineMsg(FileLine->first, FileLine->second);

std::string Path = Info->FileName;
std::string Filename = path::filename(Path);
std::string Lineno = ":" + std::to_string(Info->Line);
if (Filename == Path)
return Filename + Lineno;
return Filename + Lineno + " (" + Path + Lineno + ")";
// File->SourceFile contains STT_FILE symbol, and that is a last resort.
return File->SourceFile;
}

// Returns a filename string along with an optional section name. This
Expand Down Expand Up @@ -1004,10 +1014,14 @@ template std::string InputSectionBase::getLocation<ELF32BE>(uint64_t);
template std::string InputSectionBase::getLocation<ELF64LE>(uint64_t);
template std::string InputSectionBase::getLocation<ELF64BE>(uint64_t);

template std::string InputSectionBase::getSrcMsg<ELF32LE>(uint64_t);
template std::string InputSectionBase::getSrcMsg<ELF32BE>(uint64_t);
template std::string InputSectionBase::getSrcMsg<ELF64LE>(uint64_t);
template std::string InputSectionBase::getSrcMsg<ELF64BE>(uint64_t);
template std::string InputSectionBase::getSrcMsg<ELF32LE>(const SymbolBody &,
uint64_t);
template std::string InputSectionBase::getSrcMsg<ELF32BE>(const SymbolBody &,
uint64_t);
template std::string InputSectionBase::getSrcMsg<ELF64LE>(const SymbolBody &,
uint64_t);
template std::string InputSectionBase::getSrcMsg<ELF64BE>(const SymbolBody &,
uint64_t);

template void InputSection::writeTo<ELF32LE>(uint8_t *);
template void InputSection::writeTo<ELF32BE>(uint8_t *);
Expand Down
2 changes: 1 addition & 1 deletion lld/ELF/InputSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ class InputSectionBase : public SectionBase {

// Returns a source location string. Used to construct an error message.
template <class ELFT> std::string getLocation(uint64_t Offset);
template <class ELFT> std::string getSrcMsg(uint64_t Offset);
template <class ELFT> std::string getSrcMsg(const SymbolBody &Sym, uint64_t Offset);
std::string getObjMsg(uint64_t Offset);

// Each section knows how to relocate itself. These functions apply
Expand Down
4 changes: 2 additions & 2 deletions lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ static std::string getLocation(InputSectionBase &S, const SymbolBody &Sym,
uint64_t Off) {
std::string Msg =
"\n>>> defined in " + toString(Sym.getFile()) + "\n>>> referenced by ";
std::string Src = S.getSrcMsg<ELFT>(Off);
std::string Src = S.getSrcMsg<ELFT>(Sym, Off);
if (!Src.empty())
Msg += Src + "\n>>> ";
return Msg + S.getObjMsg(Off);
Expand Down Expand Up @@ -728,7 +728,7 @@ static bool maybeReportUndefined(SymbolBody &Sym, InputSectionBase &Sec,
std::string Msg =
"undefined symbol: " + toString(Sym) + "\n>>> referenced by ";

std::string Src = Sec.getSrcMsg<ELFT>(Offset);
std::string Src = Sec.getSrcMsg<ELFT>(Sym, Offset);
if (!Src.empty())
Msg += Src + "\n>>> ";
Msg += Sec.getObjMsg(Offset);
Expand Down
4 changes: 2 additions & 2 deletions lld/ELF/SymbolTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,9 +456,9 @@ static void reportDuplicate(SymbolBody *Sym, InputSectionBase *ErrSec,
// >>> defined at baz.c:563
// >>> baz.o in archive libbaz.a
auto *Sec1 = cast<InputSectionBase>(D->Section);
std::string Src1 = Sec1->getSrcMsg<ELFT>(D->Value);
std::string Src1 = Sec1->getSrcMsg<ELFT>(*Sym, D->Value);
std::string Obj1 = Sec1->getObjMsg(D->Value);
std::string Src2 = ErrSec->getSrcMsg<ELFT>(ErrOffset);
std::string Src2 = ErrSec->getSrcMsg<ELFT>(*Sym, ErrOffset);
std::string Obj2 = ErrSec->getObjMsg(ErrOffset);

std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at ";
Expand Down
141 changes: 141 additions & 0 deletions lld/test/ELF/conflict-debug-variable.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
# RUN: llvm-dwarfdump %t.o | FileCheck -check-prefix=INPUT %s
# RUN: not ld.lld %t.o %t.o -o %t 2>&1 | FileCheck %s

# INPUT: .debug_info contents:
# INPUT: DW_TAG_variable
# INPUT-NEXT: DW_AT_name ("foo")
# INPUT-NEXT: DW_AT_decl_file ("1.c")
# INPUT-NEXT: DW_AT_decl_line (1)
# INPUT-NEXT: DW_AT_type (cu + 0x0032 "int")
# INPUT-NEXT: DW_AT_external (true)
# INPUT-NEXT: DW_AT_location (DW_OP_addr 0x0)
# INPUT: DW_TAG_variable
# INPUT-NEXT: DW_AT_name ("bar")
# INPUT-NEXT: DW_AT_decl_file ("1.c")
# INPUT-NEXT: DW_AT_decl_line (2)
# INPUT-NEXT: DW_AT_type (cu + 0x0032 "int")
# INPUT-NEXT: DW_AT_external (true)
# INPUT-NEXT: DW_AT_location (DW_OP_addr 0x0)

## Check we use information from .debug_info in messages.
# CHECK: duplicate symbol: bar
# CHECK-NEXT: >>> defined at 1.c:2
# CHECK-NEXT: >>> {{.*}}:(bar)
# CHECK-NEXT: >>> defined at 1.c:2
# CHECK-NEXT: >>> {{.*}}:(.data+0x0)
# CHECK: duplicate symbol: foo
# CHECK-NEXT: >>> defined at 1.c:1
# CHECK-NEXT: >>> {{.*}}:(foo)
# CHECK-NEXT: >>> defined at 1.c:1
# CHECK-NEXT: >>> {{.*}}:(.bss+0x0)

# Used reduced output from following code and gcc 7.1.0
# to produce this input file:
# Source (1.c):
# int foo = 0;
# int bar = 1;
# Invocation: g++ -g -S 1.c

.bss
.globl foo
.type foo, @object
.size foo, 4
foo:

.data
.globl bar
.type bar, @object
.size bar, 4
bar:

.text
.file 1 "1.c"

.section .debug_info,"",@progbits
.long 0x4b # Compile Unit: length = 0x0000004b)
.value 0x4 # version = 0x0004
.long 0 # abbr_offset = 0x0
.byte 0x8 # addr_size = 0x08

.uleb128 0x1 # DW_TAG_compile_unit [1] *
.long 0 # DW_AT_producer [DW_FORM_strp] ( .debug_str[0x00000000] = )
.byte 0x4 # DW_AT_language [DW_FORM_data1] (DW_LANG_C_plus_plus)
.string "1.c" # DW_AT_name [DW_FORM_string] ("1.c")
.long 0 # DW_AT_comp_dir [DW_FORM_strp] ( .debug_str[0x00000000] = )
.long 0 # DW_AT_stmt_list [DW_FORM_sec_offset] (0x00000000)

.uleb128 0x2 # DW_TAG_variable [2]
.string "foo" # DW_AT_name [DW_FORM_string] ("foo")
.byte 0x1 # DW_AT_decl_file [DW_FORM_data1] ("1.c")
.byte 0x1 # DW_AT_decl_line [DW_FORM_data1] (1)
.long 0x32 # DW_AT_type [DW_FORM_ref4] (cu + 0x0032 => {0x00000032})
.uleb128 0x9 # DW_AT_external [DW_FORM_flag_present] (true)
.byte 0x3
.quad foo # DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0)

.uleb128 0x3 # DW_TAG_base_type [3]
.byte 0x4 # DW_AT_byte_size [DW_FORM_data1] (0x04)
.byte 0x5 # DW_AT_encoding [DW_FORM_data1] (DW_ATE_signed)
.string "int" # DW_AT_name [DW_FORM_string] ("int")

.uleb128 0x2 # DW_TAG_variable [2]
.string "bar" # DW_AT_name [DW_FORM_string] ("bar")
.byte 0x1 # DW_AT_decl_file [DW_FORM_data1] ("1.c")
.byte 0x2 # DW_AT_decl_line [DW_FORM_data1] (2)
.long 0x32 # DW_AT_type [DW_FORM_ref4] (cu + 0x0032 => {0x00000032})
.uleb128 0x9 # DW_AT_external [DW_FORM_flag_present] (true)
.byte 0x3
.quad bar # DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0)
.byte 0 # END


.section .debug_abbrev,"",@progbits
.uleb128 0x1 # Abbreviation code.
.uleb128 0x11 # DW_TAG_compile_unit

.byte 0x1 # ID
.uleb128 0x25 # DW_AT_producer, DW_FORM_strp
.uleb128 0xe
.uleb128 0x13 # DW_AT_language, DW_FORM_data1
.uleb128 0xb
.uleb128 0x3 # DW_AT_name, DW_FORM_string
.uleb128 0x8
.uleb128 0x1b # DW_AT_comp_dir, DW_FORM_strp
.uleb128 0xe
.uleb128 0x10 # DW_AT_stmt_list, DW_FORM_sec_offset
.uleb128 0x17
.byte 0
.byte 0

.uleb128 0x2 # ID
.uleb128 0x34 # DW_TAG_variable, DW_CHILDREN_no
.byte 0
.uleb128 0x3 # DW_AT_name, DW_FORM_string
.uleb128 0x8
.uleb128 0x3a # DW_AT_decl_file, DW_FORM_data1
.uleb128 0xb
.uleb128 0x3b # DW_AT_decl_line, DW_FORM_data1
.uleb128 0xb
.uleb128 0x49 # DW_AT_type, DW_FORM_ref4
.uleb128 0x13
.uleb128 0x3f # DW_AT_external, DW_FORM_flag_present
.uleb128 0x19
.uleb128 0x2 # DW_AT_location, DW_FORM_exprloc
.uleb128 0x18
.byte 0
.byte 0

.uleb128 0x3 # ID
.uleb128 0x24 # DW_TAG_base_type, DW_CHILDREN_no
.byte 0
.uleb128 0xb # DW_AT_byte_size, DW_FORM_data1
.uleb128 0xb
.uleb128 0x3e # DW_AT_encoding, DW_FORM_data1
.uleb128 0xb
.uleb128 0x3 # DW_AT_name, DW_FORM_string
.uleb128 0x8
.byte 0
.byte 0
.byte 0

0 comments on commit 82f0c42

Please sign in to comment.