Skip to content

Commit

Permalink
[BOLT] Support input binaries that use R_X86_GOTPC64
Browse files Browse the repository at this point in the history
In large code model, the address of GOT is calculated by the
static linker via R_X86_GOTPC64 reloc applied against a MOVABSQ
instruction. In the final binary, it can be disassembled as a regular
immediate, but because such immediate is the result of PC-relative
pointer arithmetic, we need to parse this relocation and update this
calculation whenever we move code, otherwise we break the code trying
to read GOT.

A test case showing how GOT is accessed was provided.

Reviewed By: #bolt, maksfb

Differential Revision: https://reviews.llvm.org/D158911
  • Loading branch information
rafaelauler committed Oct 3, 2023
1 parent a29e8ef commit 853e126
Show file tree
Hide file tree
Showing 11 changed files with 207 additions and 1 deletion.
9 changes: 9 additions & 0 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,15 @@ class BinaryContext {
return nullptr;
}

/// Retrieves a reference to ELF's _GLOBAL_OFFSET_TABLE_ symbol, which points
/// at GOT, or null if it is not present in the input binary symtab.
BinaryData *getGOTSymbol();

/// Checks if symbol name refers to ELF's _GLOBAL_OFFSET_TABLE_ symbol
bool isGOTSymbol(StringRef SymName) const {
return SymName == "_GLOBAL_OFFSET_TABLE_";
}

/// Return true if \p SymbolName was generated internally and was not present
/// in the input binary.
bool isInternalSymbolName(const StringRef Name) {
Expand Down
1 change: 1 addition & 0 deletions bolt/include/bolt/Core/Relocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ struct Relocation {

/// Special relocation type that allows the linker to modify the instruction.
static bool isX86GOTPCRELX(uint64_t Type);
static bool isX86GOTPC64(uint64_t Type);

/// Return true if relocation type is NONE
static bool isNone(uint64_t Type);
Expand Down
25 changes: 25 additions & 0 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,31 @@ BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
return nullptr;
}

BinaryData *BinaryContext::getGOTSymbol() {
// First tries to find a global symbol with that name
BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
if (GOTSymBD)
return GOTSymBD;

// This symbol might be hidden from run-time link, so fetch the local
// definition if available.
GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
if (!GOTSymBD)
return nullptr;

// If the local symbol is not unique, fail
unsigned Index = 2;
SmallString<30> Storage;
while (const BinaryData *BD =
getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
.concat(Twine(Index++))
.toStringRef(Storage)))
if (BD->getAddress() != GOTSymBD->getAddress())
return nullptr;

return GOTSymBD;
}

bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
auto NI = BinaryDataMap.find(Address);
assert(NI != BinaryDataMap.end());
Expand Down
9 changes: 9 additions & 0 deletions bolt/lib/Core/Relocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ static bool isSupportedX86(uint64_t Type) {
case ELF::R_X86_64_PC32:
case ELF::R_X86_64_PC64:
case ELF::R_X86_64_PLT32:
case ELF::R_X86_64_GOTPC64:
case ELF::R_X86_64_GOTPCREL:
case ELF::R_X86_64_GOTTPOFF:
case ELF::R_X86_64_TPOFF32:
Expand Down Expand Up @@ -136,6 +137,7 @@ static size_t getSizeForTypeX86(uint64_t Type) {
return 4;
case ELF::R_X86_64_PC64:
case ELF::R_X86_64_64:
case ELF::R_X86_64_GOTPC64:
return 8;
}
}
Expand Down Expand Up @@ -655,6 +657,7 @@ static bool isPCRelativeX86(uint64_t Type) {
case ELF::R_X86_64_PLT32:
case ELF::R_X86_64_GOTOFF64:
case ELF::R_X86_64_GOTPC32:
case ELF::R_X86_64_GOTPC64:
case ELF::R_X86_64_GOTTPOFF:
case ELF::R_X86_64_GOTPCRELX:
case ELF::R_X86_64_REX_GOTPCRELX:
Expand Down Expand Up @@ -797,6 +800,12 @@ bool Relocation::isX86GOTPCRELX(uint64_t Type) {
return Type == ELF::R_X86_64_GOTPCRELX || Type == ELF::R_X86_64_REX_GOTPCRELX;
}

bool Relocation::isX86GOTPC64(uint64_t Type) {
if (Arch != Triple::x86_64)
return false;
return Type == ELF::R_X86_64_GOTPC64;
}

bool Relocation::isNone(uint64_t Type) { return Type == getNone(); }

bool Relocation::isRelative(uint64_t Type) {
Expand Down
13 changes: 13 additions & 0 deletions bolt/lib/Rewrite/JITLinkLinker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,19 @@ struct JITLinkLinker::Context : jitlink::JITLinkContext {
orc::ExecutorAddr(Address), JITSymbolFlags());
continue;
}

if (Linker.BC.isGOTSymbol(SymName)) {
if (const BinaryData *I = Linker.BC.getGOTSymbol()) {
uint64_t Address =
I->isMoved() ? I->getOutputAddress() : I->getAddress();
LLVM_DEBUG(dbgs() << "Resolved to address 0x"
<< Twine::utohexstr(Address) << "\n");
AllResults[Symbol.first] = orc::ExecutorSymbolDef(
orc::ExecutorAddr(Address), JITSymbolFlags());
continue;
}
}

LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n");
AllResults[Symbol.first] =
orc::ExecutorSymbolDef(orc::ExecutorAddr(0), JITSymbolFlags());
Expand Down
6 changes: 5 additions & 1 deletion bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2394,9 +2394,13 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
}

MCSymbol *ReferencedSymbol = nullptr;
if (!IsSectionRelocation)
if (!IsSectionRelocation) {
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
ReferencedSymbol = BD->getSymbol();
else if (BC->isGOTSymbol(SymbolName))
if (BinaryData *BD = BC->getGOTSymbol())
ReferencedSymbol = BD->getSymbol();
}

ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
symbol_iterator SymbolIter = Rel.getSymbol();
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Target/X86/X86MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
case ELF::R_X86_64_PC8:
case ELF::R_X86_64_PC32:
case ELF::R_X86_64_PC64:
case ELF::R_X86_64_GOTPC64:
case ELF::R_X86_64_GOTPCRELX:
case ELF::R_X86_64_REX_GOTPCRELX:
return true;
Expand Down
29 changes: 29 additions & 0 deletions bolt/lib/Target/X86/X86MCSymbolizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ bool X86MCSymbolizer::tryAddingSymbolicOperand(
if (!Relocation)
return processPCRelOperandNoRel();

// GOTPC64 is special because the X86 Assembler doesn't know how to emit
// a PC-relative 8-byte fixup, which is what we need to cover this. The
// only way to do this is to use the symbol name _GLOBAL_OFFSET_TABLE_.
if (Relocation::isX86GOTPC64(Relocation->Type)) {
auto [Sym, Addend] = handleGOTPC64(*Relocation, InstAddress);
addOperand(Sym, Addend);
return true;
}

uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
if (Relocation->isPCRelative())
SymbolValue += InstAddress + ImmOffset;
Expand All @@ -149,6 +158,26 @@ bool X86MCSymbolizer::tryAddingSymbolicOperand(
return true;
}

std::pair<MCSymbol *, uint64_t>
X86MCSymbolizer::handleGOTPC64(const Relocation &R, uint64_t InstrAddr) {
BinaryContext &BC = Function.getBinaryContext();
const BinaryData *GOTSymBD = BC.getGOTSymbol();
if (!GOTSymBD || !GOTSymBD->getAddress()) {
errs() << "BOLT-ERROR: R_X86_GOTPC64 relocation is present but we did "
"not detect a valid _GLOBAL_OFFSET_TABLE_ in symbol table\n";
exit(1);
}
// R_X86_GOTPC64 are not relative to the Reloc nor end of instruction,
// but the start of the MOVABSQ instruction. So the Target Address is
// whatever is encoded in the original operand when we disassembled
// the binary (here, R.Value) plus MOVABSQ address (InstrAddr).
// Here we extract the intended Addend by subtracting the real
// GOT addr.
const int64_t Addend = R.Value + InstrAddr - GOTSymBD->getAddress();
return std::make_pair(BC.Ctx->getOrCreateSymbol("_GLOBAL_OFFSET_TABLE_"),
Addend);
}

void X86MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
int64_t Value,
uint64_t Address) {}
Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Target/X86/X86MCSymbolizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class X86MCSymbolizer : public MCSymbolizer {
BinaryFunction &Function;
bool CreateNewSymbols{true};

std::pair<MCSymbol *, uint64_t> handleGOTPC64(const Relocation &R,
uint64_t InstrAddr);

public:
X86MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
Expand Down
57 changes: 57 additions & 0 deletions bolt/test/runtime/X86/gotoff-large-code-model-2.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# A variation of gotoff-large-code-model.s that accesses GOT value
# with a slightly different code sequence.

# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q

# RUN: llvm-bolt %t.exe --funcs init_impls --lite \
# RUN: -o %t.bolted
# RUN: %t.bolted | FileCheck %s

.section .rodata.str1.1,"aMS",@progbits,1
.LC2:
.string "Hello, world\n"
.text
.p2align 4
.globl init_impls
.type init_impls, @function
init_impls:
.cfi_startproc
push %rbp
mov %rsp,%rbp
push %r15
push %rbx
sub $0x8,%rsp
lea 1f(%rip),%rbx
# R_X86_64_GOTPC64 _GLOBAL_OFFSET_TABLE_+0x2
1: movabsq $_GLOBAL_OFFSET_TABLE_, %r11
add %r11,%rbx
# R_X86_64_GOTOFF64 .LC2
movabs $.LC2@gotoff,%rax
lea (%rbx,%rax,1),%rax
mov %rax,%rdi
mov %rbx,%r15
# R_X86_64_PLTOFF64 puts
movabs $puts@pltoff,%rax
add %rbx,%rax
call *%rax
add $0x8,%rsp
pop %rbx
pop %r15
pop %rbp
retq
.cfi_endproc
.size init_impls, .-init_impls

.globl main
.type main, @function
.p2align 4
main:
callq init_impls
xorq %rax, %rax
ret

# CHECK: Hello, world
55 changes: 55 additions & 0 deletions bolt/test/runtime/X86/gotoff-large-code-model.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q

# RUN: llvm-bolt %t.exe --funcs init_impls --lite \
# RUN: -o %t.bolted
# RUN: %t.bolted | FileCheck %s

.section .rodata.str1.1,"aMS",@progbits,1
.LC2:
.string "Hello, world\n"
.text
.p2align 4
.globl init_impls
.type init_impls, @function
init_impls:
.cfi_startproc
push %rbp
mov %rsp,%rbp
push %r15
push %rbx
sub $0x8,%rsp
1:
lea 1b(%rip),%rbx
# R_X86_64_GOTPC64 _GLOBAL_OFFSET_TABLE_+0x9
movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r11
add %r11,%rbx
# R_X86_64_GOTOFF64 .LC2
movabs $.LC2@gotoff,%rax
lea (%rbx,%rax,1),%rax
mov %rax,%rdi
mov %rbx,%r15
# R_X86_64_PLTOFF64 puts
movabs $puts@pltoff,%rax
add %rbx,%rax
call *%rax
add $0x8,%rsp
pop %rbx
pop %r15
pop %rbp
retq
.cfi_endproc
.size init_impls, .-init_impls

.globl main
.type main, @function
.p2align 4
main:
callq init_impls
xorq %rax, %rax
ret

# CHECK: Hello, world

0 comments on commit 853e126

Please sign in to comment.