Skip to content

Commit

Permalink
[BOLT] Add support for GOTPCRELX relocations
Browse files Browse the repository at this point in the history
The linker can convert instructions with GOTPCRELX relocations into a
form that uses an absolute addressing with an immediate. BOLT needs to
recognize such conversions and symbolize the immediates.

Reviewed By: rafauler

Differential Revision: https://reviews.llvm.org/D126747
  • Loading branch information
maksfb committed Jun 9, 2022
1 parent ffe86e3 commit 1817642
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 68 deletions.
18 changes: 16 additions & 2 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,20 @@ class BinaryFunction {
return (RI == Relocations.end()) ? nullptr : &RI->second;
}

/// Return the first relocation in the function that starts at an address in
/// the [StartOffset, EndOffset) range. Return nullptr if no such relocation
/// exists.
const Relocation *getRelocationInRange(uint64_t StartOffset,
uint64_t EndOffset) const {
assert(CurrentState == State::Empty &&
"Relocations unavailable in the current function state.");
auto RI = Relocations.lower_bound(StartOffset);
if (RI != Relocations.end() && RI->first < EndOffset)
return &RI->second;

return nullptr;
}

/// Returns the raw binary encoding of this function.
ErrorOr<ArrayRef<uint8_t>> getData() const;

Expand Down Expand Up @@ -1314,11 +1328,11 @@ class BinaryFunction {
case ELF::R_X86_64_PC8:
case ELF::R_X86_64_PC32:
case ELF::R_X86_64_PC64:
case ELF::R_X86_64_GOTPCRELX:
case ELF::R_X86_64_REX_GOTPCRELX:
Relocations[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
return;
case ELF::R_X86_64_PLT32:
case ELF::R_X86_64_GOTPCRELX:
case ELF::R_X86_64_REX_GOTPCRELX:
case ELF::R_X86_64_GOTPCREL:
case ELF::R_X86_64_TPOFF32:
case ELF::R_X86_64_GOTTPOFF:
Expand Down
3 changes: 3 additions & 0 deletions bolt/include/bolt/Core/Relocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ struct Relocation {
/// Return true if relocation type implies the creation of a GOT entry
static bool isGOT(uint64_t Type);

/// Special relocation type that allows the linker to modify the instruction.
static bool isX86GOTPCRELX(uint64_t Type);

/// Return true if relocation type is NONE
static bool isNone(uint64_t Type);

Expand Down
6 changes: 6 additions & 0 deletions bolt/lib/Core/Relocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,12 @@ bool Relocation::isGOT(uint64_t Type) {
return isGOTX86(Type);
}

bool Relocation::isX86GOTPCRELX(uint64_t Type) {
if (Arch != Triple::x86_64)
return false;
return Type == ELF::R_X86_64_GOTPCRELX || Type == ELF::R_X86_64_REX_GOTPCRELX;
}

bool Relocation::isNone(uint64_t Type) { return Type == getNone(); }

bool Relocation::isRelative(uint64_t Type) {
Expand Down
106 changes: 70 additions & 36 deletions bolt/lib/Target/X86/X86MCSymbolizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,54 +47,88 @@ bool X86MCSymbolizer::tryAddingSymbolicOperand(
Inst.addOperand(MCOperand::createExpr(Expr));
};

// Check for relocations against the operand.
const uint64_t InstOffset = InstAddress - Function.getAddress();
if (const Relocation *Relocation =
Function.getRelocationAt(InstOffset + ImmOffset)) {
uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
if (Relocation->isPCRelative())
SymbolValue += InstAddress + ImmOffset;
// Check if the operand being added is a displacement part of a compound
// memory operand that uses PC-relative addressing. If it is, try to symbolize
// it without relocations. Return true on success, false otherwise.
auto processPCRelOperandNoRel = [&]() {
const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
if (MemOp == -1)
return false;

const unsigned DispOp = MemOp + X86::AddrDisp;
if (Inst.getNumOperands() != DispOp)
return false;

// Process reference to the symbol.
BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative());
const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg);
if (Base.getReg() != BC.MRI->getProgramCounter())
return false;

uint64_t Addend = Relocation->Addend;
// Real addend for pc-relative targets is adjusted with a delta from
// the relocation placement to the next instruction.
if (Relocation->isPCRelative())
Addend += InstOffset + InstSize - Relocation->Offset;
const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt);
const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg);
if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
return false;

const MCSymbol *TargetSymbol;
uint64_t TargetOffset;
std::tie(TargetSymbol, TargetOffset) =
BC.handleAddressRef(Value, Function, /*IsPCRel=*/true);

addOperand(TargetSymbol, TargetOffset);

return true;
};

addOperand(Relocation->Symbol, Addend);
// Check for GOTPCRELX relocations first. Because these relocations allow the
// linker to modify the instruction, we have to check the offset range
// corresponding to the instruction, not the offset of the operand.
// Note that if there is GOTPCRELX relocation against the instruction, there
// will be no other relocation in this range, since GOTPCRELX applies only to
// certain instruction types.
const uint64_t InstOffset = InstAddress - Function.getAddress();
const Relocation *Relocation =
Function.getRelocationInRange(InstOffset, InstOffset + InstSize);
if (Relocation && Relocation::isX86GOTPCRELX(Relocation->Type)) {
// If the operand is PC-relative, convert it without using the relocation
// information. For GOTPCRELX, it is safe to use the absolute address
// instead of extracting the addend from the relocation, as non-standard
// forms will be rejected by linker conversion process and the operand
// will always reference GOT which we don't rewrite.
if (processPCRelOperandNoRel())
return true;

// The linker converted the PC-relative address to an absolute one.
// Symbolize this address.
BC.handleAddressRef(Value, Function, /*IsPCRel=*/false);
const BinaryData *Target = BC.getBinaryDataAtAddress(Value);
assert(Target &&
"BinaryData should exist at converted GOTPCRELX destination");

addOperand(Target->getSymbol(), /*Addend=*/0);

return true;
}

// Check if the operand being added is a displacement part of a compound
// memory operand that uses PC-relative addressing. If it is, try to symbolize
// it without relocations.
const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
if (MemOp == -1)
return false;
// Check for relocations against the operand.
if (!Relocation || Relocation->Offset != InstOffset + ImmOffset)
Relocation = Function.getRelocationAt(InstOffset + ImmOffset);

const unsigned DispOp = MemOp + X86::AddrDisp;
if (Inst.getNumOperands() != DispOp)
return false;
if (!Relocation)
return processPCRelOperandNoRel();

const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg);
if (Base.getReg() != BC.MRI->getProgramCounter())
return false;
uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
if (Relocation->isPCRelative())
SymbolValue += InstAddress + ImmOffset;

const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt);
const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg);
if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
return false;
// Process reference to the symbol.
BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative());

const MCSymbol *TargetSymbol;
uint64_t TargetOffset;
std::tie(TargetSymbol, TargetOffset) =
BC.handleAddressRef(Value, Function, /*IsPCRel*/ true);
uint64_t Addend = Relocation->Addend;
// Real addend for pc-relative targets is adjusted with a delta from
// the relocation placement to the next instruction.
if (Relocation->isPCRelative())
Addend += InstOffset + InstSize - Relocation->Offset;

addOperand(TargetSymbol, TargetOffset);
addOperand(Relocation->Symbol, Addend);

return true;
}
Expand Down
83 changes: 53 additions & 30 deletions bolt/test/X86/gotpcrelx.s
Original file line number Diff line number Diff line change
@@ -1,46 +1,69 @@
# This reproduces a bug with misinterpreting the gotpcrelx reloc

# Here we use llvm-mc -relax-relocations to produce R_X86_64_REX_GOTPCRELX
# and ld.lld to consume it and optimize it, transforming a CMP <mem, reg>
# into CMP <imm, reg>.
# Then we check that BOLT updates correctly the imm operand that references
# a function address. Currently XFAIL as we do not support it.

# REQUIRES: system-linux
# XFAIL: *

## Check that BOLT correctly handles different types of instructions with
## R_X86_64_GOTPCRELX or R_X86_64_REX_GOTPCRELX relocations and different
## kinds of handling of the relocation by the linker (no relaxation, pic, and
## non-pic).

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux \
# RUN: -relax-relocations %s -o %t.o
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: ld.lld %t.o -o %t.exe -q
# RUN: llvm-readobj -r %t.exe | FileCheck --check-prefix=READOBJ %s
# RUN: llvm-bolt %t.exe -relocs -o %t.out -lite=0
# RUN: ld.lld %t.o -o %t.pie.exe -q -pie
# RUN: ld.lld %t.o -o %t.no-relax.exe -q --no-relax
# RUN: llvm-bolt %t.exe -relocs -o %t.out -print-cfg -print-only=_start \
# RUN: |& FileCheck --check-prefix=BOLT %s
# RUN: llvm-bolt %t.pie.exe -o /dev/null -print-cfg -print-only=_start \
# RUN: |& FileCheck --check-prefix=PIE-BOLT %s
# RUN: llvm-bolt %t.no-relax.exe -o /dev/null -print-cfg -print-only=_start \
# RUN: |& FileCheck --check-prefix=NO-RELAX-BOLT %s
# RUN: llvm-objdump -d --no-show-raw-insn --print-imm-hex \
# RUN: %t.out | FileCheck --check-prefix=DISASM %s

# Check that R_X86_64_REX_GOTPCRELX is present in the input binary
# READOBJ: 0x[[#%X,]] R_X86_64_REX_GOTPCRELX foo 0x[[#%X,]]

# DISASM: Disassembly of section .text:
# DISASM-EMPTY:
# DISASM-NEXT: <_start>:
# DISASM-NEXT: leaq 0x[[#%x,ADDR:]], %rax
# DISASM-NEXT: cmpq 0x[[#ADDR]], %rax

.text
.globl _start
.type _start, %function
_start:
.cfi_startproc
leaq foo, %rax
cmpq foo@GOTPCREL(%rip), %rax
je b
c:
mov $1, %rdi
callq foo
b:
xorq %rdi, %rdi
callq foo
# DISASM: Disassembly of section .text:
# DISASM-EMPTY:
# DISASM-NEXT: <_start>:

call *foo@GOTPCREL(%rip)
# NO-RELAX-BOLT: callq *{{.*}}(%rip)
# BOLT: callq foo
# PIE-BOLT: callq foo
# DISASM-NEXT: callq 0x[[#%x,ADDR:]]

movq foo@GOTPCREL(%rip), %rdi
# NO-RELAX-BOLT-NEXT: movq {{.*}}(%rip), %rdi
# BOLT-NEXT: leaq foo(%rip), %rdi
# PIE-BOLT-NEXT: leaq foo(%rip), %rdi
# DISASM-NEXT: leaq {{.*}}(%rip), %rdi # 0x[[#ADDR]]

movl foo@GOTPCREL+4(%rip), %edi
# NO-RELAX-BOLT-NEXT: movl {{.*}}(%rip), %edi
# BOLT-NEXT: movl {{.*}}(%rip), %edi
# PIE-BOLT-NEXT: movl {{.*}}(%rip), %edi
# DISASM-NEXT: movl {{.*}}(%rip), %edi

test %rdi, foo@GOTPCREL(%rip)
# NO-RELAX-BOLT-NEXT: testq %rdi, DATA{{.*}}(%rip)
# BOLT-NEXT: testq $foo, %rdi
# PIE-BOLT-NEXT: testq %rdi, DATA{{.*}}(%rip)
# DISASM-NEXT: testq $0x[[#ADDR]], %rdi

cmpq foo@GOTPCREL(%rip), %rax
# NO-RELAX-BOLT-NEXT: cmpq DATA{{.*}}(%rip), %rax
# BOLT-NEXT: cmpq $foo, %rax
# PIE-BOLT-NEXT: cmpq DATA{{.*}}(%rip), %rax
# DISASM-NEXT: cmpq $0x[[#ADDR]], %rax

jmp *foo@GOTPCREL(%rip)
# NO-RELAX-BOLT-NEXT: jmpq *DATA{{.*}}(%rip)
# BOLT-NEXT: jmp foo
# PIE-BOLT-NEXT: jmp foo
# DISASM-NEXT: jmp 0x[[#ADDR]]

ret
.cfi_endproc
.size _start, .-_start
Expand Down

0 comments on commit 1817642

Please sign in to comment.