Skip to content

Commit

Permalink
Revert "Revert "[lld][Arm] Big Endian - Byte invariant support.""
Browse files Browse the repository at this point in the history
This reverts commit d885138.

Reason: Applied the fix for the Asan buildbot failures.
  • Loading branch information
simpal01 committed Jun 22, 2023
1 parent 81ec494 commit f146763
Show file tree
Hide file tree
Showing 32 changed files with 435 additions and 68 deletions.
125 changes: 124 additions & 1 deletion lld/ELF/Arch/ARM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include "InputFiles.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
Expand Down Expand Up @@ -44,8 +45,11 @@ class ARM final : public TargetInfo {
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
};
enum class CodeState { Data = 0, Thumb = 2, Arm = 4 };
} // namespace

static DenseMap<InputSection *, SmallVector<const Defined *, 0>> sectionMap{};

ARM::ARM() {
copyRel = R_ARM_COPY;
relativeRel = R_ARM_RELATIVE;
Expand All @@ -68,16 +72,24 @@ uint32_t ARM::calcEFlags() const {
// The ABIFloatType is used by loaders to detect the floating point calling
// convention.
uint32_t abiFloatType = 0;

// Set the EF_ARM_BE8 flag in the ELF header, if ELF file is big-endian
// with BE-8 code.
uint32_t armBE8 = 0;

if (config->armVFPArgs == ARMVFPArgKind::Base ||
config->armVFPArgs == ARMVFPArgKind::Default)
abiFloatType = EF_ARM_ABI_FLOAT_SOFT;
else if (config->armVFPArgs == ARMVFPArgKind::VFP)
abiFloatType = EF_ARM_ABI_FLOAT_HARD;

if (!config->isLE && config->armBe8)
armBE8 = EF_ARM_BE8;

// We don't currently use any features incompatible with EF_ARM_EABI_VER5,
// but we don't have any firm guarantees of conformance. Linux AArch64
// kernels (as of 2016) require an EABI version to be set.
return EF_ARM_EABI_VER5 | abiFloatType;
return EF_ARM_EABI_VER5 | abiFloatType | armBE8;
}

RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
Expand Down Expand Up @@ -910,6 +922,117 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
}
}

static bool isArmMapSymbol(const Symbol *b) {
return b->getName() == "$a" || b->getName().startswith("$a.");
}

static bool isThumbMapSymbol(const Symbol *s) {
return s->getName() == "$t" || s->getName().startswith("$t.");
}

static bool isDataMapSymbol(const Symbol *b) {
return b->getName() == "$d" || b->getName().startswith("$d.");
}

void elf::sortArmMappingSymbols() {
// For each input section make sure the mapping symbols are sorted in
// ascending order.
for (auto &kv : sectionMap) {
SmallVector<const Defined *, 0> &mapSyms = kv.second;
llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {
return a->value < b->value;
});
}
}

void elf::addArmInputSectionMappingSymbols() {
// Collect mapping symbols for every executable input sections.
// The linker generated mapping symbols for all the synthetic
// sections are adding into the sectionmap through the function
// addArmSyntheitcSectionMappingSymbol.
for (ELFFileBase *file : ctx.objectFiles) {
for (Symbol *sym : file->getLocalSymbols()) {
auto *def = dyn_cast<Defined>(sym);
if (!def)
continue;
if (!isArmMapSymbol(def) && !isDataMapSymbol(def) &&
!isThumbMapSymbol(def))
continue;
if (auto *sec = cast_if_present<InputSection>(def->section))
if (sec->flags & SHF_EXECINSTR)
sectionMap[sec].push_back(def);
}
}
}

// Synthetic sections are not backed by an ELF file where we can access the
// symbol table, instead mapping symbols added to synthetic sections are stored
// in the synthetic symbol table. Due to the presence of strip (--strip-all),
// we can not rely on the synthetic symbol table retaining the mapping symbols.
// Instead we record the mapping symbols locally.
void elf::addArmSyntheticSectionMappingSymbol(Defined *sym) {
if (!isArmMapSymbol(sym) && !isDataMapSymbol(sym) && !isThumbMapSymbol(sym))
return;
if (auto *sec = cast_if_present<InputSection>(sym->section))
if (sec->flags & SHF_EXECINSTR)
sectionMap[sec].push_back(sym);
}

static void toLittleEndianInstructions(uint8_t *buf, uint64_t start,
uint64_t end, uint64_t width) {
CodeState curState = static_cast<CodeState>(width);
if (curState == CodeState::Arm)
for (uint64_t i = start; i < end; i += width)
write32le(buf + i, read32(buf + i));

if (curState == CodeState::Thumb)
for (uint64_t i = start; i < end; i += width)
write16le(buf + i, read16(buf + i));
}

// Arm BE8 big endian format requires instructions to be little endian, with
// the initial contents big-endian. Convert the big-endian instructions to
// little endian leaving literal data untouched. We use mapping symbols to
// identify half open intervals of Arm code [$a, non $a) and Thumb code
// [$t, non $t) and convert these to little endian a word or half word at a
// time respectively.
void elf::convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf) {
if (!sectionMap.contains(sec))
return;

SmallVector<const Defined *, 0> &mapSyms = sectionMap[sec];

if (mapSyms.empty())
return;

CodeState curState = CodeState::Data;
uint64_t start = 0, width = 0, size = sec->getSize();
for (auto &msym : mapSyms) {
CodeState newState = CodeState::Data;
if (isThumbMapSymbol(msym))
newState = CodeState::Thumb;
else if (isArmMapSymbol(msym))
newState = CodeState::Arm;

if (newState == curState)
continue;

if (curState != CodeState::Data) {
width = static_cast<uint64_t>(curState);
toLittleEndianInstructions(buf, start, msym->value, width);
}
start = msym->value;
curState = newState;
}

// Passed last mapping symbol, may need to reverse
// up to end of section.
if (curState != CodeState::Data) {
width = static_cast<uint64_t>(curState);
toLittleEndianInstructions(buf, start, size, width);
}
}

TargetInfo *elf::getARMTargetInfo() {
static ARM target;
return &target;
Expand Down
1 change: 1 addition & 0 deletions lld/ELF/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ struct Config {
bool armHasMovtMovw = false;
bool armJ1J2BranchEncoding = false;
bool asNeeded = false;
bool armBe8 = false;
BsymbolicKind bsymbolic = BsymbolicKind::None;
bool callGraphProfileSort;
bool checkSections;
Expand Down
6 changes: 5 additions & 1 deletion lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,12 @@ static void checkOptions() {
if (config->fixCortexA8 && config->emachine != EM_ARM)
error("--fix-cortex-a8 is only supported on ARM targets");

if (config->armBe8 && config->emachine != EM_ARM)
error("--be8 is only supported on ARM targets");

if (config->fixCortexA8 && !config->isLE)
error("--fix-cortex-a8 is not supported on big endian targets");

if (config->tocOptimize && config->emachine != EM_PPC64)
error("--toc-optimize is only supported on PowerPC64 targets");

Expand Down Expand Up @@ -1115,6 +1118,7 @@ static void readConfigs(opt::InputArgList &args) {
OPT_no_android_memtag_stack, false);
config->androidMemtagMode = getMemtagMode(args);
config->auxiliaryList = args::getStrings(args, OPT_auxiliary);
config->armBe8 = args.hasArg(OPT_be8);
if (opt::Arg *arg =
args.getLastArg(OPT_Bno_symbolic, OPT_Bsymbolic_non_weak_functions,
OPT_Bsymbolic_functions, OPT_Bsymbolic)) {
Expand Down
2 changes: 2 additions & 0 deletions lld/ELF/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ multiclass B<string name, string help1, string help2> {

defm auxiliary: Eq<"auxiliary", "Set DT_AUXILIARY field to the specified name">;

def be8: F<"be8">, HelpText<"write a Big Endian ELF file using BE8 format (AArch32 only)">;

def Bno_symbolic: F<"Bno-symbolic">, HelpText<"Don't bind default visibility defined symbols locally for -shared (default)">;

def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind default visibility defined symbols locally for -shared">;
Expand Down
6 changes: 6 additions & 0 deletions lld/ELF/OutputSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,12 @@ void OutputSection::writeTo(uint8_t *buf, parallel::TaskGroup &tg) {
else
isec->writeTo<ELFT>(buf + isec->outSecOff);

// When in Arm BE8 mode, the linker has to convert the big-endian
// instructions to little-endian, leaving the data big-endian.
if (config->emachine == EM_ARM && !config->isLE && config->armBe8 &&
(flags & SHF_EXECINSTR))
convertArmInstructionstoBE8(isec, buf + isec->outSecOff);

// Fill gaps between sections.
if (nonZeroFiller) {
uint8_t *start = buf + isec->outSecOff + isec->getSize();
Expand Down
7 changes: 7 additions & 0 deletions lld/ELF/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,13 @@ Defined *elf::addSyntheticLocal(StringRef name, uint8_t type, uint64_t value,
value, size, &section);
if (in.symTab)
in.symTab->addSymbol(s);

if (config->emachine == EM_ARM && !config->isLE && config->armBe8 &&
(section.flags & SHF_EXECINSTR))
// Adding Linker generated mapping symbols to the arm specific mapping
// symbols list.
addArmSyntheticSectionMappingSymbol(s);

return s;
}

Expand Down
4 changes: 4 additions & 0 deletions lld/ELF/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,10 @@ uint64_t getPPC64TocBase();
uint64_t getAArch64Page(uint64_t expr);
void riscvFinalizeRelax(int passes);
void mergeRISCVAttributesSections();
void addArmInputSectionMappingSymbols();
void addArmSyntheticSectionMappingSymbol(Defined *);
void sortArmMappingSymbols();
void convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf);

LLVM_LIBRARY_VISIBILITY extern const TargetInfo *target;
TargetInfo *getTarget();
Expand Down
30 changes: 20 additions & 10 deletions lld/ELF/Thunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,8 @@ void ThumbV6MABSLongThunk::addSymbols(ThunkSection &isec) {
addSymbol(saver().save("__Thumbv6MABSLongThunk_" + destination.getName()),
STT_FUNC, 1, isec);
addSymbol("$t", STT_NOTYPE, 0, isec);
addSymbol("$d", STT_NOTYPE, 8, isec);
if (!getMayUseShortThunk())
addSymbol("$d", STT_NOTYPE, 8, isec);
}

void ThumbV6MPILongThunk::writeLong(uint8_t *buf) {
Expand All @@ -767,7 +768,8 @@ void ThumbV6MPILongThunk::addSymbols(ThunkSection &isec) {
addSymbol(saver().save("__Thumbv6MPILongThunk_" + destination.getName()),
STT_FUNC, 1, isec);
addSymbol("$t", STT_NOTYPE, 0, isec);
addSymbol("$d", STT_NOTYPE, 12, isec);
if (!getMayUseShortThunk())
addSymbol("$d", STT_NOTYPE, 12, isec);
}

void ARMV5LongLdrPcThunk::writeLong(uint8_t *buf) {
Expand All @@ -780,7 +782,8 @@ void ARMV5LongLdrPcThunk::addSymbols(ThunkSection &isec) {
addSymbol(saver().save("__ARMv5LongLdrPcThunk_" + destination.getName()),
STT_FUNC, 0, isec);
addSymbol("$a", STT_NOTYPE, 0, isec);
addSymbol("$d", STT_NOTYPE, 4, isec);
if (!getMayUseShortThunk())
addSymbol("$d", STT_NOTYPE, 4, isec);
}

void ARMV4ABSLongBXThunk::writeLong(uint8_t *buf) {
Expand All @@ -794,7 +797,8 @@ void ARMV4ABSLongBXThunk::addSymbols(ThunkSection &isec) {
addSymbol(saver().save("__ARMv4ABSLongBXThunk_" + destination.getName()),
STT_FUNC, 0, isec);
addSymbol("$a", STT_NOTYPE, 0, isec);
addSymbol("$d", STT_NOTYPE, 8, isec);
if (!getMayUseShortThunk())
addSymbol("$d", STT_NOTYPE, 8, isec);
}

void ThumbV4ABSLongBXThunk::writeLong(uint8_t *buf) {
Expand All @@ -810,7 +814,8 @@ void ThumbV4ABSLongBXThunk::addSymbols(ThunkSection &isec) {
STT_FUNC, 1, isec);
addSymbol("$t", STT_NOTYPE, 0, isec);
addSymbol("$a", STT_NOTYPE, 4, isec);
addSymbol("$d", STT_NOTYPE, 8, isec);
if (!getMayUseShortThunk())
addSymbol("$d", STT_NOTYPE, 8, isec);
}

void ThumbV4ABSLongThunk::writeLong(uint8_t *buf) {
Expand All @@ -827,7 +832,8 @@ void ThumbV4ABSLongThunk::addSymbols(ThunkSection &isec) {
STT_FUNC, 1, isec);
addSymbol("$t", STT_NOTYPE, 0, isec);
addSymbol("$a", STT_NOTYPE, 4, isec);
addSymbol("$d", STT_NOTYPE, 12, isec);
if (!getMayUseShortThunk())
addSymbol("$d", STT_NOTYPE, 12, isec);
}

void ARMV4PILongBXThunk::writeLong(uint8_t *buf) {
Expand All @@ -844,7 +850,8 @@ void ARMV4PILongBXThunk::addSymbols(ThunkSection &isec) {
addSymbol(saver().save("__ARMv4PILongBXThunk_" + destination.getName()),
STT_FUNC, 0, isec);
addSymbol("$a", STT_NOTYPE, 0, isec);
addSymbol("$d", STT_NOTYPE, 12, isec);
if (!getMayUseShortThunk())
addSymbol("$d", STT_NOTYPE, 12, isec);
}

void ARMV4PILongThunk::writeLong(uint8_t *buf) {
Expand All @@ -860,7 +867,8 @@ void ARMV4PILongThunk::addSymbols(ThunkSection &isec) {
addSymbol(saver().save("__ARMv4PILongThunk_" + destination.getName()),
STT_FUNC, 0, isec);
addSymbol("$a", STT_NOTYPE, 0, isec);
addSymbol("$d", STT_NOTYPE, 8, isec);
if (!getMayUseShortThunk())
addSymbol("$d", STT_NOTYPE, 8, isec);
}

void ThumbV4PILongBXThunk::writeLong(uint8_t *buf) {
Expand All @@ -879,7 +887,8 @@ void ThumbV4PILongBXThunk::addSymbols(ThunkSection &isec) {
STT_FUNC, 1, isec);
addSymbol("$t", STT_NOTYPE, 0, isec);
addSymbol("$a", STT_NOTYPE, 4, isec);
addSymbol("$d", STT_NOTYPE, 12, isec);
if (!getMayUseShortThunk())
addSymbol("$d", STT_NOTYPE, 12, isec);
}

void ThumbV4PILongThunk::writeLong(uint8_t *buf) {
Expand All @@ -899,7 +908,8 @@ void ThumbV4PILongThunk::addSymbols(ThunkSection &isec) {
STT_FUNC, 1, isec);
addSymbol("$t", STT_NOTYPE, 0, isec);
addSymbol("$a", STT_NOTYPE, 4, isec);
addSymbol("$d", STT_NOTYPE, 16, isec);
if (!getMayUseShortThunk())
addSymbol("$d", STT_NOTYPE, 16, isec);
}

// Use the long jump which covers a range up to 8MiB.
Expand Down
5 changes: 5 additions & 0 deletions lld/ELF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2149,6 +2149,11 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
sec->finalize();

script->checkMemoryRegions();

if (config->emachine == EM_ARM && !config->isLE && config->armBe8) {
addArmInputSectionMappingSymbols();
sortArmMappingSymbols();
}
}

// Ensure data sections are not mixed with executable sections when
Expand Down
2 changes: 2 additions & 0 deletions lld/docs/ld.lld.1
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ Bind default visibility defined function symbols locally for
.It Fl Bsymbolic-non-weak-functions
Bind default visibility defined STB_GLOBAL function symbols locally for
.Fl shared.
.It Fl --be8
Write a Big Endian ELF File using BE8 format(AArch32 only)
.It Fl -build-id Ns = Ns Ar value
Generate a build ID note.
.Ar value
Expand Down
6 changes: 6 additions & 0 deletions lld/test/ELF/arm-bl-v6.s
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
// RUN: llvm-objdump --no-print-imm-hex -d --triple=armv6eb-none-linux-gnueabi --start-address=0x22100c --stop-address=0x221014 %t2 | FileCheck --check-prefix=CHECK-ARM2-EB %s
// RUN: llvm-objdump --no-print-imm-hex -d --triple=thumbv6eb-none-linux-gnueabi %t2 --start-address=0x622000 --stop-address=0x622002 | FileCheck --check-prefix=CHECK-THUMB2 %s

// RUN: ld.lld --be8 %t -o %t2
// RUN: llvm-objdump --no-print-imm-hex -d --triple=armv6eb-none-linux-gnueabi --start-address=0x21000 --stop-address=0x21008 %t2 | FileCheck --check-prefix=CHECK-ARM1 %s
// RUN: llvm-objdump --no-print-imm-hex -d --triple=thumbv6eb-none-linux-gnueabi %t2 --start-address=0x21008 --stop-address=0x2100c | FileCheck --check-prefix=CHECK-THUMB1 %s
// RUN: llvm-objdump --no-print-imm-hex -d --triple=armv6eb-none-linux-gnueabi --start-address=0x22100c --stop-address=0x221014 %t2 | FileCheck --check-prefix=CHECK-ARM2-EB %s
// RUN: llvm-objdump --no-print-imm-hex -d --triple=thumbv6eb-none-linux-gnueabi %t2 --start-address=0x622000 --stop-address=0x622002 | FileCheck --check-prefix=CHECK-THUMB2 %s

/// On Arm v6 the range of a Thumb BL instruction is only 4 megabytes as the
/// extended range encoding is not supported. The following example has a Thumb
/// BL that is out of range on ARM v6 and requires a range extension thunk.
Expand Down
3 changes: 3 additions & 0 deletions lld/test/ELF/arm-data-relocs.s
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
// RUN: ld.lld %t.be.o %t256.be.o -o %t.be
// RUN: llvm-objdump -s %t.be | FileCheck %s --check-prefixes=CHECK,BE

// RUN: ld.lld --be8 %t.be.o %t256.be.o -o %t.be8
// RUN: llvm-objdump -s %t.be8 | FileCheck %s --check-prefixes=CHECK,BE

.globl _start
_start:
.section .R_ARM_ABS, "ax","progbits"
Expand Down
4 changes: 4 additions & 0 deletions lld/test/ELF/arm-exidx-emit-relocs.s
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
// RUN: llvm-objdump -s --triple=armv7aeb-none-linux-gnueabi %t2 | FileCheck -check-prefix=CHECK-EB %s
// RUN: llvm-readelf --relocs %t2 | FileCheck -check-prefix=CHECK-RELOCS %s

// RUN: ld.lld --be8 --emit-relocs %t -o %t2
// RUN: llvm-objdump -s --triple=armv7aeb-none-linux-gnueabi %t2 | FileCheck -check-prefix=CHECK-EB %s
// RUN: llvm-readelf --relocs %t2 | FileCheck -check-prefix=CHECK-RELOCS %s

/// LLD does not support --emit-relocs for .ARM.exidx sections as the relocations
/// from synthetic table entries won't be represented. Given the known use cases
/// of --emit-relocs, relocating kernels, and binary analysis, the former doesn't
Expand Down
Loading

0 comments on commit f146763

Please sign in to comment.