Skip to content

Commit 58f3c5e

Browse files
[lld-macho] Fix thunks for non-__text TEXT sections (#99052)
This supersedes #87818 and fixes #52767 When calculating arm64 thunks, we make a few assumptions that may not hold when considering code sections outside of `__text`: 1. That a section needs thunks only if its size is larger than the branch range. 2. That any calls into `__stubs` are necessarily forward jumps (that is, the section with the jump is ordered before `__stubs`) Sections like this exist in the wild, most prominently the `__lcxx_overrides` section introduced in #69498 This change: - Ensures that if one section in `__TEXT` gets thunks, all of them do. - Makes all code sections in `__TEXT` contiguous (and guaranteed to be placed before `__stubs`)
1 parent 81e2a57 commit 58f3c5e

File tree

10 files changed

+112
-22
lines changed

10 files changed

+112
-22
lines changed

lld/MachO/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ add_lld_library(lldMachO
2626
OutputSegment.cpp
2727
Relocations.cpp
2828
SectionPriorities.cpp
29+
Sections.cpp
2930
SymbolTable.cpp
3031
Symbols.cpp
3132
SyntheticSections.cpp

lld/MachO/ConcatOutputSection.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,20 @@ bool TextOutputSection::needsThunks() const {
127127
uint64_t isecAddr = addr;
128128
for (ConcatInputSection *isec : inputs)
129129
isecAddr = alignToPowerOf2(isecAddr, isec->align) + isec->getSize();
130-
if (isecAddr - addr + in.stubs->getSize() <=
131-
std::min(target->backwardBranchRange, target->forwardBranchRange))
130+
// Other sections besides __text might be small enough to pass this
131+
// test but nevertheless need thunks for calling into other sections.
132+
// An imperfect heuristic to use in this case is that if a section
133+
// we've already processed in this segment needs thunks, so do the
134+
// rest.
135+
bool needsThunks = parent && parent->needsThunks;
136+
if (!needsThunks &&
137+
isecAddr - addr + in.stubs->getSize() <=
138+
std::min(target->backwardBranchRange, target->forwardBranchRange))
132139
return false;
133140
// Yes, this program is large enough to need thunks.
141+
if (parent) {
142+
parent->needsThunks = true;
143+
}
134144
for (ConcatInputSection *isec : inputs) {
135145
for (Reloc &r : isec->relocs) {
136146
if (!target->hasAttr(r.type, RelocAttrBits::BRANCH))

lld/MachO/InputSection.cpp

+3-14
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "Config.h"
1212
#include "InputFiles.h"
1313
#include "OutputSegment.h"
14+
#include "Sections.h"
1415
#include "Symbols.h"
1516
#include "SyntheticSections.h"
1617
#include "Target.h"
@@ -366,20 +367,8 @@ uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {
366367
}
367368

368369
bool macho::isCodeSection(const InputSection *isec) {
369-
uint32_t type = sectionType(isec->getFlags());
370-
if (type != S_REGULAR && type != S_COALESCED)
371-
return false;
372-
373-
uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR;
374-
if (attr == S_ATTR_PURE_INSTRUCTIONS)
375-
return true;
376-
377-
if (isec->getSegName() == segment_names::text)
378-
return StringSwitch<bool>(isec->getName())
379-
.Cases(section_names::textCoalNt, section_names::staticInit, true)
380-
.Default(false);
381-
382-
return false;
370+
return sections::isCodeSection(isec->getName(), isec->getSegName(),
371+
isec->getFlags());
383372
}
384373

385374
bool macho::isCfStringSection(const InputSection *isec) {

lld/MachO/OutputSegment.cpp

+14-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "OutputSegment.h"
1010
#include "ConcatOutputSection.h"
1111
#include "InputSection.h"
12+
#include "Sections.h"
1213
#include "Symbols.h"
1314
#include "SyntheticSections.h"
1415

@@ -89,9 +90,20 @@ static int sectionOrder(OutputSection *osec) {
8990
StringRef segname = osec->parent->name;
9091
// Sections are uniquely identified by their segment + section name.
9192
if (segname == segment_names::text) {
93+
if (osec->name == section_names::header)
94+
return -7;
95+
// `__text` needs to precede the other code sections since its
96+
// expected to be the largest. This means in effect that it will
97+
// be the section that determines whether we need thunks or not.
98+
if (osec->name == section_names::text)
99+
return -6;
100+
// Ensure all code sections are contiguous with `__text` for thunk
101+
// calculations.
102+
if (sections::isCodeSection(osec->name, segment_names::text, osec->flags) &&
103+
osec->name != section_names::stubHelper) {
104+
return -5;
105+
}
92106
return StringSwitch<int>(osec->name)
93-
.Case(section_names::header, -6)
94-
.Case(section_names::text, -5)
95107
.Case(section_names::stubs, -4)
96108
.Case(section_names::stubHelper, -3)
97109
.Case(section_names::objcStubs, -2)

lld/MachO/OutputSegment.h

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class OutputSegment {
5757
uint32_t initProt = 0;
5858
uint32_t flags = 0;
5959
uint8_t index;
60+
bool needsThunks = false;
6061

6162
llvm::TinyPtrVector<Defined *> segmentStartSymbols;
6263
llvm::TinyPtrVector<Defined *> segmentEndSymbols;

lld/MachO/Sections.cpp

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//===- Sections.cpp ---------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "Sections.h"
10+
#include "InputSection.h"
11+
#include "OutputSegment.h"
12+
13+
#include "llvm/ADT/StringSwitch.h"
14+
15+
using namespace llvm;
16+
using namespace llvm::MachO;
17+
18+
namespace lld::macho::sections {
19+
bool isCodeSection(StringRef name, StringRef segName, uint32_t flags) {
20+
uint32_t type = sectionType(flags);
21+
if (type != S_REGULAR && type != S_COALESCED)
22+
return false;
23+
24+
uint32_t attr = flags & SECTION_ATTRIBUTES_USR;
25+
if (attr == S_ATTR_PURE_INSTRUCTIONS)
26+
return true;
27+
28+
if (segName == segment_names::text)
29+
return StringSwitch<bool>(name)
30+
.Cases(section_names::textCoalNt, section_names::staticInit, true)
31+
.Default(false);
32+
33+
return false;
34+
}
35+
36+
} // namespace lld::macho::sections

lld/MachO/Sections.h

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//===- Sections.h ------------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLD_MACHO_SECTIONS_H
10+
#define LLD_MACHO_SECTIONS_H
11+
12+
#include "llvm/ADT/StringRef.h"
13+
14+
namespace lld::macho::sections {
15+
bool isCodeSection(llvm::StringRef name, llvm::StringRef segName,
16+
uint32_t flags);
17+
} // namespace lld::macho::sections
18+
19+
#endif // #ifndef LLD_MACHO_SECTIONS_H

lld/test/MachO/arm64-thunks.s

+20-1
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@
77
## (3) a second thunk is created when the first one goes out of range
88
## (4) early calls to a dylib stub use a thunk, and later calls the stub
99
## directly
10+
## (5) Thunks are created for all sections in the text segment with branches.
1011
## Notes:
1112
## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range
1213

1314
# RUN: rm -rf %t; mkdir %t
1415
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
15-
# RUN: %lld -arch arm64 -dead_strip -lSystem -o %t/thunk %t/input.o
16+
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -o %t/thunk %t/input.o
1617
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
1718

1819
# CHECK: Disassembly of section __TEXT,__text:
@@ -164,6 +165,10 @@
164165
# CHECK: adrp x16, 0x[[#%x, F_PAGE]]
165166
# CHECK: add x16, x16, #[[#F_OFFSET]]
166167

168+
# CHECK: Disassembly of section __TEXT,__lcxx_override:
169+
# CHECK: <_z>:
170+
# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0>
171+
167172
# CHECK: Disassembly of section __TEXT,__stubs:
168173

169174
# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
@@ -300,3 +305,17 @@ _main:
300305
bl _h
301306
bl ___nan
302307
ret
308+
309+
.section __TEXT,__cstring
310+
.space 0x4000000
311+
312+
.section __TEXT,__lcxx_override,regular,pure_instructions
313+
314+
.globl _z
315+
.no_dead_strip _z
316+
.p2align 2
317+
_z:
318+
bl _a
319+
## Ensure calling into stubs works
320+
bl _extern_sym
321+
ret

lld/test/MachO/section-order.s

+5-3
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,20 @@
2323
# CHECK-12-NEXT: __cstring
2424

2525
# CHECK-21: __text
26+
## `foo` always sorts next to `__text` since it's a code section
27+
## and needs to be adjacent for arm64 thunk calculations
28+
# CHECK-21-NEXT: foo
2629
# CHECK-21-NEXT: __cstring
2730
# CHECK-21-NEXT: bar
28-
# CHECK-21-NEXT: foo
2931

3032
# CHECK-SYNTHETIC-ORDER: __text
33+
# CHECK-SYNTHETIC-ORDER-NEXT: foo
3134
# CHECK-SYNTHETIC-ORDER-NEXT: __stubs
3235
# CHECK-SYNTHETIC-ORDER-NEXT: __stub_helper
3336
# CHECK-SYNTHETIC-ORDER-NEXT: __objc_stubs
3437
# CHECK-SYNTHETIC-ORDER-NEXT: __init_offsets
3538
# CHECK-SYNTHETIC-ORDER-NEXT: __cstring
3639
# CHECK-SYNTHETIC-ORDER-NEXT: bar
37-
# CHECK-SYNTHETIC-ORDER-NEXT: foo
3840
# CHECK-SYNTHETIC-ORDER-NEXT: __unwind_info
3941
# CHECK-SYNTHETIC-ORDER-NEXT: __eh_frame
4042
# CHECK-SYNTHETIC-ORDER-NEXT: __objc_selrefs
@@ -52,5 +54,5 @@
5254
.asciz ""
5355
.section __TEXT,bar
5456
.space 1
55-
.section __TEXT,foo
57+
.section __TEXT,foo,regular,pure_instructions
5658
.space 1

llvm/utils/gn/secondary/lld/MachO/BUILD.gn

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ static_library("MachO") {
4444
"OutputSegment.cpp",
4545
"Relocations.cpp",
4646
"SectionPriorities.cpp",
47+
"Sections.cpp",
4748
"SymbolTable.cpp",
4849
"Symbols.cpp",
4950
"SyntheticSections.cpp",

0 commit comments

Comments
 (0)