Skip to content

Commit 3c24fae

Browse files
committed
[lld-macho] Add support for objc_msgSend stubs
Apple Clang in Xcode 14 introduced a new feature for reducing the overhead of objc_msgSend calls by deduplicating the setup calls for each individual selector. This works by clang adding undefined symbols for each selector called in a translation unit, such as `_objc_msgSend$foo` for calling the `foo` method on any `NSObject`. There are 2 different modes for this behavior, the default directly does the setup for `_objc_msgSend` and calls it, and the smaller option does the selector setup, and then calls the standard `_objc_msgSend` stub function. The general overview of how this works is: - Undefined symbols with the given prefix are collected - The suffix of each matching undefined symbol is added as a string to `__objc_methname` - A pointer is added for every method name in the `__objc_selrefs` section - A `got` entry is emitted for `_objc_msgSend` - Stubs are emitting pointing to the synthesized locations Notes: - Both `__objc_methname` and `__objc_selrefs` can also exist from object files, so their contents are merged with our synthesized contents - The compiler emits method names for defined methods, but not for undefined symbols you call, but stubs are used for both - This only implements the default "fast" mode currently just to reduce the diff, I also doubt many folks will care to swap modes - This only implements this for arm64 and x86_64, we don't need to implement this for 32 bit iOS archs, but we should implement it for watchOS archs in a later diff Differential Revision: https://reviews.llvm.org/D128108
1 parent ad5f789 commit 3c24fae

19 files changed

+518
-14
lines changed

lld/MachO/Arch/ARM.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ struct ARM : TargetInfo {
3737
void writeStubHelperEntry(uint8_t *buf, const Symbol &,
3838
uint64_t entryAddr) const override;
3939

40+
void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
41+
uint64_t stubOffset, uint64_t selrefsVA,
42+
uint64_t selectorIndex, uint64_t gotAddr,
43+
uint64_t msgSendIndex) const override;
44+
4045
void relaxGotLoad(uint8_t *loc, uint8_t type) const override;
4146
uint64_t getPageSize() const override { return 4 * 1024; }
4247

@@ -148,6 +153,13 @@ void ARM::writeStubHelperEntry(uint8_t *buf, const Symbol &sym,
148153
fatal("TODO: implement this");
149154
}
150155

156+
void ARM::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
157+
uint64_t stubOffset, uint64_t selrefsVA,
158+
uint64_t selectorIndex, uint64_t gotAddr,
159+
uint64_t msgSendIndex) const {
160+
fatal("TODO: implement this");
161+
}
162+
151163
void ARM::relaxGotLoad(uint8_t *loc, uint8_t type) const {
152164
fatal("TODO: implement this");
153165
}

lld/MachO/Arch/ARM64.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ struct ARM64 : ARM64Common {
3434
void writeStubHelperHeader(uint8_t *buf) const override;
3535
void writeStubHelperEntry(uint8_t *buf, const Symbol &,
3636
uint64_t entryAddr) const override;
37+
38+
void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
39+
uint64_t stubOffset, uint64_t selrefsVA,
40+
uint64_t selectorIndex, uint64_t gotAddr,
41+
uint64_t msgSendIndex) const override;
3742
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
3843
void applyOptimizationHints(uint8_t *, const ConcatInputSection *,
3944
ArrayRef<uint64_t>) const override;
@@ -100,6 +105,26 @@ void ARM64::writeStubHelperEntry(uint8_t *buf8, const Symbol &sym,
100105
::writeStubHelperEntry(buf8, stubHelperEntryCode, sym, entryVA);
101106
}
102107

108+
static constexpr uint32_t objcStubsFastCode[] = {
109+
0x90000001, // adrp x1, __objc_selrefs@page
110+
0xf9400021, // ldr x1, [x1, @selector("foo")@pageoff]
111+
0x90000010, // adrp x16, _got@page
112+
0xf9400210, // ldr x16, [x16, _objc_msgSend@pageoff]
113+
0xd61f0200, // br x16
114+
0xd4200020, // brk #0x1
115+
0xd4200020, // brk #0x1
116+
0xd4200020, // brk #0x1
117+
};
118+
119+
void ARM64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
120+
uint64_t stubOffset, uint64_t selrefsVA,
121+
uint64_t selectorIndex, uint64_t gotAddr,
122+
uint64_t msgSendIndex) const {
123+
::writeObjCMsgSendStub<LP64>(buf, objcStubsFastCode, sym, stubsAddr,
124+
stubOffset, selrefsVA, selectorIndex, gotAddr,
125+
msgSendIndex);
126+
}
127+
103128
// A thunk is the relaxed variation of stubCode. We don't need the
104129
// extra indirection through a lazy pointer because the target address
105130
// is known at link time.
@@ -130,6 +155,9 @@ ARM64::ARM64() : ARM64Common(LP64()) {
130155
stubSize = sizeof(stubCode);
131156
thunkSize = sizeof(thunkCode);
132157

158+
objcStubsFastSize = sizeof(objcStubsFastCode);
159+
objcStubsAlignment = 32;
160+
133161
// Branch immediate is two's complement 26 bits, which is implicitly
134162
// multiplied by 4 (since all functions are 4-aligned: The branch range
135163
// is -4*(2**(26-1))..4*(2**(26-1) - 1).

lld/MachO/Arch/ARM64Common.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,32 @@ inline void writeStubHelperEntry(uint8_t *buf8,
147147
buf32[2] = sym.lazyBindOffset;
148148
}
149149

150+
template <class LP>
151+
inline void
152+
writeObjCMsgSendStub(uint8_t *buf, const uint32_t objcStubsFastCode[8],
153+
Symbol *sym, uint64_t stubsAddr, uint64_t stubOffset,
154+
uint64_t selrefsVA, uint64_t selectorIndex,
155+
uint64_t gotAddr, uint64_t msgSendIndex) {
156+
SymbolDiagnostic d = {sym, sym->getName()};
157+
auto *buf32 = reinterpret_cast<uint32_t *>(buf);
158+
159+
auto pcPageBits = [stubsAddr, stubOffset](int i) {
160+
return pageBits(stubsAddr + stubOffset + i * sizeof(uint32_t));
161+
};
162+
163+
uint64_t selectorOffset = selectorIndex * LP::wordSize;
164+
encodePage21(&buf32[0], d, objcStubsFastCode[0],
165+
pageBits(selrefsVA + selectorOffset) - pcPageBits(0));
166+
encodePageOff12(&buf32[1], objcStubsFastCode[1], selrefsVA + selectorOffset);
167+
encodePage21(&buf32[2], d, objcStubsFastCode[2],
168+
pageBits(gotAddr) - pcPageBits(2));
169+
encodePage21(&buf32[3], d, objcStubsFastCode[3], msgSendIndex * LP::wordSize);
170+
buf32[4] = objcStubsFastCode[4];
171+
buf32[5] = objcStubsFastCode[5];
172+
buf32[6] = objcStubsFastCode[6];
173+
buf32[7] = objcStubsFastCode[7];
174+
}
175+
150176
} // namespace lld::macho
151177

152178
#endif

lld/MachO/Arch/ARM64_32.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ struct ARM64_32 : ARM64Common {
3333
void writeStubHelperHeader(uint8_t *buf) const override;
3434
void writeStubHelperEntry(uint8_t *buf, const Symbol &,
3535
uint64_t entryAddr) const override;
36+
void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
37+
uint64_t stubOffset, uint64_t selrefsVA,
38+
uint64_t selectorIndex, uint64_t gotAddr,
39+
uint64_t msgSendIndex) const override;
3640
};
3741

3842
} // namespace
@@ -94,6 +98,14 @@ void ARM64_32::writeStubHelperEntry(uint8_t *buf8, const Symbol &sym,
9498
::writeStubHelperEntry(buf8, stubHelperEntryCode, sym, entryVA);
9599
}
96100

101+
void ARM64_32::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym,
102+
uint64_t stubsAddr, uint64_t stubOffset,
103+
uint64_t selrefsVA, uint64_t selectorIndex,
104+
uint64_t gotAddr,
105+
uint64_t msgSendIndex) const {
106+
fatal("TODO: implement this");
107+
}
108+
97109
ARM64_32::ARM64_32() : ARM64Common(ILP32()) {
98110
cpuType = CPU_TYPE_ARM64_32;
99111
cpuSubtype = CPU_SUBTYPE_ARM64_V8;

lld/MachO/Arch/X86_64.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ struct X86_64 : TargetInfo {
3636
void writeStubHelperEntry(uint8_t *buf, const Symbol &,
3737
uint64_t entryAddr) const override;
3838

39+
void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
40+
uint64_t stubOffset, uint64_t selrefsVA,
41+
uint64_t selectorIndex, uint64_t gotAddr,
42+
uint64_t msgSendIndex) const override;
43+
3944
void relaxGotLoad(uint8_t *loc, uint8_t type) const override;
4045
uint64_t getPageSize() const override { return 4 * 1024; }
4146

@@ -170,6 +175,24 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const Symbol &sym,
170175
sizeof(stubHelperEntry), in.stubHelper->addr);
171176
}
172177

178+
static constexpr uint8_t objcStubsFastCode[] = {
179+
0x48, 0x8b, 0x35, 0, 0, 0, 0, // 0x0: movq selrefs@selector(%rip), %rsi
180+
0xff, 0x25, 0, 0, 0, 0, // 0x7: jmpq *_objc_msgSend@GOT(%rip)
181+
};
182+
183+
void X86_64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
184+
uint64_t stubOffset, uint64_t selrefsVA,
185+
uint64_t selectorIndex, uint64_t gotAddr,
186+
uint64_t msgSendIndex) const {
187+
memcpy(buf, objcStubsFastCode, sizeof(objcStubsFastCode));
188+
SymbolDiagnostic d = {sym, sym->getName()};
189+
uint64_t stubAddr = stubsAddr + stubOffset;
190+
writeRipRelative(d, buf, stubAddr, 7,
191+
selrefsVA + selectorIndex * LP64::wordSize);
192+
writeRipRelative(d, buf, stubAddr, 0xd,
193+
gotAddr + msgSendIndex * LP64::wordSize);
194+
}
195+
173196
void X86_64::relaxGotLoad(uint8_t *loc, uint8_t type) const {
174197
// Convert MOVQ to LEAQ
175198
if (loc[-2] != 0x8b)
@@ -189,6 +212,9 @@ X86_64::X86_64() : TargetInfo(LP64()) {
189212
stubHelperHeaderSize = sizeof(stubHelperHeader);
190213
stubHelperEntrySize = sizeof(stubHelperEntry);
191214

215+
objcStubsFastSize = sizeof(objcStubsFastCode);
216+
objcStubsAlignment = 1;
217+
192218
relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()};
193219
}
194220

lld/MachO/Config.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ enum class ICFLevel {
6868
all,
6969
};
7070

71+
enum class ObjCStubsMode {
72+
fast,
73+
small,
74+
};
75+
7176
struct SectionAlign {
7277
llvm::StringRef segName;
7378
llvm::StringRef sectName;
@@ -166,6 +171,7 @@ struct Configuration {
166171
UndefinedSymbolTreatment undefinedSymbolTreatment =
167172
UndefinedSymbolTreatment::error;
168173
ICFLevel icfLevel = ICFLevel::none;
174+
ObjCStubsMode objcStubsMode = ObjCStubsMode::fast;
169175
llvm::MachO::HeaderFileType outputType;
170176
std::vector<llvm::StringRef> systemLibraryRoots;
171177
std::vector<llvm::StringRef> librarySearchPaths;

lld/MachO/Driver.cpp

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,17 @@ static ICFLevel getICFLevel(const ArgList &args) {
781781
return icfLevel;
782782
}
783783

784+
static ObjCStubsMode getObjCStubsMode(const ArgList &args) {
785+
const Arg *arg = args.getLastArg(OPT_objc_stubs_fast, OPT_objc_stubs_small);
786+
if (!arg)
787+
return ObjCStubsMode::fast;
788+
789+
if (arg->getOption().getID() == OPT_objc_stubs_small)
790+
warn("-objc_stubs_small is not yet implemented, defaulting to "
791+
"-objc_stubs_fast");
792+
return ObjCStubsMode::fast;
793+
}
794+
784795
static void warnIfDeprecatedOption(const Option &opt) {
785796
if (!opt.getGroup().isValid())
786797
return;
@@ -1099,9 +1110,15 @@ static void gatherInputSections() {
10991110
inputSections.push_back(isec);
11001111
} else if (auto *isec =
11011112
dyn_cast<CStringInputSection>(subsection.isec)) {
1102-
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
1103-
in.cStringSection->inputOrder = inputOrder++;
1104-
in.cStringSection->addInput(isec);
1113+
if (isec->getName() == section_names::objcMethname) {
1114+
if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
1115+
in.objcMethnameSection->inputOrder = inputOrder++;
1116+
in.objcMethnameSection->addInput(isec);
1117+
} else {
1118+
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
1119+
in.cStringSection->inputOrder = inputOrder++;
1120+
in.cStringSection->addInput(isec);
1121+
}
11051122
} else if (auto *isec =
11061123
dyn_cast<WordLiteralInputSection>(subsection.isec)) {
11071124
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
@@ -1124,10 +1141,39 @@ static void foldIdenticalLiterals() {
11241141
// true. If it isn't, we simply create a non-deduplicating CStringSection.
11251142
// Either way, we must unconditionally finalize it here.
11261143
in.cStringSection->finalizeContents();
1144+
in.objcMethnameSection->finalizeContents();
11271145
if (in.wordLiteralSection)
11281146
in.wordLiteralSection->finalizeContents();
11291147
}
11301148

1149+
static void addSynthenticMethnames() {
1150+
std::string &data = *make<std::string>();
1151+
llvm::raw_string_ostream os(data);
1152+
const int prefixLength = ObjCStubsSection::symbolPrefix.size();
1153+
for (Symbol *sym : symtab->getSymbols())
1154+
if (const auto *undefined = dyn_cast<Undefined>(sym))
1155+
if (sym->getName().startswith(ObjCStubsSection::symbolPrefix))
1156+
os << sym->getName().drop_front(prefixLength) << '\0';
1157+
1158+
if (data.empty())
1159+
return;
1160+
1161+
const auto *buf = reinterpret_cast<const uint8_t *>(data.c_str());
1162+
Section &section = *make<Section>(/*file=*/nullptr, segment_names::text,
1163+
section_names::objcMethname,
1164+
S_CSTRING_LITERALS, /*addr=*/0);
1165+
1166+
auto *isec =
1167+
make<CStringInputSection>(section, ArrayRef<uint8_t>{buf, data.size()},
1168+
/*align=*/1, /*dedupLiterals=*/true);
1169+
isec->splitIntoPieces();
1170+
for (auto &piece : isec->pieces)
1171+
piece.live = true;
1172+
section.subsections.push_back({0, isec});
1173+
in.objcMethnameSection->addInput(isec);
1174+
in.objcMethnameSection->isec->markLive(0);
1175+
}
1176+
11311177
static void referenceStubBinder() {
11321178
bool needsStubHelper = config->outputType == MH_DYLIB ||
11331179
config->outputType == MH_EXECUTE ||
@@ -1398,6 +1444,7 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
13981444
config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order);
13991445
config->forceExactCpuSubtypeMatch =
14001446
getenv("LD_DYLIB_CPU_SUBTYPES_MUST_MATCH");
1447+
config->objcStubsMode = getObjCStubsMode(args);
14011448

14021449
for (const Arg *arg : args.filtered(OPT_alias)) {
14031450
config->aliasedSymbols.push_back(
@@ -1643,6 +1690,7 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
16431690

16441691
createSyntheticSections();
16451692
createSyntheticSymbols();
1693+
addSynthenticMethnames();
16461694

16471695
createAliases();
16481696
// If we are in "explicit exports" mode, hide everything that isn't

lld/MachO/InputFiles.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,10 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
342342

343343
InputSection *isec;
344344
if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
345-
isec = make<CStringInputSection>(section, data, align);
345+
isec = make<CStringInputSection>(section, data, align,
346+
/*dedupLiterals=*/name ==
347+
section_names::objcMethname ||
348+
config->dedupLiterals);
346349
// FIXME: parallelize this?
347350
cast<CStringInputSection>(isec)->splitIntoPieces();
348351
} else {

lld/MachO/InputSection.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ void CStringInputSection::splitIntoPieces() {
251251
if (end == StringRef::npos)
252252
fatal(getLocation(off) + ": string is not null terminated");
253253
size_t size = end + 1;
254-
uint32_t hash = config->dedupLiterals ? xxHash64(s.substr(0, size)) : 0;
254+
uint32_t hash = deduplicateLiterals ? xxHash64(s.substr(0, size)) : 0;
255255
pieces.emplace_back(off, hash);
256256
s = s.substr(size);
257257
off += size;

lld/MachO/InputSection.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,10 @@ static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
192192
class CStringInputSection final : public InputSection {
193193
public:
194194
CStringInputSection(const Section &section, ArrayRef<uint8_t> data,
195-
uint32_t align)
196-
: InputSection(CStringLiteralKind, section, data, align) {}
195+
uint32_t align, bool dedupLiterals)
196+
: InputSection(CStringLiteralKind, section, data, align),
197+
deduplicateLiterals(dedupLiterals) {}
198+
197199
uint64_t getOffset(uint64_t off) const override;
198200
bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
199201
void markLive(uint64_t off) override { getStringPiece(off).live = true; }
@@ -215,14 +217,15 @@ class CStringInputSection final : public InputSection {
215217
// string merging is enabled, so we want to inline.
216218
LLVM_ATTRIBUTE_ALWAYS_INLINE
217219
llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {
218-
assert(config->dedupLiterals);
220+
assert(deduplicateLiterals);
219221
return {getStringRef(i), pieces[i].hash};
220222
}
221223

222224
static bool classof(const InputSection *isec) {
223225
return isec->kind() == CStringLiteralKind;
224226
}
225227

228+
bool deduplicateLiterals = false;
226229
std::vector<StringPiece> pieces;
227230
};
228231

@@ -323,6 +326,9 @@ constexpr const char objcClassList[] = "__objc_classlist";
323326
constexpr const char objcClassRefs[] = "__objc_classrefs";
324327
constexpr const char objcConst[] = "__objc_const";
325328
constexpr const char objCImageInfo[] = "__objc_imageinfo";
329+
constexpr const char objcStubs[] = "__objc_stubs";
330+
constexpr const char objcSelrefs[] = "__objc_selrefs";
331+
constexpr const char objcMethname[] = "__objc_methname";
326332
constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
327333
constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
328334
constexpr const char objcProtoList[] = "__objc_protolist";

0 commit comments

Comments
 (0)