Skip to content

Commit bcc9b9d

Browse files
authored
[lld][WebAssembly] Match the ELF linker in transitioning away from archive indexes. (#78658)
The ELF linker transitioned away from archive indexes in https://reviews.llvm.org/D117284. This paves the way for supporting `--start-lib`/`--end-lib` (See #77960) The ELF linker unified library handling with `--start-lib`/`--end-lib` and removed the ArchiveFile class in https://reviews.llvm.org/D119074.
1 parent c71a5bf commit bcc9b9d

10 files changed

+99
-142
lines changed

lld/docs/ReleaseNotes.rst

+4
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,9 @@ MachO Improvements
5050
WebAssembly Improvements
5151
------------------------
5252

53+
* Indexes are no longer required on archive files. Instead symbol information
54+
is read from object files within the archive. This matches the behaviour of
55+
the ELF linker.
56+
5357
Fixes
5458
#####

lld/test/wasm/archive-no-index.s

-14
This file was deleted.

lld/test/wasm/bad-archive-member.s

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux -o %t.dir/elf.o %s
66
# RUN: llvm-ar rcs %t.dir/libfoo.a %t.dir/elf.o
77
# RUN: not wasm-ld %t.dir/libfoo.a -o /dev/null 2>&1 | FileCheck %s
8-
# CHECK: error: unknown file type: {{.*}}libfoo.a(elf.o)
8+
# CHECK: warning: {{.*}}libfoo.a: archive member 'elf.o' is neither Wasm object file nor LLVM bitcode
99

1010
.globl _start
1111
_start:

lld/wasm/Driver.cpp

+14-15
Original file line numberDiff line numberDiff line change
@@ -272,9 +272,11 @@ void LinkerDriver::addFile(StringRef path) {
272272
if (fs::exists(importFile))
273273
readImportFile(importFile.str());
274274

275+
auto members = getArchiveMembers(mbref);
276+
275277
// Handle -whole-archive.
276278
if (inWholeArchive) {
277-
for (const auto &[m, offset] : getArchiveMembers(mbref)) {
279+
for (const auto &[m, offset] : members) {
278280
auto *object = createObjectFile(m, path, offset);
279281
// Mark object as live; object members are normally not
280282
// live by default but -whole-archive is designed to treat
@@ -289,12 +291,15 @@ void LinkerDriver::addFile(StringRef path) {
289291
std::unique_ptr<Archive> file =
290292
CHECK(Archive::create(mbref), path + ": failed to parse archive");
291293

292-
if (!file->isEmpty() && !file->hasSymbolTable()) {
293-
error(mbref.getBufferIdentifier() +
294-
": archive has no index; run ranlib to add one");
294+
for (const auto &[m, offset] : members) {
295+
auto magic = identify_magic(m.getBuffer());
296+
if (magic == file_magic::wasm_object || magic == file_magic::bitcode)
297+
files.push_back(createObjectFile(m, path, offset, true));
298+
else
299+
warn(path + ": archive member '" + m.getBufferIdentifier() +
300+
"' is neither Wasm object file nor LLVM bitcode");
295301
}
296302

297-
files.push_back(make<ArchiveFile>(mbref));
298303
return;
299304
}
300305
case file_magic::bitcode:
@@ -732,16 +737,10 @@ static Symbol *handleUndefined(StringRef name, const char *option) {
732737

733738
static void handleLibcall(StringRef name) {
734739
Symbol *sym = symtab->find(name);
735-
if (!sym)
736-
return;
737-
738-
if (auto *lazySym = dyn_cast<LazySymbol>(sym)) {
739-
MemoryBufferRef mb = lazySym->getMemberBuffer();
740-
if (isBitcode(mb)) {
741-
if (!config->whyExtract.empty())
742-
ctx.whyExtractRecords.emplace_back("<libcall>", sym->getFile(), *sym);
743-
lazySym->extract();
744-
}
740+
if (sym && sym->isLazy() && isa<BitcodeFile>(sym->getFile())) {
741+
if (!config->whyExtract.empty())
742+
ctx.whyExtractRecords.emplace_back("<libcall>", sym->getFile(), *sym);
743+
cast<LazySymbol>(sym)->extract();
745744
}
746745
}
747746

lld/wasm/InputFiles.cpp

+49-52
Original file line numberDiff line numberDiff line change
@@ -75,26 +75,19 @@ std::optional<MemoryBufferRef> readFile(StringRef path) {
7575
}
7676

7777
InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
78-
uint64_t offsetInArchive) {
78+
uint64_t offsetInArchive, bool lazy) {
7979
file_magic magic = identify_magic(mb.getBuffer());
8080
if (magic == file_magic::wasm_object) {
8181
std::unique_ptr<Binary> bin =
8282
CHECK(createBinary(mb), mb.getBufferIdentifier());
8383
auto *obj = cast<WasmObjectFile>(bin.get());
8484
if (obj->isSharedObject())
8585
return make<SharedFile>(mb);
86-
return make<ObjFile>(mb, archiveName);
86+
return make<ObjFile>(mb, archiveName, lazy);
8787
}
8888

89-
if (magic == file_magic::bitcode)
90-
return make<BitcodeFile>(mb, archiveName, offsetInArchive);
91-
92-
std::string name = mb.getBufferIdentifier().str();
93-
if (!archiveName.empty()) {
94-
name = archiveName.str() + "(" + name + ")";
95-
}
96-
97-
fatal("unknown file type: " + name);
89+
assert(magic == file_magic::bitcode);
90+
return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy);
9891
}
9992

10093
// Relocations contain either symbol or type indices. This function takes a
@@ -391,9 +384,30 @@ static bool shouldMerge(const WasmSegment &seg) {
391384
return true;
392385
}
393386

394-
void ObjFile::parse(bool ignoreComdats) {
395-
// Parse a memory buffer as a wasm file.
396-
LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
387+
void ObjFile::parseLazy() {
388+
LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << "\n");
389+
for (const SymbolRef &sym : wasmObj->symbols()) {
390+
const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
391+
if (!wasmSym.isDefined())
392+
continue;
393+
symtab->addLazy(wasmSym.Info.Name, this);
394+
// addLazy() may trigger this->extract() if an existing symbol is an
395+
// undefined symbol. If that happens, this function has served its purpose,
396+
// and we can exit from the loop early.
397+
if (!lazy)
398+
break;
399+
}
400+
}
401+
402+
ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy)
403+
: InputFile(ObjectKind, m) {
404+
this->lazy = lazy;
405+
this->archiveName = std::string(archiveName);
406+
407+
// If this isn't part of an archive, it's eagerly linked, so mark it live.
408+
if (archiveName.empty())
409+
markLive();
410+
397411
std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this));
398412

399413
auto *obj = dyn_cast<WasmObjectFile>(bin.get());
@@ -406,6 +420,11 @@ void ObjFile::parse(bool ignoreComdats) {
406420
wasmObj.reset(obj);
407421

408422
checkArch(obj->getArch());
423+
}
424+
425+
void ObjFile::parse(bool ignoreComdats) {
426+
// Parse a memory buffer as a wasm file.
427+
LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n");
409428

410429
// Build up a map of function indices to table indices for use when
411430
// verifying the existing table index relocations
@@ -717,43 +736,6 @@ void StubFile::parse() {
717736
}
718737
}
719738

720-
void ArchiveFile::parse() {
721-
// Parse a MemoryBufferRef as an archive file.
722-
LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
723-
file = CHECK(Archive::create(mb), toString(this));
724-
725-
// Read the symbol table to construct Lazy symbols.
726-
int count = 0;
727-
for (const Archive::Symbol &sym : file->symbols()) {
728-
symtab->addLazy(this, &sym);
729-
++count;
730-
}
731-
LLVM_DEBUG(dbgs() << "Read " << count << " symbols\n");
732-
(void) count;
733-
}
734-
735-
void ArchiveFile::addMember(const Archive::Symbol *sym) {
736-
const Archive::Child &c =
737-
CHECK(sym->getMember(),
738-
"could not get the member for symbol " + sym->getName());
739-
740-
// Don't try to load the same member twice (this can happen when members
741-
// mutually reference each other).
742-
if (!seen.insert(c.getChildOffset()).second)
743-
return;
744-
745-
LLVM_DEBUG(dbgs() << "loading lazy: " << sym->getName() << "\n");
746-
LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
747-
748-
MemoryBufferRef mb =
749-
CHECK(c.getMemoryBufferRef(),
750-
"could not get the buffer for the member defining symbol " +
751-
sym->getName());
752-
753-
InputFile *obj = createObjectFile(mb, getName(), c.getChildOffset());
754-
symtab->addFile(obj, sym->getName());
755-
}
756-
757739
static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
758740
switch (gvVisibility) {
759741
case GlobalValue::DefaultVisibility:
@@ -790,8 +772,9 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats,
790772
}
791773

792774
BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
793-
uint64_t offsetInArchive)
775+
uint64_t offsetInArchive, bool lazy)
794776
: InputFile(BitcodeKind, m) {
777+
this->lazy = lazy;
795778
this->archiveName = std::string(archiveName);
796779

797780
std::string path = mb.getBufferIdentifier().str();
@@ -817,6 +800,20 @@ BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
817800

818801
bool BitcodeFile::doneLTO = false;
819802

803+
void BitcodeFile::parseLazy() {
804+
for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
805+
if (irSym.isUndefined())
806+
continue;
807+
StringRef name = saver().save(irSym.getName());
808+
symtab->addLazy(name, this);
809+
// addLazy() may trigger this->extract() if an existing symbol is an
810+
// undefined symbol. If that happens, this function has served its purpose,
811+
// and we can exit from the loop early.
812+
if (!lazy)
813+
break;
814+
}
815+
}
816+
820817
void BitcodeFile::parse(StringRef symName) {
821818
if (doneLTO) {
822819
error(toString(this) + ": attempt to add bitcode file after LTO (" + symName + ")");

lld/wasm/InputFiles.h

+10-31
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include "llvm/ADT/DenseMap.h"
1515
#include "llvm/ADT/DenseSet.h"
1616
#include "llvm/LTO/LTO.h"
17-
#include "llvm/Object/Archive.h"
1817
#include "llvm/Object/Wasm.h"
1918
#include "llvm/Support/MemoryBuffer.h"
2019
#include "llvm/TargetParser/Triple.h"
@@ -45,7 +44,6 @@ class InputFile {
4544
enum Kind {
4645
ObjectKind,
4746
SharedKind,
48-
ArchiveKind,
4947
BitcodeKind,
5048
StubKind,
5149
};
@@ -69,6 +67,11 @@ class InputFile {
6967
void markLive() { live = true; }
7068
bool isLive() const { return live; }
7169

70+
// True if this file is exists as in an archive file and has not yet been
71+
// extracted.
72+
// TODO(sbc): Use this to implement --start-lib/--end-lib.
73+
bool lazy = false;
74+
7275
protected:
7376
InputFile(Kind k, MemoryBufferRef m)
7477
: mb(m), fileKind(k), live(!config->gcSections) {}
@@ -85,35 +88,14 @@ class InputFile {
8588
bool live;
8689
};
8790

88-
// .a file (ar archive)
89-
class ArchiveFile : public InputFile {
90-
public:
91-
explicit ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {}
92-
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
93-
94-
void addMember(const llvm::object::Archive::Symbol *sym);
95-
96-
void parse();
97-
98-
private:
99-
std::unique_ptr<llvm::object::Archive> file;
100-
llvm::DenseSet<uint64_t> seen;
101-
};
102-
10391
// .o file (wasm object file)
10492
class ObjFile : public InputFile {
10593
public:
106-
explicit ObjFile(MemoryBufferRef m, StringRef archiveName)
107-
: InputFile(ObjectKind, m) {
108-
this->archiveName = std::string(archiveName);
109-
110-
// If this isn't part of an archive, it's eagerly linked, so mark it live.
111-
if (archiveName.empty())
112-
markLive();
113-
}
94+
ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy = false);
11495
static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
11596

11697
void parse(bool ignoreComdats = false);
98+
void parseLazy();
11799

118100
// Returns the underlying wasm file.
119101
const WasmObjectFile *getWasmObj() const { return wasmObj.get(); }
@@ -173,10 +155,11 @@ class SharedFile : public InputFile {
173155
class BitcodeFile : public InputFile {
174156
public:
175157
BitcodeFile(MemoryBufferRef m, StringRef archiveName,
176-
uint64_t offsetInArchive);
158+
uint64_t offsetInArchive, bool lazy);
177159
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
178160

179161
void parse(StringRef symName);
162+
void parseLazy();
180163
std::unique_ptr<llvm::lto::InputFile> obj;
181164

182165
// Set to true once LTO is complete in order prevent further bitcode objects
@@ -196,14 +179,10 @@ class StubFile : public InputFile {
196179
llvm::DenseMap<StringRef, std::vector<StringRef>> symbolDependencies;
197180
};
198181

199-
inline bool isBitcode(MemoryBufferRef mb) {
200-
return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
201-
}
202-
203182
// Will report a fatal() error if the input buffer is not a valid bitcode
204183
// or wasm object file.
205184
InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "",
206-
uint64_t offsetInArchive = 0);
185+
uint64_t offsetInArchive = 0, bool lazy = false);
207186

208187
// Opens a given file.
209188
std::optional<MemoryBufferRef> readFile(StringRef path);

lld/wasm/SymbolTable.cpp

+13-10
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,13 @@ SymbolTable *symtab;
2626
void SymbolTable::addFile(InputFile *file, StringRef symName) {
2727
log("Processing: " + toString(file));
2828

29-
// .a file
30-
if (auto *f = dyn_cast<ArchiveFile>(file)) {
31-
f->parse();
29+
// Lazy object file
30+
if (file->lazy) {
31+
if (auto *f = dyn_cast<BitcodeFile>(file)) {
32+
f->parseLazy();
33+
} else {
34+
cast<ObjFile>(file)->parseLazy();
35+
}
3236
return;
3337
}
3438

@@ -737,16 +741,15 @@ TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
737741
return nullptr;
738742
}
739743

740-
void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
741-
LLVM_DEBUG(dbgs() << "addLazy: " << sym->getName() << "\n");
742-
StringRef name = sym->getName();
744+
void SymbolTable::addLazy(StringRef name, InputFile *file) {
745+
LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n");
743746

744747
Symbol *s;
745748
bool wasInserted;
746749
std::tie(s, wasInserted) = insertName(name);
747750

748751
if (wasInserted) {
749-
replaceSymbol<LazySymbol>(s, name, 0, file, *sym);
752+
replaceSymbol<LazySymbol>(s, name, 0, file);
750753
return;
751754
}
752755

@@ -763,15 +766,15 @@ void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
763766
if (auto *f = dyn_cast<UndefinedFunction>(s))
764767
oldSig = f->signature;
765768
LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
766-
auto newSym = replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK,
767-
file, *sym);
769+
auto newSym =
770+
replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK, file);
768771
newSym->signature = oldSig;
769772
return;
770773
}
771774

772775
LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
773776
const InputFile *oldFile = s->getFile();
774-
file->addMember(sym);
777+
replaceSymbol<LazySymbol>(s, name, 0, file)->extract();
775778
if (!config->whyExtract.empty())
776779
ctx.whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s);
777780
}

0 commit comments

Comments
 (0)