Skip to content

Commit

Permalink
Eager symbol tables (#1537)
Browse files Browse the repository at this point in the history
* Eagerly create symbol tables when writing relocatable binaries

Instead of adding entries to the symbol table as they are referenced,
when writing relocatable binaries, we're going to make symbols for all
functions.  This allows exported functions and globals to be written
with the proper exported / no_strip flags, so that resulting files will
link with wasm-ld.

* Symbol table names are export names, without the dollar.

In the previous commit, I wrongly assumed that the globally visible name
for linking was taken from the exports section, whereas actually it's
from the symbol table.  Therefore this patch changes to strip off the
dollar, and also to make all named bindings globally visible.  The
exported-to-the-host binding is mostly unrelated to the
visible-to-other-compilation-units binding.  Unnamed definitions aren't
added to the symbol table.

* Add symbol tables test

* Rename Intern helper to EnsureUnique
  • Loading branch information
wingo authored Sep 16, 2020
1 parent 2135099 commit cd0b3db
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 53 deletions.
176 changes: 131 additions & 45 deletions src/binary-writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cstdarg>
#include <cstdint>
#include <cstdio>
#include <set>
#include <vector>

#include "config.h"
Expand Down Expand Up @@ -124,7 +125,7 @@ class Symbol {

private:
SymbolType type_;
std::string name_;
string_view name_;
uint8_t flags_;
union {
Function function_;
Expand All @@ -135,19 +136,19 @@ class Symbol {
};

public:
Symbol(const std::string& name, uint8_t flags, const Function& f)
Symbol(const string_view& name, uint8_t flags, const Function& f)
: type_(Function::type), name_(name), flags_(flags), function_(f) {}
Symbol(const std::string& name, uint8_t flags, const Data& d)
Symbol(const string_view& name, uint8_t flags, const Data& d)
: type_(Data::type), name_(name), flags_(flags), data_(d) {}
Symbol(const std::string& name, uint8_t flags, const Global& g)
Symbol(const string_view& name, uint8_t flags, const Global& g)
: type_(Global::type), name_(name), flags_(flags), global_(g) {}
Symbol(const std::string& name, uint8_t flags, const Section& s)
Symbol(const string_view& name, uint8_t flags, const Section& s)
: type_(Section::type), name_(name), flags_(flags), section_(s) {}
Symbol(const std::string& name, uint8_t flags, const Event& e)
Symbol(const string_view& name, uint8_t flags, const Event& e)
: type_(Event::type), name_(name), flags_(flags), event_(e) {}

SymbolType type() const { return type_; }
const std::string& name() const { return name_; }
const string_view& name() const { return name_; }
uint8_t flags() const { return flags_; }

SymbolVisibility visibility() const {
Expand Down Expand Up @@ -190,6 +191,117 @@ class Symbol {
}
};

class SymbolTable {
WABT_DISALLOW_COPY_AND_ASSIGN(SymbolTable);

std::vector<Symbol> symbols_;

std::vector<Index> functions_;
std::vector<Index> globals_;

std::set<string_view> seen_names_;

Result EnsureUnique(const string_view& name) {
if (seen_names_.count(name)) {
fprintf(stderr, "error: duplicate symbol when writing relocatable "
"binary: %s\n", &name[0]);
return Result::Error;
}
seen_names_.insert(name);
return Result::Ok;
};

template <typename T>
Result AddSymbol(std::vector<Index>* map, string_view name,
bool imported, bool exported, T&& sym) {
uint8_t flags = 0;
if (imported) {
flags |= WABT_SYMBOL_FLAG_UNDEFINED;
// Wabt currently has no way for a user to explicitly specify the name of
// an import, so never set the EXPLICIT_NAME flag, and ignore any display
// name fabricated by wabt.
name = string_view();
} else {
// Functions defined in this module without a name don't go in the symbol
// table.
if (name.empty()) {
return Result::Ok;
}

// Otherwise, strip the dollar off the name; a function $foo is available
// for linking as "foo".
assert(name[0] == '$');
name.remove_prefix(1);

if (exported) {
CHECK_RESULT(EnsureUnique(name));
flags |= uint8_t(SymbolVisibility::Hidden);
flags |= WABT_SYMBOL_FLAG_NO_STRIP;
}
}
if (exported) {
flags |= WABT_SYMBOL_FLAG_EXPORTED;
}

map->push_back(symbols_.size());
symbols_.emplace_back(name, flags, sym);
return Result::Ok;
};

public:
SymbolTable() {}

Result Populate(const Module* module) {
std::set<Index> exported_funcs;
std::set<Index> exported_globals;
std::set<Index> exported_events;
std::set<Index> exported_tables;

for (const Export* export_ : module->exports) {
switch (export_->kind) {
case ExternalKind::Func:
exported_funcs.insert(module->GetFuncIndex(export_->var));
break;
case ExternalKind::Table:
exported_tables.insert(module->GetTableIndex(export_->var));
break;
case ExternalKind::Memory:
break;
case ExternalKind::Global:
exported_globals.insert(module->GetGlobalIndex(export_->var));
break;
case ExternalKind::Event:
exported_events.insert(module->GetEventIndex(export_->var));
break;
}
}

// We currently only create symbol table entries for function and global
// symbols.
for (size_t i = 0; i < module->funcs.size(); ++i) {
const Func* func = module->funcs[i];
bool imported = i < module->num_func_imports;
bool exported = exported_funcs.count(i);
CHECK_RESULT(AddSymbol(&functions_, func->name, imported, exported,
Symbol::Function{Index(i)}));
}

for (size_t i = 0; i < module->globals.size(); ++i) {
const Global* global = module->globals[i];
bool imported = i < module->num_global_imports;
bool exported = exported_globals.count(i);
CHECK_RESULT(AddSymbol(&globals_, global->name, imported, exported,
Symbol::Global{Index(i)}));
}

return Result::Ok;
}

const std::vector<Symbol>& symbols() const { return symbols_; }
Index FunctionSymbolIndex(Index index) const { return functions_[index]; }
Index GlobalSymbolIndex(Index index) const { return globals_[index]; }
};

class BinaryWriter {
WABT_DISALLOW_COPY_AND_ASSIGN(BinaryWriter);

Expand All @@ -212,8 +324,6 @@ class BinaryWriter {
void EndSection();
void BeginSubsection(const char* name);
void EndSubsection();
template <typename T>
Index InternSymbol(const std::string& name, uint8_t flags, const T& arg);
Index GetLabelVarDepth(const Var* var);
Index GetEventVarDepth(const Var* var);
Index GetLocalIndex(const Func* func, const Var& var);
Expand Down Expand Up @@ -244,8 +354,7 @@ class BinaryWriter {
const WriteBinaryOptions& options_;
const Module* module_;

std::unordered_map<std::string, Index> symtab_;
std::vector<Symbol> symbols_;
SymbolTable symtab_;
std::vector<RelocSection> reloc_sections_;
RelocSection* current_reloc_section_ = nullptr;

Expand Down Expand Up @@ -404,40 +513,12 @@ Index BinaryWriter::GetEventVarDepth(const Var* var) {
return var->index();
}

template <typename T>
Index BinaryWriter::InternSymbol(const std::string& name, uint8_t flags,
const T& arg) {
auto iter = symtab_.find(name);
if (iter != symtab_.end()) {
Index sym_index = iter->second;
const Symbol& sym = symbols_[sym_index];
if (sym.type() != T::type || sym.flags() != flags) {
fprintf(stderr, "error: duplicate symbol when writing relocatable "
"binary: %s\n", &name[0]);
return kInvalidIndex;
}
return sym_index;
}

Index sym_index = Index(symbols_.size());
symtab_[name] = sym_index;
symbols_.emplace_back(name, flags, arg);
return sym_index;
}

Index BinaryWriter::GetSymbolIndex(RelocType reloc_type, Index index) {
uint8_t flags = 0;
switch (reloc_type) {
case RelocType::FuncIndexLEB:
if (index < module_->num_func_imports) {
flags |= WABT_SYMBOL_FLAG_UNDEFINED;
}
return InternSymbol(module_->funcs[index]->name, flags, Symbol::Function{index});
return symtab_.FunctionSymbolIndex(index);
case RelocType::GlobalIndexLEB:
if (index < module_->num_global_imports) {
flags |= WABT_SYMBOL_FLAG_UNDEFINED;
}
return InternSymbol(module_->globals[index]->name, flags, Symbol::Global{index});
return symtab_.GlobalSymbolIndex(index);
case RelocType::TypeIndexLEB:
// Type indexes don't create entries in the symbol table; instead their
// index is used directly.
Expand Down Expand Up @@ -978,12 +1059,13 @@ void BinaryWriter::WriteRelocSection(const RelocSection* reloc_section) {
void BinaryWriter::WriteLinkingSection() {
BeginCustomSection(WABT_BINARY_SECTION_LINKING);
WriteU32Leb128(stream_, 2, "metadata version");
if (symbols_.size()) {
const std::vector<Symbol>& symbols = symtab_.symbols();
if (symbols.size()) {
stream_->WriteU8Enum(LinkingEntryType::SymbolTable, "symbol table");
BeginSubsection("symbol table");
WriteU32Leb128(stream_, symbols_.size(), "num symbols");
WriteU32Leb128(stream_, symbols.size(), "num symbols");

for (const Symbol& sym : symbols_) {
for (const Symbol& sym : symbols) {
stream_->WriteU8Enum(sym.type(), "symbol type");
WriteU32Leb128(stream_, sym.flags(), "symbol flags");
switch (sym.type()) {
Expand All @@ -1008,7 +1090,7 @@ void BinaryWriter::WriteLinkingSection() {
}
break;
case SymbolType::Section:
WriteU32Leb128(stream_, sym.AsSection().section, "event index");
WriteU32Leb128(stream_, sym.AsSection().section, "section index");
break;
case SymbolType::Event:
WriteU32Leb128(stream_, sym.AsEvent().index, "event index");
Expand All @@ -1027,6 +1109,10 @@ Result BinaryWriter::WriteModule() {
stream_->WriteU32(WABT_BINARY_MAGIC, "WASM_BINARY_MAGIC");
stream_->WriteU32(WABT_BINARY_VERSION, "WASM_BINARY_VERSION");

if (options_.relocatable) {
CHECK_RESULT(symtab_.Populate(module_));
}

if (module_->types.size()) {
BeginKnownSection(BinarySection::Type);
WriteU32Leb128(stream_, module_->types.size(), "num types");
Expand Down
2 changes: 2 additions & 0 deletions test/dump/relocations-block-types.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Code[1]:
- func[0] size=20 <multivalue_block>
Custom:
- name: "linking"
- symbol table [count=1]
- 0: F <multivalue_block> func=0 exported no_strip binding=global vis=hidden
Custom:
- name: "reloc.Code"
- relocations for section: 3 (Code) [1]
Expand Down
16 changes: 8 additions & 8 deletions test/dump/relocations.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,18 @@ Sections:
Export start=0x00000050 end=0x00000055 (size=0x00000005) count: 1
Elem start=0x00000057 end=0x0000005e (size=0x00000007) count: 1
Code start=0x00000060 end=0x00000082 (size=0x00000022) count: 1
Custom start=0x00000084 end=0x0000009f (size=0x0000001b) "linking"
Custom start=0x000000a1 end=0x000000ba (size=0x00000019) "reloc.Code"
Custom start=0x00000084 end=0x000000a1 (size=0x0000001d) "linking"
Custom start=0x000000a3 end=0x000000bc (size=0x00000019) "reloc.Code"
Code Disassembly:
000062 func[2] <$f>:
000063: 23 80 80 80 80 00 | global.get 0 <$g>
000064: R_WASM_GLOBAL_INDEX_LEB 0 <$g>
000069: 10 82 80 80 80 00 | call 2 <$f>
00006a: R_WASM_FUNCTION_INDEX_LEB 1 <$f>
000062 func[2] <f>:
000063: 23 80 80 80 80 00 | global.get 0 <g>
000064: R_WASM_GLOBAL_INDEX_LEB 3 <g>
000069: 10 82 80 80 80 00 | call 2 <f>
00006a: R_WASM_FUNCTION_INDEX_LEB 2 <f>
00006f: 10 80 80 80 80 00 | call 0 <__extern.foo>
000070: R_WASM_FUNCTION_INDEX_LEB 2 <__extern.foo>
000070: R_WASM_FUNCTION_INDEX_LEB 0 <__extern.foo>
000075: 41 d2 09 | i32.const 1234
000078: 41 00 | i32.const 0
00007a: 11 82 80 80 80 00 00 | call_indirect 2 0
Expand Down
61 changes: 61 additions & 0 deletions test/dump/symbol-tables.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
;;; TOOL: run-objdump
;;; ARGS0: -r
;;; ARGS1: -x
(module
(type (;0;) (func))
(import "env" "b" (func (;0;) (type 0)))
(func $a (type 0)
call 0)
(func (type 0)
call 0)
(func $b (type 0)
call 0)
(export "a" (func $a)))
(;; STDOUT ;;;
symbol-tables.wasm: file format wasm 0x1
Section Details:
Type[1]:
- type[0] () -> nil
Import[1]:
- func[0] sig=0 <env.b> <- env.b
Function[3]:
- func[1] sig=0 <a>
- func[2] sig=0
- func[3] sig=0 <b>
Export[1]:
- func[1] <a> -> "a"
Code[3]:
- func[1] size=8 <a>
- func[2] size=8
- func[3] size=8 <b>
Custom:
- name: "linking"
- symbol table [count=3]
- 0: F <env.b> func=0 undefined binding=global vis=default
- 1: F <a> func=1 exported no_strip binding=global vis=hidden
- 2: F <b> func=3 binding=global vis=default
Custom:
- name: "reloc.Code"
- relocations for section: 4 (Code) [3]
- R_WASM_FUNCTION_INDEX_LEB offset=0x000004(file=0x00002c) symbol=0 <env.b>
- R_WASM_FUNCTION_INDEX_LEB offset=0x00000d(file=0x000035) symbol=0 <env.b>
- R_WASM_FUNCTION_INDEX_LEB offset=0x000016(file=0x00003e) symbol=0 <env.b>
Code Disassembly:
00002a func[1] <a>:
00002b: 10 80 80 80 80 00 | call 0 <env.b>
00002c: R_WASM_FUNCTION_INDEX_LEB 0 <env.b>
000031: 0b | end
000033 func[2]:
000034: 10 80 80 80 80 00 | call 0 <env.b>
000035: R_WASM_FUNCTION_INDEX_LEB 0 <env.b>
00003a: 0b | end
00003c func[3] <b>:
00003d: 10 80 80 80 80 00 | call 0 <env.b>
00003e: R_WASM_FUNCTION_INDEX_LEB 0 <env.b>
000043: 0b | end
;;; STDOUT ;;)

0 comments on commit cd0b3db

Please sign in to comment.