Skip to content

Commit

Permalink
wasm2c: Segue optimization for modules with a single unshared memory
Browse files Browse the repository at this point in the history
  • Loading branch information
shravanrn committed Feb 23, 2024
1 parent 1471dff commit e286acb
Show file tree
Hide file tree
Showing 12 changed files with 618 additions and 119 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ jobs:
runs-on: ubuntu-latest
env:
USE_NINJA: "1"
WASM2C_CFLAGS: "-DWASM_RT_USE_MMAP=1 -DWASM_RT_SKIP_SIGNAL_RECOVERY=1 -DWASM_RT_NONCONFORMING_UNCHECKED_STACK_EXHAUSTION=1 -DWASM2C_TEST_EMBEDDER_SIGNAL_HANDLING"
WASM2C_CC: "clang"
WASM2C_CFLAGS: "-DWASM_RT_USE_MMAP=1 -DWASM_RT_SKIP_SIGNAL_RECOVERY=1 -DWASM_RT_NONCONFORMING_UNCHECKED_STACK_EXHAUSTION=1 -DWASM2C_TEST_EMBEDDER_SIGNAL_HANDLING -DWASM_RT_ALLOW_SEGUE=1 -mfsgsbase -Wno-pass-failed"
steps:
- uses: actions/setup-python@v1
with:
Expand Down
64 changes: 62 additions & 2 deletions src/c-writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ class CWriter {

void Indent(int size = INDENT_SIZE);
void Dedent(int size = INDENT_SIZE);
void NonIndented(std::function<void()> func);
void WriteIndent();
void WriteData(const char* src, size_t size);
void Writef(const char* format, ...);
Expand Down Expand Up @@ -402,6 +403,9 @@ class CWriter {
void WriteElemInitializerDecls();
void WriteElemInitializers();
void WriteElemTableInit(bool, const ElemSegment*, const Table*);
bool IsSingleUnsharedMemory();
void SwapSegueBase(Memory* memory, bool save_old_value);
void UndoSwapSegueBase();
void WriteExports(CWriterPhase);
void WriteTailCallExports(CWriterPhase);
void WriteInitDecl();
Expand Down Expand Up @@ -1021,6 +1025,13 @@ void CWriter::Dedent(int size) {
assert(indent_ >= 0);
}

void CWriter::NonIndented(std::function<void()> func) {
int copy = indent_;
indent_ = 0;
func();
indent_ = copy;
}

void CWriter::WriteIndent() {
static char s_indent[] =
" "
Expand Down Expand Up @@ -1471,6 +1482,11 @@ std::string CWriter::GenerateHeaderGuard() const {
void CWriter::WriteSourceTop() {
Write(s_source_includes);
Write(Newline(), "#include \"", header_name_, "\"", Newline());

if (IsSingleUnsharedMemory()) {
Write("#define WASM_RT_MODULE_IS_SINGLE_UNSHARED_MEMORY 1", Newline());
}

Write(s_source_declarations, Newline());

if (module_->features_used.simd) {
Expand Down Expand Up @@ -2417,6 +2433,28 @@ void CWriter::WriteElemTableInit(bool active_initialization,
Write(");", Newline());
}

bool CWriter::IsSingleUnsharedMemory() {
return module_->memories.size() == 1 &&
!module_->memories[0]->page_limits.is_shared;
}

void CWriter::SwapSegueBase(Memory* memory, bool save_old_value) {
NonIndented([&] { Write("#if WASM_RT_USE_SEGUE", Newline()); });
if (save_old_value) {
Write("uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();", Newline());
}
auto primary_memory =
ExternalInstanceRef(ModuleFieldType::Memory, memory->name);
Write("WASM_RT_SEGUE_WRITE_BASE(", primary_memory, ".data);", Newline());
NonIndented([&] { Write("#endif", Newline()); });
}

void CWriter::UndoSwapSegueBase() {
NonIndented([&] { Write("#if WASM_RT_USE_SEGUE", Newline()); });
Write("WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);", Newline());
NonIndented([&] { Write("#endif", Newline()); });
}

void CWriter::WriteExports(CWriterPhase kind) {
if (module_->exports.empty())
return;
Expand Down Expand Up @@ -2492,8 +2530,14 @@ void CWriter::WriteExports(CWriterPhase kind) {
switch (export_->kind) {
case ExternalKind::Func: {
Write(OpenBrace());
if (func_->GetNumResults() > 0) {
Write("return ");
if (IsSingleUnsharedMemory()) {
SwapSegueBase(module_->memories[0], true /* save_old_value */);
}
auto num_results = func_->GetNumResults();
if (num_results > 1) {
Write(func_->decl.sig.result_types, " ret = ");
} else if (num_results == 1) {
Write(func_->GetResultType(0), " ret = ");
}
Write(ExternalRef(ModuleFieldType::Func, internal_name), "(");

Expand All @@ -2505,6 +2549,12 @@ void CWriter::WriteExports(CWriterPhase kind) {
Write("instance");
}
WriteParamSymbols(index_to_name);
if (IsSingleUnsharedMemory()) {
UndoSwapSegueBase();
}
if (num_results > 0) {
Write("return ret;", Newline());
}
Write(CloseBrace(), Newline());

local_sym_map_.clear();
Expand Down Expand Up @@ -2603,6 +2653,9 @@ void CWriter::WriteInit() {
}
if (!module_->memories.empty()) {
Write("init_memories(instance);", Newline());
if (IsSingleUnsharedMemory()) {
SwapSegueBase(module_->memories[0], true /* save_old_value */);
}
}
if (!module_->tables.empty() && !module_->elem_segments.empty()) {
Write("init_elem_instances(instance);", Newline());
Expand All @@ -2623,6 +2676,10 @@ void CWriter::WriteInit() {
}
Write(Newline());
}

if (IsSingleUnsharedMemory()) {
UndoSwapSegueBase();
}
Write(CloseBrace(), Newline());
}

Expand Down Expand Up @@ -3725,6 +3782,9 @@ void CWriter::Write(const ExprList& exprs) {
Write(StackVar(0), " = ", func, "(",
ExternalInstancePtr(ModuleFieldType::Memory, memory->name), ", ",
StackVar(0), ");", Newline());
if (IsSingleUnsharedMemory()) {
SwapSegueBase(module_->memories[0], false /* save_old_value */);
}
break;
}

Expand Down
94 changes: 82 additions & 12 deletions src/prebuilt/wasm2c_source_declarations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,72 @@ R"w2c_template(#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
R"w2c_template(#endif
)w2c_template"
R"w2c_template(
#ifndef WASM_RT_USE_SEGUE
)w2c_template"
R"w2c_template(// Memory functions can use the segue optimization if allowed. The segue
)w2c_template"
R"w2c_template(// optimization uses x86 segments to point to a linear memory. We use this
)w2c_template"
R"w2c_template(// optimization when:
)w2c_template"
R"w2c_template(//
)w2c_template"
R"w2c_template(// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
)w2c_template"
R"w2c_template(// (2) on x86_64 without WABT_BIG_ENDIAN enabled
)w2c_template"
R"w2c_template(// (3) the Wasm module uses a single unshared imported or exported memory
)w2c_template"
R"w2c_template(// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
)w2c_template"
R"w2c_template(// for accessing pointers, and supports memcpy on pointers with custom
)w2c_template"
R"w2c_template(// "address namespaces". GCC does not support the memcpy requirement, so
)w2c_template"
R"w2c_template(// this leaves only clang for now.
)w2c_template"
R"w2c_template(// (5) The OS doesn't replace the segment register on context switch which
)w2c_template"
R"w2c_template(// eliminates windows for now
)w2c_template"
R"w2c_template(#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
)w2c_template"
R"w2c_template( (defined(__x86_64__) || defined(_M_X64)) && \
)w2c_template"
R"w2c_template( WASM_RT_MODULE_IS_SINGLE_UNSHARED_MEMORY && __clang__ && \
)w2c_template"
R"w2c_template( __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
)w2c_template"
R"w2c_template(#define WASM_RT_USE_SEGUE 1
)w2c_template"
R"w2c_template(#else
)w2c_template"
R"w2c_template(#define WASM_RT_USE_SEGUE 0
)w2c_template"
R"w2c_template(#endif
)w2c_template"
R"w2c_template(#endif
)w2c_template"
R"w2c_template(
#if WASM_RT_USE_SEGUE
)w2c_template"
R"w2c_template(// POSIX uses FS for TLS, GS is free
)w2c_template"
R"w2c_template(#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
)w2c_template"
R"w2c_template(#define WASM_RT_SEGUE_WRITE_BASE(base) \
)w2c_template"
R"w2c_template( __builtin_ia32_wrgsbase64((uintptr_t)base)
)w2c_template"
R"w2c_template(#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
)w2c_template"
R"w2c_template(#else
)w2c_template"
R"w2c_template(#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
)w2c_template"
R"w2c_template(#endif
)w2c_template"
R"w2c_template(
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
)w2c_template"
R"w2c_template(
Expand Down Expand Up @@ -204,32 +270,36 @@ R"w2c_template( load_data(MEM_ADDR(&m, o, s), i, s); \
R"w2c_template( } while (0)
)w2c_template"
R"w2c_template(
#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
)w2c_template"
R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
)w2c_template"
R"w2c_template( MEMCHECK(mem, addr, t1); \
R"w2c_template( MEMCHECK(mem, addr, t1); \
)w2c_template"
R"w2c_template( t1 result; \
R"w2c_template( t1 result; \
)w2c_template"
R"w2c_template( wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
R"w2c_template( wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
)w2c_template"
R"w2c_template( force_read(result); \
R"w2c_template( sizeof(t1)); \
)w2c_template"
R"w2c_template( return (t3)(t2)result; \
R"w2c_template( force_read(result); \
)w2c_template"
R"w2c_template( return (t3)(t2)result; \
)w2c_template"
R"w2c_template( }
)w2c_template"
R"w2c_template(
#define DEFINE_STORE(name, t1, t2) \
#define DEFINE_STORE(name, t1, t2) \
)w2c_template"
R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
)w2c_template"
R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
R"w2c_template( MEMCHECK(mem, addr, t1); \
)w2c_template"
R"w2c_template( MEMCHECK(mem, addr, t1); \
R"w2c_template( t1 wrapped = (t1)value; \
)w2c_template"
R"w2c_template( t1 wrapped = (t1)value; \
R"w2c_template( wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
)w2c_template"
R"w2c_template( wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
R"w2c_template( sizeof(t1)); \
)w2c_template"
R"w2c_template( }
)w2c_template"
Expand Down
64 changes: 50 additions & 14 deletions src/template/wasm2c.declarations.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,40 @@
#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
#endif

#ifndef WASM_RT_USE_SEGUE
// Memory functions can use the segue optimization if allowed. The segue
// optimization uses x86 segments to point to a linear memory. We use this
// optimization when:
//
// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
// (2) on x86_64 without WABT_BIG_ENDIAN enabled
// (3) the Wasm module uses a single unshared imported or exported memory
// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
// for accessing pointers, and supports memcpy on pointers with custom
// "address namespaces". GCC does not support the memcpy requirement, so
// this leaves only clang for now.
// (5) The OS doesn't replace the segment register on context switch which
// eliminates windows for now
#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
(defined(__x86_64__) || defined(_M_X64)) && \
WASM_RT_MODULE_IS_SINGLE_UNSHARED_MEMORY && __clang__ && \
__has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
#define WASM_RT_USE_SEGUE 1
#else
#define WASM_RT_USE_SEGUE 0
#endif
#endif

#if WASM_RT_USE_SEGUE
// POSIX uses FS for TLS, GS is free
#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
#define WASM_RT_SEGUE_WRITE_BASE(base) \
__builtin_ia32_wrgsbase64((uintptr_t)base)
#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
#else
#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
#endif

#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)

#if WASM_RT_STACK_DEPTH_COUNT
Expand Down Expand Up @@ -109,20 +143,22 @@ static inline void load_data(void* dest, const void* src, size_t n) {
load_data(MEM_ADDR(&m, o, s), i, s); \
} while (0)

#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
MEMCHECK(mem, addr, t1); \
t1 result; \
wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
force_read(result); \
return (t3)(t2)result; \
}

#define DEFINE_STORE(name, t1, t2) \
static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
MEMCHECK(mem, addr, t1); \
t1 wrapped = (t1)value; \
wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
MEMCHECK(mem, addr, t1); \
t1 result; \
wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
sizeof(t1)); \
force_read(result); \
return (t3)(t2)result; \
}

#define DEFINE_STORE(name, t1, t2) \
static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
MEMCHECK(mem, addr, t1); \
t1 wrapped = (t1)value; \
wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
sizeof(t1)); \
}

DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)
Expand Down
Loading

0 comments on commit e286acb

Please sign in to comment.