Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ if (NOT (WASI STREQUAL "p1"))
endif()
include(check-symbols)
include(clang-format)
include(wasm-tools)

# =============================================================================
# Generic top-level build flags/settings
Expand Down
5 changes: 3 additions & 2 deletions cmake/ba-download.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ function(ba_download target repo version)

if (target STREQUAL wasmtime)
set(fmt tar.xz)
elseif ((target STREQUAL wasm-component-ld) AND (os STREQUAL windows))
elseif ((os STREQUAL windows) AND
((target STREQUAL wasm-component-ld) OR (target STREQUAL wasm-tools)))
set(fmt zip)
else()
set(fmt tar.gz)
endif()

if (target STREQUAL wit-bindgen)
if (target STREQUAL wit-bindgen OR target STREQUAL wasm-tools)
set(tag v${version})
else()
set(tag ${version})
Expand Down
15 changes: 15 additions & 0 deletions cmake/wasm-tools.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Favor `wasm-tools` on the system
find_program(WASM_TOOLS_EXECUTABLE NAMES wasm-tools)
if (NOT WASM_TOOLS_EXECUTABLE)
include(ba-download)
ba_download(
wasm-tools
"https://github.com/bytecodealliance/wasm-tools"
"1.244.0"
)
ExternalProject_Get_Property(wasm-tools SOURCE_DIR)
set(wasm_tools "${SOURCE_DIR}/wasm-tools")
else()
add_custom_target(wasm-tools)
set(wasm_tools ${WASM_TOOLS_EXECUTABLE})
endif()
101 changes: 82 additions & 19 deletions libc-bottom-half/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,31 +180,94 @@ foreach(obj bottom-half-shared bottom-half-static)
)
endforeach()

add_custom_target(sysroot-startup-objects)
add_dependencies(sysroot sysroot-startup-objects)

# =============================================================================
# startup objects
#
foreach(file crt/crt1.c
crt/crt1-command.c
# This is the logic for building `crt1{,-reactor,-command}.o` objects which
# Clang will link by default based on compiler flags. These are compiled into
# "object libraries" with CMake and then they're copied into the final location
# with adjustments based on WASI versions.
foreach(file crt/crt1-command.c
crt/crt1-reactor.c)
# get the filename without the directory and extension
cmake_path(GET file STEM filename)
cmake_path(GET file STEM stem)
add_library(${stem} OBJECT ${file})
clang_format_target(${stem})
target_link_libraries(${stem} PRIVATE musl-top-half-interface)
set_pic(${stem})
target_compile_options(${stem} PRIVATE -fvisibility=default)
endforeach()

# create a custom target for each file
add_library(${filename}.o OBJECT ${file})
target_link_libraries(${filename}.o PUBLIC musl-top-half-interface)
set_pic(${filename}.o)
target_compile_options(${filename}.o PRIVATE -fvisibility=default)
# crt1-reactor.o is a straight copy of what CMake produces
add_custom_command(
OUTPUT ${SYSROOT_LIB}/crt1-reactor.o
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_OBJECTS:crt1-reactor> ${SYSROOT_LIB}/crt1-reactor.o
DEPENDS crt1-reactor $<TARGET_OBJECTS:crt1-reactor>
)

# add a custom command to compile the file
set(dst ${SYSROOT_LIB}/${filename}.o)
if (WASI STREQUAL "p1")
# wasip1: crt1-command.o is a straight copy of what CMake produces
add_custom_command(
OUTPUT ${dst}
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_OBJECTS:${filename}.o> ${dst}
DEPENDS ${filename}.o
OUTPUT ${SYSROOT_LIB}/crt1-command.o
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_OBJECTS:crt1-command> ${SYSROOT_LIB}/crt1-command.o
DEPENDS crt1-command $<TARGET_OBJECTS:crt1-command>
)
add_custom_target(sysroot-startup-${filename}.o DEPENDS ${dst})
add_dependencies(sysroot-startup-objects sysroot-startup-${filename}.o)
endforeach()
elseif (WASI STREQUAL "p2")
# wasip2: crt1-command.o is modified from what CMake produces to
# additionally have a custom section representing the type information needed
# for its contained export. This is the `wasi:cli/run` interface, for example.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be hard to add an assembly syntax for this so that clang/llvm can produce this object file without needing external tools?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I poked at that a bit, but was unfruitful in my ability to get it working. That being said you probably know whether this is possible more than I, so question for you!

The need here is that a custom section needs to be located inside of crt1-command.o (wasm custom section, not linking custom section). This wasm-tools command will embed the data within the object with a particular section name. We can pretty easily generate the data to embed ahead-of-time (similar to how wit-bindgen-generated bindings are checked in to this repo). Effectively what I want is something like:

__asm__(
  ".custom_section.component_type:other:naming:information"
#embed <the_world.wasm>
);

or... something like that. I couldn't figure out either .custom_section for __asm__, #embed, nor how to emit a wasm.custom_section in Clang via a C global or something like that. Do you know if these are all possible to combine?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That looks like something that it should be possible to support with the asm syntax yes. Assuming that #embed plays nice with __asm__ in general (i.e. on other platforms) i don't see why it shouldn't for us.

BTW, In this case are you actually embedding a wasm file as a custom section in another wasm file (i.e. the object file)?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh my problem is I can't actually figure out a syntax/incantation that works. I suspect something like this should work, but I'm not enough of a clang wizard to figure it out.

And yeah, the type information for components is itself a component (a wasm file) which is embedded in objects.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to push on this harder. I don't know how to get __asm__ to work with #embed. I tried wasm-ld --relocatable to link two objects together but that seems to lose __attribute__((export_name("..."))) directives on symbols. Another option is some sort of preprocessing similar-ish to xxd -i, but overall I'm not sure that anything else will be all that much better than running wasm-tools component embed

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could leave a comment there suggesting that a simpler solution might be possible with inline asm.

In the long run I do like the idea striving for "its just clang" as much as possible in wasi-sdk, but for the users and for the developers. I know that was one of the original goals when we first started wasi-sdk.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed yeah, comment added!

#
# TODO: ideally this wouldn't need `wasm-tools` as that's an extra build tool
# needed here and it's best to slim down dependencies as much as possible.
# This additionally requires downloading/vendoring WITs which isn't great.
# Ideally the type information embedded here would be referenced dierctly in
# `crt1-command.c` itself in the source. That would likely require some
# combination of `__asm__` and `#embed` but I at least couldn't figure out how
# to get that working. I'm also not aware of a way to, in Clang, define a
# global in C that goes into a custom section in the output object. Another
# possible route would be to use the `--relocatable` option of `wasm-ld,` but
# it looks like that loses `__attribute__((export_name("...")))` information
# which this object file relies on. As a workaround `wasm-tools` is used for
# now. If you, dear reader, know of how to do this in the C source itself
# please feel free to open an issue or a PR and maintainers can work with you
# on getting that integrated.
add_custom_command(
OUTPUT ${SYSROOT_LIB}/crt1-command.o
COMMAND
${wasm_tools} component embed
${wasip2_wit_dir}
$<TARGET_OBJECTS:crt1-command>
--world wasi:cli/command@0.2.0
-o ${SYSROOT_LIB}/crt1-command.o
DEPENDS crt1-command wasip2-wits $<TARGET_OBJECTS:crt1-command> wasm-tools
)
elseif (WASI STREQUAL "p3")
add_custom_command(
OUTPUT ${SYSROOT_LIB}/crt1-command.o
COMMAND
${wasm_tools} component embed
${wasip3_wit_dir}
$<TARGET_OBJECTS:crt1-command>
--world wasi:cli/command@0.3.0-rc-2025-09-16
-o ${SYSROOT_LIB}/crt1-command.o
DEPENDS crt1-command wasip3-wits $<TARGET_OBJECTS:crt1-command> wasm-tools
)
else()
message(FATAL_ERROR "Unknown WASI version: ${WASI}")
endif()

# Provide a plain crt1.o for toolchain compatibility, identical to
# `crt1-command.c`
add_custom_command(
OUTPUT ${SYSROOT_LIB}/crt1.o
COMMAND ${CMAKE_COMMAND} -E copy ${SYSROOT_LIB}/crt1-command.o ${SYSROOT_LIB}/crt1.o
DEPENDS ${SYSROOT_LIB}/crt1-command.o
)

add_custom_target(sysroot-startup-objects
DEPENDS
${SYSROOT_LIB}/crt1-reactor.o
${SYSROOT_LIB}/crt1-command.o
${SYSROOT_LIB}/crt1.o
)
add_dependencies(sysroot sysroot-startup-objects)
86 changes: 48 additions & 38 deletions libc-bottom-half/crt/crt1-command.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,54 +9,64 @@ extern void __wasm_call_ctors(void);
extern int __main_void(void);
extern void __wasm_call_dtors(void);

__attribute__((export_name("_start")))
void _start(void) {
// Commands should only be called once per instance. This simple check
// ensures that the `_start` function isn't started more than once.
//
// We use `volatile` here to prevent the store to `started` from being
// sunk past any subsequent code, and to prevent any compiler from
// optimizing based on the knowledge that `_start` is the program
// entrypoint.
#if defined(__wasip1__)
__attribute__((export_name("_start"))) void _start(void)
#elif defined(__wasip2__)
// Note that this is manually doing what `wit-bindgen` might otherwise be
// doing. Given the special nature of this symbol this skip the typical
// `wit-bindgen` rigamarole and the signature of this function is simple enough
// that this shouldn't be too problematic (in theory).
__attribute__((export_name("wasi:cli/run@0.2.0#run"))) int _start(void)
#elif defined(__wasip3__)
__attribute__((export_name("wasi:cli/run@0.3.0-rc-2025-09-16#run"))) int
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't wasi:cli/run.run an async function in wit, and thus would need a different annotation and a callback companion entry point? I guess this can work because the runtime adapts this blocking implementation?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct yeah, this is intentionally a sync lift into an async function type which the runtime will handle with stack switching and such. While reactor-style programs will be able to work with callbacks I don't think there's any route for us to transform int main() { ... } in C/C++ into callback-y style.

_start(void)
#else
#error "Unsupported WASI version"
#endif
{
// Commands should only be called once per instance. This simple check
// ensures that the `_start` function isn't started more than once.
//
// We use `volatile` here to prevent the store to `started` from being
// sunk past any subsequent code, and to prevent any compiler from
// optimizing based on the knowledge that `_start` is the program
// entrypoint.
#ifdef _REENTRANT
static volatile _Atomic int started = 0;
int expected = 0;
if (!atomic_compare_exchange_strong(&started, &expected, 1)) {
__builtin_trap();
}
static volatile _Atomic int started = 0;
int expected = 0;
if (!atomic_compare_exchange_strong(&started, &expected, 1)) {
__builtin_trap();
}
#else
static volatile int started = 0;
if (started != 0) {
__builtin_trap();
}
started = 1;
static volatile int started = 0;
if (started != 0) {
__builtin_trap();
}
started = 1;
#endif

__wasi_init_tp();
__wasi_init_tp();

// The linker synthesizes this to call constructors.
__wasm_call_ctors();
// The linker synthesizes this to call constructors.
__wasm_call_ctors();

// Call `__main_void` which will either be the application's zero-argument
// `__main_void` function or a libc routine which obtains the command-line
// arguments and calls `__main_argv_argc`.
int r = __main_void();
// Call `__main_void` which will either be the application's zero-argument
// `__main_void` function or a libc routine which obtains the command-line
// arguments and calls `__main_argv_argc`.
int r = __main_void();

// Call atexit functions, destructors, stdio cleanup, etc.
__wasm_call_dtors();
// Call atexit functions, destructors, stdio cleanup, etc.
__wasm_call_dtors();

// If main exited successfully, just return, otherwise call
// `__wasi_proc_exit`.
// If main exited successfully, just return, otherwise call
// `__wasi_proc_exit`.
#if defined(__wasip1__)
if (r != 0) {
__wasi_proc_exit(r);
}
if (r != 0) {
__wasi_proc_exit(r);
}
#elif defined(__wasip2__) || defined(__wasip3__)
if (r != 0) {
exit_result_void_void_t status = { .is_err = true };
exit_exit(&status);
}
return r != 0;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the exit call no longer needed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With a target-specific entrypoint signature I was able to update to directly return the result that the wasi:cli/run entrypoint returns rather than being required to go through the wasi:cli/exit route. A bit of a nice benefit as programs will now actually naturally return as opposed to prior where they would unconditionally raise a trap to exit.

#else
# error "Unsupported WASI version"
#error "Unsupported WASI version"
#endif
}
29 changes: 14 additions & 15 deletions libc-bottom-half/crt/crt1-reactor.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,22 @@
extern void __wasi_init_tp(void);
extern void __wasm_call_ctors(void);

__attribute__((export_name("_initialize")))
void _initialize(void) {
__attribute__((export_name("_initialize"))) void _initialize(void) {
#if defined(_REENTRANT)
static volatile atomic_int initialized = 0;
int expected = 0;
if (!atomic_compare_exchange_strong(&initialized, &expected, 1)) {
__builtin_trap();
}
static volatile atomic_int initialized = 0;
int expected = 0;
if (!atomic_compare_exchange_strong(&initialized, &expected, 1)) {
__builtin_trap();
}
#else
static volatile int initialized = 0;
if (initialized != 0) {
__builtin_trap();
}
initialized = 1;
static volatile int initialized = 0;
if (initialized != 0) {
__builtin_trap();
}
initialized = 1;
#endif
__wasi_init_tp();
__wasi_init_tp();

// The linker synthesizes this to call constructors.
__wasm_call_ctors();
// The linker synthesizes this to call constructors.
__wasm_call_ctors();
}
3 changes: 0 additions & 3 deletions libc-bottom-half/crt/crt1.c

This file was deleted.