From 0f3dcd10488276089a61a7bbaca69d08b98a5543 Mon Sep 17 00:00:00 2001 From: Michael McLoughlin Date: Mon, 24 Jun 2024 10:16:44 -0400 Subject: [PATCH 01/10] include extern const in error message (#8862) --- cranelift/isle/isle/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/isle/isle/src/parser.rs b/cranelift/isle/isle/src/parser.rs index b613e65c5c75..c0be39b16ee7 100644 --- a/cranelift/isle/isle/src/parser.rs +++ b/cranelift/isle/isle/src/parser.rs @@ -356,7 +356,7 @@ impl<'a> Parser<'a> { } else { Err(self.error( pos, - "Invalid extern: must be (extern constructor ...) or (extern extractor ...)" + "Invalid extern: must be (extern constructor ...), (extern extractor ...) or (extern const ...)" .to_string(), )) } From 7112d93b481c4baa366c7e56d6cd5b8e68805cea Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Mon, 24 Jun 2024 07:18:55 -0700 Subject: [PATCH 02/10] Fix radix of decimal immediates (#8865) Decimal immediates incorrectly parsed as hex digits. --- cranelift/codegen/src/ir/immediates.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs index 14af7667a55c..153f331b9e79 100644 --- a/cranelift/codegen/src/ir/immediates.rs +++ b/cranelift/codegen/src/ir/immediates.rs @@ -241,7 +241,7 @@ fn parse_u64(s: &str) -> Result { } else { // Decimal number, possibly negative. for ch in s.chars() { - match ch.to_digit(16) { + match ch.to_digit(10) { Some(digit) => { digits += 1; match value.checked_mul(10) { @@ -1270,6 +1270,8 @@ mod tests { parse_err::(" 0", "Invalid character in decimal number"); parse_err::("--", "Invalid character in decimal number"); parse_err::("-0x-", "Invalid character in hexadecimal number"); + parse_err::("abc", "Invalid character in decimal number"); + parse_err::("-abc", "Invalid character in decimal number"); // Hex count overflow. parse_err::("0x0_0000_0000_0000_0000", "Too many hexadecimal digits"); @@ -1310,6 +1312,8 @@ mod tests { parse_err::("-0x-", "Invalid character in hexadecimal number"); parse_err::("-0", "Invalid character in decimal number"); parse_err::("-1", "Invalid character in decimal number"); + parse_err::("abc", "Invalid character in decimal number"); + parse_err::("-abc", "Invalid character in decimal number"); // Hex count overflow. parse_err::("0x0_0000_0000_0000_0000", "Too many hexadecimal digits"); From f471b4dcc85a0c5ca964e8efac39f6e7a0072e90 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 24 Jun 2024 10:24:58 -0500 Subject: [PATCH 03/10] Refactor and document the wasmtime-wasi-http more (#8861) * Improve some documentation of the `wasmtime-wasi` crate Show a few examples of using `with` to point to upstream `wasmtime-wasi` for bindings. * Refactor and document the `wasmtime-wasi-http` more This commit primarily adds a complete example of using `wasmtime-wasi-http` to the documentation. Along the way I've done a number of other refactorings too: * `bindgen!`-generated `*Pre` structures now implement `Clone`. * `bindgen!`-generated `*Pre` structures now have an `engine` method. * `bindgen!`-generated `*Pre` structures now have an `instance_pre` method. * The structure of `wasmtime-wasi-http` now matches `wasmtime-wasi`, notably: * The `proxy` module is removed * `wasmtime_wasi_http::add_to_linker_{a,}sync` is the top level add-to-linker function. * The `bindings` module now contains `Proxy` and `ProxyPre` along with a `sync` submodule. * The `bindings` module contains all bindings for `wasi:http` things. * The `add_only_*` methods are un-hidden and documented. * Code processing `req` has been simplified by avoiding decomposing-and-reconstructing a request. * The `new_incoming_request` method is now generic to avoid callers having to do boxing/mapping themselves. * Update expanded macro expectations * Remove unused import --- crates/component-macro/tests/expanded/char.rs | 15 + .../tests/expanded/char_async.rs | 15 + .../tests/expanded/conventions.rs | 15 + .../tests/expanded/conventions_async.rs | 15 + .../tests/expanded/dead-code.rs | 13 + .../tests/expanded/dead-code_async.rs | 13 + .../tests/expanded/direct-import.rs | 13 + .../tests/expanded/direct-import_async.rs | 13 + .../component-macro/tests/expanded/empty.rs | 13 + .../tests/expanded/empty_async.rs | 13 + .../component-macro/tests/expanded/flags.rs | 15 + .../tests/expanded/flags_async.rs | 15 + .../component-macro/tests/expanded/floats.rs | 15 + .../tests/expanded/floats_async.rs | 15 + .../tests/expanded/function-new.rs | 14 + .../tests/expanded/function-new_async.rs | 14 + .../tests/expanded/integers.rs | 15 + .../tests/expanded/integers_async.rs | 15 + .../component-macro/tests/expanded/lists.rs | 15 + .../tests/expanded/lists_async.rs | 15 + .../tests/expanded/many-arguments.rs | 15 + .../tests/expanded/many-arguments_async.rs | 15 + .../tests/expanded/multi-return.rs | 15 + .../tests/expanded/multi-return_async.rs | 15 + .../tests/expanded/multiversion.rs | 17 + .../tests/expanded/multiversion_async.rs | 17 + .../component-macro/tests/expanded/records.rs | 15 + .../tests/expanded/records_async.rs | 15 + .../component-macro/tests/expanded/rename.rs | 13 + .../tests/expanded/rename_async.rs | 13 + .../tests/expanded/resources-export.rs | 21 + .../tests/expanded/resources-export_async.rs | 21 + .../tests/expanded/resources-import.rs | 18 +- .../tests/expanded/resources-import_async.rs | 18 +- .../tests/expanded/share-types.rs | 15 + .../tests/expanded/share-types_async.rs | 15 + .../tests/expanded/simple-functions.rs | 15 + .../tests/expanded/simple-functions_async.rs | 15 + .../tests/expanded/simple-lists.rs | 15 + .../tests/expanded/simple-lists_async.rs | 15 + .../tests/expanded/simple-wasi.rs | 13 + .../tests/expanded/simple-wasi_async.rs | 13 + .../tests/expanded/small-anonymous.rs | 15 + .../tests/expanded/small-anonymous_async.rs | 15 + .../tests/expanded/smoke-default.rs | 14 + .../tests/expanded/smoke-default_async.rs | 14 + .../tests/expanded/smoke-export.rs | 15 + .../tests/expanded/smoke-export_async.rs | 15 + .../component-macro/tests/expanded/smoke.rs | 13 + .../tests/expanded/smoke_async.rs | 13 + .../component-macro/tests/expanded/strings.rs | 15 + .../tests/expanded/strings_async.rs | 15 + .../tests/expanded/unversioned-foo.rs | 13 + .../tests/expanded/unversioned-foo_async.rs | 13 + .../tests/expanded/use-paths.rs | 13 + .../tests/expanded/use-paths_async.rs | 13 + .../tests/expanded/variants.rs | 15 + .../tests/expanded/variants_async.rs | 15 + crates/component-macro/tests/expanded/wat.rs | 15 + .../tests/expanded/wat_async.rs | 15 + .../tests/expanded/worlds-with-types.rs | 14 + .../tests/expanded/worlds-with-types_async.rs | 14 + crates/wasi-http/src/bindings.rs | 77 ++++ crates/wasi-http/src/lib.rs | 432 +++++++++++++++--- crates/wasi-http/src/proxy.rs | 236 ---------- crates/wasi-http/src/types.rs | 51 ++- crates/wasi-http/tests/all/async_.rs | 2 +- crates/wasi-http/tests/all/main.rs | 24 +- crates/wasi-http/tests/all/sync.rs | 2 +- crates/wasi/src/bindings.rs | 149 +++++- crates/wasi/src/lib.rs | 8 +- crates/wasmtime/src/lib.rs | 2 +- .../src/runtime/component/instance.rs | 7 +- crates/wit-bindgen/src/lib.rs | 23 + src/commands/run.rs | 2 +- src/commands/serve.rs | 46 +- 76 files changed, 1610 insertions(+), 365 deletions(-) create mode 100644 crates/wasi-http/src/bindings.rs delete mode 100644 crates/wasi-http/src/proxy.rs diff --git a/crates/component-macro/tests/expanded/char.rs b/crates/component-macro/tests/expanded/char.rs index b3aeffb60c7d..f5bae77cf586 100644 --- a/crates/component-macro/tests/expanded/char.rs +++ b/crates/component-macro/tests/expanded/char.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::chars::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -163,6 +177,7 @@ pub mod exports { take_char: wasmtime::component::Func, return_char: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { take_char: wasmtime::component::ComponentExportIndex, return_char: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/char_async.rs b/crates/component-macro/tests/expanded/char_async.rs index 250d62a62e0b..169eeb6e2113 100644 --- a/crates/component-macro/tests/expanded/char_async.rs +++ b/crates/component-macro/tests/expanded/char_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::chars::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -176,6 +190,7 @@ pub mod exports { take_char: wasmtime::component::Func, return_char: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { take_char: wasmtime::component::ComponentExportIndex, return_char: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/conventions.rs b/crates/component-macro/tests/expanded/conventions.rs index b1a5eefef520..464ba2874953 100644 --- a/crates/component-macro/tests/expanded/conventions.rs +++ b/crates/component-macro/tests/expanded/conventions.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::conventions::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -371,6 +385,7 @@ pub mod exports { explicit_kebab: wasmtime::component::Func, bool: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { kebab_case: wasmtime::component::ComponentExportIndex, foo: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/conventions_async.rs b/crates/component-macro/tests/expanded/conventions_async.rs index 5e5650905e09..3106702f8304 100644 --- a/crates/component-macro/tests/expanded/conventions_async.rs +++ b/crates/component-macro/tests/expanded/conventions_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::conventions::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -384,6 +398,7 @@ pub mod exports { explicit_kebab: wasmtime::component::Func, bool: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { kebab_case: wasmtime::component::ComponentExportIndex, foo: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/dead-code.rs b/crates/component-macro/tests/expanded/dead-code.rs index abbda40f9cd8..483d6389922b 100644 --- a/crates/component-macro/tests/expanded/dead-code.rs +++ b/crates/component-macro/tests/expanded/dead-code.rs @@ -7,6 +7,13 @@ pub struct ImportsPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for ImportsPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `imports`. /// @@ -45,6 +52,12 @@ const _: () = { let _instance = self.instance_pre.instantiate(&mut store)?; Ok(Imports {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Imports { /// Convenience wrapper around [`ImportsPre::new`] and diff --git a/crates/component-macro/tests/expanded/dead-code_async.rs b/crates/component-macro/tests/expanded/dead-code_async.rs index fa221e759072..de469d12615b 100644 --- a/crates/component-macro/tests/expanded/dead-code_async.rs +++ b/crates/component-macro/tests/expanded/dead-code_async.rs @@ -7,6 +7,13 @@ pub struct ImportsPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for ImportsPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `imports`. /// @@ -48,6 +55,12 @@ const _: () = { let _instance = self.instance_pre.instantiate_async(&mut store).await?; Ok(Imports {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Imports { /// Convenience wrapper around [`ImportsPre::new`] and diff --git a/crates/component-macro/tests/expanded/direct-import.rs b/crates/component-macro/tests/expanded/direct-import.rs index 186626bcca61..5ffabde6aea3 100644 --- a/crates/component-macro/tests/expanded/direct-import.rs +++ b/crates/component-macro/tests/expanded/direct-import.rs @@ -7,6 +7,13 @@ pub struct FooPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for FooPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `foo`. /// @@ -65,6 +72,12 @@ const _: () = { let _instance = self.instance_pre.instantiate(&mut store)?; Ok(Foo {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Foo { /// Convenience wrapper around [`FooPre::new`] and diff --git a/crates/component-macro/tests/expanded/direct-import_async.rs b/crates/component-macro/tests/expanded/direct-import_async.rs index 67fd9aa9ce78..77abe5e4ce55 100644 --- a/crates/component-macro/tests/expanded/direct-import_async.rs +++ b/crates/component-macro/tests/expanded/direct-import_async.rs @@ -7,6 +7,13 @@ pub struct FooPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for FooPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `foo`. /// @@ -70,6 +77,12 @@ const _: () = { let _instance = self.instance_pre.instantiate_async(&mut store).await?; Ok(Foo {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Foo { /// Convenience wrapper around [`FooPre::new`] and diff --git a/crates/component-macro/tests/expanded/empty.rs b/crates/component-macro/tests/expanded/empty.rs index acb5468267cd..512c492bb9bf 100644 --- a/crates/component-macro/tests/expanded/empty.rs +++ b/crates/component-macro/tests/expanded/empty.rs @@ -7,6 +7,13 @@ pub struct EmptyPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for EmptyPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `empty`. /// @@ -45,6 +52,12 @@ const _: () = { let _instance = self.instance_pre.instantiate(&mut store)?; Ok(Empty {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Empty { /// Convenience wrapper around [`EmptyPre::new`] and diff --git a/crates/component-macro/tests/expanded/empty_async.rs b/crates/component-macro/tests/expanded/empty_async.rs index b2f4eae8db83..cc26b32a47fd 100644 --- a/crates/component-macro/tests/expanded/empty_async.rs +++ b/crates/component-macro/tests/expanded/empty_async.rs @@ -7,6 +7,13 @@ pub struct EmptyPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for EmptyPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `empty`. /// @@ -48,6 +55,12 @@ const _: () = { let _instance = self.instance_pre.instantiate_async(&mut store).await?; Ok(Empty {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Empty { /// Convenience wrapper around [`EmptyPre::new`] and diff --git a/crates/component-macro/tests/expanded/flags.rs b/crates/component-macro/tests/expanded/flags.rs index 7e705d40336c..82d4578490e6 100644 --- a/crates/component-macro/tests/expanded/flags.rs +++ b/crates/component-macro/tests/expanded/flags.rs @@ -8,6 +8,14 @@ pub struct TheFlagsPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::flegs::GuestPre, } +impl Clone for TheFlagsPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-flags`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheFlags { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheFlags { /// Convenience wrapper around [`TheFlagsPre::new`] and @@ -505,6 +519,7 @@ pub mod exports { roundtrip_flag32: wasmtime::component::Func, roundtrip_flag64: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { roundtrip_flag1: wasmtime::component::ComponentExportIndex, roundtrip_flag2: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/flags_async.rs b/crates/component-macro/tests/expanded/flags_async.rs index a3085cb6689c..b61c765c43d2 100644 --- a/crates/component-macro/tests/expanded/flags_async.rs +++ b/crates/component-macro/tests/expanded/flags_async.rs @@ -8,6 +8,14 @@ pub struct TheFlagsPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::flegs::GuestPre, } +impl Clone for TheFlagsPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-flags`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheFlags { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheFlags { /// Convenience wrapper around [`TheFlagsPre::new`] and @@ -518,6 +532,7 @@ pub mod exports { roundtrip_flag32: wasmtime::component::Func, roundtrip_flag64: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { roundtrip_flag1: wasmtime::component::ComponentExportIndex, roundtrip_flag2: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/floats.rs b/crates/component-macro/tests/expanded/floats.rs index 7c1b801b6f15..7f2439aa6a87 100644 --- a/crates/component-macro/tests/expanded/floats.rs +++ b/crates/component-macro/tests/expanded/floats.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::floats::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -182,6 +196,7 @@ pub mod exports { float32_result: wasmtime::component::Func, float64_result: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { float32_param: wasmtime::component::ComponentExportIndex, float64_param: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/floats_async.rs b/crates/component-macro/tests/expanded/floats_async.rs index 6d13e8342416..49e359b3b817 100644 --- a/crates/component-macro/tests/expanded/floats_async.rs +++ b/crates/component-macro/tests/expanded/floats_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::floats::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -195,6 +209,7 @@ pub mod exports { float32_result: wasmtime::component::Func, float64_result: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { float32_param: wasmtime::component::ComponentExportIndex, float64_param: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/function-new.rs b/crates/component-macro/tests/expanded/function-new.rs index bfc0477bc68e..4ac8dd1632e8 100644 --- a/crates/component-macro/tests/expanded/function-new.rs +++ b/crates/component-macro/tests/expanded/function-new.rs @@ -8,6 +8,14 @@ pub struct FooPre { instance_pre: wasmtime::component::InstancePre, new: wasmtime::component::ComponentExportIndex, } +impl Clone for FooPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + new: self.new.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `foo`. /// @@ -53,6 +61,12 @@ const _: () = { let new = *_instance.get_typed_func::<(), ()>(&mut store, &self.new)?.func(); Ok(Foo { new }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Foo { /// Convenience wrapper around [`FooPre::new`] and diff --git a/crates/component-macro/tests/expanded/function-new_async.rs b/crates/component-macro/tests/expanded/function-new_async.rs index 2722fbfa6318..ee824225f683 100644 --- a/crates/component-macro/tests/expanded/function-new_async.rs +++ b/crates/component-macro/tests/expanded/function-new_async.rs @@ -8,6 +8,14 @@ pub struct FooPre { instance_pre: wasmtime::component::InstancePre, new: wasmtime::component::ComponentExportIndex, } +impl Clone for FooPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + new: self.new.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `foo`. /// @@ -56,6 +64,12 @@ const _: () = { let new = *_instance.get_typed_func::<(), ()>(&mut store, &self.new)?.func(); Ok(Foo { new }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Foo { /// Convenience wrapper around [`FooPre::new`] and diff --git a/crates/component-macro/tests/expanded/integers.rs b/crates/component-macro/tests/expanded/integers.rs index fdcd49081a15..fff62d87bb30 100644 --- a/crates/component-macro/tests/expanded/integers.rs +++ b/crates/component-macro/tests/expanded/integers.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::integers::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -406,6 +420,7 @@ pub mod exports { r8: wasmtime::component::Func, pair_ret: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { a1: wasmtime::component::ComponentExportIndex, a2: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/integers_async.rs b/crates/component-macro/tests/expanded/integers_async.rs index 9c1ad50cdbce..00f512a34e04 100644 --- a/crates/component-macro/tests/expanded/integers_async.rs +++ b/crates/component-macro/tests/expanded/integers_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::integers::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -420,6 +434,7 @@ pub mod exports { r8: wasmtime::component::Func, pair_ret: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { a1: wasmtime::component::ComponentExportIndex, a2: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/lists.rs b/crates/component-macro/tests/expanded/lists.rs index b6736318a9b3..50d810de2061 100644 --- a/crates/component-macro/tests/expanded/lists.rs +++ b/crates/component-macro/tests/expanded/lists.rs @@ -8,6 +8,14 @@ pub struct TheListsPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::lists::GuestPre, } +impl Clone for TheListsPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-lists`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheLists { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheLists { /// Convenience wrapper around [`TheListsPre::new`] and @@ -1103,6 +1117,7 @@ pub mod exports { variant_list: wasmtime::component::Func, load_store_everything: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { list_u8_param: wasmtime::component::ComponentExportIndex, list_u16_param: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/lists_async.rs b/crates/component-macro/tests/expanded/lists_async.rs index 4d8c356165d8..41a7595e6704 100644 --- a/crates/component-macro/tests/expanded/lists_async.rs +++ b/crates/component-macro/tests/expanded/lists_async.rs @@ -8,6 +8,14 @@ pub struct TheListsPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::lists::GuestPre, } +impl Clone for TheListsPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-lists`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheLists { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheLists { /// Convenience wrapper around [`TheListsPre::new`] and @@ -1148,6 +1162,7 @@ pub mod exports { variant_list: wasmtime::component::Func, load_store_everything: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { list_u8_param: wasmtime::component::ComponentExportIndex, list_u16_param: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/many-arguments.rs b/crates/component-macro/tests/expanded/many-arguments.rs index 26a91fb07794..faf5f98a1f40 100644 --- a/crates/component-macro/tests/expanded/many-arguments.rs +++ b/crates/component-macro/tests/expanded/many-arguments.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::manyarg::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -434,6 +448,7 @@ pub mod exports { many_args: wasmtime::component::Func, big_argument: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { many_args: wasmtime::component::ComponentExportIndex, big_argument: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/many-arguments_async.rs b/crates/component-macro/tests/expanded/many-arguments_async.rs index 376b3adcc813..24d9714915a6 100644 --- a/crates/component-macro/tests/expanded/many-arguments_async.rs +++ b/crates/component-macro/tests/expanded/many-arguments_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::manyarg::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -449,6 +463,7 @@ pub mod exports { many_args: wasmtime::component::Func, big_argument: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { many_args: wasmtime::component::ComponentExportIndex, big_argument: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/multi-return.rs b/crates/component-macro/tests/expanded/multi-return.rs index 41326e71a1b3..1e01f9d278a1 100644 --- a/crates/component-macro/tests/expanded/multi-return.rs +++ b/crates/component-macro/tests/expanded/multi-return.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::multi_return::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -195,6 +209,7 @@ pub mod exports { mrd: wasmtime::component::Func, mre: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { mra: wasmtime::component::ComponentExportIndex, mrb: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/multi-return_async.rs b/crates/component-macro/tests/expanded/multi-return_async.rs index 2582ccffb425..1d7d4e8882c0 100644 --- a/crates/component-macro/tests/expanded/multi-return_async.rs +++ b/crates/component-macro/tests/expanded/multi-return_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::multi_return::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -208,6 +222,7 @@ pub mod exports { mrd: wasmtime::component::Func, mre: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { mra: wasmtime::component::ComponentExportIndex, mrb: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/multiversion.rs b/crates/component-macro/tests/expanded/multiversion.rs index f6c0b40d2dab..f8e209a6a844 100644 --- a/crates/component-macro/tests/expanded/multiversion.rs +++ b/crates/component-macro/tests/expanded/multiversion.rs @@ -9,6 +9,15 @@ pub struct FooPre { interface0: exports::my::dep0_1_0::a::GuestPre, interface1: exports::my::dep0_2_0::a::GuestPre, } +impl Clone for FooPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + interface1: self.interface1.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `foo`. /// @@ -58,6 +67,12 @@ const _: () = { let interface1 = self.interface1.load(&mut store, &_instance)?; Ok(Foo { interface0, interface1 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Foo { /// Convenience wrapper around [`FooPre::new`] and @@ -203,6 +218,7 @@ pub mod exports { pub struct Guest { x: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { x: wasmtime::component::ComponentExportIndex, } @@ -272,6 +288,7 @@ pub mod exports { pub struct Guest { x: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { x: wasmtime::component::ComponentExportIndex, } diff --git a/crates/component-macro/tests/expanded/multiversion_async.rs b/crates/component-macro/tests/expanded/multiversion_async.rs index 879fba83ce93..bd5b547461c2 100644 --- a/crates/component-macro/tests/expanded/multiversion_async.rs +++ b/crates/component-macro/tests/expanded/multiversion_async.rs @@ -9,6 +9,15 @@ pub struct FooPre { interface0: exports::my::dep0_1_0::a::GuestPre, interface1: exports::my::dep0_2_0::a::GuestPre, } +impl Clone for FooPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + interface1: self.interface1.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `foo`. /// @@ -61,6 +70,12 @@ const _: () = { let interface1 = self.interface1.load(&mut store, &_instance)?; Ok(Foo { interface0, interface1 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Foo { /// Convenience wrapper around [`FooPre::new`] and @@ -222,6 +237,7 @@ pub mod exports { pub struct Guest { x: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { x: wasmtime::component::ComponentExportIndex, } @@ -294,6 +310,7 @@ pub mod exports { pub struct Guest { x: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { x: wasmtime::component::ComponentExportIndex, } diff --git a/crates/component-macro/tests/expanded/records.rs b/crates/component-macro/tests/expanded/records.rs index 867de6b6e450..5c14c14c7852 100644 --- a/crates/component-macro/tests/expanded/records.rs +++ b/crates/component-macro/tests/expanded/records.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::records::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -610,6 +624,7 @@ pub mod exports { aggregate_result: wasmtime::component::Func, typedef_inout: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { tuple_arg: wasmtime::component::ComponentExportIndex, tuple_result: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/records_async.rs b/crates/component-macro/tests/expanded/records_async.rs index faadf044affe..16a9151c88a2 100644 --- a/crates/component-macro/tests/expanded/records_async.rs +++ b/crates/component-macro/tests/expanded/records_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::records::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -623,6 +637,7 @@ pub mod exports { aggregate_result: wasmtime::component::Func, typedef_inout: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { tuple_arg: wasmtime::component::ComponentExportIndex, tuple_result: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/rename.rs b/crates/component-macro/tests/expanded/rename.rs index 9477e91e993a..a6b804943933 100644 --- a/crates/component-macro/tests/expanded/rename.rs +++ b/crates/component-macro/tests/expanded/rename.rs @@ -7,6 +7,13 @@ pub struct NeptunePre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for NeptunePre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `neptune`. /// @@ -45,6 +52,12 @@ const _: () = { let _instance = self.instance_pre.instantiate(&mut store)?; Ok(Neptune {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Neptune { /// Convenience wrapper around [`NeptunePre::new`] and diff --git a/crates/component-macro/tests/expanded/rename_async.rs b/crates/component-macro/tests/expanded/rename_async.rs index cbe8c257cd53..facb43d2064c 100644 --- a/crates/component-macro/tests/expanded/rename_async.rs +++ b/crates/component-macro/tests/expanded/rename_async.rs @@ -7,6 +7,13 @@ pub struct NeptunePre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for NeptunePre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `neptune`. /// @@ -48,6 +55,12 @@ const _: () = { let _instance = self.instance_pre.instantiate_async(&mut store).await?; Ok(Neptune {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Neptune { /// Convenience wrapper around [`NeptunePre::new`] and diff --git a/crates/component-macro/tests/expanded/resources-export.rs b/crates/component-macro/tests/expanded/resources-export.rs index 0762f5bdb9a6..043aa5fa8b38 100644 --- a/crates/component-macro/tests/expanded/resources-export.rs +++ b/crates/component-macro/tests/expanded/resources-export.rs @@ -11,6 +11,17 @@ pub struct WPre { interface2: exports::foo::foo::export_using_export1::GuestPre, interface3: exports::foo::foo::export_using_export2::GuestPre, } +impl Clone for WPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + interface1: self.interface1.clone(), + interface2: self.interface2.clone(), + interface3: self.interface3.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `w`. /// @@ -81,6 +92,12 @@ const _: () = { interface3, }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl W { /// Convenience wrapper around [`WPre::new`] and @@ -203,6 +220,7 @@ pub mod exports { static_a_static_a: wasmtime::component::Func, method_a_method_a: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { constructor_a_constructor: wasmtime::component::ComponentExportIndex, static_a_static_a: wasmtime::component::ComponentExportIndex, @@ -338,6 +356,7 @@ pub mod exports { static_a_static_a: wasmtime::component::Func, method_a_method_a: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { constructor_a_constructor: wasmtime::component::ComponentExportIndex, static_a_static_a: wasmtime::component::ComponentExportIndex, @@ -478,6 +497,7 @@ pub mod exports { pub struct Guest { constructor_a_constructor: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { constructor_a_constructor: wasmtime::component::ComponentExportIndex, } @@ -560,6 +580,7 @@ pub mod exports { pub struct Guest { constructor_b_constructor: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { constructor_b_constructor: wasmtime::component::ComponentExportIndex, } diff --git a/crates/component-macro/tests/expanded/resources-export_async.rs b/crates/component-macro/tests/expanded/resources-export_async.rs index bb81319bdefb..138962f34f17 100644 --- a/crates/component-macro/tests/expanded/resources-export_async.rs +++ b/crates/component-macro/tests/expanded/resources-export_async.rs @@ -11,6 +11,17 @@ pub struct WPre { interface2: exports::foo::foo::export_using_export1::GuestPre, interface3: exports::foo::foo::export_using_export2::GuestPre, } +impl Clone for WPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + interface1: self.interface1.clone(), + interface2: self.interface2.clone(), + interface3: self.interface3.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `w`. /// @@ -84,6 +95,12 @@ const _: () = { interface3, }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl W { /// Convenience wrapper around [`WPre::new`] and @@ -218,6 +235,7 @@ pub mod exports { static_a_static_a: wasmtime::component::Func, method_a_method_a: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { constructor_a_constructor: wasmtime::component::ComponentExportIndex, static_a_static_a: wasmtime::component::ComponentExportIndex, @@ -368,6 +386,7 @@ pub mod exports { static_a_static_a: wasmtime::component::Func, method_a_method_a: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { constructor_a_constructor: wasmtime::component::ComponentExportIndex, static_a_static_a: wasmtime::component::ComponentExportIndex, @@ -523,6 +542,7 @@ pub mod exports { pub struct Guest { constructor_a_constructor: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { constructor_a_constructor: wasmtime::component::ComponentExportIndex, } @@ -610,6 +630,7 @@ pub mod exports { pub struct Guest { constructor_b_constructor: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { constructor_b_constructor: wasmtime::component::ComponentExportIndex, } diff --git a/crates/component-macro/tests/expanded/resources-import.rs b/crates/component-macro/tests/expanded/resources-import.rs index a5d5ecdc5c87..726c98182676 100644 --- a/crates/component-macro/tests/expanded/resources-import.rs +++ b/crates/component-macro/tests/expanded/resources-import.rs @@ -36,6 +36,15 @@ pub struct TheWorldPre { interface1: exports::foo::foo::uses_resource_transitively::GuestPre, some_world_func2: wasmtime::component::ComponentExportIndex, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface1: self.interface1.clone(), + some_world_func2: self.some_world_func2.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -120,6 +129,12 @@ const _: () = { some_world_func2, }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -1003,6 +1018,7 @@ pub mod exports { pub struct Guest { handle: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { handle: wasmtime::component::ComponentExportIndex, } @@ -1025,7 +1041,7 @@ pub mod exports { .ok_or_else(|| { anyhow::anyhow!( "instance export `foo:foo/uses-resource-transitively` does \ - not have export `{name}`" + not have export `{name}`" ) }) }; diff --git a/crates/component-macro/tests/expanded/resources-import_async.rs b/crates/component-macro/tests/expanded/resources-import_async.rs index 2d94d0468241..80fe5d48fdc6 100644 --- a/crates/component-macro/tests/expanded/resources-import_async.rs +++ b/crates/component-macro/tests/expanded/resources-import_async.rs @@ -38,6 +38,15 @@ pub struct TheWorldPre { interface1: exports::foo::foo::uses_resource_transitively::GuestPre, some_world_func2: wasmtime::component::ComponentExportIndex, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface1: self.interface1.clone(), + some_world_func2: self.some_world_func2.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -127,6 +136,12 @@ const _: () = { some_world_func2, }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -1076,6 +1091,7 @@ pub mod exports { pub struct Guest { handle: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { handle: wasmtime::component::ComponentExportIndex, } @@ -1098,7 +1114,7 @@ pub mod exports { .ok_or_else(|| { anyhow::anyhow!( "instance export `foo:foo/uses-resource-transitively` does \ - not have export `{name}`" + not have export `{name}`" ) }) }; diff --git a/crates/component-macro/tests/expanded/share-types.rs b/crates/component-macro/tests/expanded/share-types.rs index a7162e1e89ad..bb96f9779e09 100644 --- a/crates/component-macro/tests/expanded/share-types.rs +++ b/crates/component-macro/tests/expanded/share-types.rs @@ -8,6 +8,14 @@ pub struct HttpInterfacePre { instance_pre: wasmtime::component::InstancePre, interface0: exports::http_handler::GuestPre, } +impl Clone for HttpInterfacePre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `http-interface`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(HttpInterface { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl HttpInterface { /// Convenience wrapper around [`HttpInterfacePre::new`] and @@ -235,6 +249,7 @@ pub mod exports { pub struct Guest { handle_request: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { handle_request: wasmtime::component::ComponentExportIndex, } diff --git a/crates/component-macro/tests/expanded/share-types_async.rs b/crates/component-macro/tests/expanded/share-types_async.rs index 9a5fc6bca8cb..ce4e84280d38 100644 --- a/crates/component-macro/tests/expanded/share-types_async.rs +++ b/crates/component-macro/tests/expanded/share-types_async.rs @@ -8,6 +8,14 @@ pub struct HttpInterfacePre { instance_pre: wasmtime::component::InstancePre, interface0: exports::http_handler::GuestPre, } +impl Clone for HttpInterfacePre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `http-interface`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(HttpInterface { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl HttpInterface { /// Convenience wrapper around [`HttpInterfacePre::new`] and @@ -254,6 +268,7 @@ pub mod exports { pub struct Guest { handle_request: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { handle_request: wasmtime::component::ComponentExportIndex, } diff --git a/crates/component-macro/tests/expanded/simple-functions.rs b/crates/component-macro/tests/expanded/simple-functions.rs index e802b6a4e140..48c638b8941d 100644 --- a/crates/component-macro/tests/expanded/simple-functions.rs +++ b/crates/component-macro/tests/expanded/simple-functions.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::simple::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -214,6 +228,7 @@ pub mod exports { f5: wasmtime::component::Func, f6: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { f1: wasmtime::component::ComponentExportIndex, f2: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/simple-functions_async.rs b/crates/component-macro/tests/expanded/simple-functions_async.rs index 121cc7dcc9c0..0cdedfd2b4d7 100644 --- a/crates/component-macro/tests/expanded/simple-functions_async.rs +++ b/crates/component-macro/tests/expanded/simple-functions_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::simple::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -227,6 +241,7 @@ pub mod exports { f5: wasmtime::component::Func, f6: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { f1: wasmtime::component::ComponentExportIndex, f2: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/simple-lists.rs b/crates/component-macro/tests/expanded/simple-lists.rs index 68c002ea76d8..8d9d9515bfcb 100644 --- a/crates/component-macro/tests/expanded/simple-lists.rs +++ b/crates/component-macro/tests/expanded/simple-lists.rs @@ -8,6 +8,14 @@ pub struct MyWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::simple_lists::GuestPre, } +impl Clone for MyWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `my-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(MyWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl MyWorld { /// Convenience wrapper around [`MyWorldPre::new`] and @@ -237,6 +251,7 @@ pub mod exports { simple_list3: wasmtime::component::Func, simple_list4: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { simple_list1: wasmtime::component::ComponentExportIndex, simple_list2: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/simple-lists_async.rs b/crates/component-macro/tests/expanded/simple-lists_async.rs index 5bd879ef1cce..a71e897e6965 100644 --- a/crates/component-macro/tests/expanded/simple-lists_async.rs +++ b/crates/component-macro/tests/expanded/simple-lists_async.rs @@ -8,6 +8,14 @@ pub struct MyWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::simple_lists::GuestPre, } +impl Clone for MyWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `my-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(MyWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl MyWorld { /// Convenience wrapper around [`MyWorldPre::new`] and @@ -254,6 +268,7 @@ pub mod exports { simple_list3: wasmtime::component::Func, simple_list4: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { simple_list1: wasmtime::component::ComponentExportIndex, simple_list2: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/simple-wasi.rs b/crates/component-macro/tests/expanded/simple-wasi.rs index a036d30866fe..cd13ac943735 100644 --- a/crates/component-macro/tests/expanded/simple-wasi.rs +++ b/crates/component-macro/tests/expanded/simple-wasi.rs @@ -7,6 +7,13 @@ pub struct WasiPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for WasiPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `wasi`. /// @@ -45,6 +52,12 @@ const _: () = { let _instance = self.instance_pre.instantiate(&mut store)?; Ok(Wasi {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Wasi { /// Convenience wrapper around [`WasiPre::new`] and diff --git a/crates/component-macro/tests/expanded/simple-wasi_async.rs b/crates/component-macro/tests/expanded/simple-wasi_async.rs index 1144d303cfe2..39b6f7f5dd2a 100644 --- a/crates/component-macro/tests/expanded/simple-wasi_async.rs +++ b/crates/component-macro/tests/expanded/simple-wasi_async.rs @@ -7,6 +7,13 @@ pub struct WasiPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for WasiPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `wasi`. /// @@ -48,6 +55,12 @@ const _: () = { let _instance = self.instance_pre.instantiate_async(&mut store).await?; Ok(Wasi {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Wasi { /// Convenience wrapper around [`WasiPre::new`] and diff --git a/crates/component-macro/tests/expanded/small-anonymous.rs b/crates/component-macro/tests/expanded/small-anonymous.rs index e6061a2eac46..abd81339f550 100644 --- a/crates/component-macro/tests/expanded/small-anonymous.rs +++ b/crates/component-macro/tests/expanded/small-anonymous.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::anon::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -245,6 +259,7 @@ pub mod exports { pub struct Guest { option_test: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { option_test: wasmtime::component::ComponentExportIndex, } diff --git a/crates/component-macro/tests/expanded/small-anonymous_async.rs b/crates/component-macro/tests/expanded/small-anonymous_async.rs index 568ff9b1ac22..753e5d663ef9 100644 --- a/crates/component-macro/tests/expanded/small-anonymous_async.rs +++ b/crates/component-macro/tests/expanded/small-anonymous_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::anon::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -258,6 +272,7 @@ pub mod exports { pub struct Guest { option_test: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { option_test: wasmtime::component::ComponentExportIndex, } diff --git a/crates/component-macro/tests/expanded/smoke-default.rs b/crates/component-macro/tests/expanded/smoke-default.rs index a1e7123d8af4..a2db160bcd33 100644 --- a/crates/component-macro/tests/expanded/smoke-default.rs +++ b/crates/component-macro/tests/expanded/smoke-default.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, y: wasmtime::component::ComponentExportIndex, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + y: self.y.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let y = *_instance.get_typed_func::<(), ()>(&mut store, &self.y)?.func(); Ok(TheWorld { y }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and diff --git a/crates/component-macro/tests/expanded/smoke-default_async.rs b/crates/component-macro/tests/expanded/smoke-default_async.rs index e71e4ed38d46..e1f8d2640dcb 100644 --- a/crates/component-macro/tests/expanded/smoke-default_async.rs +++ b/crates/component-macro/tests/expanded/smoke-default_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, y: wasmtime::component::ComponentExportIndex, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + y: self.y.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let y = *_instance.get_typed_func::<(), ()>(&mut store, &self.y)?.func(); Ok(TheWorld { y }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and diff --git a/crates/component-macro/tests/expanded/smoke-export.rs b/crates/component-macro/tests/expanded/smoke-export.rs index ff6ea8cf2309..0e292da5f0dc 100644 --- a/crates/component-macro/tests/expanded/smoke-export.rs +++ b/crates/component-macro/tests/expanded/smoke-export.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::the_name::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -78,6 +92,7 @@ pub mod exports { pub struct Guest { y: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { y: wasmtime::component::ComponentExportIndex, } diff --git a/crates/component-macro/tests/expanded/smoke-export_async.rs b/crates/component-macro/tests/expanded/smoke-export_async.rs index 7821c7791013..b84ad0471574 100644 --- a/crates/component-macro/tests/expanded/smoke-export_async.rs +++ b/crates/component-macro/tests/expanded/smoke-export_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::the_name::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -84,6 +98,7 @@ pub mod exports { pub struct Guest { y: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { y: wasmtime::component::ComponentExportIndex, } diff --git a/crates/component-macro/tests/expanded/smoke.rs b/crates/component-macro/tests/expanded/smoke.rs index c9e781d9a291..f05cf958413f 100644 --- a/crates/component-macro/tests/expanded/smoke.rs +++ b/crates/component-macro/tests/expanded/smoke.rs @@ -7,6 +7,13 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -45,6 +52,12 @@ const _: () = { let _instance = self.instance_pre.instantiate(&mut store)?; Ok(TheWorld {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and diff --git a/crates/component-macro/tests/expanded/smoke_async.rs b/crates/component-macro/tests/expanded/smoke_async.rs index 7f2b017e20b5..5faac73d966a 100644 --- a/crates/component-macro/tests/expanded/smoke_async.rs +++ b/crates/component-macro/tests/expanded/smoke_async.rs @@ -7,6 +7,13 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -48,6 +55,12 @@ const _: () = { let _instance = self.instance_pre.instantiate_async(&mut store).await?; Ok(TheWorld {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and diff --git a/crates/component-macro/tests/expanded/strings.rs b/crates/component-macro/tests/expanded/strings.rs index 7b8f91692308..3d69053d735d 100644 --- a/crates/component-macro/tests/expanded/strings.rs +++ b/crates/component-macro/tests/expanded/strings.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::strings::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -189,6 +203,7 @@ pub mod exports { b: wasmtime::component::Func, c: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { a: wasmtime::component::ComponentExportIndex, b: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/strings_async.rs b/crates/component-macro/tests/expanded/strings_async.rs index 1341805f568d..41eef14bd487 100644 --- a/crates/component-macro/tests/expanded/strings_async.rs +++ b/crates/component-macro/tests/expanded/strings_async.rs @@ -8,6 +8,14 @@ pub struct TheWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::strings::GuestPre, } +impl Clone for TheWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `the-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(TheWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl TheWorld { /// Convenience wrapper around [`TheWorldPre::new`] and @@ -202,6 +216,7 @@ pub mod exports { b: wasmtime::component::Func, c: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { a: wasmtime::component::ComponentExportIndex, b: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/unversioned-foo.rs b/crates/component-macro/tests/expanded/unversioned-foo.rs index 449aa01819d4..ea3e2435d94a 100644 --- a/crates/component-macro/tests/expanded/unversioned-foo.rs +++ b/crates/component-macro/tests/expanded/unversioned-foo.rs @@ -7,6 +7,13 @@ pub struct NopePre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for NopePre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `nope`. /// @@ -45,6 +52,12 @@ const _: () = { let _instance = self.instance_pre.instantiate(&mut store)?; Ok(Nope {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Nope { /// Convenience wrapper around [`NopePre::new`] and diff --git a/crates/component-macro/tests/expanded/unversioned-foo_async.rs b/crates/component-macro/tests/expanded/unversioned-foo_async.rs index f160af990974..37b181e425b6 100644 --- a/crates/component-macro/tests/expanded/unversioned-foo_async.rs +++ b/crates/component-macro/tests/expanded/unversioned-foo_async.rs @@ -7,6 +7,13 @@ pub struct NopePre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for NopePre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `nope`. /// @@ -48,6 +55,12 @@ const _: () = { let _instance = self.instance_pre.instantiate_async(&mut store).await?; Ok(Nope {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Nope { /// Convenience wrapper around [`NopePre::new`] and diff --git a/crates/component-macro/tests/expanded/use-paths.rs b/crates/component-macro/tests/expanded/use-paths.rs index 52b1d0c03e97..6991bd6a4002 100644 --- a/crates/component-macro/tests/expanded/use-paths.rs +++ b/crates/component-macro/tests/expanded/use-paths.rs @@ -7,6 +7,13 @@ pub struct DPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for DPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `d`. /// @@ -45,6 +52,12 @@ const _: () = { let _instance = self.instance_pre.instantiate(&mut store)?; Ok(D {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl D { /// Convenience wrapper around [`DPre::new`] and diff --git a/crates/component-macro/tests/expanded/use-paths_async.rs b/crates/component-macro/tests/expanded/use-paths_async.rs index 3395ce3589c1..6ca65976093c 100644 --- a/crates/component-macro/tests/expanded/use-paths_async.rs +++ b/crates/component-macro/tests/expanded/use-paths_async.rs @@ -7,6 +7,13 @@ pub struct DPre { instance_pre: wasmtime::component::InstancePre, } +impl Clone for DPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `d`. /// @@ -48,6 +55,12 @@ const _: () = { let _instance = self.instance_pre.instantiate_async(&mut store).await?; Ok(D {}) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl D { /// Convenience wrapper around [`DPre::new`] and diff --git a/crates/component-macro/tests/expanded/variants.rs b/crates/component-macro/tests/expanded/variants.rs index c46e5dd1291e..b85acf569ef5 100644 --- a/crates/component-macro/tests/expanded/variants.rs +++ b/crates/component-macro/tests/expanded/variants.rs @@ -8,6 +8,14 @@ pub struct MyWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::variants::GuestPre, } +impl Clone for MyWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `my-world`. /// @@ -53,6 +61,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(MyWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl MyWorld { /// Convenience wrapper around [`MyWorldPre::new`] and @@ -1193,6 +1207,7 @@ pub mod exports { return_named_option: wasmtime::component::Func, return_named_result: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { e1_arg: wasmtime::component::ComponentExportIndex, e1_result: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/variants_async.rs b/crates/component-macro/tests/expanded/variants_async.rs index e6c27f29acc2..1295127ea571 100644 --- a/crates/component-macro/tests/expanded/variants_async.rs +++ b/crates/component-macro/tests/expanded/variants_async.rs @@ -8,6 +8,14 @@ pub struct MyWorldPre { instance_pre: wasmtime::component::InstancePre, interface0: exports::foo::foo::variants::GuestPre, } +impl Clone for MyWorldPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `my-world`. /// @@ -56,6 +64,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(MyWorld { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl MyWorld { /// Convenience wrapper around [`MyWorldPre::new`] and @@ -1209,6 +1223,7 @@ pub mod exports { return_named_option: wasmtime::component::Func, return_named_result: wasmtime::component::Func, } + #[derive(Clone)] pub struct GuestPre { e1_arg: wasmtime::component::ComponentExportIndex, e1_result: wasmtime::component::ComponentExportIndex, diff --git a/crates/component-macro/tests/expanded/wat.rs b/crates/component-macro/tests/expanded/wat.rs index 639efbeb799c..f9bdb92e3c64 100644 --- a/crates/component-macro/tests/expanded/wat.rs +++ b/crates/component-macro/tests/expanded/wat.rs @@ -8,6 +8,14 @@ pub struct ExamplePre { instance_pre: wasmtime::component::InstancePre, interface0: exports::same::name::this_name_is_duplicated::GuestPre, } +impl Clone for ExamplePre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `example`. /// @@ -55,6 +63,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(Example { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Example { /// Convenience wrapper around [`ExamplePre::new`] and @@ -86,6 +100,7 @@ pub mod exports { funcs: &'a Guest, } pub struct Guest {} + #[derive(Clone)] pub struct GuestPre {} impl GuestPre { pub fn new( diff --git a/crates/component-macro/tests/expanded/wat_async.rs b/crates/component-macro/tests/expanded/wat_async.rs index 4f2293299932..1c263394c388 100644 --- a/crates/component-macro/tests/expanded/wat_async.rs +++ b/crates/component-macro/tests/expanded/wat_async.rs @@ -8,6 +8,14 @@ pub struct ExamplePre { instance_pre: wasmtime::component::InstancePre, interface0: exports::same::name::this_name_is_duplicated::GuestPre, } +impl Clone for ExamplePre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + interface0: self.interface0.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `example`. /// @@ -58,6 +66,12 @@ const _: () = { let interface0 = self.interface0.load(&mut store, &_instance)?; Ok(Example { interface0 }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Example { /// Convenience wrapper around [`ExamplePre::new`] and @@ -92,6 +106,7 @@ pub mod exports { funcs: &'a Guest, } pub struct Guest {} + #[derive(Clone)] pub struct GuestPre {} impl GuestPre { pub fn new( diff --git a/crates/component-macro/tests/expanded/worlds-with-types.rs b/crates/component-macro/tests/expanded/worlds-with-types.rs index fa40a67a4000..cbe2972f6f99 100644 --- a/crates/component-macro/tests/expanded/worlds-with-types.rs +++ b/crates/component-macro/tests/expanded/worlds-with-types.rs @@ -33,6 +33,14 @@ pub struct FooPre { instance_pre: wasmtime::component::InstancePre, f: wasmtime::component::ComponentExportIndex, } +impl Clone for FooPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + f: self.f.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `foo`. /// @@ -80,6 +88,12 @@ const _: () = { .func(); Ok(Foo { f }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Foo { /// Convenience wrapper around [`FooPre::new`] and diff --git a/crates/component-macro/tests/expanded/worlds-with-types_async.rs b/crates/component-macro/tests/expanded/worlds-with-types_async.rs index 3c4dc3b44784..57cd78cae356 100644 --- a/crates/component-macro/tests/expanded/worlds-with-types_async.rs +++ b/crates/component-macro/tests/expanded/worlds-with-types_async.rs @@ -33,6 +33,14 @@ pub struct FooPre { instance_pre: wasmtime::component::InstancePre, f: wasmtime::component::ComponentExportIndex, } +impl Clone for FooPre { + fn clone(&self) -> Self { + Self { + instance_pre: self.instance_pre.clone(), + f: self.f.clone(), + } + } +} /// Auto-generated bindings for an instance a component which /// implements the world `foo`. /// @@ -83,6 +91,12 @@ const _: () = { .func(); Ok(Foo { f }) } + pub fn engine(&self) -> &wasmtime::Engine { + self.instance_pre.engine() + } + pub fn instance_pre(&self) -> &wasmtime::component::InstancePre<_T> { + &self.instance_pre + } } impl Foo { /// Convenience wrapper around [`FooPre::new`] and diff --git a/crates/wasi-http/src/bindings.rs b/crates/wasi-http/src/bindings.rs new file mode 100644 index 000000000000..d48cbd6b52b5 --- /dev/null +++ b/crates/wasi-http/src/bindings.rs @@ -0,0 +1,77 @@ +//! Raw bindings to the `wasi:http` package. + +#[allow(missing_docs)] +mod generated { + use crate::body; + use crate::types; + + wasmtime::component::bindgen!({ + path: "wit", + world: "wasi:http/proxy", + tracing: true, + // Flag this as "possibly async" which will cause the exports to be + // generated as async, but none of the imports here are async since + // all the blocking-ness happens in wasi:io + async: { + only_imports: ["nonexistent"], + }, + trappable_imports: true, + require_store_data_send: true, + with: { + // Upstream package dependencies + "wasi:io": wasmtime_wasi::bindings::io, + + // Configure all WIT http resources to be defined types in this + // crate to use the `ResourceTable` helper methods. + "wasi:http/types/outgoing-body": body::HostOutgoingBody, + "wasi:http/types/future-incoming-response": types::HostFutureIncomingResponse, + "wasi:http/types/outgoing-response": types::HostOutgoingResponse, + "wasi:http/types/future-trailers": body::HostFutureTrailers, + "wasi:http/types/incoming-body": body::HostIncomingBody, + "wasi:http/types/incoming-response": types::HostIncomingResponse, + "wasi:http/types/response-outparam": types::HostResponseOutparam, + "wasi:http/types/outgoing-request": types::HostOutgoingRequest, + "wasi:http/types/incoming-request": types::HostIncomingRequest, + "wasi:http/types/fields": types::HostFields, + "wasi:http/types/request-options": types::HostRequestOptions, + }, + trappable_error_type: { + "wasi:http/types/error-code" => crate::HttpError, + }, + }); +} + +pub use self::generated::wasi::*; + +/// Raw bindings to the `wasi:http/proxy` exports. +pub use self::generated::exports; + +/// Bindings to the `wasi:http/proxy` world. +pub use self::generated::{Proxy, ProxyPre}; + +/// Sync implementation of the `wasi:http/proxy` world. +pub mod sync { + #[allow(missing_docs)] + mod generated { + #![allow(missing_docs)] + wasmtime::component::bindgen!({ + world: "wasi:http/proxy", + tracing: true, + async: false, + with: { + "wasi:http": crate::bindings::http, // http is in this crate + "wasi:io": wasmtime_wasi::bindings::sync::io, // io is sync + "wasi": wasmtime_wasi::bindings, // everything else + }, + require_store_data_send: true, + }); + } + + pub use self::generated::wasi::*; + + /// Raw bindings to the `wasi:http/proxy` exports. + pub use self::generated::exports; + + /// Bindings to the `wasi:http/proxy` world. + pub use self::generated::{Proxy, ProxyPre}; +} diff --git a/crates/wasi-http/src/lib.rs b/crates/wasi-http/src/lib.rs index fc0d98b4aee3..8c5c0f509f03 100644 --- a/crates/wasi-http/src/lib.rs +++ b/crates/wasi-http/src/lib.rs @@ -1,39 +1,220 @@ -//! Wasmtime's WASI HTTP Implementation +//! # Wasmtime's WASI HTTP Implementation //! -//! This crate's implementation is primarily built on top of [`hyper`]. +//! This crate is Wasmtime's host implementation of the `wasi:http` package as +//! part of WASIp2. This crate's implementation is primarily built on top of +//! [`hyper`] and [`tokio`]. //! //! # WASI HTTP Interfaces //! //! This crate contains implementations of the following interfaces: //! -//! * `wasi:http/incoming-handler` -//! * `wasi:http/outgoing-handler` -//! * `wasi:http/types` +//! * [`wasi:http/incoming-handler`] +//! * [`wasi:http/outgoing-handler`] +//! * [`wasi:http/types`] //! //! The crate also contains an implementation of the [`wasi:http/proxy`] world. //! -//! [`wasi:http/proxy`]: crate::proxy - -//! All traits are implemented in terms of a [`WasiHttpView`] trait which provides -//! basic access to [`WasiHttpCtx`], configuration for WASI HTTP, and a [`wasmtime_wasi::ResourceTable`], -//! the state for all host-defined component model resources. - +//! [`wasi:http/proxy`]: crate::bindings::Proxy +//! [`wasi:http/outgoing-handler`]: crate::bindings::http::outgoing_handler::Host +//! [`wasi:http/types`]: crate::bindings::http::types::Host +//! [`wasi:http/incoming-handler`]: crate::bindings::exports::wasi::http::incoming_handler::Guest +//! +//! This crate is very similar to [`wasmtime-wasi`] in the it uses the +//! `bindgen!` macro in Wasmtime to generate bindings to interfaces. Bindings +//! are located in the [`bindings`] module. +//! +//! # The `WasiHttpView` trait +//! +//! All `bindgen!`-generated `Host` traits are implemented in terms of a +//! [`WasiHttpView`] trait which provides basic access to [`WasiHttpCtx`], +//! configuration for WASI HTTP, and a [`wasmtime_wasi::ResourceTable`], the +//! state for all host-defined component model resources. +//! +//! The [`WasiHttpView`] trait additionally offers a few other configuration +//! methods such as [`WasiHttpView::send_request`] to customize how outgoing +//! HTTP requests are handled. +//! +//! # Async and Sync +//! +//! There are both asynchronous and synchronous bindings in this crate. For +//! example [`add_to_linker_async`] is for asynchronous embedders and +//! [`add_to_linker_sync`] is for synchronous embedders. Note that under the +//! hood both versions are implemented with `async` on top of [`tokio`]. +//! //! # Examples //! //! Usage of this crate is done through a few steps to get everything hooked up: //! //! 1. First implement [`WasiHttpView`] for your type which is the `T` in //! [`wasmtime::Store`]. -//! 2. Add WASI HTTP interfaces to a [`wasmtime::component::Linker`]. This is either -//! done through functions like [`proxy::add_to_linker`] (which bundles all interfaces -//! in the `wasi:http/proxy` world together) or through individual interfaces like the -//! [`bindings::http::outgoing_handler::add_to_linker_get_host`] function. -//! 3. Use the previous [`wasmtime::component::Linker::instantiate`] to instantiate -//! a [`wasmtime::component::Component`] within a [`wasmtime::Store`]. If you're -//! targeting the `wasi:http/proxy` world, you can instantiate the component with -//! [`proxy::Proxy::instantiate_async`] or [`proxy::sync::Proxy::instantiate`] functions. +//! 2. Add WASI HTTP interfaces to a [`wasmtime::component::Linker`]. There +//! are a few options of how to do this: +//! * Use [`add_to_linker_async`] to bundle all interfaces in +//! `wasi:http/proxy` together +//! * Use [`add_only_http_to_linker_async`] to add only HTTP interfaces but +//! no others. This is useful when working with +//! [`wasmtime_wasi::add_to_linker_async`] for example. +//! * Add individual interfaces such as with the +//! [`bindings::http::outgoing_handler::add_to_linker_get_host`] function. +//! 3. Use [`ProxyPre`](bindings::ProxyPre) to pre-instantiate a component +//! before serving requests. +//! 4. When serving requests use +//! [`ProxyPre::instantiate_async`](bindings::ProxyPre::instantiate_async) +//! to create instances and handle HTTP requests. +//! +//! A standalone example of doing all this looks like: +//! +//! ```no_run +//! use anyhow::bail; +//! use hyper::server::conn::http1; +//! use std::sync::Arc; +//! use tokio::net::TcpListener; +//! use wasmtime::component::{Component, Linker, ResourceTable}; +//! use wasmtime::{Config, Engine, Result, Store}; +//! use wasmtime_wasi::{WasiCtx, WasiCtxBuilder, WasiView}; +//! use wasmtime_wasi_http::bindings::ProxyPre; +//! use wasmtime_wasi_http::body::HyperOutgoingBody; +//! use wasmtime_wasi_http::io::TokioIo; +//! use wasmtime_wasi_http::{WasiHttpCtx, WasiHttpView}; +//! +//! #[tokio::main] +//! async fn main() -> Result<()> { +//! let component = std::env::args().nth(1).unwrap(); +//! +//! // Prepare the `Engine` for Wasmtime +//! let mut config = Config::new(); +//! config.async_support(true); +//! let engine = Engine::new(&config)?; +//! +//! // Compile the component on the command line to machine code +//! let component = Component::from_file(&engine, &component)?; +//! +//! // Prepare the `ProxyPre` which is a pre-instantiated version of the +//! // component that we have. This will make per-request instantiation +//! // much quicker. +//! let mut linker = Linker::new(&engine); +//! wasmtime_wasi_http::add_to_linker_async(&mut linker)?; +//! let pre = ProxyPre::new(linker.instantiate_pre(&component)?)?; +//! +//! // Prepare our server state and start listening for connections. +//! let server = Arc::new(MyServer { pre }); +//! let listener = TcpListener::bind("127.0.0.1:8000").await?; +//! println!("Listening on {}", listener.local_addr()?); +//! +//! loop { +//! // Accept a TCP connection and serve all of its requests in a separate +//! // tokio task. Note that for now this only works with HTTP/1.1. +//! let (client, addr) = listener.accept().await?; +//! println!("serving new client from {addr}"); +//! +//! let server = server.clone(); +//! tokio::task::spawn(async move { +//! if let Err(e) = http1::Builder::new() +//! .keep_alive(true) +//! .serve_connection( +//! TokioIo::new(client), +//! hyper::service::service_fn(move |req| { +//! let server = server.clone(); +//! async move { server.handle_request(req).await } +//! }), +//! ) +//! .await +//! { +//! eprintln!("error serving client[{addr}]: {e:?}"); +//! } +//! }); +//! } +//! } +//! +//! struct MyServer { +//! pre: ProxyPre, +//! } +//! +//! impl MyServer { +//! async fn handle_request( +//! &self, +//! req: hyper::Request, +//! ) -> Result> { +//! // Create per-http-request state within a `Store` and prepare the +//! // initial resources passed to the `handle` function. +//! let mut store = Store::new( +//! self.pre.engine(), +//! MyClientState { +//! table: ResourceTable::new(), +//! wasi: WasiCtxBuilder::new().inherit_stdio().build(), +//! http: WasiHttpCtx::new(), +//! }, +//! ); +//! let (sender, receiver) = tokio::sync::oneshot::channel(); +//! let req = store.data_mut().new_incoming_request(req)?; +//! let out = store.data_mut().new_response_outparam(sender)?; +//! let pre = self.pre.clone(); +//! +//! // Run the http request itself in a separate task so the task can +//! // optionally continue to execute beyond after the initial +//! // headers/response code are sent. +//! let task = tokio::task::spawn(async move { +//! let proxy = pre.instantiate_async(&mut store).await?; +//! +//! if let Err(e) = proxy +//! .wasi_http_incoming_handler() +//! .call_handle(store, req, out) +//! .await +//! { +//! return Err(e); +//! } +//! +//! Ok(()) +//! }); +//! +//! match receiver.await { +//! // If the client calls `response-outparam::set` then one of these +//! // methods will be called. +//! Ok(Ok(resp)) => Ok(resp), +//! Ok(Err(e)) => Err(e.into()), +//! +//! // Otherwise the `sender` will get dropped along with the `Store` +//! // meaning that the oneshot will get disconnected and here we can +//! // inspect the `task` result to see what happened +//! Err(_) => { +//! let e = match task.await { +//! Ok(r) => r.unwrap_err(), +//! Err(e) => e.into(), +//! }; +//! bail!("guest never invoked `response-outparam::set` method: {e:?}") +//! } +//! } +//! } +//! } +//! +//! struct MyClientState { +//! wasi: WasiCtx, +//! http: WasiHttpCtx, +//! table: ResourceTable, +//! } +//! +//! impl WasiView for MyClientState { +//! fn ctx(&mut self) -> &mut WasiCtx { +//! &mut self.wasi +//! } +//! fn table(&mut self) -> &mut ResourceTable { +//! &mut self.table +//! } +//! } +//! +//! impl WasiHttpView for MyClientState { +//! fn ctx(&mut self) -> &mut WasiHttpCtx { +//! &mut self.http +//! } +//! fn table(&mut self) -> &mut ResourceTable { +//! &mut self.table +//! } +//! } +//! ``` #![deny(missing_docs)] +#![doc(test(attr(deny(warnings))))] +#![doc(test(attr(allow(dead_code, unused_variables, unused_mut))))] mod error; mod http_impl; @@ -41,50 +222,185 @@ mod types_impl; pub mod body; pub mod io; -pub mod proxy; pub mod types; -/// Raw bindings to the `wasi:http` package. -pub mod bindings { - #![allow(missing_docs)] - wasmtime::component::bindgen!({ - path: "wit", - interfaces: " - import wasi:http/incoming-handler@0.2.0; - import wasi:http/outgoing-handler@0.2.0; - import wasi:http/types@0.2.0; - ", - tracing: true, - async: false, - trappable_imports: true, - with: { - // Upstream package dependencies - "wasi:io": wasmtime_wasi::bindings::io, - - // Configure all WIT http resources to be defined types in this - // crate to use the `ResourceTable` helper methods. - "wasi:http/types/outgoing-body": super::body::HostOutgoingBody, - "wasi:http/types/future-incoming-response": super::types::HostFutureIncomingResponse, - "wasi:http/types/outgoing-response": super::types::HostOutgoingResponse, - "wasi:http/types/future-trailers": super::body::HostFutureTrailers, - "wasi:http/types/incoming-body": super::body::HostIncomingBody, - "wasi:http/types/incoming-response": super::types::HostIncomingResponse, - "wasi:http/types/response-outparam": super::types::HostResponseOutparam, - "wasi:http/types/outgoing-request": super::types::HostOutgoingRequest, - "wasi:http/types/incoming-request": super::types::HostIncomingRequest, - "wasi:http/types/fields": super::types::HostFields, - "wasi:http/types/request-options": super::types::HostRequestOptions, - }, - trappable_error_type: { - "wasi:http/types/error-code" => crate::HttpError, - }, - }); - - pub use wasi::http; -} +pub mod bindings; pub use crate::error::{ http_request_error, hyper_request_error, hyper_response_error, HttpError, HttpResult, }; #[doc(inline)] pub use crate::types::{WasiHttpCtx, WasiHttpImpl, WasiHttpView}; + +/// Add all of the `wasi:http/proxy` world's interfaces to a [`wasmtime::component::Linker`]. +/// +/// This function will add the `async` variant of all interfaces into the +/// `Linker` provided. By `async` this means that this function is only +/// compatible with [`Config::async_support(true)`][async]. For embeddings with +/// async support disabled see [`add_to_linker_sync`] instead. +/// +/// [async]: wasmtime::Config::async_support +/// +/// # Example +/// +/// ``` +/// use wasmtime::{Engine, Result, Config}; +/// use wasmtime::component::{ResourceTable, Linker}; +/// use wasmtime_wasi::{WasiCtx, WasiView}; +/// use wasmtime_wasi_http::{WasiHttpCtx, WasiHttpView}; +/// +/// fn main() -> Result<()> { +/// let mut config = Config::new(); +/// config.async_support(true); +/// let engine = Engine::new(&config)?; +/// +/// let mut linker = Linker::::new(&engine); +/// wasmtime_wasi_http::add_to_linker_async(&mut linker)?; +/// // ... add any further functionality to `linker` if desired ... +/// +/// Ok(()) +/// } +/// +/// struct MyState { +/// ctx: WasiCtx, +/// http_ctx: WasiHttpCtx, +/// table: ResourceTable, +/// } +/// +/// impl WasiHttpView for MyState { +/// fn ctx(&mut self) -> &mut WasiHttpCtx { &mut self.http_ctx } +/// fn table(&mut self) -> &mut ResourceTable { &mut self.table } +/// } +/// impl WasiView for MyState { +/// fn ctx(&mut self) -> &mut WasiCtx { &mut self.ctx } +/// fn table(&mut self) -> &mut ResourceTable { &mut self.table } +/// } +/// ``` +pub fn add_to_linker_async(l: &mut wasmtime::component::Linker) -> anyhow::Result<()> +where + T: WasiHttpView + wasmtime_wasi::WasiView, +{ + let closure = type_annotate_wasi::(|t| wasmtime_wasi::WasiImpl(t)); + wasmtime_wasi::bindings::clocks::wall_clock::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::clocks::monotonic_clock::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::io::poll::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::io::error::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::io::streams::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::cli::stdin::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::cli::stdout::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::cli::stderr::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::random::random::add_to_linker_get_host(l, closure)?; + + add_only_http_to_linker_async(l) +} + +// NB: workaround some rustc inference - a future refactoring may make this +// obsolete. +fn type_annotate_http(val: F) -> F +where + F: Fn(&mut T) -> WasiHttpImpl<&mut T>, +{ + val +} +fn type_annotate_wasi(val: F) -> F +where + F: Fn(&mut T) -> wasmtime_wasi::WasiImpl<&mut T>, +{ + val +} + +/// A slimmed down version of [`add_to_linker_async`] which only adds +/// `wasi:http` interfaces to the linker. +/// +/// This is useful when using [`wasmtime_wasi::add_to_linker_async`] for +/// example to avoid re-adding the same interfaces twice. +pub fn add_only_http_to_linker_async( + l: &mut wasmtime::component::Linker, +) -> anyhow::Result<()> +where + T: WasiHttpView, +{ + let closure = type_annotate_http::(|t| WasiHttpImpl(t)); + crate::bindings::http::outgoing_handler::add_to_linker_get_host(l, closure)?; + crate::bindings::http::types::add_to_linker_get_host(l, closure)?; + + Ok(()) +} + +/// Add all of the `wasi:http/proxy` world's interfaces to a [`wasmtime::component::Linker`]. +/// +/// This function will add the `sync` variant of all interfaces into the +/// `Linker` provided. For embeddings with async support see +/// [`add_to_linker_async`] instead. +/// +/// # Example +/// +/// ``` +/// use wasmtime::{Engine, Result, Config}; +/// use wasmtime::component::{ResourceTable, Linker}; +/// use wasmtime_wasi::{WasiCtx, WasiView}; +/// use wasmtime_wasi_http::{WasiHttpCtx, WasiHttpView}; +/// +/// fn main() -> Result<()> { +/// let config = Config::default(); +/// let engine = Engine::new(&config)?; +/// +/// let mut linker = Linker::::new(&engine); +/// wasmtime_wasi_http::add_to_linker_sync(&mut linker)?; +/// // ... add any further functionality to `linker` if desired ... +/// +/// Ok(()) +/// } +/// +/// struct MyState { +/// ctx: WasiCtx, +/// http_ctx: WasiHttpCtx, +/// table: ResourceTable, +/// } +/// +/// impl WasiHttpView for MyState { +/// fn ctx(&mut self) -> &mut WasiHttpCtx { &mut self.http_ctx } +/// fn table(&mut self) -> &mut ResourceTable { &mut self.table } +/// } +/// impl WasiView for MyState { +/// fn ctx(&mut self) -> &mut WasiCtx { &mut self.ctx } +/// fn table(&mut self) -> &mut ResourceTable { &mut self.table } +/// } +/// ``` +pub fn add_to_linker_sync(l: &mut wasmtime::component::Linker) -> anyhow::Result<()> +where + T: WasiHttpView + wasmtime_wasi::WasiView, +{ + let closure = type_annotate_wasi::(|t| wasmtime_wasi::WasiImpl(t)); + + wasmtime_wasi::bindings::clocks::wall_clock::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::clocks::monotonic_clock::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::sync::io::poll::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::sync::io::streams::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::io::error::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::cli::stdin::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::cli::stdout::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::cli::stderr::add_to_linker_get_host(l, closure)?; + wasmtime_wasi::bindings::random::random::add_to_linker_get_host(l, closure)?; + + add_only_http_to_linker_sync(l)?; + + Ok(()) +} + +/// A slimmed down version of [`add_to_linker_sync`] which only adds +/// `wasi:http` interfaces to the linker. +/// +/// This is useful when using [`wasmtime_wasi::add_to_linker_sync`] for +/// example to avoid re-adding the same interfaces twice. +pub fn add_only_http_to_linker_sync(l: &mut wasmtime::component::Linker) -> anyhow::Result<()> +where + T: WasiHttpView, +{ + let closure = type_annotate_http::(|t| WasiHttpImpl(t)); + + crate::bindings::http::outgoing_handler::add_to_linker_get_host(l, closure)?; + crate::bindings::http::types::add_to_linker_get_host(l, closure)?; + + Ok(()) +} diff --git a/crates/wasi-http/src/proxy.rs b/crates/wasi-http/src/proxy.rs deleted file mode 100644 index cb8f1e818236..000000000000 --- a/crates/wasi-http/src/proxy.rs +++ /dev/null @@ -1,236 +0,0 @@ -//! Implementation of the `wasi:http/proxy` world. -//! -//! The implementation at the top of the module for use in async contexts, -//! while the `sync` module provides implementation for use in sync contexts. - -use crate::{WasiHttpImpl, WasiHttpView}; - -mod bindings { - #![allow(missing_docs)] - wasmtime::component::bindgen!({ - world: "wasi:http/proxy", - tracing: true, - async: true, - with: { - "wasi:http": crate::bindings::http, - "wasi": wasmtime_wasi::bindings, - }, - }); -} - -/// Raw bindings to the `wasi:http/proxy` exports. -pub use bindings::exports; - -/// Bindings to the `wasi:http/proxy` world. -pub use bindings::{Proxy, ProxyPre}; - -/// Add all of the `wasi:http/proxy` world's interfaces to a [`wasmtime::component::Linker`]. -/// -/// This function will add the `async` variant of all interfaces into the -/// `Linker` provided. By `async` this means that this function is only -/// compatible with [`Config::async_support(true)`][async]. For embeddings with -/// async support disabled see [`sync::add_to_linker`] instead. -/// -/// [async]: wasmtime::Config::async_support -/// -/// # Example -/// -/// ``` -/// use wasmtime::{Engine, Result, Store, Config}; -/// use wasmtime::component::{ResourceTable, Linker}; -/// use wasmtime_wasi::{WasiCtx, WasiView, WasiCtxBuilder}; -/// use wasmtime_wasi_http::{WasiHttpCtx, WasiHttpView}; -/// -/// fn main() -> Result<()> { -/// let mut config = Config::new(); -/// config.async_support(true); -/// let engine = Engine::new(&config)?; -/// -/// let mut linker = Linker::::new(&engine); -/// wasmtime_wasi_http::proxy::add_to_linker(&mut linker)?; -/// // ... add any further functionality to `linker` if desired ... -/// -/// let mut store = Store::new( -/// &engine, -/// MyState { -/// ctx: WasiCtxBuilder::new().build(), -/// http_ctx: WasiHttpCtx::new(), -/// table: ResourceTable::new(), -/// }, -/// ); -/// -/// // use `linker.instantiate_async` to instantiate within `store` -/// -/// Ok(()) -/// } -/// -/// struct MyState { -/// ctx: WasiCtx, -/// http_ctx: WasiHttpCtx, -/// table: ResourceTable, -/// } -/// -/// impl WasiHttpView for MyState { -/// fn ctx(&mut self) -> &mut WasiHttpCtx { &mut self.http_ctx } -/// fn table(&mut self) -> &mut ResourceTable { &mut self.table } -/// } -/// impl WasiView for MyState { -/// fn ctx(&mut self) -> &mut WasiCtx { &mut self.ctx } -/// fn table(&mut self) -> &mut ResourceTable { &mut self.table } -/// } -/// ``` -pub fn add_to_linker(l: &mut wasmtime::component::Linker) -> anyhow::Result<()> -where - T: WasiHttpView + wasmtime_wasi::WasiView, -{ - let closure = type_annotate_wasi::(|t| wasmtime_wasi::WasiImpl(t)); - wasmtime_wasi::bindings::clocks::wall_clock::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::clocks::monotonic_clock::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::io::poll::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::io::error::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::io::streams::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::cli::stdin::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::cli::stdout::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::cli::stderr::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::random::random::add_to_linker_get_host(l, closure)?; - - add_only_http_to_linker(l) -} - -// NB: workaround some rustc inference - a future refactoring may make this -// obsolete. -fn type_annotate_http(val: F) -> F -where - F: Fn(&mut T) -> WasiHttpImpl<&mut T>, -{ - val -} -fn type_annotate_wasi(val: F) -> F -where - F: Fn(&mut T) -> wasmtime_wasi::WasiImpl<&mut T>, -{ - val -} - -#[doc(hidden)] -pub fn add_only_http_to_linker(l: &mut wasmtime::component::Linker) -> anyhow::Result<()> -where - T: WasiHttpView, -{ - let closure = type_annotate_http::(|t| WasiHttpImpl(t)); - crate::bindings::http::outgoing_handler::add_to_linker_get_host(l, closure)?; - crate::bindings::http::types::add_to_linker_get_host(l, closure)?; - - Ok(()) -} - -/// Sync implementation of the `wasi:http/proxy` world. -pub mod sync { - use crate::{WasiHttpImpl, WasiHttpView}; - - mod bindings { - #![allow(missing_docs)] - wasmtime::component::bindgen!({ - world: "wasi:http/proxy", - tracing: true, - async: false, - with: { - "wasi:http": crate::bindings::http, // http is in this crate - "wasi:io": wasmtime_wasi::bindings::sync::io, // io is sync - "wasi": wasmtime_wasi::bindings, // everything else - }, - require_store_data_send: true, - }); - } - - /// Raw bindings to the `wasi:http/proxy` exports. - pub use bindings::exports; - - /// Bindings to the `wasi:http/proxy` world. - pub use bindings::{Proxy, ProxyPre}; - - /// Add all of the `wasi:http/proxy` world's interfaces to a [`wasmtime::component::Linker`]. - /// - /// This function will add the `sync` variant of all interfaces into the - /// `Linker` provided. For embeddings with async support see [`super::add_to_linker`] instead. - /// - /// # Example - /// - /// ``` - /// use wasmtime::{Engine, Result, Store, Config}; - /// use wasmtime::component::{ResourceTable, Linker}; - /// use wasmtime_wasi::{WasiCtx, WasiView, WasiCtxBuilder}; - /// use wasmtime_wasi_http::{WasiHttpCtx, WasiHttpView}; - /// - /// fn main() -> Result<()> { - /// let config = Config::default(); - /// let engine = Engine::new(&config)?; - /// - /// let mut linker = Linker::::new(&engine); - /// wasmtime_wasi_http::proxy::sync::add_to_linker(&mut linker)?; - /// // ... add any further functionality to `linker` if desired ... - /// - /// let mut store = Store::new( - /// &engine, - /// MyState { - /// ctx: WasiCtxBuilder::new().build(), - /// http_ctx: WasiHttpCtx::new(), - /// table: ResourceTable::new(), - /// }, - /// ); - /// - /// // use `linker.instantiate` to instantiate within `store` - /// - /// Ok(()) - /// } - /// - /// struct MyState { - /// ctx: WasiCtx, - /// http_ctx: WasiHttpCtx, - /// table: ResourceTable, - /// } - /// - /// impl WasiHttpView for MyState { - /// fn ctx(&mut self) -> &mut WasiHttpCtx { &mut self.http_ctx } - /// fn table(&mut self) -> &mut ResourceTable { &mut self.table } - /// } - /// impl WasiView for MyState { - /// fn ctx(&mut self) -> &mut WasiCtx { &mut self.ctx } - /// fn table(&mut self) -> &mut ResourceTable { &mut self.table } - /// } - /// ``` - pub fn add_to_linker(l: &mut wasmtime::component::Linker) -> anyhow::Result<()> - where - T: WasiHttpView + wasmtime_wasi::WasiView, - { - let closure = super::type_annotate_wasi::(|t| wasmtime_wasi::WasiImpl(t)); - - wasmtime_wasi::bindings::clocks::wall_clock::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::clocks::monotonic_clock::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::sync::io::poll::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::sync::io::streams::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::io::error::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::cli::stdin::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::cli::stdout::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::cli::stderr::add_to_linker_get_host(l, closure)?; - wasmtime_wasi::bindings::random::random::add_to_linker_get_host(l, closure)?; - - add_only_http_to_linker(l)?; - - Ok(()) - } - - #[doc(hidden)] - // TODO: This is temporary solution until the wasmtime_wasi command functions can be removed - pub fn add_only_http_to_linker(l: &mut wasmtime::component::Linker) -> anyhow::Result<()> - where - T: WasiHttpView, - { - let closure = super::type_annotate_http::(|t| WasiHttpImpl(t)); - - crate::bindings::http::outgoing_handler::add_to_linker_get_host(l, closure)?; - crate::bindings::http::types::add_to_linker_get_host(l, closure)?; - - Ok(()) - } -} diff --git a/crates/wasi-http/src/types.rs b/crates/wasi-http/src/types.rs index 24387ebbfa5c..29dd9375bb63 100644 --- a/crates/wasi-http/src/types.rs +++ b/crates/wasi-http/src/types.rs @@ -8,7 +8,9 @@ use crate::{ error::dns_error, hyper_request_error, }; +use bytes::Bytes; use http_body_util::BodyExt; +use hyper::body::Body; use hyper::header::HeaderName; use std::any::Any; use std::time::Duration; @@ -30,6 +32,45 @@ impl WasiHttpCtx { } /// A trait which provides internal WASI HTTP state. +/// +/// # Example +/// +/// ``` +/// use wasmtime::component::ResourceTable; +/// use wasmtime_wasi::{WasiCtx, WasiView, WasiCtxBuilder}; +/// use wasmtime_wasi_http::{WasiHttpCtx, WasiHttpView}; +/// +/// struct MyState { +/// ctx: WasiCtx, +/// http_ctx: WasiHttpCtx, +/// table: ResourceTable, +/// } +/// +/// impl WasiHttpView for MyState { +/// fn ctx(&mut self) -> &mut WasiHttpCtx { &mut self.http_ctx } +/// fn table(&mut self) -> &mut ResourceTable { &mut self.table } +/// } +/// +/// impl WasiView for MyState { +/// fn ctx(&mut self) -> &mut WasiCtx { &mut self.ctx } +/// fn table(&mut self) -> &mut ResourceTable { &mut self.table } +/// } +/// +/// impl MyState { +/// fn new() -> MyState { +/// let mut wasi = WasiCtxBuilder::new(); +/// wasi.arg("./foo.wasm"); +/// wasi.arg("--help"); +/// wasi.env("FOO", "bar"); +/// +/// MyState { +/// ctx: wasi.build(), +/// table: ResourceTable::new(), +/// http_ctx: WasiHttpCtx::new(), +/// } +/// } +/// } +/// ``` pub trait WasiHttpView: Send { /// Returns a mutable reference to the WASI HTTP context. fn ctx(&mut self) -> &mut WasiHttpCtx; @@ -38,14 +79,16 @@ pub trait WasiHttpView: Send { fn table(&mut self) -> &mut ResourceTable; /// Create a new incoming request resource. - fn new_incoming_request( + fn new_incoming_request( &mut self, - req: hyper::Request, + req: hyper::Request, ) -> wasmtime::Result> where + B: Body + Send + Sync + 'static, Self: Sized, { let (parts, body) = req.into_parts(); + let body = body.map_err(crate::hyper_response_error).boxed(); let body = HostIncomingBody::new( body, // TODO: this needs to be plumbed through @@ -151,9 +194,9 @@ impl WasiHttpView for Box { /// themselves (or `add_to_linker_get_host`). /// /// This type is automatically used when using -/// [`wasmtime_wasi_http::proxy::add_to_linker`](crate::proxy::add_to_linker) +/// [`add_to_linker_async`](crate::add_to_linker_async) /// or -/// [`wasmtime_wasi_http::proxy::sync::add_to_linker`](crate::proxy::sync::add_to_linker) +/// [`add_to_linker_sync`](crate::add_to_linker_sync) /// and doesn't need to be manually configured. #[repr(transparent)] pub struct WasiHttpImpl(pub T); diff --git a/crates/wasi-http/tests/all/async_.rs b/crates/wasi-http/tests/all/async_.rs index ba56d9c8639c..4ea9c7c0782d 100644 --- a/crates/wasi-http/tests/all/async_.rs +++ b/crates/wasi-http/tests/all/async_.rs @@ -13,7 +13,7 @@ async fn run(path: &str, server: &Server) -> Result<()> { let mut store = store(&engine, server); let mut linker = Linker::new(&engine); wasmtime_wasi::add_to_linker_async(&mut linker)?; - wasmtime_wasi_http::proxy::add_only_http_to_linker(&mut linker)?; + wasmtime_wasi_http::add_only_http_to_linker_async(&mut linker)?; let command = Command::instantiate_async(&mut store, &component, &linker).await?; let result = command.wasi_cli_run().call_run(&mut store).await?; result.map_err(|()| anyhow::anyhow!("run returned an error")) diff --git a/crates/wasi-http/tests/all/main.rs b/crates/wasi-http/tests/all/main.rs index d50277717238..1bdd6ff88815 100644 --- a/crates/wasi-http/tests/all/main.rs +++ b/crates/wasi-http/tests/all/main.rs @@ -2,7 +2,7 @@ use crate::http_server::Server; use anyhow::{anyhow, Context, Result}; use futures::{channel::oneshot, future, stream, FutureExt}; use http_body::Frame; -use http_body_util::{combinators::BoxBody, Collected, Empty, StreamBody}; +use http_body_util::{combinators::BoxBody, BodyExt, Collected, Empty, StreamBody}; use hyper::{body::Bytes, server::conn::http1, service::service_fn, Method, StatusCode}; use sha2::{Digest, Sha256}; use std::{collections::HashMap, iter, net::Ipv4Addr, str, sync::Arc}; @@ -14,7 +14,7 @@ use wasmtime::{ use wasmtime_wasi::{self, pipe::MemoryOutputPipe, WasiCtx, WasiCtxBuilder, WasiView}; use wasmtime_wasi_http::{ bindings::http::types::ErrorCode, - body::{HyperIncomingBody, HyperOutgoingBody}, + body::HyperOutgoingBody, io::TokioIo, types::{self, HostFutureIncomingResponse, IncomingResponse, OutgoingRequestConfig}, HttpResult, WasiHttpCtx, WasiHttpView, @@ -128,7 +128,7 @@ mod sync; async fn run_wasi_http( component_filename: &str, - req: hyper::Request, + req: hyper::Request>, send_request: Option, rejected_authority: Option, ) -> anyhow::Result>, ErrorCode>> { @@ -161,9 +161,9 @@ async fn run_wasi_http( let mut store = Store::new(&engine, ctx); let mut linker = Linker::new(&engine); - wasmtime_wasi_http::proxy::add_to_linker(&mut linker)?; + wasmtime_wasi_http::add_to_linker_async(&mut linker)?; let proxy = - wasmtime_wasi_http::proxy::Proxy::instantiate_async(&mut store, &component, &linker) + wasmtime_wasi_http::bindings::Proxy::instantiate_async(&mut store, &component, &linker) .await?; let req = store.data_mut().new_incoming_request(req)?; @@ -182,7 +182,6 @@ async fn run_wasi_http( let resp = match receiver.await { Ok(Ok(resp)) => { - use http_body_util::BodyExt; let (parts, body) = resp.into_parts(); let collected = BodyExt::collect(body).await?; Some(Ok(hyper::Response::from_parts(parts, collected))) @@ -277,7 +276,10 @@ async fn do_wasi_http_hash_all(override_send_request: bool) -> Result<()> { }| { let response = handle(request.into_parts().0).map(|resp| { Ok(IncomingResponse { - resp, + resp: resp.map(|body| { + body.map_err(wasmtime_wasi_http::hyper_response_error) + .boxed() + }), worker: None, between_bytes_timeout, }) @@ -498,7 +500,7 @@ async fn do_wasi_http_echo(uri: &str, url_header: Option<&str>) -> Result<()> { let request = request.body(BoxBody::new(StreamBody::new(stream::iter( body.chunks(16 * 1024) - .map(|chunk| Ok::<_, ErrorCode>(Frame::data(Bytes::copy_from_slice(chunk)))) + .map(|chunk| Ok::<_, hyper::Error>(Frame::data(Bytes::copy_from_slice(chunk)))) .collect::>(), ))))?; @@ -554,13 +556,13 @@ async fn wasi_http_without_port() -> Result<()> { mod body { use http_body_util::{combinators::BoxBody, BodyExt, Empty, Full}; use hyper::body::Bytes; - use wasmtime_wasi_http::body::HyperIncomingBody; + use hyper::Error; - pub fn full(bytes: Bytes) -> HyperIncomingBody { + pub fn full(bytes: Bytes) -> BoxBody { BoxBody::new(Full::new(bytes).map_err(|_| unreachable!())) } - pub fn empty() -> HyperIncomingBody { + pub fn empty() -> BoxBody { BoxBody::new(Empty::new().map_err(|_| unreachable!())) } } diff --git a/crates/wasi-http/tests/all/sync.rs b/crates/wasi-http/tests/all/sync.rs index ee12813d4cfd..64147a5a0e30 100644 --- a/crates/wasi-http/tests/all/sync.rs +++ b/crates/wasi-http/tests/all/sync.rs @@ -12,7 +12,7 @@ fn run(path: &str, server: &Server) -> Result<()> { let mut store = store(&engine, server); let mut linker = Linker::new(&engine); wasmtime_wasi::add_to_linker_sync(&mut linker)?; - wasmtime_wasi_http::proxy::sync::add_only_http_to_linker(&mut linker)?; + wasmtime_wasi_http::add_only_http_to_linker_sync(&mut linker)?; let command = Command::instantiate(&mut store, &component, &linker)?; let result = command.wasi_cli_run().call_run(&mut store)?; result.map_err(|()| anyhow::anyhow!("run returned an error")) diff --git a/crates/wasi/src/bindings.rs b/crates/wasi/src/bindings.rs index 5e1ece4d7408..62a0ed020938 100644 --- a/crates/wasi/src/bindings.rs +++ b/crates/wasi/src/bindings.rs @@ -1,7 +1,146 @@ -// Generate traits for synchronous bindings. -// -// Note that this is only done for interfaces which can block, or those which -// have some functions in `only_imports` below for being async. +//! Auto-generated bindings for WASI interfaces. +//! +//! This module contains the output of the [`bindgen!`] macro when run over +//! the `wasi:cli/command` world. That means this module has all the generated +//! types for WASI for all of its base interfaces used by the CLI world. This +//! module itself by default contains bindings for `async`-related traits. The +//! [`sync`] module contains bindings for a non-`async` version of types. +//! +//! [`bindgen!`]: https://docs.rs/wasmtime/latest/wasmtime/component/macro.bindgen.html +//! +//! # Examples +//! +//! If you have a WIT world which refers to WASI interfaces you probably want to +//! use this crate's bindings rather than generate fresh bindings. That can be +//! done using the `with` option to [`bindgen!`]: +//! +//! ```rust +//! use wasmtime_wasi::{WasiCtx, ResourceTable, WasiView}; +//! use wasmtime::{Result, Engine, Config}; +//! use wasmtime::component::Linker; +//! +//! wasmtime::component::bindgen!({ +//! inline: " +//! package example:wasi; +//! +//! // An example of extending the `wasi:cli/command` world with a +//! // custom host interface. +//! world my-world { +//! include wasi:cli/command@0.2.0; +//! +//! import custom-host; +//! } +//! +//! interface custom-host { +//! my-custom-function: func(); +//! } +//! ", +//! path: "wit", +//! with: { +//! "wasi": wasmtime_wasi::bindings, +//! }, +//! async: true, +//! }); +//! +//! struct MyState { +//! table: ResourceTable, +//! ctx: WasiCtx, +//! } +//! +//! #[async_trait::async_trait] +//! impl example::wasi::custom_host::Host for MyState { +//! async fn my_custom_function(&mut self) { +//! // .. +//! } +//! } +//! +//! impl WasiView for MyState { +//! fn table(&mut self) -> &mut ResourceTable { &mut self.table } +//! fn ctx(&mut self) -> &mut WasiCtx { &mut self.ctx } +//! } +//! +//! fn main() -> Result<()> { +//! let mut config = Config::default(); +//! config.async_support(true); +//! let engine = Engine::new(&config)?; +//! let mut linker: Linker = Linker::new(&engine); +//! wasmtime_wasi::add_to_linker_async(&mut linker)?; +//! example::wasi::custom_host::add_to_linker(&mut linker, |state| state)?; +//! +//! // .. use `Linker` to instantiate component ... +//! +//! Ok(()) +//! } +//! ``` + +/// Synchronous-generated bindings for WASI interfaces. +/// +/// This is the same as the top-level [`bindings`](crate::bindings) module of +/// this crate except that it's for synchronous calls. +/// +/// # Examples +/// +/// If you have a WIT world which refers to WASI interfaces you probably want to +/// use this crate's bindings rather than generate fresh bindings. That can be +/// done using the `with` option to `bindgen!`: +/// +/// ```rust +/// use wasmtime_wasi::{WasiCtx, ResourceTable, WasiView}; +/// use wasmtime::{Result, Engine}; +/// use wasmtime::component::Linker; +/// +/// wasmtime::component::bindgen!({ +/// inline: " +/// package example:wasi; +/// +/// // An example of extending the `wasi:cli/command` world with a +/// // custom host interface. +/// world my-world { +/// include wasi:cli/command@0.2.0; +/// +/// import custom-host; +/// } +/// +/// interface custom-host { +/// my-custom-function: func(); +/// } +/// ", +/// path: "wit", +/// with: { +/// "wasi": wasmtime_wasi::bindings::sync, +/// }, +/// // This is required for bindings using `wasmtime-wasi` and it otherwise +/// // isn't the default for non-async bindings. +/// require_store_data_send: true, +/// }); +/// +/// struct MyState { +/// table: ResourceTable, +/// ctx: WasiCtx, +/// } +/// +/// impl example::wasi::custom_host::Host for MyState { +/// fn my_custom_function(&mut self) { +/// // .. +/// } +/// } +/// +/// impl WasiView for MyState { +/// fn table(&mut self) -> &mut ResourceTable { &mut self.table } +/// fn ctx(&mut self) -> &mut WasiCtx { &mut self.ctx } +/// } +/// +/// fn main() -> Result<()> { +/// let engine = Engine::default(); +/// let mut linker: Linker = Linker::new(&engine); +/// wasmtime_wasi::add_to_linker_sync(&mut linker)?; +/// example::wasi::custom_host::add_to_linker(&mut linker, |state| state)?; +/// +/// // .. use `Linker` to instantiate component ... +/// +/// Ok(()) +/// } +/// ``` pub mod sync { mod generated { use crate::{FsError, SocketError, StreamError}; @@ -43,7 +182,7 @@ pub mod sync { }); } pub use self::generated::exports; - pub use self::generated::wasi::{filesystem, io, sockets}; + pub use self::generated::wasi::*; /// Synchronous bindings to execute and run a `wasi:cli/command`. /// diff --git a/crates/wasi/src/lib.rs b/crates/wasi/src/lib.rs index d3e76737d4ed..fe8594c74c75 100644 --- a/crates/wasi/src/lib.rs +++ b/crates/wasi/src/lib.rs @@ -50,8 +50,8 @@ //! This crate uses [`wasmtime::component::bindgen!`] to generate bindings for //! all WASI interfaces. Raw bindings are available in the [`bindings`] module //! of this crate. Downstream users can either implement these traits themselves -//! or you can use the built-in implementations in this crate for all -//! `T: WasiVew` +//! or you can use the built-in implementations in this crate for +//! `WasiImpl`. //! //! # The `WasiView` trait //! @@ -61,9 +61,10 @@ //! traits look like: //! //! ``` +//! # use wasmtime_wasi::WasiImpl; //! # trait WasiView {} //! # mod bindings { pub mod wasi { pub trait Host {} } } -//! impl bindings::wasi::Host for T { +//! impl bindings::wasi::Host for WasiImpl { //! // ... //! } //! ``` @@ -177,6 +178,7 @@ //! [`wasi:sockets/udp-create-socket`]: bindings::sockets::udp_create_socket::Host //! [`wasi:sockets/udp`]: bindings::sockets::udp::Host //! [async]: https://docs.rs/wasmtime/latest/wasmtime/struct.Config.html#method.async_support +//! [`ResourceTable`]: wasmtime::component::ResourceTable #![cfg_attr(docsrs, feature(doc_auto_cfg))] diff --git a/crates/wasmtime/src/lib.rs b/crates/wasmtime/src/lib.rs index bedba1e4e846..69f2a8a3ceb3 100644 --- a/crates/wasmtime/src/lib.rs +++ b/crates/wasmtime/src/lib.rs @@ -280,7 +280,7 @@ // documentation is written for the "one build" of the `main` branch which has // most features enabled. This will present warnings in stripped-down doc builds // and will prevent the doc build from failing. -#![cfg_attr(feature = "default", deny(rustdoc::broken_intra_doc_links))] +#![cfg_attr(feature = "default", warn(rustdoc::broken_intra_doc_links))] #![no_std] #[cfg(any(feature = "std", unix, windows))] diff --git a/crates/wasmtime/src/runtime/component/instance.rs b/crates/wasmtime/src/runtime/component/instance.rs index 4f1e710919be..bb7c337eb842 100644 --- a/crates/wasmtime/src/runtime/component/instance.rs +++ b/crates/wasmtime/src/runtime/component/instance.rs @@ -9,7 +9,7 @@ use crate::prelude::*; use crate::runtime::vm::component::{ComponentInstance, OwnedComponentInstance}; use crate::runtime::vm::{CompiledModuleId, VMFuncRef}; use crate::store::{StoreOpaque, Stored}; -use crate::{AsContextMut, Module, StoreContextMut}; +use crate::{AsContextMut, Engine, Module, StoreContextMut}; use alloc::sync::Arc; use core::marker; use core::ptr::{self, NonNull}; @@ -788,6 +788,11 @@ impl InstancePre { &self.component } + /// Returns the underlying engine. + pub fn engine(&self) -> &Engine { + self.component.engine() + } + /// Performs the instantiation process into the store specified. // // TODO: needs more docs diff --git a/crates/wit-bindgen/src/lib.rs b/crates/wit-bindgen/src/lib.rs index 0d2685f8c0fa..b8bf2bdda865 100644 --- a/crates/wit-bindgen/src/lib.rs +++ b/crates/wit-bindgen/src/lib.rs @@ -542,6 +542,7 @@ impl Wasmtime { } uwriteln!(gen.src, "}}"); + uwriteln!(gen.src, "#[derive(Clone)]"); uwriteln!(gen.src, "pub struct {struct_name}Pre {{"); for (_, func) in iface.functions.iter() { uwriteln!( @@ -746,6 +747,16 @@ pub fn new( } self.src.push_str("}\n"); + uwriteln!(self.src, "impl Clone for {camel}Pre {{"); + uwriteln!(self.src, "fn clone(&self) -> Self {{"); + uwriteln!(self.src, "Self {{ instance_pre: self.instance_pre.clone(),"); + for (name, _field) in self.exports.fields.iter() { + uwriteln!(self.src, "{name}: self.{name}.clone(),"); + } + uwriteln!(self.src, "}}"); // `Self ... + uwriteln!(self.src, "}}"); // `fn clone` + uwriteln!(self.src, "}}"); // `impl Clone` + uwriteln!( self.src, " @@ -828,6 +839,18 @@ pub fn new( } uwriteln!(self.src, "}})"); uwriteln!(self.src, "}}"); // close `fn new` + uwriteln!( + self.src, + " + pub fn engine(&self) -> &{wt}::Engine {{ + self.instance_pre.engine() + }} + + pub fn instance_pre(&self) -> &{wt}::component::InstancePre<_T> {{ + &self.instance_pre + }} + ", + ); uwriteln!(self.src, "}}"); diff --git a/src/commands/run.rs b/src/commands/run.rs index 4a7f9f35c651..317b91aa22d7 100644 --- a/src/commands/run.rs +++ b/src/commands/run.rs @@ -701,7 +701,7 @@ impl RunCommand { bail!("Cannot enable wasi-http for core wasm modules"); } CliLinker::Component(linker) => { - wasmtime_wasi_http::proxy::sync::add_only_http_to_linker(linker)?; + wasmtime_wasi_http::add_only_http_to_linker_sync(linker)?; } } diff --git a/src/commands/serve.rs b/src/commands/serve.rs index 3e8b2f6e954e..56c2af9d3024 100644 --- a/src/commands/serve.rs +++ b/src/commands/serve.rs @@ -12,12 +12,9 @@ use std::{ use wasmtime::component::Linker; use wasmtime::{Config, Engine, Memory, MemoryType, Store, StoreLimits}; use wasmtime_wasi::{StreamError, StreamResult, WasiCtx, WasiCtxBuilder, WasiView}; +use wasmtime_wasi_http::bindings::ProxyPre; use wasmtime_wasi_http::io::TokioIo; -use wasmtime_wasi_http::proxy::ProxyPre; -use wasmtime_wasi_http::{ - bindings::http::types as http_types, body::HyperOutgoingBody, hyper_response_error, - WasiHttpCtx, WasiHttpView, -}; +use wasmtime_wasi_http::{body::HyperOutgoingBody, WasiHttpCtx, WasiHttpView}; #[cfg(feature = "wasi-nn")] use wasmtime_wasi_nn::WasiNnCtx; @@ -213,14 +210,14 @@ impl ServeCommand { // those in the `proxy` world. If `-Scli` is present then add all // `command` APIs and then additionally add in the required HTTP APIs. // - // If `-Scli` isn't passed then use the `proxy::add_to_linker` + // If `-Scli` isn't passed then use the `add_to_linker_async` // bindings which adds just those interfaces that the proxy interface // uses. if cli == Some(true) { wasmtime_wasi::add_to_linker_async(linker)?; - wasmtime_wasi_http::proxy::add_only_http_to_linker(linker)?; + wasmtime_wasi_http::add_only_http_to_linker_async(linker)?; } else { - wasmtime_wasi_http::proxy::add_to_linker(linker)?; + wasmtime_wasi_http::add_to_linker_async(linker)?; } if self.run.common.wasi.nn == Some(true) { @@ -405,43 +402,10 @@ async fn handle_request( ProxyHandler(inner): ProxyHandler, req: Request, ) -> Result> { - use http_body_util::BodyExt; - let (sender, receiver) = tokio::sync::oneshot::channel(); let task = tokio::task::spawn(async move { let req_id = inner.next_req_id(); - let (mut parts, body) = req.into_parts(); - - parts.uri = { - let uri_parts = parts.uri.into_parts(); - - let scheme = uri_parts.scheme.unwrap_or(http::uri::Scheme::HTTP); - - let host = if let Some(val) = parts.headers.get(hyper::header::HOST) { - std::str::from_utf8(val.as_bytes()) - .map_err(|_| http_types::ErrorCode::HttpRequestUriInvalid)? - } else { - uri_parts - .authority - .as_ref() - .ok_or(http_types::ErrorCode::HttpRequestUriInvalid)? - .host() - }; - - let path_with_query = uri_parts - .path_and_query - .ok_or(http_types::ErrorCode::HttpRequestUriInvalid)?; - - hyper::Uri::builder() - .scheme(scheme) - .authority(host) - .path_and_query(path_with_query) - .build() - .map_err(|_| http_types::ErrorCode::HttpRequestUriInvalid)? - }; - - let req = hyper::Request::from_parts(parts, body.map_err(hyper_response_error).boxed()); log::info!( "Request {req_id} handling {} to {}", From f4b49b8c8942523b5bc44d09aa455fadaf848e41 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Mon, 24 Jun 2024 23:19:46 +0100 Subject: [PATCH 04/10] riscv64: Dynamically emit islands for return calls (#8868) * riscv64: Increase max inst size * riscv64: Emit islands in return call sequence * riscv64: Update worst case size tests Having duplicate registers was preventing some moves from being generated --- cranelift/codegen/src/isa/riscv64/abi.rs | 2 +- .../codegen/src/isa/riscv64/inst/emit.rs | 63 ++- .../src/isa/riscv64/inst/emit_tests.rs | 40 +- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 2 +- .../filetests/isa/riscv64/issue8866.clif | 489 ++++++++++++++++++ .../filetests/isa/riscv64/return-call.clif | 9 +- 6 files changed, 553 insertions(+), 52 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/issue8866.clif diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index d13949c1ddbf..05f3f3a45ffe 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -855,7 +855,7 @@ fn compute_clobber_size(clobbers: &[Writable]) -> u32 { align_to(clobbered_size, 16) } -pub(crate) const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty() +const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty() .with(px_reg(1)) .with(px_reg(5)) .with(px_reg(6)) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 044368a5bc16..90186fb9fbfb 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -47,14 +47,14 @@ pub enum EmitVState { #[derive(Default, Clone, Debug)] pub struct EmitState { /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. - pub(crate) stack_map: Option, + stack_map: Option, /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. - pub(crate) ctrl_plane: ControlPlane, + ctrl_plane: ControlPlane, /// Vector State /// Controls the current state of the vector unit at the emission point. - pub(crate) vstate: EmitVState, - pub(crate) frame_layout: FrameLayout, + vstate: EmitVState, + frame_layout: FrameLayout, } impl EmitState { @@ -242,14 +242,21 @@ impl MachInstEmit for Inst { self.emit_uncompressed(sink, emit_info, state, &mut start_off); } - let end_off = sink.cur_offset(); - assert!( - (end_off - start_off) <= Inst::worst_case_size(), - "Inst:{:?} length:{} worst_case_size:{}", + // We exclude br_table and return call from these checks since they emit + // their own islands, and thus are allowed to exceed the worst case size. + if !matches!( self, - end_off - start_off, - Inst::worst_case_size() - ); + Inst::BrTable { .. } | Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. } + ) { + let end_off = sink.cur_offset(); + assert!( + (end_off - start_off) <= Inst::worst_case_size(), + "Inst:{:?} length:{} worst_case_size:{}", + self, + end_off - start_off, + Inst::worst_case_size() + ); + } } fn pretty_print_inst(&self, state: &mut Self::State) -> String { @@ -2608,6 +2615,40 @@ fn emit_return_call_common_sequence( emit_info: &EmitInfo, state: &mut EmitState, info: &ReturnCallInfo, +) { + // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!) + // So lets emit an island here if we need it. + // + // It is difficult to calculate exactly how many instructions are going to be emitted, so + // we calculate it by emitting it into a disposable buffer, and then checking how many instructions + // were actually emitted. + let mut buffer = MachBuffer::new(); + let mut fake_emit_state = state.clone(); + + return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info); + + // Finalize the buffer and get the number of bytes emitted. + let buffer = buffer.finish(&Default::default(), &mut Default::default()); + let length = buffer.data().len() as u32; + + // And now emit the island inline with this instruction. + if sink.island_needed(length) { + let jump_around_label = sink.get_label(); + Inst::gen_jump(jump_around_label).emit(sink, emit_info, state); + sink.emit_island(length + 4, &mut state.ctrl_plane); + sink.bind_label(jump_around_label, &mut state.ctrl_plane); + } + + // Now that we're done, emit the *actual* return sequence. + return_call_emit_impl(sink, emit_info, state, info); +} + +/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence]. +fn return_call_emit_impl( + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, + info: &ReturnCallInfo, ) { let sp_to_fp_offset = { let frame_layout = state.frame_layout(); diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs index ffa5f118f24a..fe05c646846e 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs @@ -1,6 +1,6 @@ #[allow(unused)] use crate::ir::LibCall; -use crate::isa::riscv64::{abi::DEFAULT_CLOBBERS, inst::*}; +use crate::isa::riscv64::inst::*; use std::borrow::Cow; fn fa7() -> Reg { @@ -2190,47 +2190,21 @@ fn riscv64_worst_case_instruction_size() { .map(|op| Inst::AtomicRmwLoop { op: *op, offset: a0(), - dst: writable_a0(), + dst: writable_a1(), ty: I16, - p: a0(), - x: a0(), + p: a1(), + x: a2(), t0: writable_a0(), }), ); - candidates.push(Inst::ReturnCallInd { - callee: a0(), - info: Box::new(ReturnCallInfo { - opcode: Opcode::ReturnCallIndirect, - new_stack_arg_size: 64, - uses: DEFAULT_CLOBBERS - .into_iter() - .map(|reg| CallArgPair { - vreg: reg.into(), - preg: reg.into(), - }) - .collect(), - }), - }); + // Return Call Indirect and BrTable are the largest instructions possible. However they + // emit their own island, so we don't account them here. let mut max: (u32, MInst) = (0, Inst::Nop0); for i in candidates { let mut buffer = MachBuffer::new(); - let mut emit_state = EmitState { - // This frame layout is important to ensure that the ReturnCallIndirect - // instruction in this test, becomes as large as practically possible. - frame_layout: FrameLayout { - tail_args_size: 64, - setup_area_size: 8192, - clobbered_callee_saves: DEFAULT_CLOBBERS - .into_iter() - .filter(|r| r.class() != RegClass::Vector) - .map(|r| Writable::from_reg(r.into())) - .collect(), - ..Default::default() - }, - ..Default::default() - }; + let mut emit_state = Default::default(); i.emit(&mut buffer, &emit_info, &mut emit_state); let buffer = buffer.finish(&Default::default(), &mut Default::default()); let length = buffer.data().len() as u32; diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index b03f8e98e411..f80ca99e29bf 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -845,7 +845,7 @@ impl MachInst for Inst { fn worst_case_size() -> CodeOffset { // Our worst case size is determined by the riscv64_worst_case_instruction_size test - 168 + 84 } fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { diff --git a/cranelift/filetests/filetests/isa/riscv64/issue8866.clif b/cranelift/filetests/filetests/isa/riscv64/issue8866.clif new file mode 100644 index 000000000000..b55f140a902f --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/issue8866.clif @@ -0,0 +1,489 @@ +;; Compile test case + +test compile +set bb_padding_log2_minus_one=4 +set enable_alias_analysis=false +set enable_llvm_abi_extensions=true +set machine_code_cfg_info=true +set enable_jump_tables=false +set enable_heap_access_spectre_mitigation=false +target riscv64 has_zcd has_zbkb has_zbc has_zbs has_zicond has_zvl32b has_zvl64b has_zvl128b has_zvl1024b has_zvl2048b has_zvl4096b has_zvl8192b has_zvl16384b has_zvl32768b + +function u1:0() tail { + ss0 = explicit_slot 50, align = 512 + ss1 = explicit_slot 47, align = 4 + ss2 = explicit_slot 34, align = 32 + ss3 = explicit_slot 103, align = 1024 + ss4 = explicit_slot 110, align = 512 + ss5 = explicit_slot 126, align = 512 + sig0 = (f32, f64, f64, f32, i8 uext, i128, i8 uext, i32, i16 sext, i64, i64 sext, i128, i8 sext, i8, i64, i64 sext) -> i16 sext, i64 sext, f64, i32 sext, f64, i8 sext, i64 sext, f32 cold + sig1 = () system_v + sig2 = (i128, i16 sext, i128 sext, i32 sext, i16, i64 uext, f32, i8 sext, f32, i8, i64, i64, i64, i64 uext, f64) -> f64, i8 uext, f32, i128, i64 uext, i8, i16 sext, i64 sext tail + sig3 = () -> i8 sext, f32, i128, i32, f32, i128 uext, i8, i8 uext, f64, i8 sext, f32 system_v + sig4 = (i8, i16, i64 sext, i64 sext, i128 uext, i8, i32, f64, i32, f32, i128 uext, i8, i8 uext, f64, f64) -> i8 uext system_v + sig5 = (i8 sext, i64 uext, i16 sext, i64 sext, i128 uext, i128 sext, f32, i16 uext, i64 sext, i32 sext, i64, i64 uext, f64, f64, i16 sext) -> f32 tail + sig6 = (i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i8 uext, i16 uext, i16, i64 sext, i64 sext, i128 uext, i8 sext, f32) tail + sig7 = (f32) -> f32 system_v + sig8 = (f64) -> f64 system_v + sig9 = (f32) -> f32 system_v + sig10 = (f64) -> f64 system_v + sig11 = (f32) -> f32 system_v + sig12 = (f64) -> f64 system_v + sig13 = (f32) -> f32 system_v + sig14 = (f64) -> f64 system_v + sig15 = (f32, f32, f32) -> f32 system_v + sig16 = (f64, f64, f64) -> f64 system_v + fn0 = colocated u2:0 sig0 + fn1 = colocated u2:1 sig1 + fn2 = colocated u2:2 sig2 + fn3 = colocated u2:3 sig3 + fn4 = colocated u2:4 sig4 + fn5 = colocated u2:5 sig5 + fn6 = colocated u2:6 sig6 + fn7 = %CeilF32 sig7 + fn8 = %CeilF64 sig8 + fn9 = %FloorF32 sig9 + fn10 = %FloorF64 sig10 + fn11 = colocated %TruncF32 sig11 + fn12 = %TruncF64 sig12 + fn13 = colocated %NearestF32 sig13 + fn14 = %NearestF64 sig14 + fn15 = %FmaF32 sig15 + fn16 = %FmaF64 sig16 + +block0: + v0 = iconst.i64 0xef31_de2a_2352_79ff + v161 -> v0 + v1 = iconst.i64 0x2231_ffd1_ff29_ff26 + v2 = f64const 0x1.8ff2320672823p-225 + v3 = iconst.i16 0xffef + v163 -> v3 + v4 = iconst.i64 0xddde_2a23_52f9_ffff + v5 = iconst.i64 0xc8c8_c8c8_c8c8_c8c8 + v6 = iconst.i64 0xc8c8_c8c8_c8c8_c8c8 + v7 = iconcat v6, v5 ; v6 = 0xc8c8_c8c8_c8c8_c8c8, v5 = 0xc8c8_c8c8_c8c8_c8c8 + v164 -> v7 + v8 = iconst.i64 0xc8c8_c8c8_c8c8_c8c8 + v9 = iconst.i64 0xc8c8_c8c8_c8c8_c8c8 + v10 = iconcat v9, v8 ; v9 = 0xc8c8_c8c8_c8c8_c8c8, v8 = 0xc8c8_c8c8_c8c8_c8c8 + v11 = iconst.i64 0xcbcb_cbcb_cbc8_c8c8 + v12 = iconst.i64 0xc8c8_c8c8_c8c8_c8c8 + v13 = iconcat v12, v11 ; v12 = 0xc8c8_c8c8_c8c8_c8c8, v11 = 0xcbcb_cbcb_cbc8_c8c8 + v14 = iconst.i8 203 + v162 -> v14 + v15 = f32const -0x1.979796p24 + v165 -> v15 + v16 = iconst.i64 0x0031_2222_2a2f + v17 = iconst.i64 0xcbcb_2adc_9e98_d7d4 + v18 = iconcat v17, v16 ; v17 = 0xcbcb_2adc_9e98_d7d4, v16 = 0x0031_2222_2a2f + v19 = iconst.i8 0 + v20 = iconst.i16 0 + v21 = iconst.i32 0 + v22 = iconst.i64 0 + v23 = uextend.i128 v22 ; v22 = 0 + v24 = stack_addr.i64 ss2 + store notrap table v23, v24 + v25 = stack_addr.i64 ss2+16 + store notrap table v23, v25 + v26 = stack_addr.i64 ss2+32 + store notrap table v20, v26 ; v20 = 0 + v27 = stack_addr.i64 ss1 + store notrap table v23, v27 + v28 = stack_addr.i64 ss1+16 + store notrap table v23, v28 + v29 = stack_addr.i64 ss1+32 + store notrap table v22, v29 ; v22 = 0 + v30 = stack_addr.i64 ss1+40 + store notrap table v21, v30 ; v21 = 0 + v31 = stack_addr.i64 ss1+44 + store notrap table v20, v31 ; v20 = 0 + v32 = stack_addr.i64 ss1+46 + store notrap table v19, v32 ; v19 = 0 + v33 = stack_addr.i64 ss0 + store notrap table v23, v33 + v34 = stack_addr.i64 ss0+16 + store notrap table v23, v34 + v35 = stack_addr.i64 ss0+32 + store notrap table v23, v35 + v36 = stack_addr.i64 ss0+48 + store notrap table v20, v36 ; v20 = 0 + v37 = stack_addr.i64 ss3 + store notrap vmctx v23, v37 + v38 = stack_addr.i64 ss3+16 + store notrap vmctx v23, v38 + v39 = stack_addr.i64 ss3+32 + store notrap vmctx v23, v39 + v40 = stack_addr.i64 ss3+48 + store notrap vmctx v23, v40 + v41 = stack_addr.i64 ss3+64 + store notrap vmctx v23, v41 + v42 = stack_addr.i64 ss3+80 + store notrap vmctx v23, v42 + v43 = stack_addr.i64 ss3+96 + store notrap vmctx v21, v43 ; v21 = 0 + v44 = stack_addr.i64 ss3+100 + store notrap vmctx v20, v44 ; v20 = 0 + v45 = stack_addr.i64 ss3+102 + store notrap vmctx v19, v45 ; v19 = 0 + v46 = stack_addr.i64 ss4 + store notrap heap v23, v46 + v47 = stack_addr.i64 ss4+16 + store notrap heap v23, v47 + v48 = stack_addr.i64 ss4+32 + store notrap heap v23, v48 + v49 = stack_addr.i64 ss4+48 + store notrap heap v23, v49 + v50 = stack_addr.i64 ss4+64 + store notrap heap v23, v50 + v51 = stack_addr.i64 ss4+80 + store notrap heap v23, v51 + v52 = stack_addr.i64 ss4+96 + store notrap heap v22, v52 ; v22 = 0 + v53 = stack_addr.i64 ss4+104 + store notrap heap v21, v53 ; v21 = 0 + v54 = stack_addr.i64 ss4+108 + store notrap heap v20, v54 ; v20 = 0 + v55 = stack_addr.i64 ss5 + store notrap vmctx v23, v55 + v56 = stack_addr.i64 ss5+16 + store notrap vmctx v23, v56 + v57 = stack_addr.i64 ss5+32 + store notrap vmctx v23, v57 + v58 = stack_addr.i64 ss5+48 + store notrap vmctx v23, v58 + v59 = stack_addr.i64 ss5+64 + store notrap vmctx v23, v59 + v60 = stack_addr.i64 ss5+80 + store notrap vmctx v23, v60 + v61 = stack_addr.i64 ss5+96 + store notrap vmctx v23, v61 + v62 = stack_addr.i64 ss5+112 + store notrap vmctx v22, v62 ; v22 = 0 + v63 = stack_addr.i64 ss5+120 + store notrap vmctx v21, v63 ; v21 = 0 + v64 = stack_addr.i64 ss5+124 + store notrap vmctx v20, v64 ; v20 = 0 + v65 = icmp_imm uge v3, 0x504d ; v3 = 0xffef + brif v65, block3, block2 + +block3: + v66 = icmp_imm.i16 uge v3, 0xd5d7 ; v3 = 0xffef + brif v66, block5, block4 + +block5: + v67 = icmp_imm.i16 uge v3, 0xf6ff ; v3 = 0xffef + brif v67, block7, block6 + +block7: + v68 = icmp_imm.i16 uge v3, 0xff22 ; v3 = 0xffef + brif v68, block9, block8 + +block9: + v69 = icmp_imm.i16 eq v3, 0xffdd ; v3 = 0xffef + brif v69, block1, block10 + +block10: + v70 = icmp_imm.i16 uge v3, 0xff79 ; v3 = 0xffef + brif v70, block12, block11 + +block12: + v71 = iadd_imm.i16 v3, 0xffff_ffff_ffff_0087 ; v3 = 0xffef + v72 = uextend.i32 v71 + br_table v72, block1, [block1, block1, block1, block1, block1, block1] + +block11: + v73 = icmp_imm.i16 uge v3, 0xff22 ; v3 = 0xffef + brif v73, block13, block1 + +block13: + v74 = iadd_imm.i16 v3, 0xffff_ffff_ffff_00de ; v3 = 0xffef + v75 = uextend.i32 v74 + br_table v75, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block8: + v76 = icmp_imm.i16 eq v3, 0xf951 ; v3 = 0xffef + brif v76, block1, block14 + +block14: + v77 = icmp_imm.i16 uge v3, 0xf6ff ; v3 = 0xffef + brif v77, block15, block1 + +block15: + v78 = iadd_imm.i16 v3, 0xffff_ffff_ffff_0901 ; v3 = 0xffef + v79 = uextend.i32 v78 + br_table v79, block1, [block1, block1, block1, block1, block1, block1, block1] + +block6: + v80 = icmp_imm.i16 uge v3, 0xef2a ; v3 = 0xffef + brif v80, block17, block16 + +block17: + v81 = icmp_imm.i16 eq v3, 0xf426 ; v3 = 0xffef + brif v81, block1, block18 + +block18: + v82 = icmp_imm.i16 uge v3, 0xefff ; v3 = 0xffef + brif v82, block20, block19 + +block20: + v83 = iadd_imm.i16 v3, 0xffff_ffff_ffff_1001 ; v3 = 0xffef + v84 = uextend.i32 v83 + br_table v84, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block19: + v85 = icmp_imm.i16 uge v3, 0xef2a ; v3 = 0xffef + brif v85, block21, block1 + +block21: + v86 = iadd_imm.i16 v3, 0xffff_ffff_ffff_10d6 ; v3 = 0xffef + v87 = uextend.i32 v86 + br_table v87, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block16: + v88 = icmp_imm.i16 eq v3, 0xdc2a ; v3 = 0xffef + brif v88, block1, block22 + +block22: + v89 = icmp_imm.i16 uge v3, 0xd5d7 ; v3 = 0xffef + brif v89, block23, block1 + +block23: + v90 = iadd_imm.i16 v3, 0xffff_ffff_ffff_2a29 ; v3 = 0xffef + v91 = uextend.i32 v90 + br_table v91, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block4: + v92 = icmp_imm.i16 uge v3, 0x7363 ; v3 = 0xffef + brif v92, block25, block24 + +block25: + v93 = icmp_imm.i16 uge v3, 0x9f22 ; v3 = 0xffef + brif v93, block27, block26 + +block27: + v94 = icmp_imm.i16 uge v3, 0xbf41 ; v3 = 0xffef + brif v94, block29, block28 + +block29: + v95 = iadd_imm.i16 v3, 0xffff_ffff_ffff_40bf ; v3 = 0xffef + v96 = uextend.i32 v95 + br_table v96, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block28: + v97 = icmp_imm.i16 eq v3, 0xae73 ; v3 = 0xffef + brif v97, block1, block30 + +block30: + v98 = icmp_imm.i16 eq v3, 0x9f22 ; v3 = 0xffef + brif v98, block1, block1 + +block26: + v99 = icmp_imm.i16 uge v3, 0x9301 ; v3 = 0xffef + brif v99, block32, block31 + +block32: + v100 = iadd_imm.i16 v3, 0xffff_ffff_ffff_6cff ; v3 = 0xffef + v101 = uextend.i32 v100 + br_table v101, block1, [block1, block1, block1, block1, block1, block1, block1] + +block31: + v102 = icmp_imm.i16 uge v3, 0x7363 ; v3 = 0xffef + brif v102, block33, block1 + +block33: + v103 = iadd_imm.i16 v3, 0xffff_ffff_ffff_8c9d ; v3 = 0xffef + v104 = uextend.i32 v103 + br_table v104, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block24: + v105 = icmp_imm.i16 uge v3, 0x5230 ; v3 = 0xffef + brif v105, block35, block34 + +block35: + v106 = icmp_imm.i16 eq v3, 0x6720 ; v3 = 0xffef + brif v106, block1, block36 + +block36: + v107 = icmp_imm.i16 eq v3, 0x56cc ; v3 = 0xffef + brif v107, block1, block37 + +block37: + v108 = icmp_imm.i16 eq v3, 0x5230 ; v3 = 0xffef + brif v108, block1, block1 + +block34: + v109 = icmp_imm.i16 uge v3, 0x5123 ; v3 = 0xffef + brif v109, block39, block38 + +block39: + v110 = iadd_imm.i16 v3, 0xffff_ffff_ffff_aedd ; v3 = 0xffef + v111 = uextend.i32 v110 + br_table v111, block1, [block1, block1, block1, block1, block1, block1, block1] + +block38: + v112 = icmp_imm.i16 uge v3, 0x504d ; v3 = 0xffef + brif v112, block40, block1 + +block40: + v113 = iadd_imm.i16 v3, 0xffff_ffff_ffff_afb3 ; v3 = 0xffef + v114 = uextend.i32 v113 + br_table v114, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block2: + v115 = icmp_imm.i16 uge v3, 9983 ; v3 = 0xffef + brif v115, block42, block41 + +block42: + v116 = icmp_imm.i16 uge v3, 0x2ade ; v3 = 0xffef + brif v116, block44, block43 + +block44: + v117 = icmp_imm.i16 uge v3, 0x2f2a ; v3 = 0xffef + brif v117, block46, block45 + +block46: + v118 = icmp_imm.i16 eq v3, 0x4118 ; v3 = 0xffef + brif v118, block1, block47 + +block47: + v119 = icmp_imm.i16 eq v3, 0x3320 ; v3 = 0xffef + brif v119, block1, block48 + +block48: + v120 = icmp_imm.i16 uge v3, 0x2f2a ; v3 = 0xffef + brif v120, block49, block1 + +block49: + v121 = iadd_imm.i16 v3, 0xffff_ffff_ffff_d0d6 ; v3 = 0xffef + v122 = uextend.i32 v121 + br_table v122, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block45: + v123 = icmp_imm.i16 eq v3, 0x2f22 ; v3 = 0xffef + brif v123, block1, block50 + +block50: + v124 = icmp_imm.i16 uge v3, 0x2ade ; v3 = 0xffef + brif v124, block51, block1 + +block51: + v125 = iadd_imm.i16 v3, 0xffff_ffff_ffff_d522 ; v3 = 0xffef + v126 = uextend.i32 v125 + br_table v126, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block43: + v127 = icmp_imm.i16 uge v3, 0x2a38 ; v3 = 0xffef + brif v127, block53, block52 + +block53: + v128 = icmp_imm.i16 eq v3, 0x2ab2 ; v3 = 0xffef + brif v128, block1, block54 + +block54: + v129 = icmp_imm.i16 uge v3, 0x2a67 ; v3 = 0xffef + brif v129, block56, block55 + +block56: + v130 = iadd_imm.i16 v3, 0xffff_ffff_ffff_d599 ; v3 = 0xffef + v131 = uextend.i32 v130 + br_table v131, block1, [block1, block1, block1, block1, block1, block1] + +block55: + v132 = icmp_imm.i16 eq v3, 0x2a38 ; v3 = 0xffef + brif v132, block1, block1 + +block52: + v133 = icmp_imm.i16 eq v3, 0x2a20 ; v3 = 0xffef + brif v133, block1, block57 + +block57: + v134 = icmp_imm.i16 eq v3, 9983 ; v3 = 0xffef + brif v134, block1, block1 + +block41: + v135 = icmp_imm.i16 uge v3, 341 ; v3 = 0xffef + brif v135, block59, block58 + +block59: + v136 = icmp_imm.i16 uge v3, 6493 ; v3 = 0xffef + brif v136, block61, block60 + +block61: + v137 = icmp_imm.i16 uge v3, 8738 ; v3 = 0xffef + brif v137, block63, block62 + +block63: + v138 = iadd_imm.i16 v3, -8738 ; v3 = 0xffef + v139 = uextend.i32 v138 + br_table v139, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block62: + v140 = icmp_imm.i16 uge v3, 8241 ; v3 = 0xffef + brif v140, block65, block64 + +block65: + v141 = iadd_imm.i16 v3, -8241 ; v3 = 0xffef + v142 = uextend.i32 v141 + br_table v142, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block64: + v143 = icmp_imm.i16 eq v3, 6493 ; v3 = 0xffef + brif v143, block1, block1 + +block60: + v144 = icmp_imm.i16 uge v3, 512 ; v3 = 0xffef + brif v144, block67, block66 + +block67: + v145 = iadd_imm.i16 v3, -512 ; v3 = 0xffef + v146 = uextend.i32 v145 + br_table v146, block1, [block1, block1, block1] + +block66: + v147 = icmp_imm.i16 uge v3, 341 ; v3 = 0xffef + brif v147, block68, block1 + +block68: + v148 = iadd_imm.i16 v3, -341 ; v3 = 0xffef + v149 = uextend.i32 v148 + br_table v149, block1, [block1, block1] + +block58: + v150 = icmp_imm.i16 uge v3, 148 ; v3 = 0xffef + brif v150, block70, block69 + +block70: + v151 = icmp_imm.i16 uge v3, 212 ; v3 = 0xffef + brif v151, block72, block71 + +block72: + v152 = iadd_imm.i16 v3, -212 ; v3 = 0xffef + v153 = uextend.i32 v152 + br_table v153, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block71: + v154 = icmp_imm.i16 uge v3, 148 ; v3 = 0xffef + brif v154, block73, block1 + +block73: + v155 = iadd_imm.i16 v3, -148 ; v3 = 0xffef + v156 = uextend.i32 v155 + br_table v156, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block69: + v157 = icmp_imm.i16 uge v3, 128 ; v3 = 0xffef + brif v157, block75, block74 + +block75: + v158 = iadd_imm.i16 v3, -128 ; v3 = 0xffef + v159 = uextend.i32 v158 + br_table v159, block1, [block1, block1] + +block74: + v160 = uextend.i32 v3 ; v3 = 0xffef + br_table v160, block1, [block1, block1] + +block1 cold: + v166 = func_addr.i64 fn6 + return_call_indirect sig6, v166(v161, v161, v161, v161, v161, v161, v161, v162, v163, v163, v161, v161, v164, v162, v165) ; v161 = 0xef31_de2a_2352_79ff, v161 = 0xef31_de2a_2352_79ff, v161 = 0xef31_de2a_2352_79ff, v161 = 0xef31_de2a_2352_79ff, v161 = 0xef31_de2a_2352_79ff, v161 = 0xef31_de2a_2352_79ff, v161 = 0xef31_de2a_2352_79ff, v162 = 203, v163 = 0xffef, v163 = 0xffef, v161 = 0xef31_de2a_2352_79ff, v161 = 0xef31_de2a_2352_79ff, v162 = 203, v165 = -0x1.979796p24 +} diff --git a/cranelift/filetests/filetests/isa/riscv64/return-call.clif b/cranelift/filetests/filetests/isa/riscv64/return-call.clif index 5a575cb1ae2f..e119025b92a4 100644 --- a/cranelift/filetests/filetests/isa/riscv64/return-call.clif +++ b/cranelift/filetests/filetests/isa/riscv64/return-call.clif @@ -693,11 +693,8 @@ block2: ; addi s8, zero, 0x7d ; addi s7, zero, 0x82 ; addi s6, zero, 0x87 -; bnez a0, 8 -; j 0xc -; auipc t6, 0 -; jalr zero, t6, 0xb4 -; block2: ; offset 0xd8 +; bnez a0, 0xb0 +; block2: ; offset 0xcc ; addi a0, zero, 0x8c ; sd a2, 0x90(sp) ; sd a1, 0x98(sp) @@ -741,7 +738,7 @@ block2: ; ld s0, 0x80(sp) ; addi sp, sp, 0x90 ; jr t0 -; block3: ; offset 0x184 +; block3: ; offset 0x178 ; ld a0, 0x10(sp) ; sd a2, 0xa0(sp) ; sd a1, 0xa8(sp) From be67ee8909aeaa971332e6151b436dc3e3b2ed39 Mon Sep 17 00:00:00 2001 From: Xinzhao Xu Date: Thu, 27 Jun 2024 01:16:46 +0800 Subject: [PATCH 05/10] component-macro: normalize the path parameter in component bindgen (#8871) --- crates/component-macro/src/bindgen.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/component-macro/src/bindgen.rs b/crates/component-macro/src/bindgen.rs index 776faa7ddbf4..0be41be6f53a 100644 --- a/crates/component-macro/src/bindgen.rs +++ b/crates/component-macro/src/bindgen.rs @@ -197,7 +197,14 @@ fn parse_source( let root = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()); let mut parse = |resolve: &mut Resolve, path: &Path| -> anyhow::Result<_> { - let (pkg, sources) = resolve.push_path(path)?; + // Try to normalize the path to make the error message more understandable when + // the path is not correct. Fallback to the original path if normalization fails + // (probably return an error somewhere else). + let normalized_path = match std::fs::canonicalize(path) { + Ok(p) => p, + Err(_) => path.to_path_buf(), + }; + let (pkg, sources) = resolve.push_path(normalized_path)?; files.extend(sources); Ok(pkg) }; From 7ac3fda7f25d3e6efc53181e45db309b63465350 Mon Sep 17 00:00:00 2001 From: beetrees Date: Thu, 27 Jun 2024 01:13:24 +0100 Subject: [PATCH 06/10] Initial `f16` and `f128` support (#8860) --- cranelift/codegen/meta/src/cdsl/types.rs | 30 +- cranelift/codegen/meta/src/cdsl/typevar.rs | 32 +- cranelift/codegen/meta/src/shared/types.rs | 10 +- cranelift/codegen/src/data_value.rs | 27 +- cranelift/codegen/src/ir/immediates.rs | 539 +++++++++++++++++- cranelift/codegen/src/ir/instructions.rs | 15 +- cranelift/codegen/src/ir/types.rs | 40 +- .../filetests/runtests/bitcast-f16-f128.clif | 61 ++ cranelift/interpreter/src/step.rs | 4 +- cranelift/interpreter/src/value.rs | 10 +- cranelift/reader/src/lexer.rs | 6 +- cranelift/reader/src/parser.rs | 44 +- 12 files changed, 741 insertions(+), 77 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/bitcast-f16-f128.clif diff --git a/cranelift/codegen/meta/src/cdsl/types.rs b/cranelift/codegen/meta/src/cdsl/types.rs index 0ff486609fc2..28b23132d928 100644 --- a/cranelift/codegen/meta/src/cdsl/types.rs +++ b/cranelift/codegen/meta/src/cdsl/types.rs @@ -136,6 +136,12 @@ impl LaneType { /// Return a string containing the documentation comment for this lane type. pub fn doc(self) -> String { match self { + LaneType::Float(shared_types::Float::F16) => String::from( + "A 16-bit floating point type represented in the IEEE 754-2008 + *binary16* interchange format. This corresponds to the :c:type:`_Float16` + type in most C implementations. + WARNING: f16 support is a work-in-progress and is incomplete", + ), LaneType::Float(shared_types::Float::F32) => String::from( "A 32-bit floating point type represented in the IEEE 754-2008 *binary32* interchange format. This corresponds to the :c:type:`float` @@ -146,6 +152,12 @@ impl LaneType { *binary64* interchange format. This corresponds to the :c:type:`double` type in most C implementations.", ), + LaneType::Float(shared_types::Float::F128) => String::from( + "A 128-bit floating point type represented in the IEEE 754-2008 + *binary128* interchange format. This corresponds to the :c:type:`_Float128` + type in most C implementations. + WARNING: f128 support is a work-in-progress and is incomplete", + ), LaneType::Int(_) if self.lane_bits() < 32 => format!( "An integer type with {} bits. WARNING: arithmetic on {}bit integers is incomplete", @@ -168,13 +180,15 @@ impl LaneType { pub fn number(self) -> u16 { constants::LANE_BASE + match self { - LaneType::Int(shared_types::Int::I8) => 6, - LaneType::Int(shared_types::Int::I16) => 7, - LaneType::Int(shared_types::Int::I32) => 8, - LaneType::Int(shared_types::Int::I64) => 9, - LaneType::Int(shared_types::Int::I128) => 10, - LaneType::Float(shared_types::Float::F32) => 11, - LaneType::Float(shared_types::Float::F64) => 12, + LaneType::Int(shared_types::Int::I8) => 4, + LaneType::Int(shared_types::Int::I16) => 5, + LaneType::Int(shared_types::Int::I32) => 6, + LaneType::Int(shared_types::Int::I64) => 7, + LaneType::Int(shared_types::Int::I128) => 8, + LaneType::Float(shared_types::Float::F16) => 9, + LaneType::Float(shared_types::Float::F32) => 10, + LaneType::Float(shared_types::Float::F64) => 11, + LaneType::Float(shared_types::Float::F128) => 12, } } @@ -191,8 +205,10 @@ impl LaneType { pub fn float_from_bits(num_bits: u16) -> LaneType { LaneType::Float(match num_bits { + 16 => shared_types::Float::F16, 32 => shared_types::Float::F32, 64 => shared_types::Float::F64, + 128 => shared_types::Float::F128, _ => unreachable!("unexpected num bits for float"), }) } diff --git a/cranelift/codegen/meta/src/cdsl/typevar.rs b/cranelift/codegen/meta/src/cdsl/typevar.rs index da0e92cd8c5a..d0a331369a2a 100644 --- a/cranelift/codegen/meta/src/cdsl/typevar.rs +++ b/cranelift/codegen/meta/src/cdsl/typevar.rs @@ -9,7 +9,7 @@ use crate::cdsl::types::{LaneType, ReferenceType, ValueType}; const MAX_LANES: u16 = 256; const MAX_BITS: u16 = 128; -const MAX_FLOAT_BITS: u16 = 64; +const MAX_FLOAT_BITS: u16 = 128; /// Type variables can be used in place of concrete types when defining /// instructions. This makes the instructions *polymorphic*. @@ -159,7 +159,7 @@ impl TypeVar { "can't halve all integer types" ); assert!( - ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32, + ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 16, "can't halve all float types" ); } @@ -179,7 +179,7 @@ impl TypeVar { "can't halve all integer types" ); assert!( - ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32, + ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 16, "can't halve all float types" ); assert!( @@ -464,7 +464,7 @@ impl TypeSet { fn half_width(&self) -> TypeSet { let mut copy = self.clone(); copy.ints = NumSet::from_iter(self.ints.iter().filter(|&&x| x > 8).map(|&x| x / 2)); - copy.floats = NumSet::from_iter(self.floats.iter().filter(|&&x| x > 32).map(|&x| x / 2)); + copy.floats = NumSet::from_iter(self.floats.iter().filter(|&&x| x > 16).map(|&x| x / 2)); copy } @@ -643,7 +643,7 @@ impl TypeSetBuilder { range_to_set(self.simd_lanes.to_range(min_lanes..MAX_LANES, Some(1))), range_to_set(self.dynamic_simd_lanes.to_range(2..MAX_LANES, None)), range_to_set(self.ints.to_range(8..MAX_BITS, None)), - range_to_set(self.floats.to_range(32..64, None)), + range_to_set(self.floats.to_range(16..MAX_FLOAT_BITS, None)), range_to_set(self.refs.to_range(32..64, None)), ) } @@ -711,7 +711,7 @@ fn test_typevar_builder() { let type_set = TypeSetBuilder::new().floats(Interval::All).build(); assert_eq!(type_set.lanes, num_set![1]); - assert_eq!(type_set.floats, num_set![32, 64]); + assert_eq!(type_set.floats, num_set![16, 32, 64, 128]); assert!(type_set.ints.is_empty()); let type_set = TypeSetBuilder::new() @@ -720,7 +720,7 @@ fn test_typevar_builder() { .includes_scalars(false) .build(); assert_eq!(type_set.lanes, num_set![2, 4, 8, 16, 32, 64, 128, 256]); - assert_eq!(type_set.floats, num_set![32, 64]); + assert_eq!(type_set.floats, num_set![16, 32, 64, 128]); assert!(type_set.ints.is_empty()); let type_set = TypeSetBuilder::new() @@ -729,7 +729,7 @@ fn test_typevar_builder() { .includes_scalars(true) .build(); assert_eq!(type_set.lanes, num_set![1, 2, 4, 8, 16, 32, 64, 128, 256]); - assert_eq!(type_set.floats, num_set![32, 64]); + assert_eq!(type_set.floats, num_set![16, 32, 64, 128]); assert!(type_set.ints.is_empty()); let type_set = TypeSetBuilder::new() @@ -738,7 +738,7 @@ fn test_typevar_builder() { .includes_scalars(false) .build(); assert_eq!(type_set.lanes, num_set![2, 4, 8, 16, 32, 64, 128, 256]); - assert_eq!(type_set.floats, num_set![32, 64]); + assert_eq!(type_set.floats, num_set![16, 32, 64, 128]); assert!(type_set.dynamic_lanes.is_empty()); assert!(type_set.ints.is_empty()); @@ -753,7 +753,7 @@ fn test_typevar_builder() { num_set![2, 4, 8, 16, 32, 64, 128, 256] ); assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]); - assert_eq!(type_set.floats, num_set![32, 64]); + assert_eq!(type_set.floats, num_set![16, 32, 64, 128]); assert_eq!(type_set.lanes, num_set![1]); let type_set = TypeSetBuilder::new() @@ -765,7 +765,7 @@ fn test_typevar_builder() { type_set.dynamic_lanes, num_set![2, 4, 8, 16, 32, 64, 128, 256] ); - assert_eq!(type_set.floats, num_set![32, 64]); + assert_eq!(type_set.floats, num_set![16, 32, 64, 128]); assert_eq!(type_set.lanes, num_set![1]); assert!(type_set.ints.is_empty()); @@ -871,12 +871,12 @@ fn test_forward_images() { TypeSetBuilder::new().ints(8..16).build() ); assert_eq!( - TypeSetBuilder::new().floats(32..32).build().half_width(), + TypeSetBuilder::new().floats(16..16).build().half_width(), empty_set ); assert_eq!( - TypeSetBuilder::new().floats(32..64).build().half_width(), - TypeSetBuilder::new().floats(32..32).build() + TypeSetBuilder::new().floats(32..128).build().half_width(), + TypeSetBuilder::new().floats(16..64).build() ); // Double width. @@ -893,8 +893,8 @@ fn test_forward_images() { TypeSetBuilder::new().floats(64..64).build() ); assert_eq!( - TypeSetBuilder::new().floats(32..64).build().double_width(), - TypeSetBuilder::new().floats(64..64).build() + TypeSetBuilder::new().floats(16..64).build().double_width(), + TypeSetBuilder::new().floats(32..128).build() ); } diff --git a/cranelift/codegen/meta/src/shared/types.rs b/cranelift/codegen/meta/src/shared/types.rs index 33efd108014b..3bd2e2d55b2b 100644 --- a/cranelift/codegen/meta/src/shared/types.rs +++ b/cranelift/codegen/meta/src/shared/types.rs @@ -43,8 +43,10 @@ impl Iterator for IntIterator { #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] pub(crate) enum Float { + F16 = 16, F32 = 32, F64 = 64, + F128 = 128, } /// Iterator through the variants of the Float enum. @@ -63,8 +65,10 @@ impl Iterator for FloatIterator { type Item = Float; fn next(&mut self) -> Option { let res = match self.index { - 0 => Some(Float::F32), - 1 => Some(Float::F64), + 0 => Some(Float::F16), + 1 => Some(Float::F32), + 2 => Some(Float::F64), + 3 => Some(Float::F128), _ => return None, }; self.index += 1; @@ -122,8 +126,10 @@ mod iter_tests { #[test] fn float_iter_works() { let mut float_iter = FloatIterator::new(); + assert_eq!(float_iter.next(), Some(Float::F16)); assert_eq!(float_iter.next(), Some(Float::F32)); assert_eq!(float_iter.next(), Some(Float::F64)); + assert_eq!(float_iter.next(), Some(Float::F128)); assert_eq!(float_iter.next(), None); } diff --git a/cranelift/codegen/src/data_value.rs b/cranelift/codegen/src/data_value.rs index 25eb7a401a9a..e2feb4946d2a 100644 --- a/cranelift/codegen/src/data_value.rs +++ b/cranelift/codegen/src/data_value.rs @@ -1,7 +1,8 @@ //! This module gives users to instantiate values that Cranelift understands. These values are used, //! for example, during interpretation and for wrapping immediates. -use crate::ir::immediates::{Ieee32, Ieee64, Offset32}; +use crate::ir::immediates::{Ieee128, Ieee16, Ieee32, Ieee64, Offset32}; use crate::ir::{types, ConstantData, Type}; +use core::cmp::Ordering; use core::fmt::{self, Display, Formatter}; /// Represent a data value. Where [Value] is an SSA reference, [DataValue] is the type + value @@ -16,8 +17,10 @@ pub enum DataValue { I32(i32), I64(i64), I128(i128), + F16(Ieee16), F32(Ieee32), F64(Ieee64), + F128(Ieee128), V128([u8; 16]), V64([u8; 8]), } @@ -36,10 +39,14 @@ impl PartialEq for DataValue { (I64(_), _) => false, (I128(l), I128(r)) => l == r, (I128(_), _) => false, + (F16(l), F16(r)) => l.partial_cmp(&r) == Some(Ordering::Equal), + (F16(_), _) => false, (F32(l), F32(r)) => l.as_f32() == r.as_f32(), (F32(_), _) => false, (F64(l), F64(r)) => l.as_f64() == r.as_f64(), (F64(_), _) => false, + (F128(l), F128(r)) => l.partial_cmp(&r) == Some(Ordering::Equal), + (F128(_), _) => false, (V128(l), V128(r)) => l == r, (V128(_), _) => false, (V64(l), V64(r)) => l == r, @@ -70,8 +77,10 @@ impl DataValue { DataValue::I32(_) => types::I32, DataValue::I64(_) => types::I64, DataValue::I128(_) => types::I128, + DataValue::F16(_) => types::F16, DataValue::F32(_) => types::F32, DataValue::F64(_) => types::F64, + DataValue::F128(_) => types::F128, DataValue::V128(_) => types::I8X16, // A default type. DataValue::V64(_) => types::I8X8, // A default type. } @@ -92,8 +101,10 @@ impl DataValue { DataValue::I32(i) => DataValue::I32(i.swap_bytes()), DataValue::I64(i) => DataValue::I64(i.swap_bytes()), DataValue::I128(i) => DataValue::I128(i.swap_bytes()), + DataValue::F16(f) => DataValue::F16(Ieee16::with_bits(f.bits().swap_bytes())), DataValue::F32(f) => DataValue::F32(Ieee32::with_bits(f.bits().swap_bytes())), DataValue::F64(f) => DataValue::F64(Ieee64::with_bits(f.bits().swap_bytes())), + DataValue::F128(f) => DataValue::F128(Ieee128::with_bits(f.bits().swap_bytes())), DataValue::V128(mut v) => { v.reverse(); DataValue::V128(v) @@ -135,8 +146,10 @@ impl DataValue { DataValue::I32(i) => dst[..4].copy_from_slice(&i.to_ne_bytes()[..]), DataValue::I64(i) => dst[..8].copy_from_slice(&i.to_ne_bytes()[..]), DataValue::I128(i) => dst[..16].copy_from_slice(&i.to_ne_bytes()[..]), + DataValue::F16(f) => dst[..2].copy_from_slice(&f.bits().to_ne_bytes()[..]), DataValue::F32(f) => dst[..4].copy_from_slice(&f.bits().to_ne_bytes()[..]), DataValue::F64(f) => dst[..8].copy_from_slice(&f.bits().to_ne_bytes()[..]), + DataValue::F128(f) => dst[..16].copy_from_slice(&f.bits().to_ne_bytes()[..]), DataValue::V128(v) => dst[..16].copy_from_slice(&v[..]), DataValue::V64(v) => dst[..8].copy_from_slice(&v[..]), }; @@ -172,12 +185,18 @@ impl DataValue { types::I32 => DataValue::I32(i32::from_ne_bytes(src[..4].try_into().unwrap())), types::I64 => DataValue::I64(i64::from_ne_bytes(src[..8].try_into().unwrap())), types::I128 => DataValue::I128(i128::from_ne_bytes(src[..16].try_into().unwrap())), + types::F16 => DataValue::F16(Ieee16::with_bits(u16::from_ne_bytes( + src[..2].try_into().unwrap(), + ))), types::F32 => DataValue::F32(Ieee32::with_bits(u32::from_ne_bytes( src[..4].try_into().unwrap(), ))), types::F64 => DataValue::F64(Ieee64::with_bits(u64::from_ne_bytes( src[..8].try_into().unwrap(), ))), + types::F128 => DataValue::F128(Ieee128::with_bits(u128::from_ne_bytes( + src[..16].try_into().unwrap(), + ))), _ if ty.is_vector() => { if ty.bytes() == 16 { DataValue::V128(src[..16].try_into().unwrap()) @@ -233,8 +252,10 @@ impl DataValue { // We need to bit compare the floats to ensure that we produce the correct values // on NaN's. The test suite expects to assert the precise bit pattern on NaN's or // works around it in the tests themselves. + (DataValue::F16(a), DataValue::F16(b)) => a.bits() == b.bits(), (DataValue::F32(a), DataValue::F32(b)) => a.bits() == b.bits(), (DataValue::F64(a), DataValue::F64(b)) => a.bits() == b.bits(), + (DataValue::F128(a), DataValue::F128(b)) => a.bits() == b.bits(), // We don't need to worry about F32x4 / F64x2 Since we compare V128 which is already the // raw bytes anyway @@ -305,8 +326,10 @@ build_conversion_impl!(i16, I16, I16); build_conversion_impl!(i32, I32, I32); build_conversion_impl!(i64, I64, I64); build_conversion_impl!(i128, I128, I128); +build_conversion_impl!(Ieee16, F16, F16); build_conversion_impl!(Ieee32, F32, F32); build_conversion_impl!(Ieee64, F64, F64); +build_conversion_impl!(Ieee128, F128, F128); build_conversion_impl!([u8; 16], V128, I8X16); build_conversion_impl!([u8; 8], V64, I8X8); impl From for DataValue { @@ -324,8 +347,10 @@ impl Display for DataValue { DataValue::I64(dv) => write!(f, "{}", dv), DataValue::I128(dv) => write!(f, "{}", dv), // The Ieee* wrappers here print the expected syntax. + DataValue::F16(dv) => write!(f, "{}", dv), DataValue::F32(dv) => write!(f, "{}", dv), DataValue::F64(dv) => write!(f, "{}", dv), + DataValue::F128(dv) => write!(f, "{}", dv), // Again, for syntax consistency, use ConstantData, which in this case displays as hex. DataValue::V128(dv) => write!(f, "{}", ConstantData::from(&dv[..])), DataValue::V64(dv) => write!(f, "{}", ConstantData::from(&dv[..])), diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs index 153f331b9e79..c5f5d7bc6168 100644 --- a/cranelift/codegen/src/ir/immediates.rs +++ b/cranelift/codegen/src/ir/immediates.rs @@ -468,6 +468,21 @@ impl FromStr for Offset32 { } } +/// An IEEE binary16 immediate floating point value, represented as a u16 +/// containing the bit pattern. +/// +/// We specifically avoid using a f16 here since some architectures may silently alter floats. +/// See: +/// +/// The [PartialEq] and [Hash] implementations are over the underlying bit pattern, but +/// [PartialOrd] respects IEEE754 semantics. +/// +/// All bit patterns are allowed. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +#[repr(C)] +pub struct Ieee16(u16); + /// An IEEE binary32 immediate floating point value, represented as a u32 /// containing the bit pattern. /// @@ -498,6 +513,21 @@ pub struct Ieee32(u32); #[repr(C)] pub struct Ieee64(u64); +/// An IEEE binary128 immediate floating point value, represented as a u128 +/// containing the bit pattern. +/// +/// We specifically avoid using a f128 here since some architectures may silently alter floats. +/// See: +/// +/// The [PartialEq] and [Hash] implementations are over the underlying bit pattern, but +/// [PartialOrd] respects IEEE754 semantics. +/// +/// All bit patterns are allowed. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +#[repr(C)] +pub struct Ieee128(u128); + /// Format a floating point number in a way that is reasonably human-readable, and that can be /// converted back to binary without any rounding issues. The hexadecimal formatting of normal and /// subnormal numbers is compatible with C99 and the `printf "%a"` format specifier. The NaN and Inf @@ -508,13 +538,13 @@ pub struct Ieee64(u64); /// w - exponent field width in bits /// t - trailing significand field width in bits /// -fn format_float(bits: u64, w: u8, t: u8, f: &mut Formatter) -> fmt::Result { +fn format_float(bits: u128, w: u8, t: u8, f: &mut Formatter) -> fmt::Result { debug_assert!(w > 0 && w <= 16, "Invalid exponent range"); - debug_assert!(1 + w + t <= 64, "Too large IEEE format for u64"); + debug_assert!(1 + w + t <= 128, "Too large IEEE format for u128"); debug_assert!((t + w + 1).is_power_of_two(), "Unexpected IEEE format size"); - let max_e_bits = (1u64 << w) - 1; - let t_bits = bits & ((1u64 << t) - 1); // Trailing significand. + let max_e_bits = (1u128 << w) - 1; + let t_bits = bits & ((1u128 << t) - 1); // Trailing significand. let e_bits = (bits >> t) & max_e_bits; // Biased exponent. let sign_bit = (bits >> (w + t)) & 1; @@ -583,13 +613,13 @@ fn format_float(bits: u64, w: u8, t: u8, f: &mut Formatter) -> fmt::Result { /// w - exponent field width in bits /// t - trailing significand field width in bits /// -fn parse_float(s: &str, w: u8, t: u8) -> Result { +fn parse_float(s: &str, w: u8, t: u8) -> Result { debug_assert!(w > 0 && w <= 16, "Invalid exponent range"); - debug_assert!(1 + w + t <= 64, "Too large IEEE format for u64"); + debug_assert!(1 + w + t <= 128, "Too large IEEE format for u128"); debug_assert!((t + w + 1).is_power_of_two(), "Unexpected IEEE format size"); let (sign_bit, s2) = if s.starts_with('-') { - (1u64 << (t + w), &s[1..]) + (1u128 << (t + w), &s[1..]) } else if s.starts_with('+') { (0, &s[1..]) } else { @@ -597,8 +627,8 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result { }; if !s2.starts_with("0x") { - let max_e_bits = ((1u64 << w) - 1) << t; - let quiet_bit = 1u64 << (t - 1); + let max_e_bits = ((1u128 << w) - 1) << t; + let quiet_bit = 1u128 << (t - 1); // The only decimal encoding allowed is 0. if s2 == "0.0" { @@ -615,7 +645,7 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result { } if s2.starts_with("NaN:0x") { // Quiet NaN with payload. - return match u64::from_str_radix(&s2[6..], 16) { + return match u128::from_str_radix(&s2[6..], 16) { Ok(payload) if payload < quiet_bit => { Ok(sign_bit | max_e_bits | quiet_bit | payload) } @@ -624,7 +654,7 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result { } if s2.starts_with("sNaN:0x") { // Signaling NaN with payload. - return match u64::from_str_radix(&s2[7..], 16) { + return match u128::from_str_radix(&s2[7..], 16) { Ok(payload) if 0 < payload && payload < quiet_bit => { Ok(sign_bit | max_e_bits | payload) } @@ -638,7 +668,7 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result { let mut digits = 0u8; let mut digits_before_period: Option = None; - let mut significand = 0u64; + let mut significand = 0u128; let mut exponent = 0i32; for (idx, ch) in s3.char_indices() { @@ -665,10 +695,10 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result { _ => match ch.to_digit(16) { Some(digit) => { digits += 1; - if digits > 16 { + if digits > 32 { return Err("Too many digits"); } - significand = (significand << 4) | u64::from(digit); + significand = (significand << 4) | u128::from(digit); } None => return Err("Invalid character"), }, @@ -691,10 +721,10 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result { }; // Normalize the significand and exponent. - let significant_bits = (64 - significand.leading_zeros()) as u8; + let significant_bits = (128 - significand.leading_zeros()) as u8; if significant_bits > t + 1 { let adjust = significant_bits - (t + 1); - if significand & ((1u64 << adjust) - 1) != 0 { + if significand & ((1u128 << adjust) - 1) != 0 { return Err("Too many significant bits"); } // Adjust significand down. @@ -718,13 +748,13 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result { Err("Magnitude too large") } else if exponent > 0 { // This is a normal number. - let e_bits = (exponent as u64) << t; + let e_bits = (exponent as u128) << t; Ok(sign_bit | e_bits | t_bits) } else if 1 - exponent <= i32::from(t) { // This is a subnormal number: e = 0, t = significand bits. // Renormalize significand for exponent = 1. let adjust = 1 - exponent; - if significand & ((1u64 << adjust) - 1) != 0 { + if significand & ((1u128 << adjust) - 1) != 0 { Err("Subnormal underflow") } else { significand >>= adjust; @@ -735,6 +765,84 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result { } } +impl Ieee16 { + const SIGNIFICAND_BITS: u8 = 10; + const EXPONENT_BITS: u8 = 5; + + /// Create a new `Ieee16` containing the bits of `x`. + pub fn with_bits(x: u16) -> Self { + Self(x) + } + + /// Get the bitwise representation. + pub fn bits(self) -> u16 { + self.0 + } + + /// Returns true if self is positive or negative zero + pub fn is_zero(&self) -> bool { + self.partial_cmp(&Self::with_bits(0)) == Some(Ordering::Equal) + } +} + +impl PartialOrd for Ieee16 { + fn partial_cmp(&self, rhs: &Self) -> Option { + // FIXME(#8312): Use Rust `f16` comparisons once `f16` support is stabalised. + let significand_mask = u16::MAX >> (Self::EXPONENT_BITS + 1); + let sign_mask = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS); + let exponent_mask = !sign_mask & !significand_mask; + + let lhs_abs = self.bits() & !sign_mask; + let rhs_abs = rhs.bits() & !sign_mask; + if (lhs_abs & exponent_mask == exponent_mask && lhs_abs & significand_mask != 0) + && (rhs_abs & exponent_mask == exponent_mask && rhs_abs & significand_mask != 0) + { + // One of the floats is a NaN. + return None; + } + if lhs_abs == 0 && rhs_abs == 0 { + // Zeros are always equal regardless of sign. + return Some(Ordering::Equal); + } + let lhs_positive = self.bits() & sign_mask == 0; + let rhs_positive = rhs.bits() & sign_mask == 0; + if lhs_positive != rhs_positive { + // Different signs: negative < positive + return lhs_positive.partial_cmp(&rhs_positive); + } + // Finite or infinity will order correctly with an integer comparison of the bits. + if lhs_positive { + lhs_abs.partial_cmp(&rhs_abs) + } else { + // Reverse the comparison when both floats are negative. + rhs_abs.partial_cmp(&lhs_abs) + } + } +} + +impl Display for Ieee16 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let bits: u16 = self.0; + format_float( + u128::from(bits), + Self::EXPONENT_BITS, + Self::SIGNIFICAND_BITS, + f, + ) + } +} + +impl FromStr for Ieee16 { + type Err = &'static str; + + fn from_str(s: &str) -> Result { + match parse_float(s, Self::EXPONENT_BITS, Self::SIGNIFICAND_BITS) { + Ok(b) => Ok(Self(b as u16)), + Err(s) => Err(s), + } + } +} + impl Ieee32 { /// Create a new `Ieee32` containing the bits of `x`. pub fn with_bits(x: u32) -> Self { @@ -853,7 +961,7 @@ impl PartialOrd for Ieee32 { impl Display for Ieee32 { fn fmt(&self, f: &mut Formatter) -> fmt::Result { let bits: u32 = self.0; - format_float(u64::from(bits), 8, 23, f) + format_float(u128::from(bits), 8, 23, f) } } @@ -1071,7 +1179,7 @@ impl PartialOrd for Ieee64 { impl Display for Ieee64 { fn fmt(&self, f: &mut Formatter) -> fmt::Result { let bits: u64 = self.0; - format_float(bits, 11, 52, f) + format_float(u128::from(bits), 11, 52, f) } } @@ -1080,7 +1188,7 @@ impl FromStr for Ieee64 { fn from_str(s: &str) -> Result { match parse_float(s, 11, 52) { - Ok(b) => Ok(Self(b)), + Ok(b) => Ok(Self(b as u64)), Err(s) => Err(s), } } @@ -1176,6 +1284,79 @@ impl Not for Ieee64 { } } +impl Ieee128 { + const SIGNIFICAND_BITS: u8 = 112; + const EXPONENT_BITS: u8 = 15; + + /// Create a new `Ieee128` containing the bits of `x`. + pub fn with_bits(x: u128) -> Self { + Self(x) + } + + /// Get the bitwise representation. + pub fn bits(self) -> u128 { + self.0 + } + + /// Returns true if self is positive or negative zero + pub fn is_zero(&self) -> bool { + self.partial_cmp(&Self::with_bits(0)) == Some(Ordering::Equal) + } +} + +impl PartialOrd for Ieee128 { + fn partial_cmp(&self, rhs: &Self) -> Option { + // FIXME(#8312): Use Rust `f128` comparisons once `f128` support is stabalised. + let significand_mask = u128::MAX >> (Self::EXPONENT_BITS + 1); + let sign_mask = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS); + let exponent_mask = !sign_mask & !significand_mask; + + let lhs_abs = self.bits() & !sign_mask; + let rhs_abs = rhs.bits() & !sign_mask; + if (lhs_abs & exponent_mask == exponent_mask && lhs_abs & significand_mask != 0) + && (rhs_abs & exponent_mask == exponent_mask && rhs_abs & significand_mask != 0) + { + // One of the floats is a NaN. + return None; + } + if lhs_abs == 0 && rhs_abs == 0 { + // Zeros are always equal regardless of sign. + return Some(Ordering::Equal); + } + let lhs_positive = self.bits() & sign_mask == 0; + let rhs_positive = rhs.bits() & sign_mask == 0; + if lhs_positive != rhs_positive { + // Different signs: negative < positive + return lhs_positive.partial_cmp(&rhs_positive); + } + // Finite or infinity will order correctly with an integer comparison of the bits. + if lhs_positive { + lhs_abs.partial_cmp(&rhs_abs) + } else { + // Reverse the comparison when both floats are negative. + rhs_abs.partial_cmp(&lhs_abs) + } + } +} + +impl Display for Ieee128 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let bits: u128 = self.0; + format_float(bits, Self::EXPONENT_BITS, Self::SIGNIFICAND_BITS, f) + } +} + +impl FromStr for Ieee128 { + type Err = &'static str; + + fn from_str(s: &str) -> Result { + match parse_float(s, Self::EXPONENT_BITS, Self::SIGNIFICAND_BITS) { + Ok(b) => Ok(Self(b)), + Err(s) => Err(s), + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -1346,6 +1527,134 @@ mod tests { parse_err::("+0x8000_0000", "Offset out of range"); } + #[test] + fn format_ieee16() { + assert_eq!(Ieee16::with_bits(0).to_string(), "0.0"); // 0.0 + assert_eq!(Ieee16::with_bits(0x8000).to_string(), "-0.0"); // -0.0 + assert_eq!(Ieee16::with_bits(0x3c00).to_string(), "0x1.000p0"); // 1.0 + assert_eq!(Ieee16::with_bits(0x3e00).to_string(), "0x1.800p0"); // 1.5 + assert_eq!(Ieee16::with_bits(0x3800).to_string(), "0x1.000p-1"); // 0.5 + assert_eq!( + Ieee16::with_bits(0x1400).to_string(), // `f16::EPSILON` + "0x1.000p-10" + ); + assert_eq!( + Ieee16::with_bits(0xfbff).to_string(), // `f16::MIN` + "-0x1.ffcp15" + ); + assert_eq!( + Ieee16::with_bits(0x7bff).to_string(), // `f16::MAX` + "0x1.ffcp15" + ); + // Smallest positive normal number. + assert_eq!( + Ieee16::with_bits(0x0400).to_string(), // `f16::MIN_POSITIVE` + "0x1.000p-14" + ); + // Subnormals. + assert_eq!( + Ieee16::with_bits(0x0200).to_string(), // `f16::MIN_POSITIVE / 2.0` + "0x0.800p-14" + ); + assert_eq!( + Ieee16::with_bits(0x0001).to_string(), // `f16::MIN_POSITIVE * f16::EPSILON` + "0x0.004p-14" + ); + assert_eq!( + Ieee16::with_bits(0x7c00).to_string(), // `f16::INFINITY` + "+Inf" + ); + assert_eq!( + Ieee16::with_bits(0xfc00).to_string(), // `f16::NEG_INFINITY` + "-Inf" + ); + assert_eq!( + Ieee16::with_bits(0x7e00).to_string(), // `f16::NAN` + "+NaN" + ); + assert_eq!( + Ieee16::with_bits(0xfe00).to_string(), // `-f16::NAN` + "-NaN" + ); + // Construct some qNaNs with payloads. + assert_eq!(Ieee16(0x7e01).to_string(), "+NaN:0x1"); + assert_eq!(Ieee16(0x7f01).to_string(), "+NaN:0x101"); + // Signaling NaNs. + assert_eq!(Ieee16(0x7c01).to_string(), "+sNaN:0x1"); + assert_eq!(Ieee16(0x7d01).to_string(), "+sNaN:0x101"); + } + + #[test] + fn parse_ieee16() { + parse_ok::("0.0", "0.0"); + parse_ok::("+0.0", "0.0"); + parse_ok::("-0.0", "-0.0"); + parse_ok::("0x0", "0.0"); + parse_ok::("0x0.0", "0.0"); + parse_ok::("0x.0", "0.0"); + parse_ok::("0x0.", "0.0"); + parse_ok::("0x1", "0x1.000p0"); + parse_ok::("+0x1", "0x1.000p0"); + parse_ok::("-0x1", "-0x1.000p0"); + parse_ok::("0x10", "0x1.000p4"); + parse_ok::("0x10.0", "0x1.000p4"); + parse_err::("0.", "Float must be hexadecimal"); + parse_err::(".0", "Float must be hexadecimal"); + parse_err::("0", "Float must be hexadecimal"); + parse_err::("-0", "Float must be hexadecimal"); + parse_err::(".", "Float must be hexadecimal"); + parse_err::("", "Float must be hexadecimal"); + parse_err::("-", "Float must be hexadecimal"); + parse_err::("0x", "No digits"); + parse_err::("0x..", "Multiple radix points"); + + // Check significant bits. + parse_ok::("0x0.ffe", "0x1.ffcp-1"); + parse_ok::("0x1.ffc", "0x1.ffcp0"); + parse_ok::("0x3.ff8", "0x1.ffcp1"); + parse_ok::("0x7.ff", "0x1.ffcp2"); + parse_ok::("0xf.fe", "0x1.ffcp3"); + parse_err::("0x1.ffe", "Too many significant bits"); + parse_err::("0x1.ffc00000000000000000000000000000", "Too many digits"); + + // Exponents. + parse_ok::("0x1p3", "0x1.000p3"); + parse_ok::("0x1p-3", "0x1.000p-3"); + parse_ok::("0x1.0p3", "0x1.000p3"); + parse_ok::("0x2.0p3", "0x1.000p4"); + parse_ok::("0x1.0p15", "0x1.000p15"); + parse_ok::("0x1.0p-14", "0x1.000p-14"); + parse_ok::("0x0.1p-10", "0x1.000p-14"); + parse_err::("0x2.0p15", "Magnitude too large"); + + // Subnormals. + parse_ok::("0x1.0p-15", "0x0.800p-14"); + parse_ok::("0x1.0p-24", "0x0.004p-14"); + parse_ok::("0x0.004p-14", "0x0.004p-14"); + parse_err::("0x0.102p-14", "Subnormal underflow"); + parse_err::("0x1.8p-24", "Subnormal underflow"); + parse_err::("0x1.0p-25", "Magnitude too small"); + + // NaNs and Infs. + parse_ok::("Inf", "+Inf"); + parse_ok::("+Inf", "+Inf"); + parse_ok::("-Inf", "-Inf"); + parse_ok::("NaN", "+NaN"); + parse_ok::("+NaN", "+NaN"); + parse_ok::("-NaN", "-NaN"); + parse_ok::("NaN:0x0", "+NaN"); + parse_err::("NaN:", "Float must be hexadecimal"); + parse_err::("NaN:0", "Float must be hexadecimal"); + parse_err::("NaN:0x", "Invalid NaN payload"); + parse_ok::("NaN:0x001", "+NaN:0x1"); + parse_ok::("NaN:0x101", "+NaN:0x101"); + parse_err::("NaN:0x301", "Invalid NaN payload"); + parse_ok::("sNaN:0x1", "+sNaN:0x1"); + parse_err::("sNaN:0x0", "Invalid sNaN payload"); + parse_ok::("sNaN:0x101", "+sNaN:0x101"); + parse_err::("sNaN:0x301", "Invalid sNaN payload"); + } + #[test] fn format_ieee32() { assert_eq!(Ieee32::with_float(0.0).to_string(), "0.0"); @@ -1416,7 +1725,7 @@ mod tests { parse_ok::("0x7.fffff8", "0x1.fffffep2"); parse_ok::("0xf.fffff0", "0x1.fffffep3"); parse_err::("0x1.ffffff", "Too many significant bits"); - parse_err::("0x1.fffffe0000000000", "Too many digits"); + parse_err::("0x1.fffffe00000000000000000000000000", "Too many digits"); // Exponents. parse_ok::("0x1p3", "0x1.000000p3"); @@ -1556,7 +1865,7 @@ mod tests { parse_ok::("0x7.ffffffffffffc", "0x1.fffffffffffffp2"); parse_ok::("0xf.ffffffffffff8", "0x1.fffffffffffffp3"); parse_err::("0x3.fffffffffffff", "Too many significant bits"); - parse_err::("0x001.fffffe00000000", "Too many digits"); + parse_err::("0x001.fffffe000000000000000000000000", "Too many digits"); // Exponents. parse_ok::("0x1p3", "0x1.0000000000000p3"); @@ -1613,4 +1922,188 @@ mod tests { }); } } + + #[test] + fn format_ieee128() { + assert_eq!( + Ieee128::with_bits(0x00000000000000000000000000000000).to_string(), // 0.0 + "0.0" + ); + assert_eq!( + Ieee128::with_bits(0x80000000000000000000000000000000).to_string(), // -0.0 + "-0.0" + ); + assert_eq!( + Ieee128::with_bits(0x3fff0000000000000000000000000000).to_string(), // 1.0 + "0x1.0000000000000000000000000000p0" + ); + assert_eq!( + Ieee128::with_bits(0x3fff8000000000000000000000000000).to_string(), // 1.5 + "0x1.8000000000000000000000000000p0" + ); + assert_eq!( + Ieee128::with_bits(0x3ffe0000000000000000000000000000).to_string(), // 0.5 + "0x1.0000000000000000000000000000p-1" + ); + assert_eq!( + Ieee128::with_bits(0x3f8f0000000000000000000000000000).to_string(), // `f128::EPSILON` + "0x1.0000000000000000000000000000p-112" + ); + assert_eq!( + Ieee128::with_bits(0xfffeffffffffffffffffffffffffffff).to_string(), // `f128::MIN` + "-0x1.ffffffffffffffffffffffffffffp16383" + ); + assert_eq!( + Ieee128::with_bits(0x7ffeffffffffffffffffffffffffffff).to_string(), // `f128::MAX` + "0x1.ffffffffffffffffffffffffffffp16383" + ); + // Smallest positive normal number. + assert_eq!( + Ieee128::with_bits(0x00010000000000000000000000000000).to_string(), // `f128::MIN_POSITIVE` + "0x1.0000000000000000000000000000p-16382" + ); + // Subnormals. + assert_eq!( + Ieee128::with_bits(0x00008000000000000000000000000000).to_string(), // `f128::MIN_POSITIVE / 2.0` + "0x0.8000000000000000000000000000p-16382" + ); + assert_eq!( + Ieee128::with_bits(0x00000000000000000000000000000001).to_string(), // `f128::MIN_POSITIVE * f128::EPSILON` + "0x0.0000000000000000000000000001p-16382" + ); + assert_eq!( + Ieee128::with_bits(0x7fff0000000000000000000000000000).to_string(), // `f128::INFINITY` + "+Inf" + ); + assert_eq!( + Ieee128::with_bits(0xffff0000000000000000000000000000).to_string(), // `f128::NEG_INFINITY` + "-Inf" + ); + assert_eq!( + Ieee128::with_bits(0x7fff8000000000000000000000000000).to_string(), // `f128::NAN` + "+NaN" + ); + assert_eq!( + Ieee128::with_bits(0xffff8000000000000000000000000000).to_string(), // `-f128::NAN` + "-NaN" + ); + // Construct some qNaNs with payloads. + assert_eq!( + Ieee128(0x7fff8000000000000000000000000001).to_string(), + "+NaN:0x1" + ); + assert_eq!( + Ieee128(0x7fffc000000000000000000000000001).to_string(), + "+NaN:0x4000000000000000000000000001" + ); + // Signaling NaNs. + assert_eq!( + Ieee128(0x7fff0000000000000000000000000001).to_string(), + "+sNaN:0x1" + ); + assert_eq!( + Ieee128(0x7fff4000000000000000000000000001).to_string(), + "+sNaN:0x4000000000000000000000000001" + ); + } + + #[test] + fn parse_ieee128() { + parse_ok::("0.0", "0.0"); + parse_ok::("-0.0", "-0.0"); + parse_ok::("0x0", "0.0"); + parse_ok::("0x0.0", "0.0"); + parse_ok::("0x.0", "0.0"); + parse_ok::("0x0.", "0.0"); + parse_ok::("0x1", "0x1.0000000000000000000000000000p0"); + parse_ok::("-0x1", "-0x1.0000000000000000000000000000p0"); + parse_ok::("0x10", "0x1.0000000000000000000000000000p4"); + parse_ok::("0x10.0", "0x1.0000000000000000000000000000p4"); + parse_err::("0.", "Float must be hexadecimal"); + parse_err::(".0", "Float must be hexadecimal"); + parse_err::("0", "Float must be hexadecimal"); + parse_err::("-0", "Float must be hexadecimal"); + parse_err::(".", "Float must be hexadecimal"); + parse_err::("", "Float must be hexadecimal"); + parse_err::("-", "Float must be hexadecimal"); + parse_err::("0x", "No digits"); + parse_err::("0x..", "Multiple radix points"); + + // Check significant bits. + parse_ok::( + "0x0.ffffffffffffffffffffffffffff8", + "0x1.ffffffffffffffffffffffffffffp-1", + ); + parse_ok::( + "0x1.ffffffffffffffffffffffffffff", + "0x1.ffffffffffffffffffffffffffffp0", + ); + parse_ok::( + "0x3.fffffffffffffffffffffffffffe", + "0x1.ffffffffffffffffffffffffffffp1", + ); + parse_ok::( + "0x7.fffffffffffffffffffffffffffc", + "0x1.ffffffffffffffffffffffffffffp2", + ); + parse_ok::( + "0xf.fffffffffffffffffffffffffff8", + "0x1.ffffffffffffffffffffffffffffp3", + ); + parse_err::( + "0x3.ffffffffffffffffffffffffffff", + "Too many significant bits", + ); + parse_err::("0x001.fffffe000000000000000000000000", "Too many digits"); + + // Exponents. + parse_ok::("0x1p3", "0x1.0000000000000000000000000000p3"); + parse_ok::("0x1p-3", "0x1.0000000000000000000000000000p-3"); + parse_ok::("0x1.0p3", "0x1.0000000000000000000000000000p3"); + parse_ok::("0x2.0p3", "0x1.0000000000000000000000000000p4"); + parse_ok::("0x1.0p16383", "0x1.0000000000000000000000000000p16383"); + parse_ok::("0x1.0p-16382", "0x1.0000000000000000000000000000p-16382"); + parse_ok::("0x0.1p-16378", "0x1.0000000000000000000000000000p-16382"); + parse_err::("0x2.0p16383", "Magnitude too large"); + + // Subnormals. + parse_ok::("0x1.0p-16383", "0x0.8000000000000000000000000000p-16382"); + parse_ok::("0x1.0p-16494", "0x0.0000000000000000000000000001p-16382"); + parse_ok::( + "0x0.0000000000000000000000000001p-16382", + "0x0.0000000000000000000000000001p-16382", + ); + parse_err::( + "0x0.10000000000000000000000000008p-16382", + "Subnormal underflow", + ); + parse_err::("0x1.8p-16494", "Subnormal underflow"); + parse_err::("0x1.0p-16495", "Magnitude too small"); + + // NaNs and Infs. + parse_ok::("Inf", "+Inf"); + parse_ok::("-Inf", "-Inf"); + parse_ok::("NaN", "+NaN"); + parse_ok::("-NaN", "-NaN"); + parse_ok::("NaN:0x0", "+NaN"); + parse_err::("NaN:", "Float must be hexadecimal"); + parse_err::("NaN:0", "Float must be hexadecimal"); + parse_err::("NaN:0x", "Invalid NaN payload"); + parse_ok::("NaN:0x000001", "+NaN:0x1"); + parse_ok::( + "NaN:0x4000000000000000000000000001", + "+NaN:0x4000000000000000000000000001", + ); + parse_err::("NaN:0x8000000000000000000000000001", "Invalid NaN payload"); + parse_ok::("sNaN:0x1", "+sNaN:0x1"); + parse_err::("sNaN:0x0", "Invalid sNaN payload"); + parse_ok::( + "sNaN:0x4000000000000000000000000001", + "+sNaN:0x4000000000000000000000000001", + ); + parse_err::( + "sNaN:0x8000000000000000000000000001", + "Invalid sNaN payload", + ); + } } diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs index fd5208070f3f..6d483aba33ba 100644 --- a/cranelift/codegen/src/ir/instructions.rs +++ b/cranelift/codegen/src/ir/instructions.rs @@ -794,8 +794,8 @@ impl OperandConstraint { tys.ints = BitSet8::from_range(3, ctrl_type_bits as u8); } else if ctrl_type.is_float() { // The upper bound in from_range is exclusive, and we want to exclude the - // control type to construct the interval of [F32, ctrl_type). - tys.floats = BitSet8::from_range(5, ctrl_type_bits as u8); + // control type to construct the interval of [F16, ctrl_type). + tys.floats = BitSet8::from_range(4, ctrl_type_bits as u8); } else { panic!("The Narrower constraint only operates on floats or ints"); } @@ -822,10 +822,11 @@ impl OperandConstraint { tys.ints = BitSet8::from_range(lower_bound, 8); } } else if ctrl_type.is_float() { - // The interval should include all float types wider than `ctrl_type`, so we - // use `2^7` as the upper bound, and add one to the bits of `ctrl_type` to - // define the interval `(ctrl_type, F64]`. - tys.floats = BitSet8::from_range(ctrl_type_bits as u8 + 1, 7); + // Same as above but for `tys.floats`, as the largest float type is F128. + let lower_bound = ctrl_type_bits as u8 + 1; + if lower_bound < BitSet8::capacity() { + tys.floats = BitSet8::from_range(lower_bound, 8); + } } else { panic!("The Wider constraint only operates on floats or ints"); } @@ -964,7 +965,9 @@ mod tests { assert!(vts.contains(I64)); assert!(vts.contains(I32X4)); assert!(vts.contains(I32X4XN)); + assert!(!vts.contains(F16)); assert!(!vts.contains(F32)); + assert!(!vts.contains(F128)); assert!(vts.contains(R32)); assert!(vts.contains(R64)); assert_eq!(vts.example().to_string(), "i32"); diff --git a/cranelift/codegen/src/ir/types.rs b/cranelift/codegen/src/ir/types.rs index 928a28526609..a62fffa61d4b 100644 --- a/cranelift/codegen/src/ir/types.rs +++ b/cranelift/codegen/src/ir/types.rs @@ -14,7 +14,7 @@ use target_lexicon::{PointerWidth, Triple}; /// /// Basic integer types: `I8`, `I16`, `I32`, `I64`, and `I128`. These types are sign-agnostic. /// -/// Basic floating point types: `F32` and `F64`. IEEE single and double precision. +/// Basic floating point types: `F16`, `F32`, `F64`, and `F128`. IEEE half, single, double, and quadruple precision. /// /// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float type. /// @@ -56,10 +56,10 @@ impl Type { pub fn log2_lane_bits(self) -> u32 { match self.lane_type() { I8 => 3, - I16 => 4, + I16 | F16 => 4, I32 | F32 | R32 => 5, I64 | F64 | R64 => 6, - I128 => 7, + I128 | F128 => 7, _ => 0, } } @@ -68,10 +68,10 @@ impl Type { pub fn lane_bits(self) -> u32 { match self.lane_type() { I8 => 8, - I16 => 16, + I16 | F16 => 16, I32 | F32 | R32 => 32, I64 | F64 | R64 => 64, - I128 => 128, + I128 | F128 => 128, _ => 0, } } @@ -136,11 +136,11 @@ impl Type { // Replace the low 4 bits with the boolean version, preserve the high 4 bits. self.replace_lanes(match self.lane_type() { I8 => I8, - I16 => I16, + I16 | F16 => I16, I32 | F32 => I32, I64 | F64 => I64, R32 | R64 => panic!("Reference types are not truthy"), - I128 => I128, + I128 | F128 => I128, _ => I8, }) } @@ -161,10 +161,10 @@ impl Type { pub fn as_int(self) -> Self { self.replace_lanes(match self.lane_type() { I8 => I8, - I16 => I16, + I16 | F16 => I16, I32 | F32 | R32 => I32, I64 | F64 | R64 => I64, - I128 => I128, + I128 | F128 => I128, _ => unimplemented!(), }) } @@ -177,7 +177,9 @@ impl Type { I32 => I16, I64 => I32, I128 => I64, + F32 => F16, F64 => F32, + F128 => F64, _ => return None, })) } @@ -190,7 +192,9 @@ impl Type { I16 => I32, I32 => I64, I64 => I128, + F16 => F32, F32 => F64, + F64 => F128, _ => return None, })) } @@ -235,7 +239,7 @@ impl Type { /// Is this a scalar floating point type? pub fn is_float(self) -> bool { match self { - F32 | F64 => true, + F16 | F32 | F64 | F128 => true, _ => false, } } @@ -486,7 +490,9 @@ mod tests { assert_eq!(I64, I64.lane_type()); assert_eq!(I128, I128.lane_type()); assert_eq!(F32, F32.lane_type()); + assert_eq!(F16, F16.lane_type()); assert_eq!(F64, F64.lane_type()); + assert_eq!(F128, F128.lane_type()); assert_eq!(I32, I32X4.lane_type()); assert_eq!(F64, F64X2.lane_type()); assert_eq!(R32, R32.lane_type()); @@ -498,8 +504,10 @@ mod tests { assert_eq!(I32.lane_bits(), 32); assert_eq!(I64.lane_bits(), 64); assert_eq!(I128.lane_bits(), 128); + assert_eq!(F16.lane_bits(), 16); assert_eq!(F32.lane_bits(), 32); assert_eq!(F64.lane_bits(), 64); + assert_eq!(F128.lane_bits(), 128); assert_eq!(R32.lane_bits(), 32); assert_eq!(R64.lane_bits(), 64); } @@ -514,8 +522,10 @@ mod tests { assert_eq!(I32X4.half_width(), Some(I16X4)); assert_eq!(I64.half_width(), Some(I32)); assert_eq!(I128.half_width(), Some(I64)); - assert_eq!(F32.half_width(), None); + assert_eq!(F16.half_width(), None); + assert_eq!(F32.half_width(), Some(F16)); assert_eq!(F64.half_width(), Some(F32)); + assert_eq!(F128.half_width(), Some(F64)); assert_eq!(INVALID.double_width(), None); assert_eq!(I8.double_width(), Some(I16)); @@ -524,8 +534,10 @@ mod tests { assert_eq!(I32X4.double_width(), Some(I64X4)); assert_eq!(I64.double_width(), Some(I128)); assert_eq!(I128.double_width(), None); + assert_eq!(F16.double_width(), Some(F32)); assert_eq!(F32.double_width(), Some(F64)); - assert_eq!(F64.double_width(), None); + assert_eq!(F64.double_width(), Some(F128)); + assert_eq!(F128.double_width(), None); } #[test] @@ -558,14 +570,18 @@ mod tests { // Conversions to and from vectors. assert_eq!(I8.by(16).unwrap().vector_to_dynamic(), Some(I8X16XN)); assert_eq!(I16.by(8).unwrap().vector_to_dynamic(), Some(I16X8XN)); + assert_eq!(F16.by(8).unwrap().vector_to_dynamic(), Some(F16X8XN)); assert_eq!(I32.by(4).unwrap().vector_to_dynamic(), Some(I32X4XN)); assert_eq!(F32.by(4).unwrap().vector_to_dynamic(), Some(F32X4XN)); assert_eq!(F64.by(2).unwrap().vector_to_dynamic(), Some(F64X2XN)); assert_eq!(I128.by(2).unwrap().vector_to_dynamic(), Some(I128X2XN)); + assert_eq!(F128.by(2).unwrap().vector_to_dynamic(), Some(F128X2XN)); assert_eq!(I128X2XN.dynamic_to_vector(), Some(I128X2)); + assert_eq!(F16X4XN.dynamic_to_vector(), Some(F16X4)); assert_eq!(F32X4XN.dynamic_to_vector(), Some(F32X4)); assert_eq!(F64X4XN.dynamic_to_vector(), Some(F64X4)); + assert_eq!(F128X4XN.dynamic_to_vector(), Some(F128X4)); assert_eq!(I32X2XN.dynamic_to_vector(), Some(I32X2)); assert_eq!(I32X8XN.dynamic_to_vector(), Some(I32X8)); assert_eq!(I16X16XN.dynamic_to_vector(), Some(I16X16)); diff --git a/cranelift/filetests/filetests/runtests/bitcast-f16-f128.clif b/cranelift/filetests/filetests/runtests/bitcast-f16-f128.clif new file mode 100644 index 000000000000..5448ae48134f --- /dev/null +++ b/cranelift/filetests/filetests/runtests/bitcast-f16-f128.clif @@ -0,0 +1,61 @@ +test interpret + +function %bitcast_i16_f16(i16) -> f16 fast { +block0(v0: i16): + v1 = bitcast.f16 v0 + return v1 +} +; run: %bitcast_i16_f16(0x0000) == 0x0.0 +; run: %bitcast_i16_f16(0x0001) == 0x0.004p-14 +; run: %bitcast_i16_f16(0x7c00) == Inf +; run: %bitcast_i16_f16(0xfe00) == -NaN +; run: %bitcast_i16_f16(0x7c01) == +sNaN:0x1 +; run: %bitcast_i16_f16(0x3c00) == 0x1.0 +; run: %bitcast_i16_f16(0x47fe) == 0x7.fe +; run: %bitcast_i16_f16(0xf0e0) == -0x1.380p13 +; run: %bitcast_i16_f16(0xabcd) == -0x1.f34p-5 + +function %bitcast_i128_f128(i128) -> f128 fast { +block0(v0: i128): + v1 = bitcast.f128 v0 + return v1 +} +; run: %bitcast_i128_f128(0x00000000000000000000000000000000) == 0x0.0 +; run: %bitcast_i128_f128(0x00000000000000000000000000000001) == 0x0.0000000000000000000000000001p-16382 +; run: %bitcast_i128_f128(0x7fff0000000000000000000000000000) == Inf +; run: %bitcast_i128_f128(0xffff8000000000000000000000000000) == -NaN +; run: %bitcast_i128_f128(0x7fff0000000000000000000000000001) == +sNaN:0x1 +; run: %bitcast_i128_f128(0x3fff0000000000000000000000000000) == 0x1.0 +; run: %bitcast_i128_f128(0x3fff7fef123456789abcdefcda456987) == 0x1.7fef123456789abcdefcda456987 +; run: %bitcast_i128_f128(0xf0e0d0c0b0a090807060504030201000) == -0x1.d0c0b0a090807060504030201000p12513 +; run: %bitcast_i128_f128(0xabcdef01234567890123456789abcdef) == -0x1.ef01234567890123456789abcdefp-5170 + +function %bitcast_f16_i16(f16) -> i16 fast { +block0(v0: f16): + v1 = bitcast.i16 v0 + return v1 +} +; run: %bitcast_f16_i16(0x0.0) == 0x0000 +; run: %bitcast_f16_i16(0x0.004p-14) == 0x0001 +; run: %bitcast_f16_i16(Inf) == 0x7c00 +; run: %bitcast_f16_i16(-NaN) == 0xfe00 +; run: %bitcast_f16_i16(+sNaN:0x1) == 0x7c01 +; run: %bitcast_f16_i16(0x1.0) == 0x3c00 +; run: %bitcast_f16_i16(0x7.fe) == 0x47fe +; run: %bitcast_f16_i16(-0x1.3c0p13) == 0xf0f0 +; run: %bitcast_f16_i16(-0x1.f34p-5) == 0xabcd + +function %bitcast_f128_i128(f128) -> i128 fast { +block0(v0: f128): + v1 = bitcast.i128 v0 + return v1 +} +; run: %bitcast_f128_i128(0x0.0) == 0x00000000000000000000000000000000 +; run: %bitcast_f128_i128(0x0.0000000000000000000000000001p-16382) == 0x00000000000000000000000000000001 +; run: %bitcast_f128_i128(Inf) == 0x7fff0000000000000000000000000000 +; run: %bitcast_f128_i128(-NaN) == 0xffff8000000000000000000000000000 +; run: %bitcast_f128_i128(+sNaN:0x1) == 0x7fff0000000000000000000000000001 +; run: %bitcast_f128_i128(0x1.0) == 0x3fff0000000000000000000000000000 +; run: %bitcast_f128_i128(0x1.7fef123456789abcdefcda456987) == 0x3fff7fef123456789abcdefcda456987 +; run: %bitcast_f128_i128(-0x1.d0c0b0a090807060504030201000p12513) == 0xf0e0d0c0b0a090807060504030201000 +; run: %bitcast_f128_i128(-0x1.ef01234567890123456789abcdefp-5170) == 0xabcdef01234567890123456789abcdef diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index bb9703bfc77e..01d0377e4559 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -1419,7 +1419,7 @@ pub(crate) fn extractlanes( let iterations = match lane_type { types::I8 => 1, - types::I16 => 2, + types::I16 | types::F16 => 2, types::I32 | types::F32 => 4, types::I64 | types::F64 => 8, _ => unimplemented!("vectors with lanes wider than 64-bits are currently unsupported."), @@ -1458,7 +1458,7 @@ fn vectorizelanes_all(x: &[DataValue], vector_type: types::Type) -> ValueResult< let lane_type = vector_type.lane_type(); let iterations = match lane_type { types::I8 => 1, - types::I16 => 2, + types::I16 | types::F16 => 2, types::I32 | types::F32 => 4, types::I64 | types::F64 => 8, _ => unimplemented!("vectors with lanes wider than 64-bits are currently unsupported."), diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index 4689fe6a1d1f..d72ee57149de 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -5,7 +5,7 @@ use core::fmt::{self, Display, Formatter}; use cranelift_codegen::data_value::{DataValue, DataValueCastFailure}; -use cranelift_codegen::ir::immediates::{Ieee32, Ieee64}; +use cranelift_codegen::ir::immediates::{Ieee128, Ieee16, Ieee32, Ieee64}; use cranelift_codegen::ir::{types, Type}; use thiserror::Error; @@ -318,7 +318,7 @@ impl DataValueExt for DataValue { fn is_float(&self) -> bool { match self { - DataValue::F32(_) | DataValue::F64(_) => true, + DataValue::F16(_) | DataValue::F32(_) | DataValue::F64(_) | DataValue::F128(_) => true, _ => false, } } @@ -399,10 +399,14 @@ impl DataValueExt for DataValue { (val, ty) if val.ty().is_int() && ty.is_int() => { DataValue::from_integer(val.into_int_signed()?, ty)? } + (DataValue::I16(n), types::F16) => DataValue::F16(Ieee16::with_bits(n as u16)), (DataValue::I32(n), types::F32) => DataValue::F32(f32::from_bits(n as u32).into()), (DataValue::I64(n), types::F64) => DataValue::F64(f64::from_bits(n as u64).into()), + (DataValue::I128(n), types::F128) => DataValue::F128(Ieee128::with_bits(n as u128)), + (DataValue::F16(n), types::I16) => DataValue::I16(n.bits() as i16), (DataValue::F32(n), types::I32) => DataValue::I32(n.bits() as i32), (DataValue::F64(n), types::I64) => DataValue::I64(n.bits() as i64), + (DataValue::F128(n), types::I128) => DataValue::I128(n.bits() as i128), (DataValue::F32(n), types::F64) => DataValue::F64((n.as_f32() as f64).into()), (dv, t) if (t.is_int() || t.is_float()) && dv.ty() == t => dv, (dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind), @@ -502,8 +506,10 @@ impl DataValueExt for DataValue { DataValue::I32(f) => Ok(*f == 0), DataValue::I64(f) => Ok(*f == 0), DataValue::I128(f) => Ok(*f == 0), + DataValue::F16(f) => Ok(f.is_zero()), DataValue::F32(f) => Ok(f.is_zero()), DataValue::F64(f) => Ok(f.is_zero()), + DataValue::F128(f) => Ok(f.is_zero()), DataValue::V64(_) | DataValue::V128(_) => { Err(ValueError::InvalidType(ValueTypeClass::Float, self.ty())) } diff --git a/cranelift/reader/src/lexer.rs b/cranelift/reader/src/lexer.rs index 0fec4fcd2658..afdf3043bfdf 100644 --- a/cranelift/reader/src/lexer.rs +++ b/cranelift/reader/src/lexer.rs @@ -369,8 +369,10 @@ impl<'a> Lexer<'a> { "i32" => types::I32, "i64" => types::I64, "i128" => types::I128, + "f16" => types::F16, "f32" => types::F32, "f64" => types::F64, + "f128" => types::F128, "r32" => types::R32, "r64" => types::R64, _ => return None, @@ -626,7 +628,7 @@ mod tests { fn lex_identifiers() { let mut lex = Lexer::new( "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \ - function0 function i8 i32x4 f32x5", + function0 function i8 i32x4 f32x5 f16 f128", ); assert_eq!( lex.next(), @@ -647,6 +649,8 @@ mod tests { assert_eq!(lex.next(), token(Token::Type(types::I8), 1)); assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1)); assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1)); + assert_eq!(lex.next(), token(Token::Type(types::F16), 1)); + assert_eq!(lex.next(), token(Token::Type(types::F128), 1)); assert_eq!(lex.next(), None); } diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 6068af8de3e2..3783bb49e543 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -10,7 +10,9 @@ use crate::testfile::{Comment, Details, Feature, TestFile}; use cranelift_codegen::data_value::DataValue; use cranelift_codegen::entity::{EntityRef, PrimaryMap}; use cranelift_codegen::ir::entities::{AnyEntity, DynamicType, MemoryType}; -use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64}; +use cranelift_codegen::ir::immediates::{ + Ieee128, Ieee16, Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, +}; use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs}; use cranelift_codegen::ir::pcc::{BaseExpr, Expr, Fact}; use cranelift_codegen::ir::types; @@ -855,6 +857,18 @@ impl<'a> Parser<'a> { Ok(Imm64::new(0)) } + // Match and consume an Ieee16 immediate. + fn match_ieee16(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Float(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like a float. + // Parse it as an Ieee16 to check for the right number of digits and other issues. + text.parse().map_err(|e| self.error(e)) + } else { + err!(self.loc, err_msg) + } + } + // Match and consume an Ieee32 immediate. fn match_ieee32(&mut self, err_msg: &str) -> ParseResult { if let Some(Token::Float(text)) = self.token() { @@ -879,6 +893,18 @@ impl<'a> Parser<'a> { } } + // Match and consume an Ieee128 immediate. + fn match_ieee128(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Float(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like a float. + // Parse it as an Ieee128 to check for the right number of digits and other issues. + text.parse().map_err(|e| self.error(e)) + } else { + err!(self.loc, err_msg) + } + } + // Match and consume an enumerated immediate, like one of the condition codes. fn match_enum(&mut self, err_msg: &str) -> ParseResult { if let Some(Token::Identifier(text)) = self.token() { @@ -2793,8 +2819,10 @@ impl<'a> Parser<'a> { I32 => DataValue::from(self.match_imm32("expected an i32")?), I64 => DataValue::from(Into::::into(self.match_imm64("expected an i64")?)), I128 => DataValue::from(self.match_imm128("expected an i128")?), + F16 => DataValue::from(self.match_ieee16("expected an f16")?), F32 => DataValue::from(self.match_ieee32("expected an f32")?), F64 => DataValue::from(self.match_ieee64("expected an f64")?), + F128 => DataValue::from(self.match_ieee128("expected an f128")?), _ if (ty.is_vector() || ty.is_dynamic_vector()) => { let as_vec = self.match_uimm128(ty)?.into_vec(); if as_vec.len() == 16 { @@ -3254,12 +3282,13 @@ mod tests { assert_eq!(sig.returns.len(), 0); assert_eq!(sig.call_conv, CallConv::SystemV); - let sig2 = Parser::new("(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 system_v") - .parse_signature() - .unwrap(); + let sig2 = + Parser::new("(i8 uext, f16, f32, f64, f128, i32 sret) -> i32 sext, f64 system_v") + .parse_signature() + .unwrap(); assert_eq!( sig2.to_string(), - "(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 system_v" + "(i8 uext, f16, f32, f64, f128, i32 sret) -> i32 sext, f64 system_v" ); assert_eq!(sig2.call_conv, CallConv::SystemV); @@ -3865,8 +3894,13 @@ mod tests { "1512366032949150931280199141537564007" ); assert_eq!(parse("1234567", I128).to_string(), "1234567"); + assert_eq!(parse("0x16.1", F16).to_string(), "0x1.610p4"); assert_eq!(parse("0x32.32", F32).to_string(), "0x1.919000p5"); assert_eq!(parse("0x64.64", F64).to_string(), "0x1.9190000000000p6"); + assert_eq!( + parse("0x128.128", F128).to_string(), + "0x1.2812800000000000000000000000p8" + ); assert_eq!( parse("[0 1 2 3]", I32X4).to_string(), "0x00000003000000020000000100000000" From 7bf2b8fb2f11af4b00b3692b3061defc2e204ab3 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 27 Jun 2024 10:21:25 -0700 Subject: [PATCH 07/10] Wasmtime: remove indirect-call caching. (#8881) In the original development of this feature, guided by JS AOT compilation to Wasm of a microbenchmark heavily focused on IC sites, I was seeing a ~20% speedup. However, in more recent measurements, on full programs (e.g., the Octane benchmark suite), the benefit is more like 5%. Moreover, in #8870, I attempted to switch over to a direct-mapped cache, to address a current shortcoming of the design, namely that it has a hard-capped number of callsites it can apply to (50k) to limit impact on VMContext struct size. With all of the needed checks for correctness, though, that change results in a 2.5% slowdown relative to no caching at all, so it was dropped. In the process of thinking through that, I discovered the current design on `main` incorrectly handles null funcrefs: it invokes a null code pointer, rather than loading a field from a null struct pointer. The latter was specifically designed to cause the necessary Wasm trap in #8159, but I had missed that the call to a null code pointer would not have the same effect. As a result, we actually can crash the VM (safely at least, but still no good vs. a proper Wasm trap!) with the feature enabled. (It's off by default still.) That could be fixed too, but at this point with the small benefit on real programs, together with the limitation on module size for full benefit, I think I'd rather opt for simplicity and remove the cache entirely. Thus, this PR removes call-indirect caching. It's not a direct revert because the original PR refactored the call-indirect generation into smaller helpers and IMHO it's a bit nicer to keep that. But otherwise all traces of the setting, code pre-scan during compilation and special conditions tracked on tables, and codegen changes are gone. --- crates/cli-flags/src/lib.rs | 17 -- crates/cranelift/src/compiler.rs | 16 +- crates/cranelift/src/func_environ.rs | 233 +----------------- crates/environ/src/compile/module_environ.rs | 110 +-------- crates/environ/src/module.rs | 13 +- crates/environ/src/tunables.rs | 10 - crates/environ/src/vmoffsets.rs | 67 +---- crates/fuzzing/src/generators/config.rs | 8 +- crates/types/src/lib.rs | 6 - crates/wasmtime/src/config.rs | 55 ----- crates/wasmtime/src/engine/serialization.rs | 12 - .../wasmtime/src/runtime/vm/gc/enabled/drc.rs | 3 - crates/wasmtime/src/runtime/vm/vmcontext.rs | 47 ---- tests/all/module.rs | 47 ---- .../indirect-call-caching-exclude-0-index.wat | 107 -------- ...rect-call-caching-exclude-table-export.wat | 106 -------- ...rect-call-caching-exclude-table-writes.wat | 138 ----------- .../indirect-call-caching-slot-limit-1.wat | 157 ------------ .../indirect-call-caching-slot-limit-2.wat | 180 -------------- tests/disas/indirect-call-caching.wat | 125 ---------- 20 files changed, 23 insertions(+), 1434 deletions(-) delete mode 100644 tests/disas/indirect-call-caching-exclude-0-index.wat delete mode 100644 tests/disas/indirect-call-caching-exclude-table-export.wat delete mode 100644 tests/disas/indirect-call-caching-exclude-table-writes.wat delete mode 100644 tests/disas/indirect-call-caching-slot-limit-1.wat delete mode 100644 tests/disas/indirect-call-caching-slot-limit-2.wat delete mode 100644 tests/disas/indirect-call-caching.wat diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index 89f1472ba4a2..a45e3fffa540 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -115,17 +115,6 @@ wasmtime_option_group! { /// The maximum runtime size of each linear memory in the pooling /// allocator, in bytes. pub pooling_max_memory_size: Option, - - /// Whether to enable call-indirect caching. - pub cache_call_indirects: Option, - - /// The maximum call-indirect cache slot count. - /// - /// One slot is allocated per indirect callsite; if the module - /// has more indirect callsites than this limit, then the - /// first callsites in linear order in the code section, up to - /// the limit, will receive a cache slot. - pub max_call_indirect_cache_slots: Option, } enum Optimize { @@ -576,12 +565,6 @@ impl CommonOptions { if let Some(enable) = self.opts.memory_init_cow { config.memory_init_cow(enable); } - if let Some(enable) = self.opts.cache_call_indirects { - config.cache_call_indirects(enable); - } - if let Some(max) = self.opts.max_call_indirect_cache_slots { - config.max_call_indirect_cache_slots(max); - } match_feature! { ["pooling-allocator" : self.opts.pooling_allocator.or(pooling_allocator_default)] diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 80075c648735..c720cf18e401 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -147,14 +147,8 @@ impl wasmtime_environ::Compiler for Compiler { context.func.collect_debug_info(); } - let mut func_env = FuncEnvironment::new( - isa, - translation, - types, - &self.tunables, - self.wmemcheck, - input.call_indirect_start, - ); + let mut func_env = + FuncEnvironment::new(isa, translation, types, &self.tunables, self.wmemcheck); // The `stack_limit` global value below is the implementation of stack // overflow checks in Wasmtime. @@ -206,11 +200,7 @@ impl wasmtime_environ::Compiler for Compiler { flags: MemFlags::trusted(), }); context.func.stack_limit = Some(stack_limit); - let FunctionBodyData { - validator, - body, - call_indirect_start: _, - } = input; + let FunctionBodyData { validator, body } = input; let mut validator = validator.into_validator(mem::take(&mut compiler.cx.validator_allocations)); compiler.cx.func_translator.translate_body( diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 6ade601af03f..8ec068b39522 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -12,9 +12,9 @@ use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap}; use cranelift_frontend::FunctionBuilder; use cranelift_frontend::Variable; use cranelift_wasm::{ - CallIndirectSiteIndex, EngineOrModuleTypeIndex, FuncIndex, FuncTranslationState, GlobalIndex, - GlobalVariable, Heap, HeapData, HeapStyle, MemoryIndex, TableData, TableIndex, TableSize, - TargetEnvironment, TypeIndex, WasmHeapTopType, WasmHeapType, WasmResult, + EngineOrModuleTypeIndex, FuncIndex, FuncTranslationState, GlobalIndex, GlobalVariable, Heap, + HeapData, HeapStyle, MemoryIndex, TableData, TableIndex, TableSize, TargetEnvironment, + TypeIndex, WasmHeapTopType, WasmHeapType, WasmResult, }; use std::mem; use wasmparser::Operator; @@ -138,9 +138,6 @@ pub struct FuncEnvironment<'module_environment> { #[cfg(feature = "wmemcheck")] wmemcheck: bool, - - /// The current call-indirect-cache index. - pub call_indirect_index: usize, } impl<'module_environment> FuncEnvironment<'module_environment> { @@ -150,7 +147,6 @@ impl<'module_environment> FuncEnvironment<'module_environment> { types: &'module_environment ModuleTypesBuilder, tunables: &'module_environment Tunables, wmemcheck: bool, - call_indirect_start: usize, ) -> Self { let builtin_functions = BuiltinFunctions::new(isa); @@ -178,8 +174,6 @@ impl<'module_environment> FuncEnvironment<'module_environment> { // functions should consume at least some fuel. fuel_consumed: 1, - call_indirect_index: call_indirect_start, - #[cfg(feature = "wmemcheck")] wmemcheck, #[cfg(feature = "wmemcheck")] @@ -1030,37 +1024,6 @@ impl<'module_environment> FuncEnvironment<'module_environment> { _ => unreachable!(), } } - - /// Allocate the next CallIndirectSiteIndex for indirect-target - /// caching purposes, if slots remain below the slot-count limit. - fn alloc_call_indirect_index(&mut self) -> Option { - // We need to check to see if we have reached the cache-slot - // limit. - // - // There are two kinds of limit behavior: - // - // 1. Our function's start-index is below the limit, but we - // hit the limit in the middle of the function. We will - // allocate slots up to the limit, then stop exactly when we - // hit it. - // - // 2. Our function is beyond the limit-count of - // `call_indirect`s. The counting prescan in - // `ModuleEnvironment` that assigns start indices will - // saturate at the limit, and this function's start index - // will be exactly the limit, so we get zero slots and exit - // immediately at every call to this function. - if self.call_indirect_index >= self.tunables.max_call_indirect_cache_slots { - return None; - } - - let idx = CallIndirectSiteIndex::from_u32( - u32::try_from(self.call_indirect_index) - .expect("More than 2^32 callsites; should be limited by impl limits"), - ); - self.call_indirect_index += 1; - Some(idx) - } } struct Call<'a, 'func, 'module_env> { @@ -1172,68 +1135,6 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { Ok(self.indirect_call_inst(sig_ref, func_addr, &real_call_args)) } - /// Get the address of the call-indirect cache slot for a given callsite. - pub fn call_indirect_cache_slot_addr( - &mut self, - call_site: CallIndirectSiteIndex, - vmctx: ir::Value, - ) -> ir::Value { - let offset = self.env.offsets.vmctx_call_indirect_cache(call_site); - self.builder.ins().iadd_imm(vmctx, i64::from(offset)) - } - - /// Load the cached index and code pointer for an indirect call. - /// - /// Generates IR like: - /// - /// ```ignore - /// v1 = load.i64 cache_ptr+0 ;; cached index (cache key) - /// v2 = load.i64 cache_ptr+8 ;; cached raw code pointer (cache value) - /// ``` - /// - /// and returns `(index, code_ptr)` (e.g. from above, `(v1, v2)`). - fn load_cached_indirect_index_and_code_ptr( - &mut self, - cache_ptr: ir::Value, - ) -> (ir::Value, ir::Value) { - let cached_index = self.builder.ins().load( - I32, - MemFlags::trusted(), - cache_ptr, - Offset32::from(self.env.offsets.ptr.vmcall_indirect_cache_index()), - ); - let cached_code_ptr = self.builder.ins().load( - self.env.pointer_type(), - MemFlags::trusted(), - cache_ptr, - Offset32::from(self.env.offsets.ptr.vmcall_indirect_cache_wasm_call()), - ); - - (cached_index, cached_code_ptr) - } - - /// Update the indirect-call cache: store a new index and raw code - /// pointer in the slot for a given callsite. - fn store_cached_indirect_index_and_code_ptr( - &mut self, - cache_ptr: ir::Value, - index: ir::Value, - code_ptr: ir::Value, - ) { - self.builder.ins().store( - MemFlags::trusted(), - index, - cache_ptr, - Offset32::from(self.env.offsets.ptr.vmcall_indirect_cache_index()), - ); - self.builder.ins().store( - MemFlags::trusted(), - code_ptr, - cache_ptr, - Offset32::from(self.env.offsets.ptr.vmcall_indirect_cache_wasm_call()), - ); - } - /// Do an indirect call through the given funcref table. pub fn indirect_call( mut self, @@ -1243,126 +1144,14 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { callee: ir::Value, call_args: &[ir::Value], ) -> WasmResult> { - // If we are performing call-indirect caching with this table, check the cache. - let caching = if self.env.tunables.cache_call_indirects { - let plan = &self.env.module.table_plans[table_index]; - // We can do the indirect call caching optimization only - // if table elements will not change (no opcodes exist - // that could write the table, and table not exported), - // and if we can use the zero-index as a sentinenl for "no - // cache entry" (initial zeroed vmctx state). - !plan.written && !plan.non_null_zero - } else { - false - }; - - // Allocate a call-indirect cache slot if caching is - // enabled. Note that this still may be `None` if we run out - // of slots. - let call_site = if caching { - self.env.alloc_call_indirect_index() - } else { - None - }; - - let (code_ptr, callee_vmctx) = if let Some(call_site) = call_site { - // Get a local copy of `vmctx`. - let vmctx = self.env.vmctx(self.builder.func); - let vmctx = self - .builder - .ins() - .global_value(self.env.pointer_type(), vmctx); - - // Get the address of the cache slot in the VMContext - // struct. - let slot = self.call_indirect_cache_slot_addr(call_site, vmctx); - - // Create the following CFG and generate code with the following outline: - // - // (load cached index and code pointer) - // hit = icmp eq (cached index), (callee) - // brif hit, call_block((cached code ptr), vmctx), miss_block - // - // miss_block: - // (compute actual code pointer, with checks) - // same_instance = icmp eq (callee vmctx), (vmctx) - // brif same_instance update_block, call_block((actual code ptr), (callee vmctx)) - // - // update_block: - // (store actual index and actual code pointer) - // jump call_block((actual code ptr), (callee vmctx)) - // - // call_block(code_ptr, callee_vmctx): - // (unchecked call-indirect sequence) - - // Create two-level conditionals with CFG. - let current_block = self.builder.current_block().unwrap(); - let miss_block = self.builder.create_block(); - let call_block = self.builder.create_block(); - let update_block = self.builder.create_block(); - - self.builder.insert_block_after(miss_block, current_block); - self.builder.insert_block_after(update_block, miss_block); - self.builder.insert_block_after(call_block, update_block); - self.builder.set_cold_block(miss_block); - self.builder.set_cold_block(update_block); - - // Load cached values, check for hit, branch to - // call block or miss block. - - let (cached_index, cached_code_ptr) = - self.load_cached_indirect_index_and_code_ptr(slot); - let hit = self.builder.ins().icmp(IntCC::Equal, cached_index, callee); - self.builder - .ins() - .brif(hit, call_block, &[cached_code_ptr, vmctx], miss_block, &[]); - - // Miss block: compute actual callee code pointer and - // vmctx, and update cache if same-instance. - - self.builder.seal_block(miss_block); - self.builder.switch_to_block(miss_block); - - if let Some((code_ptr, callee_vmctx)) = - self.check_and_load_code_and_callee_vmctx(table_index, ty_index, callee, true)? - { - // If callee vmctx is equal to ours, update the cache. - let same_instance = self.builder.ins().icmp(IntCC::Equal, callee_vmctx, vmctx); - - self.builder.ins().brif( - same_instance, - update_block, - &[], - call_block, - &[code_ptr, callee_vmctx], - ); - - self.builder.seal_block(update_block); - self.builder.switch_to_block(update_block); - - self.store_cached_indirect_index_and_code_ptr(slot, callee, code_ptr); - self.builder - .ins() - .jump(call_block, &[code_ptr, callee_vmctx]); - } - - // Call block: do the call. - - self.builder.seal_block(call_block); - self.builder.switch_to_block(call_block); - - let code_ptr = self - .builder - .append_block_param(call_block, self.env.pointer_type()); - let callee_vmctx = self - .builder - .append_block_param(call_block, self.env.pointer_type()); - (code_ptr, callee_vmctx) - } else { - match self.check_and_load_code_and_callee_vmctx(table_index, ty_index, callee, false)? { - Some(pair) => pair, - None => return Ok(None), - } + let (code_ptr, callee_vmctx) = match self.check_and_load_code_and_callee_vmctx( + table_index, + ty_index, + callee, + false, + )? { + Some(pair) => pair, + None => return Ok(None), }; self.unchecked_call_impl(sig_ref, code_ptr, callee_vmctx, call_args) diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index 4548095b8946..f8edcc8455a7 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -11,7 +11,6 @@ use crate::{ }; use anyhow::{bail, Result}; use cranelift_entity::packed_option::ReservedValue; -use cranelift_entity::EntityRef; use std::borrow::Cow; use std::collections::HashMap; use std::mem; @@ -19,8 +18,8 @@ use std::path::PathBuf; use std::sync::Arc; use wasmparser::{ types::Types, CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind, - FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Operator, Parser, - Payload, TypeRef, Validator, ValidatorResources, + FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef, + Validator, ValidatorResources, }; use wasmtime_types::{ConstExpr, ConstOp, ModuleInternedTypeIndex, SizeOverflow, WasmHeapTopType}; @@ -116,8 +115,6 @@ pub struct FunctionBodyData<'a> { pub body: FunctionBody<'a>, /// Validator for the function body pub validator: FuncToValidate, - /// The start index for call-indirects in this body. - pub call_indirect_start: usize, } #[derive(Debug, Default)] @@ -436,9 +433,6 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { // this never gets past validation ExternalKind::Tag => unreachable!(), }; - if let EntityIndex::Table(table) = entity { - self.flag_written_table(table); - } self.result .module .exports @@ -504,10 +498,6 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { let (offset, escaped) = ConstExpr::from_wasmparser(offset_expr)?; debug_assert!(escaped.is_empty()); - if !offset.provably_nonzero_i32() { - self.flag_table_possibly_non_null_zero_element(table_index); - } - self.result .module .table_initialization @@ -547,8 +537,6 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { self.result.code_index + self.result.module.num_imported_funcs as u32; let func_index = FuncIndex::from_u32(func_index); - let call_indirect_start = self.result.module.num_call_indirect_caches; - if self.tunables.generate_native_debuginfo { let sig_index = self.result.module.functions[func_index].signature; let sig = self.types[sig_index].unwrap_func(); @@ -567,12 +555,9 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { params: sig.params().into(), }); } - self.prescan_code_section(&body)?; - self.result.function_body_inputs.push(FunctionBodyData { - validator, - body, - call_indirect_start, - }); + self.result + .function_body_inputs + .push(FunctionBodyData { validator, body }); self.result.code_index += 1; } @@ -684,83 +669,6 @@ and for re-adding support for interface types you can see this issue: Ok(()) } - /// Check various properties in function bodies in a "pre-pass" as - /// needed, before we actually generate code. Currently this is - /// used for: - /// - /// - Call-indirect caching: we need to know whether a table is - /// "immutable", i.e., there are opcodes that could update its - /// entries. If this is the case then the optimization isn't - /// applicable. We can check this by simply scanning all functions - /// for the relevant opcodes. - /// - /// We also need to know how many `call_indirect` opcodes are in - /// the whole module so that we know how large a `vmctx` struct - /// to reserve and what its layout will be; and the starting - /// index in this count for each function, so we can generate - /// its code (with accesses to its own `call_indirect` callsite - /// caches) in parallel. - fn prescan_code_section(&mut self, body: &FunctionBody<'_>) -> Result<()> { - if self.tunables.cache_call_indirects { - for op in body.get_operators_reader()? { - let op = op?; - match op { - // Check whether a table may be mutated by any - // opcode. (Note that we separately check for - // table exports so we can detect mutations from - // the outside; here we are only concerned with - // mutations by our own module's code.) - Operator::TableSet { table } - | Operator::TableFill { table } - | Operator::TableInit { table, .. } - | Operator::TableCopy { - dst_table: table, .. - } => { - // We haven't yet validated the body during - // this pre-scan, so we need to check that - // `dst_table` is in bounds. Ignore if not: - // we'll catch the error later. - let table = TableIndex::from_u32(table); - if table.index() < self.result.module.table_plans.len() { - self.flag_written_table(table); - } - } - // Count the `call_indirect` sites so we can - // assign them unique slots. - // - // We record the value of this counter as a - // start-index as we start to scan each function, - // and that function's compilation (which is - // normally a separate parallel task) counts on - // its own from that start index. - Operator::CallIndirect { .. } => { - self.result.module.num_call_indirect_caches += 1; - - // Cap the `num_call_indirect_caches` counter - // at `max_call_indirect_cache_slots` so that - // we don't allocate more than that amount of - // space in the VMContext struct. - // - // Note that we also separately check against - // this limit when emitting code for each - // individual slot because we may cross the - // limit in the middle of a function; also - // once we hit the limit, the start-index for - // each subsequent function will be saturated - // at the limit. - self.result.module.num_call_indirect_caches = core::cmp::min( - self.result.module.num_call_indirect_caches, - self.tunables.max_call_indirect_cache_slots, - ); - } - - _ => {} - } - } - } - Ok(()) - } - fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) { match section.as_known() { KnownCustom::Name(name) => { @@ -886,14 +794,6 @@ and for re-adding support for interface types you can see this issue: self.result.module.num_escaped_funcs += 1; } - fn flag_written_table(&mut self, table: TableIndex) { - self.result.module.table_plans[table].written = true; - } - - fn flag_table_possibly_non_null_zero_element(&mut self, table: TableIndex) { - self.result.module.table_plans[table].non_null_zero = true; - } - /// Parses the Name section of the wasm module. fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> { for subsection in names { diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs index 724d0aad5578..e983333c3326 100644 --- a/crates/environ/src/module.rs +++ b/crates/environ/src/module.rs @@ -338,24 +338,13 @@ pub struct TablePlan { pub table: Table, /// Our chosen implementation style. pub style: TableStyle, - /// Whether the table is observed to be written or possibly - /// written: either by some opcode present in the code section, or - /// by the fact that the table is exported. - pub written: bool, - /// Whether this table may have a non-null zero element. - pub non_null_zero: bool, } impl TablePlan { /// Draw up a plan for implementing a `Table`. pub fn for_table(table: Table, tunables: &Tunables) -> Self { let style = TableStyle::for_table(table, tunables); - Self { - table, - style, - written: false, - non_null_zero: false, - } + Self { table, style } } } diff --git a/crates/environ/src/tunables.rs b/crates/environ/src/tunables.rs index d32cd5bb72c1..c6fb03aa19b3 100644 --- a/crates/environ/src/tunables.rs +++ b/crates/environ/src/tunables.rs @@ -65,14 +65,6 @@ pub struct Tunables { /// Whether or not Wasm functions target the winch abi. pub winch_callable: bool, - - /// Whether we implement a one-entry cache at each call_indirect - /// site. - pub cache_call_indirects: bool, - - /// The maximum number of call-indirect cache slots that we will - /// allocate for one instance. - pub max_call_indirect_cache_slots: usize, } impl Tunables { @@ -125,8 +117,6 @@ impl Tunables { relaxed_simd_deterministic: false, tail_callable: false, winch_callable: false, - cache_call_indirects: false, - max_call_indirect_cache_slots: 50_000, } } diff --git a/crates/environ/src/vmoffsets.rs b/crates/environ/src/vmoffsets.rs index d913f2d62c6e..852c76d094e7 100644 --- a/crates/environ/src/vmoffsets.rs +++ b/crates/environ/src/vmoffsets.rs @@ -24,12 +24,11 @@ // owned_memories: [VMMemoryDefinition; module.num_owned_memories], // globals: [VMGlobalDefinition; module.num_defined_globals], // func_refs: [VMFuncRef; module.num_escaped_funcs], -// call_indirect_caches: [VMCallIndirectCache; module.num_call_indirect_caches], // } use crate::{ - CallIndirectSiteIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex, - FuncRefIndex, GlobalIndex, MemoryIndex, Module, TableIndex, + DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex, FuncRefIndex, + GlobalIndex, MemoryIndex, Module, TableIndex, }; use cranelift_entity::packed_option::ReservedValue; use wasmtime_types::OwnedMemoryIndex; @@ -74,8 +73,6 @@ pub struct VMOffsets

{ /// The number of escaped functions in the module, the size of the func_refs /// array. pub num_escaped_funcs: u32, - /// The number of call_indirect cache entries in the cache array. - pub num_call_indirect_caches: u32, // precalculated offsets of various member fields imported_functions: u32, @@ -87,7 +84,6 @@ pub struct VMOffsets

{ owned_memories: u32, defined_globals: u32, defined_func_refs: u32, - call_indirect_caches: u32, size: u32, } @@ -219,23 +215,6 @@ pub trait PtrSize { .unwrap() } - // Offsets within `VMCallIndirectCache`. - - /// Return the offset of `VMCallIndirectCache::wasm_call`. - fn vmcall_indirect_cache_wasm_call(&self) -> u8 { - 0 - } - - /// Return the offset of `VMCallIndirectCache::index`. - fn vmcall_indirect_cache_index(&self) -> u8 { - self.size() - } - - /// Return the size of a `VMCallIndirectCache`. - fn size_of_vmcall_indirect_cache(&self) -> u8 { - 2 * self.size() - } - /// Return the offset to the `magic` value in this `VMContext`. #[inline] fn vmctx_magic(&self) -> u8 { @@ -354,8 +333,6 @@ pub struct VMOffsetsFields

{ /// The number of escaped functions in the module, the size of the function /// references array. pub num_escaped_funcs: u32, - /// The number of call_indirect cache entries in the cache array. - pub num_call_indirect_caches: u32, } impl VMOffsets

{ @@ -382,7 +359,6 @@ impl VMOffsets

{ num_owned_memories, num_defined_globals: cast_to_u32(module.globals.len() - module.num_imported_globals), num_escaped_funcs: cast_to_u32(module.num_escaped_funcs), - num_call_indirect_caches: cast_to_u32(module.num_call_indirect_caches), }) } @@ -412,7 +388,6 @@ impl VMOffsets

{ num_defined_memories: _, num_owned_memories: _, num_escaped_funcs: _, - num_call_indirect_caches: _, // used as the initial size below size, @@ -441,7 +416,6 @@ impl VMOffsets

{ } calculate_sizes! { - call_indirect_caches: "call_indirect caches", defined_func_refs: "module functions", defined_globals: "defined globals", owned_memories: "owned memories", @@ -468,7 +442,6 @@ impl From> for VMOffsets

{ num_owned_memories: fields.num_owned_memories, num_defined_globals: fields.num_defined_globals, num_escaped_funcs: fields.num_escaped_funcs, - num_call_indirect_caches: fields.num_call_indirect_caches, imported_functions: 0, imported_tables: 0, imported_memories: 0, @@ -478,7 +451,6 @@ impl From> for VMOffsets

{ owned_memories: 0, defined_globals: 0, defined_func_refs: 0, - call_indirect_caches: 0, size: 0, }; @@ -533,10 +505,6 @@ impl From> for VMOffsets

{ ret.num_escaped_funcs, ret.ptr.size_of_vm_func_ref(), ), - size(call_indirect_caches) = cmul( - ret.num_call_indirect_caches, - ret.ptr.size_of_vmcall_indirect_cache(), - ), } ret.size = next_field_offset; @@ -727,12 +695,6 @@ impl VMOffsets

{ self.defined_func_refs } - /// The offset of the `call_indirect_caches` array. - #[inline] - pub fn vmctx_call_indirec_caches_begin(&self) -> u32 { - self.call_indirect_caches - } - /// Return the size of the `VMContext` allocation. #[inline] pub fn size_of_vmctx(&self) -> u32 { @@ -877,31 +839,6 @@ impl VMOffsets

{ pub fn vmctx_vmglobal_import_from(&self, index: GlobalIndex) -> u32 { self.vmctx_vmglobal_import(index) + u32::from(self.vmglobal_import_from()) } - - /// Return the offset to the `VMCallIndirectCache` for the given - /// call-indirect site. - #[inline] - pub fn vmctx_call_indirect_cache(&self, call_site: CallIndirectSiteIndex) -> u32 { - assert!(call_site.as_u32() < self.num_call_indirect_caches); - self.vmctx_call_indirec_caches_begin() - + call_site.as_u32() * u32::from(self.ptr.size_of_vmcall_indirect_cache()) - } - - /// Return the offset to the `wasm_call` field in `*const - /// VMCallIndirectCache` with call-site ID `call_site`. - #[inline] - pub fn vmctx_call_indirect_cache_wasm_call(&self, call_site: CallIndirectSiteIndex) -> u32 { - self.vmctx_call_indirect_cache(call_site) - + u32::from(self.ptr.vmcall_indirect_cache_wasm_call()) - } - - /// Return the offset to the `index` field in `*const - /// VMCallIndirectCache` with call-site ID `call_site`. - #[inline] - pub fn vmctx_call_indirect_cache_index(&self, call_site: CallIndirectSiteIndex) -> u32 { - self.vmctx_call_indirect_cache(call_site) - + u32::from(self.ptr.vmcall_indirect_cache_index()) - } } /// Offsets for `VMDrcHeader`. diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index 2946137004df..d6fd64824af2 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -180,9 +180,7 @@ impl Config { self.wasmtime.memory_guaranteed_dense_image_size, )) .allocation_strategy(self.wasmtime.strategy.to_wasmtime()) - .generate_address_map(self.wasmtime.generate_address_map) - .cache_call_indirects(self.wasmtime.cache_call_indirects) - .max_call_indirect_cache_slots(self.wasmtime.max_call_indirect_cache_slots); + .generate_address_map(self.wasmtime.generate_address_map); if !self.module_config.config.simd_enabled { cfg.wasm_relaxed_simd(false); @@ -501,10 +499,6 @@ pub struct WasmtimeConfig { native_unwind_info: bool, /// Configuration for the compiler to use. pub compiler_strategy: CompilerStrategy, - /// Whether we enable indirect-call caching. - cache_call_indirects: bool, - /// The maximum number of call-indirect cache slots. - max_call_indirect_cache_slots: usize, table_lazy_init: bool, /// Whether or not fuzzing should enable PCC. diff --git a/crates/types/src/lib.rs b/crates/types/src/lib.rs index 2fe7cb056c1a..dd9946fb2ac4 100644 --- a/crates/types/src/lib.rs +++ b/crates/types/src/lib.rs @@ -1146,12 +1146,6 @@ entity_impl!(TagIndex); pub struct StaticModuleIndex(u32); entity_impl!(StaticModuleIndex); -/// Index of a `call_indirect` instruction in a module, used for -/// caching that callsite's target in the VMContext. -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)] -pub struct CallIndirectSiteIndex(u32); -entity_impl!(CallIndirectSiteIndex); - /// An index of an entity. #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)] pub enum EntityIndex { diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 2e678f65a0a8..3c4e49b00716 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -151,8 +151,6 @@ struct ConfigTunables { debug_adapter_modules: Option, relaxed_simd_deterministic: Option, tail_callable: Option, - cache_call_indirects: Option, - max_call_indirect_cache_slots: Option, } /// User-provided configuration for the compiler. @@ -986,57 +984,6 @@ impl Config { self } - /// Configures whether we enable the "indirect call cache" optimization. - /// - /// This feature adds, for each `call_indirect` instruction in a - /// Wasm module (i.e., a function-pointer call in guest code), a - /// one-entry cache that speeds up the translation from a table - /// index to the actual machine code. By default, the VM's - /// implementation of this translation requires several - /// indirections and checks (table bounds-check, function - /// signature-check, table lazy-initialization logic). The intent - /// of this feature is to speed up indirect calls substantially - /// when they are repeated frequently in hot code. - /// - /// While it accelerates repeated calls, this feature has the - /// potential to slow down instantiation slightly, because it adds - /// additional state (the cache storage -- usually 16 bytes per - /// `call_indirect` instruction for each instance) that has to be - /// initialized. In practice, we have not seen - /// measurable/statistically-significant impact from this, though. - /// - /// Until we have further experience with this feature, it will - /// remain off: it is `false` by default. - pub fn cache_call_indirects(&mut self, enable: bool) -> &mut Self { - self.tunables.cache_call_indirects = Some(enable); - self - } - - /// Configures the "indirect call cache" maximum capacity. - /// - /// If the [`Config::cache_call_indirects`] configuration option - /// is enabled, the engine allocates "cache slots" directly in its - /// per-instance state struct for each `call_indirect` in the - /// module's code. We place a limit on this count in order to - /// avoid inflating the state too much with very large modules. If - /// a module exceeds the limit, the first `max` indirect - /// call-sites will still have a one-entry cache, but any indirect - /// call-sites beyond the limit (in linear order in the module's - /// code section) do not participate in the caching, as if the - /// option were turned off. - /// - /// There is also an internal hard cap to this limit: - /// configurations with `max` beyond `50_000` will effectively cap - /// the limit at `50_000`. This is so that instance state does not - /// become unreasonably large. - /// - /// This is `50_000` by default. - pub fn max_call_indirect_cache_slots(&mut self, max: usize) -> &mut Self { - const HARD_CAP: usize = 50_000; // See doc-comment above. - self.tunables.max_call_indirect_cache_slots = Some(core::cmp::min(max, HARD_CAP)); - self - } - /// Configures which compilation strategy will be used for wasm modules. /// /// This method can be used to configure which compiler is used for wasm @@ -1849,8 +1796,6 @@ impl Config { debug_adapter_modules relaxed_simd_deterministic tail_callable - cache_call_indirects - max_call_indirect_cache_slots } // If we're going to compile with winch, we must use the winch calling convention. diff --git a/crates/wasmtime/src/engine/serialization.rs b/crates/wasmtime/src/engine/serialization.rs index 5da038859808..f350f9286821 100644 --- a/crates/wasmtime/src/engine/serialization.rs +++ b/crates/wasmtime/src/engine/serialization.rs @@ -360,8 +360,6 @@ impl Metadata<'_> { relaxed_simd_deterministic, tail_callable, winch_callable, - cache_call_indirects, - max_call_indirect_cache_slots, // This doesn't affect compilation, it's just a runtime setting. dynamic_memory_growth_reserve: _, @@ -429,16 +427,6 @@ impl Metadata<'_> { other.winch_callable, "Winch calling convention", )?; - Self::check_bool( - cache_call_indirects, - other.cache_call_indirects, - "caching of call-indirect targets", - )?; - Self::check_int( - max_call_indirect_cache_slots, - other.max_call_indirect_cache_slots, - "maximum slot count for caching of call-indirect targets", - )?; Ok(()) } diff --git a/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs b/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs index ce49c34743d5..a0bbcb08fe9c 100644 --- a/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs +++ b/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs @@ -900,7 +900,6 @@ mod tests { num_owned_memories: 0, num_defined_globals: 0, num_escaped_funcs: 0, - num_call_indirect_caches: 0, }); assert_eq!( @@ -929,7 +928,6 @@ mod tests { num_owned_memories: 0, num_defined_globals: 0, num_escaped_funcs: 0, - num_call_indirect_caches: 0, }); assert_eq!( offsets.vm_gc_ref_activation_table_next() as usize, @@ -957,7 +955,6 @@ mod tests { num_owned_memories: 0, num_defined_globals: 0, num_escaped_funcs: 0, - num_call_indirect_caches: 0, }); assert_eq!( offsets.vm_gc_ref_activation_table_end() as usize, diff --git a/crates/wasmtime/src/runtime/vm/vmcontext.rs b/crates/wasmtime/src/runtime/vm/vmcontext.rs index d9da984eb987..073f6d80adac 100644 --- a/crates/wasmtime/src/runtime/vm/vmcontext.rs +++ b/crates/wasmtime/src/runtime/vm/vmcontext.rs @@ -871,53 +871,6 @@ mod test_vmruntime_limits { } } -/// One call-indirect cache entry. -/// -/// It consists of the last observed function-pointer index, and the -/// direct code pointer (with the same vmctx, i.e., in the same -/// instance) to call if this index matches. -#[derive(Debug, Clone)] -#[allow(dead_code)] // not actually used in Rust runtime code; only in generated code. -#[repr(C)] -pub struct VMCallIndirectCache { - /// Function pointer for this funcref if being called via the Wasm - /// calling convention. - pub wasm_call: NonNull, - - /// Table index corresponding to the above function pointer. - pub index: usize, - // If more elements are added here, remember to add offset_of tests below! -} - -unsafe impl Send for VMCallIndirectCache {} -unsafe impl Sync for VMCallIndirectCache {} - -#[cfg(test)] -mod test_vm_call_indirect_cache { - use super::VMCallIndirectCache; - use core::mem::offset_of; - use std::mem::size_of; - use wasmtime_environ::{Module, PtrSize, VMOffsets}; - - #[test] - fn check_vm_call_indirect_cache_offsets() { - let module = Module::new(); - let offsets = VMOffsets::new(size_of::<*mut u8>() as u8, &module); - assert_eq!( - size_of::(), - usize::from(offsets.ptr.size_of_vmcall_indirect_cache()) - ); - assert_eq!( - offset_of!(VMCallIndirectCache, wasm_call), - usize::from(offsets.ptr.vmcall_indirect_cache_wasm_call()) - ); - assert_eq!( - offset_of!(VMCallIndirectCache, index), - usize::from(offsets.ptr.vmcall_indirect_cache_index()) - ); - } -} - /// The VM "context", which is pointed to by the `vmctx` arg in Cranelift. /// This has information about globals, memories, tables, and other runtime /// state associated with the current instance. diff --git a/tests/all/module.rs b/tests/all/module.rs index eb658f69fec1..94a56acf56cb 100644 --- a/tests/all/module.rs +++ b/tests/all/module.rs @@ -251,53 +251,6 @@ fn compile_a_component() -> Result<()> { Ok(()) } -#[test] -fn call_indirect_caching_and_memory64() -> Result<()> { - let mut config = Config::new(); - config.wasm_memory64(true); - config.cache_call_indirects(true); - let engine = Engine::new(&config)?; - Module::new( - &engine, - "(module - (memory i64 1) - (func (param i64) (result i32) - local.get 0 - i32.load offset=0x100000000 - ) - )", - )?; - Ok(()) -} - -#[test] -fn call_indirect_caching_out_of_bounds_table_index() -> Result<()> { - let mut config = Config::new(); - config.cache_call_indirects(true); - let engine = Engine::new(&config)?; - // Test an out-of-bounds table index: this is exposed to the prescan - // that call-indirect caching must perform during compilation, so we - // need to make sure the error is properly handled by the validation - // that comes later. - let err = Module::new( - &engine, - "(module - (func (param i32) - ref.null func - local.get 0 - table.set 32 ;; out-of-bounds table index - ) - )", - ) - .unwrap_err(); - let err = format!("{err:?}"); - assert!( - err.contains("table index out of bounds"), - "bad error: {err}" - ); - Ok(()) -} - #[test] fn tail_call_defaults() -> Result<()> { let wasm_with_tail_calls = "(module (func $a return_call $a))"; diff --git a/tests/disas/indirect-call-caching-exclude-0-index.wat b/tests/disas/indirect-call-caching-exclude-0-index.wat deleted file mode 100644 index c1446fb231b9..000000000000 --- a/tests/disas/indirect-call-caching-exclude-0-index.wat +++ /dev/null @@ -1,107 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y" ] - -;; This test checks that we do *not* get the indirect-call caching optimization -;; when it must not be used: in this case, because there is a non-null element -;; at index 0 in the table (`0` is the default value for vmctx struct initialization -;; and so is our "not yet cached" sentinel). - -(module - (table 10 10 funcref) - - (func $f1 (result i32) i32.const 1) - (func $f2 (result i32) i32.const 2) - (func $f3 (result i32) i32.const 3) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32)) - - (elem (i32.const 0) func $f1 $f2 $f3)) -;; function u0:0(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @003f v3 = iconst.i32 1 -;; @0041 jump block1(v3) ; v3 = 1 -;; -;; block1(v2: i32): -;; @0041 return v2 -;; } -;; -;; function u0:1(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0044 v3 = iconst.i32 2 -;; @0046 jump block1(v3) ; v3 = 2 -;; -;; block1(v2: i32): -;; @0046 return v2 -;; } -;; -;; function u0:2(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0049 v3 = iconst.i32 3 -;; @004b jump block1(v3) ; v3 = 3 -;; -;; block1(v2: i32): -;; @004b return v2 -;; } -;; -;; function u0:3(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0050 v4 = iconst.i32 10 -;; @0050 v5 = icmp uge v2, v4 ; v4 = 10 -;; @0050 v6 = uextend.i64 v2 -;; @0050 v7 = global_value.i64 gv4 -;; @0050 v8 = ishl_imm v6, 3 -;; @0050 v9 = iadd v7, v8 -;; @0050 v10 = iconst.i64 0 -;; @0050 v11 = select_spectre_guard v5, v10, v9 ; v10 = 0 -;; @0050 v12 = load.i64 table_oob aligned table v11 -;; @0050 v13 = band_imm v12, -2 -;; @0050 brif v12, block3(v13), block2 -;; -;; block2 cold: -;; @0050 v15 = iconst.i32 0 -;; @0050 v16 = global_value.i64 gv3 -;; @0050 v17 = call fn0(v16, v15, v2) ; v15 = 0 -;; @0050 jump block3(v17) -;; -;; block3(v14: i64): -;; @0050 v18 = global_value.i64 gv3 -;; @0050 v19 = load.i64 notrap aligned readonly v18+80 -;; @0050 v20 = load.i32 notrap aligned readonly v19 -;; @0050 v21 = load.i32 icall_null aligned readonly v14+16 -;; @0050 v22 = icmp eq v21, v20 -;; @0050 trapz v22, bad_sig -;; @0050 v23 = load.i64 notrap aligned readonly v14+8 -;; @0050 v24 = load.i64 notrap aligned readonly v14+24 -;; @0050 v25 = call_indirect sig0, v23(v24, v0) -;; @0053 jump block1(v25) -;; -;; block1(v3: i32): -;; @0053 return v3 -;; } diff --git a/tests/disas/indirect-call-caching-exclude-table-export.wat b/tests/disas/indirect-call-caching-exclude-table-export.wat deleted file mode 100644 index 1e7ee89f53c6..000000000000 --- a/tests/disas/indirect-call-caching-exclude-table-export.wat +++ /dev/null @@ -1,106 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y" ] - -;; This test checks that we do *not* get the indirect-call caching optimization -;; when it must not be used: in this case, because the table is exported so -;; could be mutated (invalidating the cache, which we would not detect). - -(module - (table (export "t") 10 10 funcref) - - (func $f1 (result i32) i32.const 1) - (func $f2 (result i32) i32.const 2) - (func $f3 (result i32) i32.const 3) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32)) - - (elem (i32.const 1) func $f1 $f2 $f3)) -;; function u0:0(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0043 v3 = iconst.i32 1 -;; @0045 jump block1(v3) ; v3 = 1 -;; -;; block1(v2: i32): -;; @0045 return v2 -;; } -;; -;; function u0:1(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0048 v3 = iconst.i32 2 -;; @004a jump block1(v3) ; v3 = 2 -;; -;; block1(v2: i32): -;; @004a return v2 -;; } -;; -;; function u0:2(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @004d v3 = iconst.i32 3 -;; @004f jump block1(v3) ; v3 = 3 -;; -;; block1(v2: i32): -;; @004f return v2 -;; } -;; -;; function u0:3(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0054 v4 = iconst.i32 10 -;; @0054 v5 = icmp uge v2, v4 ; v4 = 10 -;; @0054 v6 = uextend.i64 v2 -;; @0054 v7 = global_value.i64 gv4 -;; @0054 v8 = ishl_imm v6, 3 -;; @0054 v9 = iadd v7, v8 -;; @0054 v10 = iconst.i64 0 -;; @0054 v11 = select_spectre_guard v5, v10, v9 ; v10 = 0 -;; @0054 v12 = load.i64 table_oob aligned table v11 -;; @0054 v13 = band_imm v12, -2 -;; @0054 brif v12, block3(v13), block2 -;; -;; block2 cold: -;; @0054 v15 = iconst.i32 0 -;; @0054 v16 = global_value.i64 gv3 -;; @0054 v17 = call fn0(v16, v15, v2) ; v15 = 0 -;; @0054 jump block3(v17) -;; -;; block3(v14: i64): -;; @0054 v18 = global_value.i64 gv3 -;; @0054 v19 = load.i64 notrap aligned readonly v18+80 -;; @0054 v20 = load.i32 notrap aligned readonly v19 -;; @0054 v21 = load.i32 icall_null aligned readonly v14+16 -;; @0054 v22 = icmp eq v21, v20 -;; @0054 trapz v22, bad_sig -;; @0054 v23 = load.i64 notrap aligned readonly v14+8 -;; @0054 v24 = load.i64 notrap aligned readonly v14+24 -;; @0054 v25 = call_indirect sig0, v23(v24, v0) -;; @0057 jump block1(v25) -;; -;; block1(v3: i32): -;; @0057 return v3 -;; } diff --git a/tests/disas/indirect-call-caching-exclude-table-writes.wat b/tests/disas/indirect-call-caching-exclude-table-writes.wat deleted file mode 100644 index 387da41b742c..000000000000 --- a/tests/disas/indirect-call-caching-exclude-table-writes.wat +++ /dev/null @@ -1,138 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y" ] - -;; This test checks that we do *not* get the indirect-call caching optimization -;; when it must not be used: in this case, because the table is updated with a -;; `table.set` instruction (invalidating the cache, which we would not detect). - -(module - (table 10 10 funcref) - - (func $f1 (result i32) i32.const 1) - (func $f2 (result i32) i32.const 2) - (func $f3 (result i32) i32.const 3) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32)) - - (func (export "update_table") - i32.const 1 - ref.null func - table.set) - - (elem (i32.const 1) func $f1 $f2 $f3)) -;; function u0:0(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0052 v3 = iconst.i32 1 -;; @0054 jump block1(v3) ; v3 = 1 -;; -;; block1(v2: i32): -;; @0054 return v2 -;; } -;; -;; function u0:1(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0057 v3 = iconst.i32 2 -;; @0059 jump block1(v3) ; v3 = 2 -;; -;; block1(v2: i32): -;; @0059 return v2 -;; } -;; -;; function u0:2(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @005c v3 = iconst.i32 3 -;; @005e jump block1(v3) ; v3 = 3 -;; -;; block1(v2: i32): -;; @005e return v2 -;; } -;; -;; function u0:3(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0063 v4 = iconst.i32 10 -;; @0063 v5 = icmp uge v2, v4 ; v4 = 10 -;; @0063 v6 = uextend.i64 v2 -;; @0063 v7 = global_value.i64 gv4 -;; @0063 v8 = ishl_imm v6, 3 -;; @0063 v9 = iadd v7, v8 -;; @0063 v10 = iconst.i64 0 -;; @0063 v11 = select_spectre_guard v5, v10, v9 ; v10 = 0 -;; @0063 v12 = load.i64 table_oob aligned table v11 -;; @0063 v13 = band_imm v12, -2 -;; @0063 brif v12, block3(v13), block2 -;; -;; block2 cold: -;; @0063 v15 = iconst.i32 0 -;; @0063 v16 = global_value.i64 gv3 -;; @0063 v17 = call fn0(v16, v15, v2) ; v15 = 0 -;; @0063 jump block3(v17) -;; -;; block3(v14: i64): -;; @0063 v18 = global_value.i64 gv3 -;; @0063 v19 = load.i64 notrap aligned readonly v18+80 -;; @0063 v20 = load.i32 notrap aligned readonly v19 -;; @0063 v21 = load.i32 icall_null aligned readonly v14+16 -;; @0063 v22 = icmp eq v21, v20 -;; @0063 trapz v22, bad_sig -;; @0063 v23 = load.i64 notrap aligned readonly v14+8 -;; @0063 v24 = load.i64 notrap aligned readonly v14+24 -;; @0063 v25 = call_indirect sig0, v23(v24, v0) -;; @0066 jump block1(v25) -;; -;; block1(v3: i32): -;; @0066 return v3 -;; } -;; -;; function u0:4(i64 vmctx, i64) tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0069 v2 = iconst.i32 1 -;; @006b v3 = iconst.i64 0 -;; @006d v4 = iconst.i32 10 -;; @006d v5 = icmp uge v2, v4 ; v2 = 1, v4 = 10 -;; @006d v6 = uextend.i64 v2 ; v2 = 1 -;; @006d v7 = global_value.i64 gv4 -;; @006d v8 = ishl_imm v6, 3 -;; @006d v9 = iadd v7, v8 -;; @006d v10 = iconst.i64 0 -;; @006d v11 = select_spectre_guard v5, v10, v9 ; v10 = 0 -;; @006d v12 = bor_imm v3, 1 ; v3 = 0 -;; @006d store table_oob aligned table v12, v11 -;; @006f jump block1 -;; -;; block1: -;; @006f return -;; } diff --git a/tests/disas/indirect-call-caching-slot-limit-1.wat b/tests/disas/indirect-call-caching-slot-limit-1.wat deleted file mode 100644 index cedf4aaf4e6a..000000000000 --- a/tests/disas/indirect-call-caching-slot-limit-1.wat +++ /dev/null @@ -1,157 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y", "-Omax-call-indirect-cache-slots=2" ] - -;; This test checks that we properly bound the number of call-indirect -;; cache slots. The first case (here) is when the limit falls in the -;; middle of a function. We set the limit to 2 above; we have 3 -;; `call_indirect`s below; the last should not have caching code. -;; -;; In particular, below we see the cache probe sequence in block0 -;; (first) and block3 (second); but the third call, starting in -;; block8, has no cache slot access and just performs the checks -;; unconditionally. - -(module - (table 10 10 funcref) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32) - call_indirect (result i32) - call_indirect (result i32))) - -;; function u0:0(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0033 v4 = global_value.i64 gv3 -;; @0033 v5 = iadd_imm v4, 144 -;; @0033 v6 = load.i32 notrap aligned v5+8 -;; @0033 v7 = load.i64 notrap aligned v5 -;; @0033 v8 = icmp eq v6, v2 -;; @0033 brif v8, block3(v7, v4), block2 -;; -;; block2 cold: -;; @0033 v9 = iconst.i32 10 -;; @0033 v10 = icmp.i32 uge v2, v9 ; v9 = 10 -;; @0033 v11 = uextend.i64 v2 -;; @0033 v12 = global_value.i64 gv4 -;; @0033 v13 = ishl_imm v11, 3 -;; @0033 v14 = iadd v12, v13 -;; @0033 v15 = iconst.i64 0 -;; @0033 v16 = select_spectre_guard v10, v15, v14 ; v15 = 0 -;; @0033 v17 = load.i64 table_oob aligned table v16 -;; @0033 v18 = band_imm v17, -2 -;; @0033 brif v17, block6(v18), block5 -;; -;; block4 cold: -;; @0033 store.i32 notrap aligned v2, v5+8 -;; @0033 store.i64 notrap aligned v28, v5 -;; @0033 jump block3(v28, v29) -;; -;; block3(v31: i64, v32: i64): -;; @0033 v33 = call_indirect sig0, v31(v32, v0) -;; @0036 v34 = global_value.i64 gv3 -;; @0036 v35 = iadd_imm v34, 160 -;; @0036 v36 = load.i32 notrap aligned v35+8 -;; @0036 v37 = load.i64 notrap aligned v35 -;; @0036 v38 = icmp eq v36, v33 -;; @0036 brif v38, block8(v37, v34), block7 -;; -;; block7 cold: -;; @0036 v39 = iconst.i32 10 -;; @0036 v40 = icmp.i32 uge v33, v39 ; v39 = 10 -;; @0036 v41 = uextend.i64 v33 -;; @0036 v42 = global_value.i64 gv4 -;; @0036 v43 = ishl_imm v41, 3 -;; @0036 v44 = iadd v42, v43 -;; @0036 v45 = iconst.i64 0 -;; @0036 v46 = select_spectre_guard v40, v45, v44 ; v45 = 0 -;; @0036 v47 = load.i64 table_oob aligned table v46 -;; @0036 v48 = band_imm v47, -2 -;; @0036 brif v47, block11(v48), block10 -;; -;; block9 cold: -;; @0036 store.i32 notrap aligned v33, v35+8 -;; @0036 store.i64 notrap aligned v58, v35 -;; @0036 jump block8(v58, v59) -;; -;; block8(v61: i64, v62: i64): -;; @0036 v63 = call_indirect sig0, v61(v62, v0) -;; @0039 v64 = iconst.i32 10 -;; @0039 v65 = icmp uge v63, v64 ; v64 = 10 -;; @0039 v66 = uextend.i64 v63 -;; @0039 v67 = global_value.i64 gv4 -;; @0039 v68 = ishl_imm v66, 3 -;; @0039 v69 = iadd v67, v68 -;; @0039 v70 = iconst.i64 0 -;; @0039 v71 = select_spectre_guard v65, v70, v69 ; v70 = 0 -;; @0039 v72 = load.i64 table_oob aligned table v71 -;; @0039 v73 = band_imm v72, -2 -;; @0039 brif v72, block13(v73), block12 -;; -;; block5 cold: -;; @0033 v20 = iconst.i32 0 -;; @0033 v21 = global_value.i64 gv3 -;; @0033 v22 = call fn0(v21, v20, v2) ; v20 = 0 -;; @0033 jump block6(v22) -;; -;; block6(v19: i64) cold: -;; @0033 v23 = global_value.i64 gv3 -;; @0033 v24 = load.i64 notrap aligned readonly v23+80 -;; @0033 v25 = load.i32 notrap aligned readonly v24+4 -;; @0033 v26 = load.i32 icall_null aligned readonly v19+16 -;; @0033 v27 = icmp eq v26, v25 -;; @0033 trapz v27, bad_sig -;; @0033 v28 = load.i64 notrap aligned readonly v19+8 -;; @0033 v29 = load.i64 notrap aligned readonly v19+24 -;; @0033 v30 = icmp eq v29, v4 -;; @0033 brif v30, block4, block3(v28, v29) -;; -;; block10 cold: -;; @0036 v50 = iconst.i32 0 -;; @0036 v51 = global_value.i64 gv3 -;; @0036 v52 = call fn0(v51, v50, v33) ; v50 = 0 -;; @0036 jump block11(v52) -;; -;; block11(v49: i64) cold: -;; @0036 v53 = global_value.i64 gv3 -;; @0036 v54 = load.i64 notrap aligned readonly v53+80 -;; @0036 v55 = load.i32 notrap aligned readonly v54+4 -;; @0036 v56 = load.i32 icall_null aligned readonly v49+16 -;; @0036 v57 = icmp eq v56, v55 -;; @0036 trapz v57, bad_sig -;; @0036 v58 = load.i64 notrap aligned readonly v49+8 -;; @0036 v59 = load.i64 notrap aligned readonly v49+24 -;; @0036 v60 = icmp eq v59, v34 -;; @0036 brif v60, block9, block8(v58, v59) -;; -;; block12 cold: -;; @0039 v75 = iconst.i32 0 -;; @0039 v76 = global_value.i64 gv3 -;; @0039 v77 = call fn0(v76, v75, v63) ; v75 = 0 -;; @0039 jump block13(v77) -;; -;; block13(v74: i64): -;; @0039 v78 = global_value.i64 gv3 -;; @0039 v79 = load.i64 notrap aligned readonly v78+80 -;; @0039 v80 = load.i32 notrap aligned readonly v79+4 -;; @0039 v81 = load.i32 icall_null aligned readonly v74+16 -;; @0039 v82 = icmp eq v81, v80 -;; @0039 trapz v82, bad_sig -;; @0039 v83 = load.i64 notrap aligned readonly v74+8 -;; @0039 v84 = load.i64 notrap aligned readonly v74+24 -;; @0039 v85 = call_indirect sig0, v83(v84, v0) -;; @003c jump block1(v85) -;; -;; block1(v3: i32): -;; @003c return v3 -;; } diff --git a/tests/disas/indirect-call-caching-slot-limit-2.wat b/tests/disas/indirect-call-caching-slot-limit-2.wat deleted file mode 100644 index 719d5e68b7c3..000000000000 --- a/tests/disas/indirect-call-caching-slot-limit-2.wat +++ /dev/null @@ -1,180 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y", "-Omax-call-indirect-cache-slots=2" ] - -;; This test checks that we properly bound the number of call-indirect -;; cache slots. The second case (here) is when the limit falls -;; entirely before a function. We set the limit to 2 above; we have 2 -;; callsites in the first function; the second function should have no -;; caching. -;; -;; In particular, below we see the cache probe sequence in block0 -;; (first) and block3 (second) in `u0:0` (`call_it`); but the call in -;; the second function in `u0:1` (`call_it_2`), starting in block0 in -;; that function, has no cache slot access and just performs the -;; checks unconditionally. - -(module - (table 10 10 funcref) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32) - call_indirect (result i32)) - - (func (export "call_it_2") (param i32) (result i32) - local.get 0 - call_indirect (result i32))) - -;; function u0:0(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0040 v4 = global_value.i64 gv3 -;; @0040 v5 = iadd_imm v4, 176 -;; @0040 v6 = load.i32 notrap aligned v5+8 -;; @0040 v7 = load.i64 notrap aligned v5 -;; @0040 v8 = icmp eq v6, v2 -;; @0040 brif v8, block3(v7, v4), block2 -;; -;; block2 cold: -;; @0040 v9 = iconst.i32 10 -;; @0040 v10 = icmp.i32 uge v2, v9 ; v9 = 10 -;; @0040 v11 = uextend.i64 v2 -;; @0040 v12 = global_value.i64 gv4 -;; @0040 v13 = ishl_imm v11, 3 -;; @0040 v14 = iadd v12, v13 -;; @0040 v15 = iconst.i64 0 -;; @0040 v16 = select_spectre_guard v10, v15, v14 ; v15 = 0 -;; @0040 v17 = load.i64 table_oob aligned table v16 -;; @0040 v18 = band_imm v17, -2 -;; @0040 brif v17, block6(v18), block5 -;; -;; block4 cold: -;; @0040 store.i32 notrap aligned v2, v5+8 -;; @0040 store.i64 notrap aligned v28, v5 -;; @0040 jump block3(v28, v29) -;; -;; block3(v31: i64, v32: i64): -;; @0040 v33 = call_indirect sig0, v31(v32, v0) -;; @0043 v34 = global_value.i64 gv3 -;; @0043 v35 = iadd_imm v34, 192 -;; @0043 v36 = load.i32 notrap aligned v35+8 -;; @0043 v37 = load.i64 notrap aligned v35 -;; @0043 v38 = icmp eq v36, v33 -;; @0043 brif v38, block8(v37, v34), block7 -;; -;; block7 cold: -;; @0043 v39 = iconst.i32 10 -;; @0043 v40 = icmp.i32 uge v33, v39 ; v39 = 10 -;; @0043 v41 = uextend.i64 v33 -;; @0043 v42 = global_value.i64 gv4 -;; @0043 v43 = ishl_imm v41, 3 -;; @0043 v44 = iadd v42, v43 -;; @0043 v45 = iconst.i64 0 -;; @0043 v46 = select_spectre_guard v40, v45, v44 ; v45 = 0 -;; @0043 v47 = load.i64 table_oob aligned table v46 -;; @0043 v48 = band_imm v47, -2 -;; @0043 brif v47, block11(v48), block10 -;; -;; block9 cold: -;; @0043 store.i32 notrap aligned v33, v35+8 -;; @0043 store.i64 notrap aligned v58, v35 -;; @0043 jump block8(v58, v59) -;; -;; block8(v61: i64, v62: i64): -;; @0043 v63 = call_indirect sig0, v61(v62, v0) -;; @0046 jump block1(v63) -;; -;; block5 cold: -;; @0040 v20 = iconst.i32 0 -;; @0040 v21 = global_value.i64 gv3 -;; @0040 v22 = call fn0(v21, v20, v2) ; v20 = 0 -;; @0040 jump block6(v22) -;; -;; block6(v19: i64) cold: -;; @0040 v23 = global_value.i64 gv3 -;; @0040 v24 = load.i64 notrap aligned readonly v23+80 -;; @0040 v25 = load.i32 notrap aligned readonly v24+4 -;; @0040 v26 = load.i32 icall_null aligned readonly v19+16 -;; @0040 v27 = icmp eq v26, v25 -;; @0040 trapz v27, bad_sig -;; @0040 v28 = load.i64 notrap aligned readonly v19+8 -;; @0040 v29 = load.i64 notrap aligned readonly v19+24 -;; @0040 v30 = icmp eq v29, v4 -;; @0040 brif v30, block4, block3(v28, v29) -;; -;; block10 cold: -;; @0043 v50 = iconst.i32 0 -;; @0043 v51 = global_value.i64 gv3 -;; @0043 v52 = call fn0(v51, v50, v33) ; v50 = 0 -;; @0043 jump block11(v52) -;; -;; block11(v49: i64) cold: -;; @0043 v53 = global_value.i64 gv3 -;; @0043 v54 = load.i64 notrap aligned readonly v53+80 -;; @0043 v55 = load.i32 notrap aligned readonly v54+4 -;; @0043 v56 = load.i32 icall_null aligned readonly v49+16 -;; @0043 v57 = icmp eq v56, v55 -;; @0043 trapz v57, bad_sig -;; @0043 v58 = load.i64 notrap aligned readonly v49+8 -;; @0043 v59 = load.i64 notrap aligned readonly v49+24 -;; @0043 v60 = icmp eq v59, v34 -;; @0043 brif v60, block9, block8(v58, v59) -;; -;; block1(v3: i32): -;; @0046 return v3 -;; } -;; -;; function u0:1(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @004b v4 = iconst.i32 10 -;; @004b v5 = icmp uge v2, v4 ; v4 = 10 -;; @004b v6 = uextend.i64 v2 -;; @004b v7 = global_value.i64 gv4 -;; @004b v8 = ishl_imm v6, 3 -;; @004b v9 = iadd v7, v8 -;; @004b v10 = iconst.i64 0 -;; @004b v11 = select_spectre_guard v5, v10, v9 ; v10 = 0 -;; @004b v12 = load.i64 table_oob aligned table v11 -;; @004b v13 = band_imm v12, -2 -;; @004b brif v12, block3(v13), block2 -;; -;; block2 cold: -;; @004b v15 = iconst.i32 0 -;; @004b v16 = global_value.i64 gv3 -;; @004b v17 = call fn0(v16, v15, v2) ; v15 = 0 -;; @004b jump block3(v17) -;; -;; block3(v14: i64): -;; @004b v18 = global_value.i64 gv3 -;; @004b v19 = load.i64 notrap aligned readonly v18+80 -;; @004b v20 = load.i32 notrap aligned readonly v19+4 -;; @004b v21 = load.i32 icall_null aligned readonly v14+16 -;; @004b v22 = icmp eq v21, v20 -;; @004b trapz v22, bad_sig -;; @004b v23 = load.i64 notrap aligned readonly v14+8 -;; @004b v24 = load.i64 notrap aligned readonly v14+24 -;; @004b v25 = call_indirect sig0, v23(v24, v0) -;; @004e jump block1(v25) -;; -;; block1(v3: i32): -;; @004e return v3 -;; } diff --git a/tests/disas/indirect-call-caching.wat b/tests/disas/indirect-call-caching.wat deleted file mode 100644 index a32015292b62..000000000000 --- a/tests/disas/indirect-call-caching.wat +++ /dev/null @@ -1,125 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y" ] - -;; This test checks that we get the indirect-call caching optimization -;; where it should be applicable (immutable table, null 0-index). -;; -;; The key bit in the expectation below is the cached-index load (v6), -;; compare (v7), branch, fastpath in block2/block4. - -(module - (table 10 10 funcref) - - (func $f1 (result i32) i32.const 1) - (func $f2 (result i32) i32.const 2) - (func $f3 (result i32) i32.const 3) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32)) - - (elem (i32.const 1) func $f1 $f2 $f3)) -;; function u0:0(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @003f v3 = iconst.i32 1 -;; @0041 jump block1(v3) ; v3 = 1 -;; -;; block1(v2: i32): -;; @0041 return v2 -;; } -;; -;; function u0:1(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0044 v3 = iconst.i32 2 -;; @0046 jump block1(v3) ; v3 = 2 -;; -;; block1(v2: i32): -;; @0046 return v2 -;; } -;; -;; function u0:2(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0049 v3 = iconst.i32 3 -;; @004b jump block1(v3) ; v3 = 3 -;; -;; block1(v2: i32): -;; @004b return v2 -;; } -;; -;; function u0:3(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0050 v4 = global_value.i64 gv3 -;; @0050 v5 = iadd_imm v4, 240 -;; @0050 v6 = load.i32 notrap aligned v5+8 -;; @0050 v7 = load.i64 notrap aligned v5 -;; @0050 v8 = icmp eq v6, v2 -;; @0050 brif v8, block3(v7, v4), block2 -;; -;; block2 cold: -;; @0050 v9 = iconst.i32 10 -;; @0050 v10 = icmp.i32 uge v2, v9 ; v9 = 10 -;; @0050 v11 = uextend.i64 v2 -;; @0050 v12 = global_value.i64 gv4 -;; @0050 v13 = ishl_imm v11, 3 -;; @0050 v14 = iadd v12, v13 -;; @0050 v15 = iconst.i64 0 -;; @0050 v16 = select_spectre_guard v10, v15, v14 ; v15 = 0 -;; @0050 v17 = load.i64 table_oob aligned table v16 -;; @0050 v18 = band_imm v17, -2 -;; @0050 brif v17, block6(v18), block5 -;; -;; block4 cold: -;; @0050 store.i32 notrap aligned v2, v5+8 -;; @0050 store.i64 notrap aligned v28, v5 -;; @0050 jump block3(v28, v29) -;; -;; block3(v31: i64, v32: i64): -;; @0050 v33 = call_indirect sig0, v31(v32, v0) -;; @0053 jump block1(v33) -;; -;; block5 cold: -;; @0050 v20 = iconst.i32 0 -;; @0050 v21 = global_value.i64 gv3 -;; @0050 v22 = call fn0(v21, v20, v2) ; v20 = 0 -;; @0050 jump block6(v22) -;; -;; block6(v19: i64) cold: -;; @0050 v23 = global_value.i64 gv3 -;; @0050 v24 = load.i64 notrap aligned readonly v23+80 -;; @0050 v25 = load.i32 notrap aligned readonly v24 -;; @0050 v26 = load.i32 icall_null aligned readonly v19+16 -;; @0050 v27 = icmp eq v26, v25 -;; @0050 trapz v27, bad_sig -;; @0050 v28 = load.i64 notrap aligned readonly v19+8 -;; @0050 v29 = load.i64 notrap aligned readonly v19+24 -;; @0050 v30 = icmp eq v29, v4 -;; @0050 brif v30, block4, block3(v28, v29) -;; -;; block1(v3: i32): -;; @0053 return v3 -;; } From e20b4244b945b27fdc3e3bcec6b9480986798934 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 27 Jun 2024 11:40:54 -0700 Subject: [PATCH 08/10] Cranelift: Take user stack maps through lowering and emission (#8876) * Cranelift: Take user stack maps through lowering and emission Previously, user stack maps were inserted by the frontend and preserved in the mid-end. This commit takes them from the mid-end CLIF into the backend vcode, and then from that vcode into the finalized mach buffer during emission. During lowering, we compile the `UserStackMapEntry`s into packed `UserStackMap`s. This is the appropriate moment in time to do that coalescing, packing, and compiling because the stack map entries are immutable from this point on. Additionally, we include user stack maps in the `Debug` and disassembly implementations for vcode, just after their associated safepoint instructions. This allows us to see the stack maps we are generating when debugging, as well as write filetests that check we are generating the expected stack maps for the correct instructions. Co-Authored-By: Trevor Elliott * uncomment debug assert that was commented out for debugging * Address review feedback * remove new method that was actually never needed --------- Co-authored-by: Trevor Elliott --- cranelift/codegen/src/ir/mod.rs | 2 +- cranelift/codegen/src/ir/user_stack_maps.rs | 55 +++- .../codegen/src/isa/aarch64/inst/emit.rs | 37 ++- .../codegen/src/isa/riscv64/inst/emit.rs | 41 ++- cranelift/codegen/src/isa/s390x/inst/emit.rs | 44 ++- cranelift/codegen/src/isa/x64/inst/emit.rs | 17 +- .../codegen/src/isa/x64/inst/emit_state.rs | 23 +- cranelift/codegen/src/machinst/abi.rs | 48 ++-- cranelift/codegen/src/machinst/buffer.rs | 46 +++- cranelift/codegen/src/machinst/lower.rs | 40 ++- cranelift/codegen/src/machinst/mod.rs | 19 +- cranelift/codegen/src/machinst/vcode.rs | 109 +++++++- .../isa/aarch64/user_stack_maps.clif | 221 ++++++++++++++++ .../isa/riscv64/user_stack_maps.clif | 250 ++++++++++++++++++ .../filetests/isa/s390x/user_stack_maps.clif | 221 ++++++++++++++++ .../filetests/isa/x64/user_stack_maps.clif | 241 +++++++++++++++++ 16 files changed, 1341 insertions(+), 73 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/aarch64/user_stack_maps.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/user_stack_maps.clif create mode 100644 cranelift/filetests/filetests/isa/s390x/user_stack_maps.clif create mode 100644 cranelift/filetests/filetests/isa/x64/user_stack_maps.clif diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index b17e2b111b65..e6f082d70a8d 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -65,7 +65,7 @@ pub use crate::ir::stackslot::{ }; pub use crate::ir::trapcode::TrapCode; pub use crate::ir::types::Type; -pub use crate::ir::user_stack_maps::UserStackMapEntry; +pub use crate::ir::user_stack_maps::{UserStackMap, UserStackMapEntry}; use crate::entity::{entity_impl, PrimaryMap, SecondaryMap}; diff --git a/cranelift/codegen/src/ir/user_stack_maps.rs b/cranelift/codegen/src/ir/user_stack_maps.rs index 4f9daf910bf9..48ae47c82bf8 100644 --- a/cranelift/codegen/src/ir/user_stack_maps.rs +++ b/cranelift/codegen/src/ir/user_stack_maps.rs @@ -29,13 +29,19 @@ //! contrast to the old system and its `r64` values). use crate::ir; +use cranelift_bitset::CompoundBitSet; +use cranelift_entity::PrimaryMap; use smallvec::SmallVec; pub(crate) type UserStackMapEntryVec = SmallVec<[UserStackMapEntry; 4]>; -/// A stack map entry describes a GC-managed value and its location at a -/// particular instruction. -#[derive(Clone, PartialEq, Hash)] +/// A stack map entry describes a single GC-managed value and its location on +/// the stack. +/// +/// A stack map entry is associated with a particular instruction, and that +/// instruction must be a safepoint. The GC-managed value must be stored in the +/// described location across this entry's instruction. +#[derive(Clone, Debug, PartialEq, Hash)] #[cfg_attr( feature = "enable-serde", derive(serde_derive::Serialize, serde_derive::Deserialize) @@ -50,3 +56,46 @@ pub struct UserStackMapEntry { /// The offset within the stack slot where this entry's value can be found. pub offset: u32, } + +/// A compiled stack map, describing the location of many GC-managed values. +/// +/// A stack map is associated with a particular instruction, and that +/// instruction is a safepoint. +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr( + feature = "enable-serde", + derive(serde_derive::Deserialize, serde_derive::Serialize) +)] +pub struct UserStackMap { + by_type: SmallVec<[(ir::Type, CompoundBitSet); 1]>, +} + +impl UserStackMap { + /// Coalesce the given entries into a new `UserStackMap`. + pub fn new( + entries: &[UserStackMapEntry], + stack_slot_offsets: &PrimaryMap, + ) -> Self { + let mut by_type = SmallVec::<[(ir::Type, CompoundBitSet); 1]>::default(); + + for entry in entries { + let offset = stack_slot_offsets[entry.slot] + entry.offset; + let offset = usize::try_from(offset).unwrap(); + + // Don't bother trying to avoid an `O(n)` search here: `n` is + // basically always one in practice; even if it isn't, there aren't + // that many different CLIF types. + let index = by_type + .iter() + .position(|(ty, _)| *ty == entry.ty) + .unwrap_or_else(|| { + by_type.push((entry.ty, CompoundBitSet::with_capacity(offset + 1))); + by_type.len() - 1 + }); + + by_type[index].1.insert(offset); + } + + UserStackMap { by_type } + } +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 42afe51d455f..c1f91b07a432 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -651,11 +651,18 @@ fn enc_asimd_mod_imm(rd: Writable, q_op: u32, cmode: u32, imm: u8) -> u32 { /// State carried between emissions of a sequence of instructions. #[derive(Default, Clone, Debug)] pub struct EmitState { - /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + /// Safepoint stack map for upcoming instruction, as provided to + /// `pre_safepoint()`. stack_map: Option, + + /// The user stack map for the upcoming instruction, as provided to + /// `pre_safepoint()`. + user_stack_map: Option, + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. ctrl_plane: ControlPlane, + frame_layout: FrameLayout, } @@ -663,13 +670,19 @@ impl MachInstEmitState for EmitState { fn new(abi: &Callee, ctrl_plane: ControlPlane) -> Self { EmitState { stack_map: None, + user_stack_map: None, ctrl_plane, frame_layout: abi.frame_layout().clone(), } } - fn pre_safepoint(&mut self, stack_map: StackMap) { - self.stack_map = Some(stack_map); + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ) { + self.stack_map = stack_map; + self.user_stack_map = user_stack_map; } fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { @@ -686,8 +699,8 @@ impl MachInstEmitState for EmitState { } impl EmitState { - fn take_stack_map(&mut self) -> Option { - self.stack_map.take() + fn take_stack_map(&mut self) -> (Option, Option) { + (self.stack_map.take(), self.user_stack_map.take()) } fn clear_post_insn(&mut self) { @@ -2921,11 +2934,16 @@ impl MachInstEmit for Inst { } } &Inst::Call { ref info } => { - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); } sink.add_reloc(Reloc::Arm64Call, &info.dest, 0); sink.put4(enc_jump26(0b100101, 0)); + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } if info.opcode.is_call() { sink.add_call_site(info.opcode); } @@ -2939,11 +2957,16 @@ impl MachInstEmit for Inst { } } &Inst::CallInd { ref info } => { - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); } let rn = info.rn; sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)); + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } if info.opcode.is_call() { sink.add_call_site(info.opcode); } diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 90186fb9fbfb..fb63c355fc29 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -46,20 +46,28 @@ pub enum EmitVState { /// State carried between emissions of a sequence of instructions. #[derive(Default, Clone, Debug)] pub struct EmitState { - /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + /// Safepoint stack map for upcoming instruction, as provided to + /// `pre_safepoint()`. stack_map: Option, + + /// The user stack map for the upcoming instruction, as provided to + /// `pre_safepoint()`. + user_stack_map: Option, + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. ctrl_plane: ControlPlane, + /// Vector State /// Controls the current state of the vector unit at the emission point. vstate: EmitVState, + frame_layout: FrameLayout, } impl EmitState { - fn take_stack_map(&mut self) -> Option { - self.stack_map.take() + fn take_stack_map(&mut self) -> (Option, Option) { + (self.stack_map.take(), self.user_stack_map.take()) } } @@ -70,14 +78,20 @@ impl MachInstEmitState for EmitState { ) -> Self { EmitState { stack_map: None, + user_stack_map: None, ctrl_plane, vstate: EmitVState::Unknown, frame_layout: abi.frame_layout().clone(), } } - fn pre_safepoint(&mut self, stack_map: StackMap) { - self.stack_map = Some(stack_map); + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ) { + self.stack_map = stack_map; + self.user_stack_map = user_stack_map; } fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { @@ -1134,13 +1148,21 @@ impl Inst { sink.add_call_site(info.opcode); } sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0); - if let Some(s) = state.take_stack_map() { + + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s); } + Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0) .into_iter() .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off)); + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } + let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap(); if callee_pop_size > 0 { for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) { @@ -1158,9 +1180,14 @@ impl Inst { } .emit(sink, emit_info, state); - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s); } + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } if info.opcode.is_call() { sink.add_call_site(info.opcode); diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index b98e064ad225..5c72b887e9e7 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -1,7 +1,7 @@ //! S390x ISA: binary code emission. use crate::binemit::StackMap; -use crate::ir::{MemFlags, TrapCode}; +use crate::ir::{self, MemFlags, TrapCode}; use crate::isa::s390x::inst::*; use crate::isa::s390x::settings as s390x_settings; use cranelift_control::ControlPlane; @@ -1306,11 +1306,19 @@ fn put_with_trap(sink: &mut MachBuffer, enc: &[u8], trap_code: TrapCode) { #[derive(Default, Clone, Debug)] pub struct EmitState { pub(crate) initial_sp_offset: i64, - /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + + /// Safepoint stack map for upcoming instruction, as provided to + /// `pre_safepoint()`. stack_map: Option, + + /// The user stack map for the upcoming instruction, as provided to + /// `pre_safepoint()`. + user_stack_map: Option, + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. ctrl_plane: ControlPlane, + frame_layout: FrameLayout, } @@ -1319,13 +1327,19 @@ impl MachInstEmitState for EmitState { EmitState { initial_sp_offset: abi.frame_size() as i64, stack_map: None, + user_stack_map: None, ctrl_plane, frame_layout: abi.frame_layout().clone(), } } - fn pre_safepoint(&mut self, stack_map: StackMap) { - self.stack_map = Some(stack_map); + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ) { + self.stack_map = stack_map; + self.user_stack_map = user_stack_map; } fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { @@ -1342,8 +1356,8 @@ impl MachInstEmitState for EmitState { } impl EmitState { - fn take_stack_map(&mut self) -> Option { - self.stack_map.take() + fn take_stack_map(&mut self) -> (Option, Option) { + (self.stack_map.take(), self.user_stack_map.take()) } fn clear_post_insn(&mut self) { @@ -3243,9 +3257,15 @@ impl Inst { _ => unreachable!(), } - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(6), s); } + if let Some(s) = user_stack_map { + let offset = sink.cur_offset() + 6; + sink.push_user_stack_map(state, offset, s); + } + put(sink, &enc_ril_b(opcode, link.to_reg(), 0)); if info.opcode.is_call() { sink.add_call_site(info.opcode); @@ -3255,10 +3275,16 @@ impl Inst { debug_assert_eq!(link.to_reg(), gpr(14)); let rn = info.rn; - let opcode = 0x0d; // BASR - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s); } + if let Some(s) = user_stack_map { + let offset = sink.cur_offset() + 2; + sink.push_user_stack_map(state, offset, s); + } + + let opcode = 0x0d; // BASR put(sink, &enc_rr(opcode, link.to_reg(), rn)); if info.opcode.is_call() { sink.add_call_site(info.opcode); diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index abfcc447770c..3e0d44059c83 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1599,9 +1599,15 @@ pub(crate) fn emit( opcode, info: call_info, } => { - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s); } + if let Some(s) = user_stack_map { + let offset = sink.cur_offset() + 5; + sink.push_user_stack_map(state, offset, s); + } + sink.put1(0xE8); // The addend adjusts for the difference between the end of the instruction and the // beginning of the immediate field. @@ -1696,9 +1702,16 @@ pub(crate) fn emit( ); } } - if let Some(s) = state.take_stack_map() { + + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s); } + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } + if opcode.is_call() { sink.add_call_site(*opcode); } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_state.rs b/cranelift/codegen/src/isa/x64/inst/emit_state.rs index 21f01dcc01b7..9cca7d7895d8 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_state.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_state.rs @@ -1,11 +1,18 @@ use super::*; +use crate::ir; use cranelift_control::ControlPlane; /// State carried between emissions of a sequence of instructions. #[derive(Default, Clone, Debug)] pub struct EmitState { - /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + /// Safepoint stack map for upcoming instruction, as provided to + /// `pre_safepoint()`. stack_map: Option, + + /// The user stack map for the upcoming instruction, as provided to + /// `pre_safepoint()`. + user_stack_map: Option, + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. ctrl_plane: ControlPlane, @@ -19,13 +26,19 @@ impl MachInstEmitState for EmitState { fn new(abi: &Callee, ctrl_plane: ControlPlane) -> Self { EmitState { stack_map: None, + user_stack_map: None, ctrl_plane, frame_layout: abi.frame_layout().clone(), } } - fn pre_safepoint(&mut self, stack_map: StackMap) { - self.stack_map = Some(stack_map); + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ) { + self.stack_map = stack_map; + self.user_stack_map = user_stack_map; } fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { @@ -42,8 +55,8 @@ impl MachInstEmitState for EmitState { } impl EmitState { - pub(crate) fn take_stack_map(&mut self) -> Option { - self.stack_map.take() + pub(crate) fn take_stack_map(&mut self) -> (Option, Option) { + (self.stack_map.take(), self.user_stack_map.take()) } pub(crate) fn clear_post_insn(&mut self) { diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 713fcec2540e..fd9555b7c499 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -59,8 +59,10 @@ //! //! ```plain //! (high address) -//! -//! +---------------------------+ +//! | ... | +//! | caller frames | +//! | ... | +//! +===========================+ //! | ... | //! | stack args | //! Canonical Frame Address --> | (accessed via FP) | @@ -68,24 +70,24 @@ //! SP at function entry -----> | return address | //! +---------------------------+ //! FP after prologue --------> | FP (pushed by prologue) | -//! +---------------------------+ -//! | ... | -//! | clobbered callee-saves | -//! unwind-frame base --------> | (pushed by prologue) | -//! +---------------------------+ -//! | ... | -//! | spill slots | -//! | (accessed via SP) | -//! | ... | -//! | stack slots | -//! | (accessed via SP) | -//! | (alloc'd by prologue) | -//! +---------------------------+ -//! | [alignment as needed] | -//! | ... | -//! | args for largest call | -//! SP -----------------------> | (alloc'd by prologue) | -//! +---------------------------+ +//! +---------------------------+ ----- +//! | ... | | +//! | clobbered callee-saves | | +//! unwind-frame base --------> | (pushed by prologue) | | +//! +---------------------------+ | +//! | ... | | +//! | spill slots | | +//! | (accessed via SP) | active +//! | ... | size +//! | stack slots | | +//! | (accessed via SP) | | +//! | (alloc'd by prologue) | | +//! +---------------------------+ | +//! | [alignment as needed] | | +//! | ... | | +//! | args for largest call | | +//! SP -----------------------> | (alloc'd by prologue) | | +//! +===========================+ ----- //! //! (low address) //! ``` @@ -1012,6 +1014,12 @@ impl FrameLayout { debug_assert!(floats.iter().all(|r| r.to_reg().class() == RegClass::Float)); (ints, floats) } + + /// The size of FP to SP while the frame is active (not during prologue + /// setup or epilogue tear down). + pub fn active_size(&self) -> u32 { + self.outgoing_args_size + self.fixed_frame_storage_size + self.clobber_size + } } /// ABI object for a function body. diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index c4ffbb2f8976..c02b8596d05d 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -178,6 +178,7 @@ use crate::machinst::{ BlockIndex, MachInstLabelUse, TextSectionBuilder, VCodeConstant, VCodeConstants, VCodeInst, }; use crate::trace; +use crate::{ir, MachInstEmitState}; use crate::{timing, VCodeConstantData}; use cranelift_control::ControlPlane; use cranelift_entity::{entity_impl, PrimaryMap}; @@ -250,6 +251,11 @@ pub struct MachBuffer { srclocs: SmallVec<[MachSrcLoc; 64]>, /// Any stack maps referring to this code. stack_maps: SmallVec<[MachStackMap; 8]>, + /// Any user stack maps for this code. + /// + /// Each entry is an `(offset, span, stack_map)` triple. Entries are sorted + /// by code offset, and each stack map covers `span` bytes on the stack. + user_stack_maps: SmallVec<[(CodeOffset, u32, ir::UserStackMap); 8]>, /// Any unwind info at a given location. unwind_info: SmallVec<[(CodeOffset, UnwindInst); 8]>, /// The current source location in progress (after `start_srcloc()` and @@ -329,6 +335,7 @@ impl MachBufferFinalized { .map(|srcloc| srcloc.apply_base_srcloc(base_srcloc)) .collect(), stack_maps: self.stack_maps, + user_stack_maps: self.user_stack_maps, unwind_info: self.unwind_info, alignment: self.alignment, } @@ -357,9 +364,14 @@ pub struct MachBufferFinalized { pub(crate) srclocs: SmallVec<[T::MachSrcLocType; 64]>, /// Any stack maps referring to this code. pub(crate) stack_maps: SmallVec<[MachStackMap; 8]>, + /// Any user stack maps for this code. + /// + /// Each entry is an `(offset, span, stack_map)` triple. Entries are sorted + /// by code offset, and each stack map covers `span` bytes on the stack. + pub(crate) user_stack_maps: SmallVec<[(CodeOffset, u32, ir::UserStackMap); 8]>, /// Any unwind info at a given location. pub unwind_info: SmallVec<[(CodeOffset, UnwindInst); 8]>, - /// The requireed alignment of this buffer + /// The required alignment of this buffer. pub alignment: u32, } @@ -447,6 +459,7 @@ impl MachBuffer { call_sites: SmallVec::new(), srclocs: SmallVec::new(), stack_maps: SmallVec::new(), + user_stack_maps: SmallVec::new(), unwind_info: SmallVec::new(), cur_srcloc: None, label_offsets: SmallVec::new(), @@ -1532,6 +1545,7 @@ impl MachBuffer { call_sites: self.call_sites, srclocs, stack_maps: self.stack_maps, + user_stack_maps: self.user_stack_maps, unwind_info: self.unwind_info, alignment, } @@ -1667,6 +1681,36 @@ impl MachBuffer { stack_map, }); } + + /// Push a user stack map onto this buffer. + /// + /// The stack map is associated with the given `return_addr` code + /// offset. This must be the PC for the instruction just *after* this stack + /// map's associated instruction. For example in the sequence `call $foo; + /// add r8, rax`, the `return_addr` must be the offset of the start of the + /// `add` instruction. + /// + /// Stack maps must be pushed in sorted `return_addr` order. + pub fn push_user_stack_map( + &mut self, + emit_state: &I::State, + return_addr: CodeOffset, + stack_map: ir::UserStackMap, + ) { + let span = emit_state.frame_layout().active_size(); + trace!("Adding user stack map @ {return_addr:#x} spanning {span} bytes: {stack_map:?}"); + + debug_assert!( + self.user_stack_maps + .last() + .map_or(true, |(prev_addr, _, _)| *prev_addr < return_addr), + "pushed stack maps out of order: {} is not less than {}", + self.user_stack_maps.last().unwrap().0, + return_addr, + ); + + self.user_stack_maps.push((return_addr, span, stack_map)); + } } impl MachBufferFinalized { diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index ea2be76d485b..32db947a259e 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -14,9 +14,9 @@ use crate::ir::{ Value, ValueDef, ValueLabelAssignments, ValueLabelStart, }; use crate::machinst::{ - writable_value_regs, BlockIndex, BlockLoweringOrder, Callee, InsnIndex, LoweredBlock, - MachLabel, Reg, SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants, - VCodeInst, ValueRegs, Writable, + writable_value_regs, BackwardsInsnIndex, BlockIndex, BlockLoweringOrder, Callee, InsnIndex, + LoweredBlock, MachLabel, Reg, SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, + VCodeConstants, VCodeInst, ValueRegs, Writable, }; use crate::settings::Flags; use crate::{trace, CodegenResult}; @@ -485,8 +485,8 @@ impl<'func, I: VCodeInst> Lower<'func, I> { /// Pre-analysis: compute `value_ir_uses`. See comment on /// `ValueUseState` for a description of what this analysis /// computes. - fn compute_use_states<'a>( - f: &'a Function, + fn compute_use_states( + f: &Function, sret_param: Option, ) -> SecondaryMap { // We perform the analysis without recursion, so we don't @@ -803,9 +803,39 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } } + let start = self.vcode.vcode.num_insts(); let loc = self.srcloc(inst); self.finish_ir_inst(loc); + // If the instruction had a user stack map, forward it from the CLIF + // to the vcode. + if let Some(entries) = self.f.dfg.user_stack_map_entries(inst) { + let end = self.vcode.vcode.num_insts(); + debug_assert!(end > start); + debug_assert_eq!( + (start..end) + .filter(|i| self.vcode.vcode[InsnIndex::new(*i)].is_safepoint()) + .count(), + 1 + ); + for i in start..end { + let iix = InsnIndex::new(i); + if self.vcode.vcode[iix].is_safepoint() { + trace!( + "Adding user stack map from clif\n\n\ + {inst:?} `{}`\n\n\ + to vcode\n\n\ + {iix:?} `{}`", + self.f.dfg.display_inst(inst), + &self.vcode.vcode[iix].pretty_print_inst(&mut Default::default()), + ); + self.vcode + .add_user_stack_map(BackwardsInsnIndex::new(iix.index()), entries); + break; + } + } + } + // maybe insert random instruction if ctrl_plane.get_decision() { if ctrl_plane.get_decision() { diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index bf15ec462dae..af87c3053a6b 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -45,8 +45,9 @@ //! ``` use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap}; -use crate::ir::function::FunctionParameters; -use crate::ir::{DynamicStackSlot, RelSourceLoc, StackSlot, Type}; +use crate::ir::{ + self, function::FunctionParameters, DynamicStackSlot, RelSourceLoc, StackSlot, Type, +}; use crate::isa::FunctionAlignment; use crate::result::CodegenResult; use crate::settings; @@ -284,10 +285,13 @@ pub enum MachTerminator { pub trait MachInstEmit: MachInst { /// Persistent state carried across `emit` invocations. type State: MachInstEmitState; + /// Constant information used in `emit` invocations. type Info; + /// Emit the instruction. fn emit(&self, code: &mut MachBuffer, info: &Self::Info, state: &mut Self::State); + /// Pretty-print the instruction. fn pretty_print_inst(&self, state: &mut Self::State) -> String; } @@ -297,20 +301,29 @@ pub trait MachInstEmit: MachInst { pub trait MachInstEmitState: Default + Clone + Debug { /// Create a new emission state given the ABI object. fn new(abi: &Callee, ctrl_plane: ControlPlane) -> Self; + /// Update the emission state before emitting an instruction that is a /// safepoint. - fn pre_safepoint(&mut self, _stack_map: StackMap) {} + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ); + /// The emission state holds ownership of a control plane, so it doesn't /// have to be passed around explicitly too much. `ctrl_plane_mut` may /// be used if temporary access to the control plane is needed by some /// other function that doesn't have access to the emission state. fn ctrl_plane_mut(&mut self) -> &mut ControlPlane; + /// Used to continue using a control plane after the emission state is /// not needed anymore. fn take_ctrl_plane(self) -> ControlPlane; + /// A hook that triggers when first emitting a new block. /// It is guaranteed to be called before any instructions are emitted. fn on_new_block(&mut self) {} + /// The [`FrameLayout`] for the function currently being compiled. fn frame_layout(&self) -> &FrameLayout; } diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 7a971be74c84..48a45c90eb8f 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -40,6 +40,33 @@ use std::fmt; /// Index referring to an instruction in VCode. pub type InsnIndex = regalloc2::Inst; +/// Extension trait for `InsnIndex` to allow conversion to a +/// `BackwardsInsnIndex`. +trait ToBackwardsInsnIndex { + fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex; +} + +impl ToBackwardsInsnIndex for InsnIndex { + fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex { + BackwardsInsnIndex::new(num_insts - self.index() - 1) + } +} + +/// An index referring to an instruction in the VCode when it is backwards, +/// during VCode construction. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr( + feature = "enable-serde", + derive(::serde::Serialize, ::serde::Deserialize) +)] +pub struct BackwardsInsnIndex(InsnIndex); + +impl BackwardsInsnIndex { + pub fn new(i: usize) -> Self { + BackwardsInsnIndex(InsnIndex::new(i)) + } +} + /// Index referring to a basic block in VCode. pub type BlockIndex = regalloc2::Block; @@ -67,6 +94,14 @@ pub struct VCode { /// Lowered machine instructions in order corresponding to the original IR. insts: Vec, + /// A map from backwards instruction index to the user stack map for that + /// instruction. + /// + /// This is a sparse side table that only has entries for instructions that + /// are safepoints, and only for a subset of those that have an associated + /// user stack map. + user_stack_maps: FxHashMap, + /// Operands: pre-regalloc references to virtual registers with /// constraints, in one flattened array. This allows the regalloc /// to efficiently access all operands without requiring expensive @@ -251,7 +286,7 @@ impl VCodeBuilder { block_order: BlockLoweringOrder, constants: VCodeConstants, direction: VCodeBuildDirection, - ) -> VCodeBuilder { + ) -> Self { let vcode = VCode::new(sigs, abi, emit_info, block_order, constants); VCodeBuilder { @@ -565,6 +600,17 @@ impl VCodeBuilder { self.vcode } + + /// Add a user stack map for the associated instruction. + pub fn add_user_stack_map( + &mut self, + inst: BackwardsInsnIndex, + entries: &[ir::UserStackMapEntry], + ) { + let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets()); + let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map); + debug_assert!(old_entry.is_none()); + } } /// Is this type a reference type? @@ -582,12 +628,13 @@ impl VCode { emit_info: I::Info, block_order: BlockLoweringOrder, constants: VCodeConstants, - ) -> VCode { + ) -> Self { let n_blocks = block_order.lowered_order().len(); VCode { sigs, vreg_types: vec![], insts: Vec::with_capacity(10 * n_blocks), + user_stack_maps: FxHashMap::default(), operands: Vec::with_capacity(30 * n_blocks), operand_ranges: Ranges::with_capacity(10 * n_blocks), clobbers: FxHashMap::default(), @@ -864,7 +911,7 @@ impl VCode { // If this is a safepoint, compute a stack map // and pass it to the emit state. - if self.insts[iix.index()].is_safepoint() { + let stack_map_disasm = if self.insts[iix.index()].is_safepoint() { let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![]; // Find the contiguous range of // (progpoint, allocation) safepoint slot @@ -888,13 +935,36 @@ impl VCode { let slot = alloc.as_stack().unwrap(); safepoint_slots.push(slot); } - if !safepoint_slots.is_empty() { - let stack_map = self - .abi - .spillslots_to_stack_map(&safepoint_slots[..], &state); - state.pre_safepoint(stack_map); - } - } + + let stack_map = if safepoint_slots.is_empty() { + None + } else { + Some( + self.abi + .spillslots_to_stack_map(&safepoint_slots[..], &state), + ) + }; + + let (user_stack_map, user_stack_map_disasm) = { + // The `user_stack_maps` is keyed by reverse + // instruction index, so we must flip the + // index. We can't put this into a helper method + // due to borrowck issues because parts of + // `self` are borrowed mutably elsewhere in this + // function. + let index = iix.to_backwards_insn_index(self.num_insts()); + let user_stack_map = self.user_stack_maps.remove(&index); + let user_stack_map_disasm = + user_stack_map.as_ref().map(|m| format!(" ; {m:?}")); + (user_stack_map, user_stack_map_disasm) + }; + + state.pre_safepoint(stack_map, user_stack_map); + + user_stack_map_disasm + } else { + None + }; // If the instruction we are about to emit is // a return, place an epilogue at this point @@ -932,6 +1002,10 @@ impl VCode { &mut buffer, &mut state, ); + if let Some(stack_map_disasm) = stack_map_disasm { + disasm.push_str(&stack_map_disasm); + disasm.push('\n'); + } } } @@ -1014,6 +1088,12 @@ impl VCode { } } + debug_assert!( + self.user_stack_maps.is_empty(), + "any stack maps should have been consumed by instruction emission, still have: {:#?}", + self.user_stack_maps, + ); + // Do any optimizations on branches at tail of buffer, as if we had // bound one last label. buffer.optimize_branches(ctrl_plane); @@ -1224,6 +1304,12 @@ impl VCode { .map(|o| o.vreg()) .any(|vreg| self.facts[vreg.vreg()].is_some()) } + + /// Get the user stack map associated with the given forward instruction index. + pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> { + let index = inst.to_backwards_insn_index(self.num_insts()); + self.user_stack_maps.get(&index) + } } impl std::ops::Index for VCode { @@ -1385,6 +1471,9 @@ impl fmt::Debug for VCode { } } } + if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) { + writeln!(f, " {user_stack_map:?}")?; + } } } diff --git a/cranelift/filetests/filetests/isa/aarch64/user_stack_maps.clif b/cranelift/filetests/filetests/isa/aarch64/user_stack_maps.clif new file mode 100644 index 000000000000..5c1d93b6235d --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/user_stack_maps.clif @@ -0,0 +1,221 @@ +test compile precise-output +set unwind_info=false +set enable_probestack=false +target aarch64 + +function %foo() system_v { + ss0 = explicit_slot 12, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + + stack_store v0, ss0 + stack_store v1, ss0+4 + stack_store v2, ss0+8 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] + + stack_store v1, ss0 + stack_store v2, ss0+4 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] + + stack_store v2, ss0 + call fn0(v1), stack_map=[i32 @ ss0+0] + + call fn0(v2) + + return +} + +; VCode: +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x24, [sp, #-16]! +; stp x19, x22, [sp, #-16]! +; sub sp, sp, #16 +; block0: +; movz w9, #0 +; movz w8, #1 +; movz w0, #2 +; mov x10, sp +; str w9, [x10] +; mov x24, x9 +; add x9, sp, #4 +; str w8, [x9] +; mov x19, x8 +; add x10, sp, #8 +; str w0, [x10] +; mov x22, x0 +; mov x0, x24 +; bl 0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] } +; mov x12, sp +; mov x0, x19 +; str w0, [x12] +; add x13, sp, #4 +; mov x0, x22 +; str w0, [x13] +; mov x22, x0 +; mov x0, x24 +; bl 0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] } +; mov x15, sp +; mov x0, x22 +; str w0, [x15] +; mov x0, x19 +; bl 0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] } +; mov x0, x22 +; bl 0 +; add sp, sp, #16 +; ldp x19, x22, [sp], #16 +; ldr x24, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; stp x29, x30, [sp, #-0x10]! +; mov x29, sp +; str x24, [sp, #-0x10]! +; stp x19, x22, [sp, #-0x10]! +; sub sp, sp, #0x10 +; block1: ; offset 0x14 +; mov w9, #0 +; mov w8, #1 +; mov w0, #2 +; mov x10, sp +; str w9, [x10] +; mov x24, x9 +; add x9, sp, #4 +; str w8, [x9] +; mov x19, x8 +; add x10, sp, #8 +; str w0, [x10] +; mov x22, x0 +; mov x0, x24 +; bl #0x48 ; reloc_external Call u0:0 0 +; mov x12, sp +; mov x0, x19 +; str w0, [x12] +; add x13, sp, #4 +; mov x0, x22 +; str w0, [x13] +; mov x22, x0 +; mov x0, x24 +; bl #0x6c ; reloc_external Call u0:0 0 +; mov x15, sp +; mov x0, x22 +; str w0, [x15] +; mov x0, x19 +; bl #0x80 ; reloc_external Call u0:0 0 +; mov x0, x22 +; bl #0x88 ; reloc_external Call u0:0 0 +; add sp, sp, #0x10 +; ldp x19, x22, [sp], #0x10 +; ldr x24, [sp], #0x10 +; ldp x29, x30, [sp], #0x10 +; ret + +function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 { + ss0 = explicit_slot 1 + ss1 = explicit_slot 2, align = 2 + ss2 = explicit_slot 8, align = 4 + ss3 = explicit_slot 16, align = 8 + ss4 = explicit_slot 48, align = 16 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64): + stack_store v0, ss0 + stack_store v1, ss1 + stack_store v2, ss2 + stack_store v4, ss2+4 + stack_store v3, ss3 + stack_store v5, ss3+8 + call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8] + return v0, v1, v2, v3, v4, v5 +} + +; VCode: +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; stp x23, x27, [sp, #-16]! +; stp x20, x21, [sp, #-16]! +; sub sp, sp, #128 +; block0: +; mov x12, sp +; strb w0, [x12] +; mov x23, x0 +; add x13, sp, #8 +; strh w1, [x13] +; mov x20, x1 +; add x14, sp, #16 +; str w2, [x14] +; mov x21, x2 +; add x15, sp, #20 +; str s0, [x15] +; str q0, [sp, #96] +; add x0, sp, #24 +; str x3, [x0] +; mov x27, x3 +; add x1, sp, #32 +; str d1, [x1] +; str q1, [sp, #112] +; bl 0 +; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] } +; mov x0, x23 +; mov x1, x20 +; mov x2, x21 +; mov x3, x27 +; ldr q0, [sp, #96] +; ldr q1, [sp, #112] +; add sp, sp, #128 +; ldp x20, x21, [sp], #16 +; ldp x23, x27, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; stp x29, x30, [sp, #-0x10]! +; mov x29, sp +; stp x23, x27, [sp, #-0x10]! +; stp x20, x21, [sp, #-0x10]! +; sub sp, sp, #0x80 +; block1: ; offset 0x14 +; mov x12, sp +; strb w0, [x12] +; mov x23, x0 +; add x13, sp, #8 +; strh w1, [x13] +; mov x20, x1 +; add x14, sp, #0x10 +; str w2, [x14] +; mov x21, x2 +; add x15, sp, #0x14 +; str s0, [x15] +; stur q0, [sp, #0x60] +; add x0, sp, #0x18 +; str x3, [x0] +; mov x27, x3 +; add x1, sp, #0x20 +; str d1, [x1] +; stur q1, [sp, #0x70] +; bl #0x5c ; reloc_external Call u0:0 0 +; mov x0, x23 +; mov x1, x20 +; mov x2, x21 +; mov x3, x27 +; ldur q0, [sp, #0x60] +; ldur q1, [sp, #0x70] +; add sp, sp, #0x80 +; ldp x20, x21, [sp], #0x10 +; ldp x23, x27, [sp], #0x10 +; ldp x29, x30, [sp], #0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/user_stack_maps.clif b/cranelift/filetests/filetests/isa/riscv64/user_stack_maps.clif new file mode 100644 index 000000000000..b265508b9622 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/user_stack_maps.clif @@ -0,0 +1,250 @@ +test compile precise-output +set unwind_info=false +set enable_probestack=false +target riscv64 + +function %foo() system_v { + ss0 = explicit_slot 12, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + + stack_store v0, ss0 + stack_store v1, ss0+4 + stack_store v2, ss0+8 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] + + stack_store v1, ss0 + stack_store v2, ss0+4 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] + + stack_store v2, ss0 + call fn0(v1), stack_map=[i32 @ ss0+0] + + call fn0(v2) + + return +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; addi sp,sp,-48 +; sd s1,40(sp) +; sd s2,32(sp) +; sd s3,24(sp) +; block0: +; li a0,0 +; mv s3,a0 +; li a0,1 +; li a2,2 +; sw zero,0(slot) +; sw a0,4(slot) +; mv s1,a0 +; sw a2,8(slot) +; mv s2,a2 +; mv a0,s3 +; call userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] } +; mv a0,s1 +; sw a0,0(slot) +; mv a0,s2 +; sw a0,4(slot) +; mv s2,a0 +; mv a0,s3 +; call userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] } +; mv a0,s2 +; sw a0,0(slot) +; mv a0,s1 +; call userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] } +; mv a0,s2 +; call userextname0 +; ld s1,40(sp) +; ld s2,32(sp) +; ld s3,24(sp) +; addi sp,sp,48 +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; addi sp, sp, -0x30 +; sd s1, 0x28(sp) +; sd s2, 0x20(sp) +; sd s3, 0x18(sp) +; block1: ; offset 0x20 +; mv a0, zero +; mv s3, a0 +; addi a0, zero, 1 +; addi a2, zero, 2 +; sw zero, 0(sp) +; sw a0, 4(sp) +; mv s1, a0 +; sw a2, 8(sp) +; mv s2, a2 +; mv a0, s3 +; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0 +; jalr ra +; mv a0, s1 +; sw a0, 0(sp) +; mv a0, s2 +; sw a0, 4(sp) +; mv s2, a0 +; mv a0, s3 +; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0 +; jalr ra +; mv a0, s2 +; sw a0, 0(sp) +; mv a0, s1 +; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0 +; jalr ra +; mv a0, s2 +; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0 +; jalr ra +; ld s1, 0x28(sp) +; ld s2, 0x20(sp) +; ld s3, 0x18(sp) +; addi sp, sp, 0x30 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 { + ss0 = explicit_slot 1 + ss1 = explicit_slot 2, align = 2 + ss2 = explicit_slot 8, align = 4 + ss3 = explicit_slot 16, align = 8 + ss4 = explicit_slot 48, align = 16 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64): + stack_store v0, ss0 + stack_store v1, ss1 + stack_store v2, ss2 + stack_store v4, ss2+4 + stack_store v3, ss3 + stack_store v5, ss3+8 + call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8] + return v0, v1, v2, v3, v4, v5 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; addi sp,sp,-160 +; sd s1,152(sp) +; sd s2,144(sp) +; sd s6,136(sp) +; sd s8,128(sp) +; sd s10,120(sp) +; fsd fs0,112(sp) +; fsd fs2,104(sp) +; block0: +; mv s10,a4 +; sb a0,0(slot) +; mv s8,a0 +; sh a1,8(slot) +; mv s6,a1 +; sw a2,16(slot) +; mv s2,a2 +; fsw fa0,20(slot) +; fmv.d fs2,fa0 +; sd a3,24(slot) +; mv s1,a3 +; fsd fa1,32(slot) +; fmv.d fs0,fa1 +; call userextname0 +; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] } +; mv a2,s2 +; mv a4,s10 +; sw a2,0(a4) +; mv a3,s1 +; sd a3,8(a4) +; mv a0,s8 +; mv a1,s6 +; fmv.d fa0,fs2 +; fmv.d fa1,fs0 +; ld s1,152(sp) +; ld s2,144(sp) +; ld s6,136(sp) +; ld s8,128(sp) +; ld s10,120(sp) +; fld fs0,112(sp) +; fld fs2,104(sp) +; addi sp,sp,160 +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; addi sp, sp, -0xa0 +; sd s1, 0x98(sp) +; sd s2, 0x90(sp) +; sd s6, 0x88(sp) +; sd s8, 0x80(sp) +; sd s10, 0x78(sp) +; fsd fs0, 0x70(sp) +; fsd fs2, 0x68(sp) +; block1: ; offset 0x30 +; mv s10, a4 +; sb a0, 0(sp) +; mv s8, a0 +; sh a1, 8(sp) +; mv s6, a1 +; sw a2, 0x10(sp) +; mv s2, a2 +; fsw fa0, 0x14(sp) +; fmv.d fs2, fa0 +; sd a3, 0x18(sp) +; mv s1, a3 +; fsd fa1, 0x20(sp) +; fmv.d fs0, fa1 +; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0 +; jalr ra +; mv a2, s2 +; mv a4, s10 +; sw a2, 0(a4) +; mv a3, s1 +; sd a3, 8(a4) +; mv a0, s8 +; mv a1, s6 +; fmv.d fa0, fs2 +; fmv.d fa1, fs0 +; ld s1, 0x98(sp) +; ld s2, 0x90(sp) +; ld s6, 0x88(sp) +; ld s8, 0x80(sp) +; ld s10, 0x78(sp) +; fld fs0, 0x70(sp) +; fld fs2, 0x68(sp) +; addi sp, sp, 0xa0 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/s390x/user_stack_maps.clif b/cranelift/filetests/filetests/isa/s390x/user_stack_maps.clif new file mode 100644 index 000000000000..911a93f61a97 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/user_stack_maps.clif @@ -0,0 +1,221 @@ +test compile precise-output +set unwind_info=false +set enable_probestack=false +target s390x + +function %foo() system_v { + ss0 = explicit_slot 12, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + + stack_store v0, ss0 + stack_store v1, ss0+4 + stack_store v2, ss0+8 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] + + stack_store v1, ss0 + stack_store v2, ss0+4 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] + + stack_store v2, ss0 + call fn0(v1), stack_map=[i32 @ ss0+0] + + call fn0(v2) + + return +} + +; VCode: +; stmg %r7, %r15, 56(%r15) +; aghi %r15, -176 +; block0: +; lhi %r2, 0 +; lgr %r11, %r2 +; lhi %r2, 1 +; lgr %r7, %r2 +; lhi %r2, 2 +; lgr %r9, %r2 +; la %r2, 160(%r15) +; mvhi 0(%r2), 0 +; la %r3, 164(%r15) +; mvhi 0(%r3), 1 +; la %r4, 168(%r15) +; mvhi 0(%r4), 2 +; lgr %r2, %r11 +; brasl %r14, userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] } +; la %r2, 160(%r15) +; mvhi 0(%r2), 1 +; la %r3, 164(%r15) +; mvhi 0(%r3), 2 +; lgr %r2, %r11 +; brasl %r14, userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] } +; la %r5, 160(%r15) +; mvhi 0(%r5), 2 +; lgr %r2, %r7 +; brasl %r14, userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] } +; lgr %r2, %r9 +; brasl %r14, userextname0 +; lmg %r7, %r15, 232(%r15) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; stmg %r7, %r15, 0x38(%r15) +; aghi %r15, -0xb0 +; block1: ; offset 0xa +; lhi %r2, 0 +; lgr %r11, %r2 +; lhi %r2, 1 +; lgr %r7, %r2 +; lhi %r2, 2 +; lgr %r9, %r2 +; la %r2, 0xa0(%r15) +; mvhi 0(%r2), 0 +; la %r3, 0xa4(%r15) +; mvhi 0(%r3), 1 +; la %r4, 0xa8(%r15) +; mvhi 0(%r4), 2 +; lgr %r2, %r11 +; brasl %r14, 0x44 ; reloc_external PLTRel32Dbl u0:0 2 +; la %r2, 0xa0(%r15) +; mvhi 0(%r2), 1 +; la %r3, 0xa4(%r15) +; mvhi 0(%r3), 2 +; lgr %r2, %r11 +; brasl %r14, 0x62 ; reloc_external PLTRel32Dbl u0:0 2 +; la %r5, 0xa0(%r15) +; mvhi 0(%r5), 2 +; lgr %r2, %r7 +; brasl %r14, 0x76 ; reloc_external PLTRel32Dbl u0:0 2 +; lgr %r2, %r9 +; brasl %r14, 0x80 ; reloc_external PLTRel32Dbl u0:0 2 +; lmg %r7, %r15, 0xe8(%r15) +; br %r14 + +function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 { + ss0 = explicit_slot 1 + ss1 = explicit_slot 2, align = 2 + ss2 = explicit_slot 8, align = 4 + ss3 = explicit_slot 16, align = 8 + ss4 = explicit_slot 48, align = 16 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64): + stack_store v0, ss0 + stack_store v1, ss1 + stack_store v2, ss2 + stack_store v4, ss2+4 + stack_store v3, ss3 + stack_store v5, ss3+8 + call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8] + return v0, v1, v2, v3, v4, v5 +} + +; VCode: +; stmg %r6, %r15, 48(%r15) +; aghi %r15, -352 +; std %f8, 288(%r15) +; std %f9, 296(%r15) +; std %f10, 304(%r15) +; std %f11, 312(%r15) +; std %f12, 320(%r15) +; std %f13, 328(%r15) +; std %f14, 336(%r15) +; std %f15, 344(%r15) +; block0: +; la %r9, 160(%r15) +; stc %r2, 0(%r9) +; lgr %r11, %r2 +; la %r2, 168(%r15) +; sth %r3, 0(%r2) +; lgr %r9, %r3 +; la %r2, 176(%r15) +; st %r4, 0(%r2) +; lgr %r7, %r4 +; la %r2, 180(%r15) +; ste %f0, 0(%r2) +; vst %v0, 256(%r15) +; la %r2, 184(%r15) +; stg %r5, 0(%r2) +; lgr %r6, %r5 +; la %r3, 192(%r15) +; std %f2, 0(%r3) +; vst %v2, 272(%r15) +; brasl %r14, userextname0 +; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] } +; lgr %r2, %r11 +; lgr %r3, %r9 +; lgr %r4, %r7 +; lgr %r5, %r6 +; vl %v0, 256(%r15) +; vl %v2, 272(%r15) +; ld %f8, 288(%r15) +; ld %f9, 296(%r15) +; ld %f10, 304(%r15) +; ld %f11, 312(%r15) +; ld %f12, 320(%r15) +; ld %f13, 328(%r15) +; ld %f14, 336(%r15) +; ld %f15, 344(%r15) +; lmg %r6, %r15, 400(%r15) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; stmg %r6, %r15, 0x30(%r15) +; aghi %r15, -0x160 +; std %f8, 0x120(%r15) +; std %f9, 0x128(%r15) +; std %f10, 0x130(%r15) +; std %f11, 0x138(%r15) +; std %f12, 0x140(%r15) +; std %f13, 0x148(%r15) +; std %f14, 0x150(%r15) +; std %f15, 0x158(%r15) +; block1: ; offset 0x2a +; la %r9, 0xa0(%r15) +; stc %r2, 0(%r9) +; lgr %r11, %r2 +; la %r2, 0xa8(%r15) +; sth %r3, 0(%r2) +; lgr %r9, %r3 +; la %r2, 0xb0(%r15) +; st %r4, 0(%r2) +; lgr %r7, %r4 +; la %r2, 0xb4(%r15) +; ste %f0, 0(%r2) +; vst %v0, 0x100(%r15) +; la %r2, 0xb8(%r15) +; stg %r5, 0(%r2) +; lgr %r6, %r5 +; la %r3, 0xc0(%r15) +; std %f2, 0(%r3) +; vst %v2, 0x110(%r15) +; brasl %r14, 0x78 ; reloc_external PLTRel32Dbl u0:0 2 +; lgr %r2, %r11 +; lgr %r3, %r9 +; lgr %r4, %r7 +; lgr %r5, %r6 +; vl %v0, 0x100(%r15) +; vl %v2, 0x110(%r15) +; ld %f8, 0x120(%r15) +; ld %f9, 0x128(%r15) +; ld %f10, 0x130(%r15) +; ld %f11, 0x138(%r15) +; ld %f12, 0x140(%r15) +; ld %f13, 0x148(%r15) +; ld %f14, 0x150(%r15) +; ld %f15, 0x158(%r15) +; lmg %r6, %r15, 0x190(%r15) +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/x64/user_stack_maps.clif b/cranelift/filetests/filetests/isa/x64/user_stack_maps.clif new file mode 100644 index 000000000000..bddc779df2c7 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/user_stack_maps.clif @@ -0,0 +1,241 @@ +test compile precise-output +set unwind_info=false +set enable_probestack=false +target x86_64 + +function %foo() system_v { + ss0 = explicit_slot 12, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + + stack_store v0, ss0 + stack_store v1, ss0+4 + stack_store v2, ss0+8 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] + + stack_store v1, ss0 + stack_store v2, ss0+4 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] + + stack_store v2, ss0 + call fn0(v1), stack_map=[i32 @ ss0+0] + + call fn0(v2) + + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $48, %rsp +; movq %rbx, 16(%rsp) +; movq %r13, 24(%rsp) +; movq %r15, 32(%rsp) +; block0: +; xorl %edi, %edi, %edi +; movq %rdi, %r15 +; movl $1, %edi +; movq %rdi, %rbx +; movl $2, %edi +; movq %rdi, %r13 +; lea rsp(0 + virtual offset), %r11 +; movl $0, 0(%r11) +; lea rsp(4 + virtual offset), %rsi +; movl $1, 0(%rsi) +; lea rsp(8 + virtual offset), %rdi +; movl $2, 0(%rdi) +; movq %r15, %rdi +; call User(userextname0) +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] } +; lea rsp(0 + virtual offset), %rcx +; movl $1, 0(%rcx) +; lea rsp(4 + virtual offset), %rdx +; movl $2, 0(%rdx) +; movq %r15, %rdi +; call User(userextname0) +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] } +; lea rsp(0 + virtual offset), %r9 +; movl $2, 0(%r9) +; movq %rbx, %rdi +; call User(userextname0) +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] } +; movq %r13, %rdi +; call User(userextname0) +; movq 16(%rsp), %rbx +; movq 24(%rsp), %r13 +; movq 32(%rsp), %r15 +; addq %rsp, $48, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x30, %rsp +; movq %rbx, 0x10(%rsp) +; movq %r13, 0x18(%rsp) +; movq %r15, 0x20(%rsp) +; block1: ; offset 0x17 +; xorl %edi, %edi +; movq %rdi, %r15 +; movl $1, %edi +; movq %rdi, %rbx +; movl $2, %edi +; movq %rdi, %r13 +; leaq (%rsp), %r11 +; movl $0, (%r11) +; leaq 4(%rsp), %rsi +; movl $1, (%rsi) +; leaq 8(%rsp), %rdi +; movl $2, (%rdi) +; movq %r15, %rdi +; callq 0x55 ; reloc_external CallPCRel4 u0:0 -4 +; leaq (%rsp), %rcx +; movl $1, (%rcx) +; leaq 4(%rsp), %rdx +; movl $2, (%rdx) +; movq %r15, %rdi +; callq 0x72 ; reloc_external CallPCRel4 u0:0 -4 +; leaq (%rsp), %r9 +; movl $2, (%r9) +; movq %rbx, %rdi +; callq 0x85 ; reloc_external CallPCRel4 u0:0 -4 +; movq %r13, %rdi +; callq 0x8d ; reloc_external CallPCRel4 u0:0 -4 +; movq 0x10(%rsp), %rbx +; movq 0x18(%rsp), %r13 +; movq 0x20(%rsp), %r15 +; addq $0x30, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 { + ss0 = explicit_slot 1 + ss1 = explicit_slot 2, align = 2 + ss2 = explicit_slot 8, align = 4 + ss3 = explicit_slot 16, align = 8 + ss4 = explicit_slot 48, align = 16 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64): + stack_store v0, ss0 + stack_store v1, ss1 + stack_store v2, ss2 + stack_store v4, ss2+4 + stack_store v3, ss3 + stack_store v5, ss3+8 + call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8] + return v0, v1, v2, v3, v4, v5 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $176, %rsp +; movq %rbx, 128(%rsp) +; movq %r12, 136(%rsp) +; movq %r13, 144(%rsp) +; movq %r14, 152(%rsp) +; movq %r15, 160(%rsp) +; block0: +; movq %r8, %r13 +; lea rsp(0 + virtual offset), %r8 +; movb %dil, 0(%r8) +; movq %rdi, %rbx +; lea rsp(8 + virtual offset), %r8 +; movw %si, 0(%r8) +; movq %rsi, %r14 +; lea rsp(16 + virtual offset), %r9 +; movl %edx, 0(%r9) +; movq %rdx, %r12 +; lea rsp(20 + virtual offset), %r10 +; movss %xmm0, 0(%r10) +; movdqu %xmm0, rsp(96 + virtual offset) +; lea rsp(24 + virtual offset), %r11 +; movq %rcx, 0(%r11) +; movq %rcx, %r15 +; lea rsp(32 + virtual offset), %rsi +; movsd %xmm1, 0(%rsi) +; movdqu %xmm1, rsp(112 + virtual offset) +; call User(userextname0) +; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] } +; movq %r12, %rdx +; movq %r13, %r8 +; movl %edx, 0(%r8) +; movq %r15, %rcx +; movq %rcx, 8(%r8) +; movq %rbx, %rax +; movq %r14, %rdx +; movdqu rsp(96 + virtual offset), %xmm0 +; movdqu rsp(112 + virtual offset), %xmm1 +; movq 128(%rsp), %rbx +; movq 136(%rsp), %r12 +; movq 144(%rsp), %r13 +; movq 152(%rsp), %r14 +; movq 160(%rsp), %r15 +; addq %rsp, $176, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0xb0, %rsp +; movq %rbx, 0x80(%rsp) +; movq %r12, 0x88(%rsp) +; movq %r13, 0x90(%rsp) +; movq %r14, 0x98(%rsp) +; movq %r15, 0xa0(%rsp) +; block1: ; offset 0x33 +; movq %r8, %r13 +; leaq (%rsp), %r8 +; movb %dil, (%r8) +; movq %rdi, %rbx +; leaq 8(%rsp), %r8 +; movw %si, (%r8) +; movq %rsi, %r14 +; leaq 0x10(%rsp), %r9 +; movl %edx, (%r9) +; movq %rdx, %r12 +; leaq 0x14(%rsp), %r10 +; movss %xmm0, (%r10) +; movdqu %xmm0, 0x60(%rsp) +; leaq 0x18(%rsp), %r11 +; movq %rcx, (%r11) +; movq %rcx, %r15 +; leaq 0x20(%rsp), %rsi +; movsd %xmm1, (%rsi) +; movdqu %xmm1, 0x70(%rsp) +; callq 0x86 ; reloc_external CallPCRel4 u0:0 -4 +; movq %r12, %rdx +; movq %r13, %r8 +; movl %edx, (%r8) +; movq %r15, %rcx +; movq %rcx, 8(%r8) +; movq %rbx, %rax +; movq %r14, %rdx +; movdqu 0x60(%rsp), %xmm0 +; movdqu 0x70(%rsp), %xmm1 +; movq 0x80(%rsp), %rbx +; movq 0x88(%rsp), %r12 +; movq 0x90(%rsp), %r13 +; movq 0x98(%rsp), %r14 +; movq 0xa0(%rsp), %r15 +; addq $0xb0, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + From 0f4ae88a7ade1fc188cbd7e79bf76625e5aa853c Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Thu, 27 Jun 2024 12:14:03 -0700 Subject: [PATCH 09/10] wasi-nn: use resources (#8873) * wasi-nn: use resources Recent discussion in the wasi-nn proposal (see [wasi-nn#59], e.g.) has concluded that the right approach for representing wasi-nn "things" (tensors, graph, etc.) is with a component model _resource_. This sweeping change brings Wasmtime's implementation in line with that decision. Initially I had structured this PR to remove all of the WITX-based implementation (#8530). But, after consulting in a Zulip [thread] on what other WASI proposals aim to do, this PR pivoted to support _both_` the WITX-based and WIT-based ABIs (e.g., preview1 era versus preview2, component model era). What is clear is that the WITX-based specification will remain "frozen in time" while the WIT-based implementation moves forward. What that means for this PR is a "split world" paradigm. In many places, we have to distinguish between the `wit` and `witx` versions of the same thing. This change isn't the end state yet: it's a big step forward towards bringing Wasmtime back in line with the WIT spec but, despite my best efforts, doesn't fully fix all the TODOs left behind over several years of development. I have, however, taken the liberty to refactor and fix various parts as I came across them (e.g., the ONNX backend). I plan to continue working on this in future PRs to figure out a good error paradigm (the current one is too wordy) and device residence. [wasi-nn#59]: https://github.com/WebAssembly/wasi-nn/pull/59 [thread]: https://bytecodealliance.zulipchat.com/#narrow/stream/219900-wasi/topic/wasi-nn's.20preview1.20vs.20preview2.20timeline prtest:full * vet: audit `ort`-related crate updates * Simplify `WasiNnView` With @alexcrichton's help, this change removes the `trait WasiNnView` and `struct WasiNnImpl` wrapping that the WIT-based implementation used for accessing the host context. Instead, `WasiNnView` is now a `struct` containing the mutable references it needs to make things work. This unwraps one complex layer of abstraction, though it does have the downside that it complicates CLI code to split borrows of `Host`. * Temporarily disable WIT check * Refactor errors to use `trappable_error_type` This change simplifies the return types of the host implementations of the WIT-based wasi-nn. There is more work to be done with errors, e.g., to catch up with the upstream decision to return errors as resources. But this is better than the previous mess. --- Cargo.lock | 50 ++- ci/vendor-wit.sh | 6 +- crates/bench-api/src/lib.rs | 4 +- crates/test-programs/artifacts/build.rs | 5 +- .../src/bin/nn_image_classification_winml.rs | 16 - ...rs => nn_wit_image_classification_onnx.rs} | 14 +- .../nn_wit_image_classification_openvino.rs | 25 ++ ...wit_image_classification_openvino_named.rs | 17 + ...n_wit_image_classification_winml_named.rs} | 9 +- .../bin/nn_witx_image_classification_onnx.rs | 22 ++ ... nn_witx_image_classification_openvino.rs} | 12 +- ...itx_image_classification_openvino_named.rs | 17 + ...n_witx_image_classification_winml_named.rs | 18 + crates/test-programs/src/nn.rs | 180 ++++++++-- crates/wasi-nn/Cargo.toml | 17 +- crates/wasi-nn/src/backend/mod.rs | 31 +- crates/wasi-nn/src/backend/onnx.rs | 338 ++++++++++++++++++ crates/wasi-nn/src/backend/onnxruntime.rs | 149 -------- crates/wasi-nn/src/backend/openvino.rs | 36 +- crates/wasi-nn/src/backend/winml.rs | 146 +++++--- crates/wasi-nn/src/ctx.rs | 146 -------- crates/wasi-nn/src/lib.rs | 51 ++- crates/wasi-nn/src/registry/in_memory.rs | 5 +- crates/wasi-nn/src/registry/mod.rs | 1 + crates/wasi-nn/src/wit.rs | 330 +++++++++++++---- crates/wasi-nn/src/witx.rs | 122 ++++++- crates/wasi-nn/tests/check/mod.rs | 8 +- crates/wasi-nn/tests/exec/mod.rs | 54 +-- crates/wasi-nn/tests/exec/wit.rs | 73 ++++ crates/wasi-nn/tests/exec/witx.rs | 52 +++ .../tests/fixtures/{readme.md => README.md} | 0 crates/wasi-nn/tests/test-programs.rs | 200 +++++++---- crates/wasi-nn/wit/wasi-nn.wit | 57 +-- src/commands/run.rs | 63 ++-- src/commands/serve.rs | 22 +- supply-chain/audits.toml | 18 + 36 files changed, 1571 insertions(+), 743 deletions(-) delete mode 100644 crates/test-programs/src/bin/nn_image_classification_winml.rs rename crates/test-programs/src/bin/{nn_image_classification_onnx.rs => nn_wit_image_classification_onnx.rs} (67%) create mode 100644 crates/test-programs/src/bin/nn_wit_image_classification_openvino.rs create mode 100644 crates/test-programs/src/bin/nn_wit_image_classification_openvino_named.rs rename crates/test-programs/src/bin/{nn_image_classification_named.rs => nn_wit_image_classification_winml_named.rs} (53%) create mode 100644 crates/test-programs/src/bin/nn_witx_image_classification_onnx.rs rename crates/test-programs/src/bin/{nn_image_classification.rs => nn_witx_image_classification_openvino.rs} (66%) create mode 100644 crates/test-programs/src/bin/nn_witx_image_classification_openvino_named.rs create mode 100644 crates/test-programs/src/bin/nn_witx_image_classification_winml_named.rs create mode 100644 crates/wasi-nn/src/backend/onnx.rs delete mode 100644 crates/wasi-nn/src/backend/onnxruntime.rs delete mode 100644 crates/wasi-nn/src/ctx.rs create mode 100644 crates/wasi-nn/tests/exec/wit.rs create mode 100644 crates/wasi-nn/tests/exec/witx.rs rename crates/wasi-nn/tests/fixtures/{readme.md => README.md} (100%) diff --git a/Cargo.lock b/Cargo.lock index eccee7eb5785..47e695e49d79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -371,15 +371,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" -[[package]] -name = "castaway" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc" -dependencies = [ - "rustversion", -] - [[package]] name = "cc" version = "1.0.83" @@ -486,19 +477,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" -[[package]] -name = "compact_str" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f" -dependencies = [ - "castaway", - "cfg-if", - "itoa", - "ryu", - "static_assertions", -] - [[package]] name = "component-fuzz-util" version = "0.0.0" @@ -1908,9 +1886,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] @@ -2028,21 +2006,22 @@ dependencies = [ [[package]] name = "ort" -version = "2.0.0-rc.0" +version = "2.0.0-rc.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8e5caf4eb2ead4bc137c3ff4e347940e3e556ceb11a4180627f04b63d7342dd" +checksum = "0bc80894094c6a875bfac64415ed456fa661081a278a035e22be661305c87e14" dependencies = [ - "compact_str", + "js-sys", "ort-sys", "thiserror", "tracing", + "web-sys", ] [[package]] name = "ort-sys" -version = "2.0.0-rc.0" +version = "2.0.0-rc.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f48b5623df2187e0db543ecb2032a6a999081086b7ffddd318000c00b23ace46" +checksum = "b3d9c1373fc813d3f024d394f621f4c6dde0734c79b1c17113c3bb5bf0084bbe" dependencies = [ "flate2", "sha2", @@ -3936,9 +3915,10 @@ dependencies = [ "test-programs-artifacts", "thiserror", "tracing", + "tracing-subscriber", "walkdir", - "wasi-common", "wasmtime", + "wasmtime-wasi", "wiggle", "windows", ] @@ -4024,6 +4004,16 @@ dependencies = [ "wast 211.0.1", ] +[[package]] +name = "web-sys" +version = "0.3.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b17e741662c70c8bd24ac5c5b18de314a2c26c32bf8346ee1e6f53de919c283" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webpki-roots" version = "0.26.1" diff --git a/ci/vendor-wit.sh b/ci/vendor-wit.sh index 30c1cbb9283c..53ce9ad80cb2 100755 --- a/ci/vendor-wit.sh +++ b/ci/vendor-wit.sh @@ -36,5 +36,9 @@ cp -r $dst crates/wasi-http/wit # slightly different than above. repo=https://raw.githubusercontent.com/WebAssembly/wasi-nn revision=e2310b -curl -L $repo/$revision/wit/wasi-nn.wit -o crates/wasi-nn/wit/wasi-nn.wit curl -L $repo/$revision/wasi-nn.witx -o crates/wasi-nn/witx/wasi-nn.witx +# TODO: the in-tree `wasi-nn` implementation does not yet fully support the +# latest WIT specification on `main`. To create a baseline for moving forward, +# the in-tree WIT incorporates some but not all of the upstream changes. This +# TODO can be removed once the implementation catches up with the spec. +# curl -L $repo/$revision/wit/wasi-nn.wit -o crates/wasi-nn/wit/wasi-nn.wit diff --git a/crates/bench-api/src/lib.rs b/crates/bench-api/src/lib.rs index 166db3dc2fe4..9b89faddcbda 100644 --- a/crates/bench-api/src/lib.rs +++ b/crates/bench-api/src/lib.rs @@ -418,7 +418,7 @@ struct BenchState { struct HostState { wasi: WasiCtx, #[cfg(feature = "wasi-nn")] - wasi_nn: wasmtime_wasi_nn::WasiNnCtx, + wasi_nn: wasmtime_wasi_nn::witx::WasiNnCtx, } impl BenchState { @@ -509,7 +509,7 @@ impl BenchState { #[cfg(feature = "wasi-nn")] wasi_nn: { let (backends, registry) = wasmtime_wasi_nn::preload(&[])?; - wasmtime_wasi_nn::WasiNnCtx::new(backends, registry) + wasmtime_wasi_nn::witx::WasiNnCtx::new(backends, registry) }, }; diff --git a/crates/test-programs/artifacts/build.rs b/crates/test-programs/artifacts/build.rs index 20209eaf6de6..7fd653f9b72a 100644 --- a/crates/test-programs/artifacts/build.rs +++ b/crates/test-programs/artifacts/build.rs @@ -90,7 +90,10 @@ fn build_and_generate_tests() { } // Generate a component from each test. - if kind == "nn" || target == "dwarf_imported_memory" || target == "dwarf_shared_memory" { + if target == "dwarf_imported_memory" + || target == "dwarf_shared_memory" + || target.starts_with("nn_witx") + { continue; } let adapter = match target.as_str() { diff --git a/crates/test-programs/src/bin/nn_image_classification_winml.rs b/crates/test-programs/src/bin/nn_image_classification_winml.rs deleted file mode 100644 index 0dc7e8843525..000000000000 --- a/crates/test-programs/src/bin/nn_image_classification_winml.rs +++ /dev/null @@ -1,16 +0,0 @@ -use anyhow::{Context, Result}; -use std::fs; -use test_programs::nn::{classify, sort_results}; -use wasi_nn::{ExecutionTarget, GraphBuilder, GraphEncoding}; - -pub fn main() -> Result<()> { - let graph = GraphBuilder::new(GraphEncoding::Onnx, ExecutionTarget::CPU) - .build_from_cache("mobilenet")?; - let tensor = fs::read("fixture/kitten.rgb") - .context("the tensor file to be mapped to the fixture directory")?; - let results = classify(graph, tensor)?; - let top_five = &sort_results(&results)[..5]; - println!("found results, sorted top 5: {:?}", top_five); - assert_eq!(top_five[0].class_id(), 284); - Ok(()) -} diff --git a/crates/test-programs/src/bin/nn_image_classification_onnx.rs b/crates/test-programs/src/bin/nn_wit_image_classification_onnx.rs similarity index 67% rename from crates/test-programs/src/bin/nn_image_classification_onnx.rs rename to crates/test-programs/src/bin/nn_wit_image_classification_onnx.rs index abb77d0e7339..5900e664afde 100644 --- a/crates/test-programs/src/bin/nn_image_classification_onnx.rs +++ b/crates/test-programs/src/bin/nn_wit_image_classification_onnx.rs @@ -1,18 +1,20 @@ use anyhow::{Context, Result}; use std::fs; -use test_programs::nn::{classify, sort_results}; -use wasi_nn::{ExecutionTarget, GraphBuilder, GraphEncoding}; +use test_programs::nn::{sort_results, wit}; pub fn main() -> Result<()> { let model = fs::read("fixture/model.onnx") .context("the model file to be mapped to the fixture directory")?; - let graph = - GraphBuilder::new(GraphEncoding::Onnx, ExecutionTarget::CPU).build_from_bytes([&model])?; + let graph = wit::load( + &[model], + wit::GraphEncoding::Onnx, + wit::ExecutionTarget::Cpu, + )?; let tensor = fs::read("fixture/000000062808.rgb") .context("the tensor file to be mapped to the fixture directory")?; - let results = classify(graph, tensor)?; + let results = wit::classify(graph, ("input", tensor), "output")?; let top_five = &sort_results(&results)[..5]; - // 963 is meat loaf, meatloaf. + // 963 is "meat loaf, meatloaf." // https://github.com/onnx/models/blob/bec48b6a70e5e9042c0badbaafefe4454e072d08/validated/vision/classification/synset.txt#L963 assert_eq!(top_five[0].class_id(), 963); println!("found results, sorted top 5: {:?}", top_five); diff --git a/crates/test-programs/src/bin/nn_wit_image_classification_openvino.rs b/crates/test-programs/src/bin/nn_wit_image_classification_openvino.rs new file mode 100644 index 000000000000..52bb6eb5967c --- /dev/null +++ b/crates/test-programs/src/bin/nn_wit_image_classification_openvino.rs @@ -0,0 +1,25 @@ +use anyhow::{Context, Result}; +use std::fs; +use test_programs::nn::{sort_results, wit}; + +pub fn main() -> Result<()> { + let xml = fs::read("fixture/model.xml") + .context("the model file to be mapped to the fixture directory")?; + let weights = fs::read("fixture/model.bin") + .context("the weights file to be mapped to the fixture directory")?; + let graph = wit::load( + &[xml, weights], + wit::GraphEncoding::Openvino, + wit::ExecutionTarget::Cpu, + )?; + let tensor = fs::read("fixture/tensor.bgr") + .context("the tensor file to be mapped to the fixture directory")?; + let results = wit::classify( + graph, + ("input", tensor), + "MobilenetV2/Predictions/Reshape_1", + )?; + let top_five = &sort_results(&results)[..5]; + println!("found results, sorted top 5: {:?}", top_five); + Ok(()) +} diff --git a/crates/test-programs/src/bin/nn_wit_image_classification_openvino_named.rs b/crates/test-programs/src/bin/nn_wit_image_classification_openvino_named.rs new file mode 100644 index 000000000000..482c77043206 --- /dev/null +++ b/crates/test-programs/src/bin/nn_wit_image_classification_openvino_named.rs @@ -0,0 +1,17 @@ +use anyhow::{Context, Result}; +use std::fs; +use test_programs::nn::{sort_results, wit}; + +pub fn main() -> Result<()> { + let graph = wit::load_by_name("fixtures")?; + let tensor: Vec = fs::read("fixture/tensor.bgr") + .context("the tensor file to be mapped to the fixture directory")?; + let results = wit::classify( + graph, + ("input", tensor), + "MobilenetV2/Predictions/Reshape_1", + )?; + let top_five = &sort_results(&results)[..5]; + println!("found results, sorted top 5: {:?}", top_five); + Ok(()) +} diff --git a/crates/test-programs/src/bin/nn_image_classification_named.rs b/crates/test-programs/src/bin/nn_wit_image_classification_winml_named.rs similarity index 53% rename from crates/test-programs/src/bin/nn_image_classification_named.rs rename to crates/test-programs/src/bin/nn_wit_image_classification_winml_named.rs index 9b75a5afb4c6..f3840062e207 100644 --- a/crates/test-programs/src/bin/nn_image_classification_named.rs +++ b/crates/test-programs/src/bin/nn_wit_image_classification_winml_named.rs @@ -1,15 +1,14 @@ use anyhow::{Context, Result}; use std::fs; -use test_programs::nn::{classify, sort_results}; -use wasi_nn::{ExecutionTarget, GraphBuilder, GraphEncoding}; +use test_programs::nn::{sort_results, wit}; pub fn main() -> Result<()> { - let graph = GraphBuilder::new(GraphEncoding::Openvino, ExecutionTarget::CPU) - .build_from_cache("fixtures")?; + let graph = wit::load_by_name("mobilenet")?; let tensor = fs::read("fixture/tensor.bgr") .context("the tensor file to be mapped to the fixture directory")?; - let results = classify(graph, tensor)?; + let results = wit::classify(graph, ("input", tensor), "output")?; let top_five = &sort_results(&results)[..5]; println!("found results, sorted top 5: {:?}", top_five); + assert_eq!(top_five[0].class_id(), 284); Ok(()) } diff --git a/crates/test-programs/src/bin/nn_witx_image_classification_onnx.rs b/crates/test-programs/src/bin/nn_witx_image_classification_onnx.rs new file mode 100644 index 000000000000..fc4991ca44ff --- /dev/null +++ b/crates/test-programs/src/bin/nn_witx_image_classification_onnx.rs @@ -0,0 +1,22 @@ +use anyhow::{Context, Result}; +use std::fs; +use test_programs::nn::{sort_results, witx}; + +pub fn main() -> Result<()> { + let model = fs::read("fixture/model.onnx") + .context("the model file to be mapped to the fixture directory")?; + let graph = witx::load( + &[&model], + witx::GraphEncoding::Onnx, + witx::ExecutionTarget::CPU, + )?; + let tensor = fs::read("fixture/000000062808.rgb") + .context("the tensor file to be mapped to the fixture directory")?; + let results = witx::classify(graph, tensor)?; + let top_five = &sort_results(&results)[..5]; + // 963 is "meat loaf, meatloaf." + // https://github.com/onnx/models/blob/bec48b6a70e5e9042c0badbaafefe4454e072d08/validated/vision/classification/synset.txt#L963 + assert_eq!(top_five[0].class_id(), 963); + println!("found results, sorted top 5: {:?}", top_five); + Ok(()) +} diff --git a/crates/test-programs/src/bin/nn_image_classification.rs b/crates/test-programs/src/bin/nn_witx_image_classification_openvino.rs similarity index 66% rename from crates/test-programs/src/bin/nn_image_classification.rs rename to crates/test-programs/src/bin/nn_witx_image_classification_openvino.rs index 5815503c3f76..5d557e3b8274 100644 --- a/crates/test-programs/src/bin/nn_image_classification.rs +++ b/crates/test-programs/src/bin/nn_witx_image_classification_openvino.rs @@ -1,18 +1,20 @@ use anyhow::{Context, Result}; use std::fs; -use test_programs::nn::{classify, sort_results}; -use wasi_nn::{ExecutionTarget, GraphBuilder, GraphEncoding}; +use test_programs::nn::{sort_results, witx}; pub fn main() -> Result<()> { let xml = fs::read("fixture/model.xml") .context("the model file to be mapped to the fixture directory")?; let weights = fs::read("fixture/model.bin") .context("the weights file to be mapped to the fixture directory")?; - let graph = GraphBuilder::new(GraphEncoding::Openvino, ExecutionTarget::CPU) - .build_from_bytes([&xml, &weights])?; + let graph = witx::load( + &[&xml, &weights], + witx::GraphEncoding::Openvino, + witx::ExecutionTarget::CPU, + )?; let tensor = fs::read("fixture/tensor.bgr") .context("the tensor file to be mapped to the fixture directory")?; - let results = classify(graph, tensor)?; + let results = witx::classify(graph, tensor)?; let top_five = &sort_results(&results)[..5]; println!("found results, sorted top 5: {:?}", top_five); Ok(()) diff --git a/crates/test-programs/src/bin/nn_witx_image_classification_openvino_named.rs b/crates/test-programs/src/bin/nn_witx_image_classification_openvino_named.rs new file mode 100644 index 000000000000..d91c78a5c7b5 --- /dev/null +++ b/crates/test-programs/src/bin/nn_witx_image_classification_openvino_named.rs @@ -0,0 +1,17 @@ +use anyhow::{Context, Result}; +use std::fs; +use test_programs::nn::{sort_results, witx}; + +pub fn main() -> Result<()> { + let graph = witx::load_by_name( + "fixtures", + witx::GraphEncoding::Openvino, + witx::ExecutionTarget::CPU, + )?; + let tensor: Vec = fs::read("fixture/tensor.bgr") + .context("the tensor file to be mapped to the fixture directory")?; + let results = witx::classify(graph, tensor)?; + let top_five = &sort_results(&results)[..5]; + println!("found results, sorted top 5: {:?}", top_five); + Ok(()) +} diff --git a/crates/test-programs/src/bin/nn_witx_image_classification_winml_named.rs b/crates/test-programs/src/bin/nn_witx_image_classification_winml_named.rs new file mode 100644 index 000000000000..87808f32c2c3 --- /dev/null +++ b/crates/test-programs/src/bin/nn_witx_image_classification_winml_named.rs @@ -0,0 +1,18 @@ +use anyhow::{Context, Result}; +use std::fs; +use test_programs::nn::{sort_results, witx}; + +pub fn main() -> Result<()> { + let graph = witx::load_by_name( + "mobilenet", + witx::GraphEncoding::Onnx, + witx::ExecutionTarget::CPU, + )?; + let tensor = fs::read("fixture/tensor.bgr") + .context("the tensor file to be mapped to the fixture directory")?; + let results = witx::classify(graph, tensor)?; + let top_five = &sort_results(&results)[..5]; + println!("found results, sorted top 5: {:?}", top_five); + assert_eq!(top_five[0].class_id(), 284); + Ok(()) +} diff --git a/crates/test-programs/src/nn.rs b/crates/test-programs/src/nn.rs index f3b54b460901..361a7c1f6282 100644 --- a/crates/test-programs/src/nn.rs +++ b/crates/test-programs/src/nn.rs @@ -1,39 +1,147 @@ -use anyhow::Result; -use std::time::Instant; -use wasi_nn::{Graph, TensorType}; - -/// Run a wasi-nn inference using a simple classifier model (single input, -/// single output). -pub fn classify(graph: Graph, tensor: Vec) -> Result> { - let mut context = graph.init_execution_context()?; - println!( - "[nn] created wasi-nn execution context with ID: {}", - context - ); - - // Many classifiers have a single input; currently, this test suite also - // uses tensors of the same shape, though this is not usually the case. - context.set_input(0, TensorType::F32, &[1, 3, 224, 224], &tensor)?; - println!("[nn] set input tensor: {} bytes", tensor.len()); - - let before = Instant::now(); - context.compute()?; - println!( - "[nn] executed graph inference in {} ms", - before.elapsed().as_millis() - ); - - // Many classifiers emit probabilities as floating point values; here we - // convert the raw bytes to `f32` knowing all models used here use that - // type. - let mut output_buffer = vec![0u8; 1001 * std::mem::size_of::()]; - let num_bytes = context.get_output(0, &mut output_buffer)?; - println!("[nn] retrieved output tensor: {} bytes", num_bytes); - let output: Vec = output_buffer[..num_bytes] - .chunks(4) - .map(|b| f32::from_le_bytes([b[0], b[1], b[2], b[3]])) - .collect(); - Ok(output) +//! This module attempts to paper over the differences between the two +//! implementations of wasi-nn: the legacy WITX-based version (`mod witx`) and +//! the up-to-date WIT version (`mod wit`). Since the tests are mainly a simple +//! classifier, this exposes a high-level `classify` function to go along with +//! `load`, etc. +//! +//! This module exists solely for convenience--e.g., reduces test duplication. +//! In the future can be safely disposed of or altered as more tests are added. + +/// Call `wasi-nn` functions from WebAssembly using the canonical ABI of the +/// component model via WIT-based tooling. Used by `bin/nn_wit_*.rs` tests. +pub mod wit { + use anyhow::{anyhow, Result}; + use std::time::Instant; + + // Generate the wasi-nn bindings based on the `*.wit` files. + wit_bindgen::generate!({ + path: "../wasi-nn/wit", + world: "ml", + default_bindings_module: "test_programs::ml" + }); + use self::wasi::nn::errors; + use self::wasi::nn::graph::{self, Graph}; + pub use self::wasi::nn::graph::{ExecutionTarget, GraphEncoding}; // Used by tests. + use self::wasi::nn::tensor::{Tensor, TensorType}; + + /// Load a wasi-nn graph from a set of bytes. + pub fn load( + bytes: &[Vec], + encoding: GraphEncoding, + target: ExecutionTarget, + ) -> Result { + graph::load(bytes, encoding, target).map_err(err_as_anyhow) + } + + /// Load a wasi-nn graph by name. + pub fn load_by_name(name: &str) -> Result { + graph::load_by_name(name).map_err(err_as_anyhow) + } + + /// Run a wasi-nn inference using a simple classifier model (single input, + /// single output). + pub fn classify(graph: Graph, input: (&str, Vec), output: &str) -> Result> { + let context = graph.init_execution_context().map_err(err_as_anyhow)?; + println!( + "[nn] created wasi-nn execution context with ID: {:?}", + context + ); + + // Many classifiers have a single input; currently, this test suite also + // uses tensors of the same shape, though this is not usually the case. + let tensor = Tensor::new(&vec![1, 3, 224, 224], TensorType::Fp32, &input.1); + context.set_input(input.0, tensor).map_err(err_as_anyhow)?; + println!("[nn] set input tensor: {} bytes", input.1.len()); + + let before = Instant::now(); + context.compute().map_err(err_as_anyhow)?; + println!( + "[nn] executed graph inference in {} ms", + before.elapsed().as_millis() + ); + + // Many classifiers emit probabilities as floating point values; here we + // convert the raw bytes to `f32` knowing all models used here use that + // type. + let output = context.get_output(output).map_err(err_as_anyhow)?; + println!( + "[nn] retrieved output tensor: {} bytes", + output.data().len() + ); + let output: Vec = output + .data() + .chunks(4) + .map(|b| f32::from_le_bytes([b[0], b[1], b[2], b[3]])) + .collect(); + Ok(output) + } + + fn err_as_anyhow(e: errors::Error) -> anyhow::Error { + anyhow!("error: {e:?}") + } +} + +/// Call `wasi-nn` functions from WebAssembly using the legacy WITX-based +/// tooling. This older API has been deprecated for the newer WIT-based API but +/// retained for backwards compatibility testing--i.e., `bin/nn_witx_*.rs` +/// tests. +pub mod witx { + use anyhow::Result; + use std::time::Instant; + pub use wasi_nn::{ExecutionTarget, GraphEncoding}; + use wasi_nn::{Graph, GraphBuilder, TensorType}; + + /// Load a wasi-nn graph from a set of bytes. + pub fn load( + bytes: &[&[u8]], + encoding: GraphEncoding, + target: ExecutionTarget, + ) -> Result { + Ok(GraphBuilder::new(encoding, target).build_from_bytes(bytes)?) + } + + /// Load a wasi-nn graph by name. + pub fn load_by_name( + name: &str, + encoding: GraphEncoding, + target: ExecutionTarget, + ) -> Result { + Ok(GraphBuilder::new(encoding, target).build_from_cache(name)?) + } + + /// Run a wasi-nn inference using a simple classifier model (single input, + /// single output). + pub fn classify(graph: Graph, tensor: Vec) -> Result> { + let mut context = graph.init_execution_context()?; + println!( + "[nn] created wasi-nn execution context with ID: {}", + context + ); + + // Many classifiers have a single input; currently, this test suite also + // uses tensors of the same shape, though this is not usually the case. + context.set_input(0, TensorType::F32, &[1, 3, 224, 224], &tensor)?; + println!("[nn] set input tensor: {} bytes", tensor.len()); + + let before = Instant::now(); + context.compute()?; + println!( + "[nn] executed graph inference in {} ms", + before.elapsed().as_millis() + ); + + // Many classifiers emit probabilities as floating point values; here we + // convert the raw bytes to `f32` knowing all models used here use that + // type. + let mut output_buffer = vec![0u8; 1001 * std::mem::size_of::()]; + let num_bytes = context.get_output(0, &mut output_buffer)?; + println!("[nn] retrieved output tensor: {} bytes", num_bytes); + let output: Vec = output_buffer[..num_bytes] + .chunks(4) + .map(|b| f32::from_le_bytes([b[0], b[1], b[2], b[3]])) + .collect(); + Ok(output) + } } /// Sort some classification probabilities. diff --git a/crates/wasi-nn/Cargo.toml b/crates/wasi-nn/Cargo.toml index 7390c1e33146..a5ace788d03a 100644 --- a/crates/wasi-nn/Cargo.toml +++ b/crates/wasi-nn/Cargo.toml @@ -20,7 +20,11 @@ anyhow = { workspace = true, features = ['std'] } wiggle = { workspace = true, features = ["wasmtime"] } # This dependency is necessary for the WIT-generation macros to work: -wasmtime = { workspace = true, features = ["component-model", "runtime"] } +wasmtime = { workspace = true, features = [ + "component-model", + "runtime", + "std", +] } # These dependencies are necessary for the wasi-nn implementation: tracing = { workspace = true } @@ -29,7 +33,7 @@ openvino = { version = "0.6.0", features = [ "runtime-linking", ], optional = true } -ort = { version = "2.0.0-rc.0", default-features = false, features = [ +ort = { version = "2.0.0-rc.2", default-features = false, features = [ "copy-dylibs", "download-binaries", ], optional = true } @@ -46,16 +50,17 @@ walkdir = { workspace = true } cap-std = { workspace = true } libtest-mimic = { workspace = true } test-programs-artifacts = { workspace = true } -wasi-common = { workspace = true, features = ["sync"] } +wasmtime-wasi = { workspace = true, features = ["preview1"] } wasmtime = { workspace = true, features = ["cranelift"] } +tracing-subscriber = { workspace = true } [features] default = ["openvino", "winml"] -# openvino is available on all platforms, it requires openvino installed. +# OpenVINO is available on all platforms; it requires OpenVINO to be installed. openvino = ["dep:openvino"] -# onnx is available on all platforms. +# ONNX is available on all platforms. onnx = ["dep:ort"] -# winml is only available on Windows 10 1809 and later. +# WinML is only available on Windows 10 1809 and later. winml = ["dep:windows"] [[test]] diff --git a/crates/wasi-nn/src/backend/mod.rs b/crates/wasi-nn/src/backend/mod.rs index e06f9e364925..710c93b980f4 100644 --- a/crates/wasi-nn/src/backend/mod.rs +++ b/crates/wasi-nn/src/backend/mod.rs @@ -3,20 +3,20 @@ //! implementations to maintain backend-specific state between calls. #[cfg(feature = "onnx")] -pub mod onnxruntime; +pub mod onnx; #[cfg(feature = "openvino")] pub mod openvino; #[cfg(all(feature = "winml", target_os = "windows"))] pub mod winml; #[cfg(feature = "onnx")] -use self::onnxruntime::OnnxBackend; +use self::onnx::OnnxBackend; #[cfg(feature = "openvino")] use self::openvino::OpenvinoBackend; #[cfg(all(feature = "winml", target_os = "windows"))] use self::winml::WinMLBackend; -use crate::wit::types::{ExecutionTarget, GraphEncoding, Tensor}; +use crate::wit::{ExecutionTarget, GraphEncoding, Tensor}; use crate::{Backend, ExecutionContext, Graph}; use std::fs::File; use std::io::Read; @@ -69,9 +69,30 @@ pub trait BackendGraph: Send + Sync { /// A [BackendExecutionContext] performs the actual inference; this is the /// backing implementation for a user-facing execution context. pub trait BackendExecutionContext: Send + Sync { - fn set_input(&mut self, index: u32, tensor: &Tensor) -> Result<(), BackendError>; + fn set_input(&mut self, id: Id, tensor: &Tensor) -> Result<(), BackendError>; fn compute(&mut self) -> Result<(), BackendError>; - fn get_output(&mut self, index: u32, destination: &mut [u8]) -> Result; + fn get_output(&mut self, id: Id) -> Result; +} + +/// An identifier for a tensor in a [Graph]. +#[derive(Debug)] +pub enum Id { + Index(u32), + Name(String), +} +impl Id { + pub fn index(&self) -> Option { + match self { + Id::Index(i) => Some(*i), + Id::Name(_) => None, + } + } + pub fn name(&self) -> Option<&str> { + match self { + Id::Index(_) => None, + Id::Name(n) => Some(n), + } + } } /// Errors returned by a backend; [BackendError::BackendAccess] is a catch-all diff --git a/crates/wasi-nn/src/backend/onnx.rs b/crates/wasi-nn/src/backend/onnx.rs new file mode 100644 index 000000000000..b6ae2ddf278e --- /dev/null +++ b/crates/wasi-nn/src/backend/onnx.rs @@ -0,0 +1,338 @@ +//! Implements a `wasi-nn` [`BackendInner`] using ONNX via the `ort` crate. + +use super::{BackendError, BackendExecutionContext, BackendFromDir, BackendGraph, BackendInner}; +use crate::backend::{read, Id}; +use crate::wit::types::{ExecutionTarget, GraphEncoding, Tensor, TensorType}; +use crate::{ExecutionContext, Graph}; +use anyhow::Context; +use ort::{inputs, GraphOptimizationLevel, Session}; +use std::path::Path; +use std::sync::{Arc, Mutex}; + +#[derive(Default)] +pub struct OnnxBackend(); +unsafe impl Send for OnnxBackend {} +unsafe impl Sync for OnnxBackend {} + +impl BackendInner for OnnxBackend { + fn encoding(&self) -> GraphEncoding { + GraphEncoding::Onnx + } + + fn load(&mut self, builders: &[&[u8]], target: ExecutionTarget) -> Result { + if builders.len() != 1 { + return Err(BackendError::InvalidNumberOfBuilders(1, builders.len()).into()); + } + + let session = Session::builder()? + .with_optimization_level(GraphOptimizationLevel::Level3)? + .commit_from_memory(builders[0])?; + + let box_: Box = + Box::new(OnnxGraph(Arc::new(Mutex::new(session)), target)); + Ok(box_.into()) + } + + fn as_dir_loadable<'a>(&'a mut self) -> Option<&'a mut dyn BackendFromDir> { + Some(self) + } +} + +impl BackendFromDir for OnnxBackend { + fn load_from_dir( + &mut self, + path: &Path, + target: ExecutionTarget, + ) -> Result { + let model = read(&path.join("model.onnx"))?; + self.load(&[&model], target) + } +} + +struct OnnxGraph(Arc>, #[allow(dead_code)] ExecutionTarget); +unsafe impl Send for OnnxGraph {} +unsafe impl Sync for OnnxGraph {} + +impl BackendGraph for OnnxGraph { + fn init_execution_context(&self) -> Result { + let session = self.0.lock().unwrap(); + // We need to hold on to the names of the inputs in order for + // `set_input` to work with both indexes and names. Having the + // dimensions and type around is useful for validation but could be + // retrieved from the session. + let mut inputs = vec![]; + for input in &session.inputs { + let shape = Shape::from_onnx_input(input)?; + inputs.push(TensorSlot { + shape, + tensor: None, + }); + } + // We need to keep track of the output shapes since they are used for + // creating the output tensor. + let mut outputs = vec![]; + for output in &session.outputs { + let shape = Shape::from_onnx_output(output)?; + outputs.push(TensorSlot { + shape, + tensor: None, + }); + } + let box_: Box = Box::new(OnnxExecutionContext { + session: self.0.clone(), + inputs, + outputs, + }); + Ok(box_.into()) + } +} + +struct OnnxExecutionContext { + session: Arc>, + inputs: Vec, + outputs: Vec, +} + +unsafe impl Send for OnnxExecutionContext {} +unsafe impl Sync for OnnxExecutionContext {} + +impl OnnxExecutionContext { + /// Helper function for finding the internal index of a tensor by [`Id`]. + fn find(&self, id: Id, list: &[TensorSlot]) -> Result { + let index = match id { + Id::Index(i) => { + let i = i as usize; + if i < list.len() { + i + } else { + return Err(BackendError::BackendAccess(anyhow::anyhow!( + "incorrect tensor index: {i} >= {}", + list.len() + ))); + } + } + Id::Name(n) => list.iter().position(|s| s.shape.name == n).ok_or_else(|| { + BackendError::BackendAccess(anyhow::anyhow!("unknown tensor name: {n}")) + })?, + }; + Ok(index) + } +} + +impl BackendExecutionContext for OnnxExecutionContext { + fn set_input(&mut self, id: Id, tensor: &Tensor) -> Result<(), BackendError> { + let index = self.find(id, &self.inputs)?; + let input = &mut self.inputs[index]; + if let Err(e) = input.shape.matches(tensor) { + return Err(e.into()); + } + // Hold the tensor data on the context until `compute` is called. + input.tensor.replace(tensor.clone()); + Ok(()) + } + + fn compute(&mut self) -> Result<(), BackendError> { + let mut session_inputs: Vec> = vec![]; + for i in &self.inputs { + session_inputs.extend(to_input_value(i)?); + } + let session = self.session.lock().unwrap(); + let session_outputs = session.run(session_inputs.as_slice())?; + for i in 0..self.outputs.len() { + // TODO: fix preexisting gap--this only handles f32 tensors. + let raw: (Vec, &[f32]) = session_outputs[i].try_extract_raw_tensor()?; + let f32s = raw.1.to_vec(); + let output = &mut self.outputs[i]; + output.tensor.replace(Tensor { + dimensions: output.shape.dimensions_as_u32()?, + ty: output.shape.ty, + data: f32_vec_to_bytes(f32s), + }); + } + Ok(()) + } + + fn get_output(&mut self, id: Id) -> Result { + let index = self.find(id, &self.outputs)?; + let output = &self.outputs[index]; + if let Some(tensor) = &output.tensor { + Ok(tensor.clone()) + } else { + Err(BackendError::BackendAccess(anyhow::anyhow!( + "missing output tensor: {}; has `compute` been called?", + output.shape.name + ))) + } + } +} + +impl From for BackendError { + fn from(e: ort::Error) -> Self { + BackendError::BackendAccess(e.into()) + } +} + +/// Holds a slot for ONNX session inputs and outputs. +/// +/// TODO: it seems unfortunate that we have to "hold" some extra data per +/// session but in the input case, this is necessary for name-based indexing. +struct TensorSlot { + shape: Shape, + tensor: Option, +} + +/// Describes a tensor in ONNX terms. +struct Shape { + name: String, + dimensions: Vec, + ty: TensorType, +} + +impl Shape { + fn from_onnx_input(input: &ort::Input) -> Result { + let name = input.name.clone(); + let (dimensions, ty) = convert_value_type(&input.input_type)?; + Ok(Self { + name, + dimensions, + ty, + }) + } + + fn from_onnx_output(output: &ort::Output) -> Result { + let name = output.name.clone(); + let (dimensions, ty) = convert_value_type(&output.output_type)?; + Ok(Self { + name, + dimensions, + ty, + }) + } + + fn dimensions_as_u32(&self) -> Result, BackendError> { + self.dimensions + .iter() + .map(|d| if *d == -1 { Ok(1) } else { convert_i64(d) }) + .collect() + } + + fn matches(&self, tensor: &Tensor) -> anyhow::Result<()> { + if self.dimensions.len() != tensor.dimensions.len() { + return Err(anyhow::anyhow!( + "input tensor cardinality does not match model: {:?} != {:?}", + self.dimensions, + tensor.dimensions + )); + } else { + for (&shape_dim, &tensor_dim) in self.dimensions.iter().zip(tensor.dimensions.iter()) { + let tensor_dim = tensor_dim as i64; + if !is_dynamic_dimension(shape_dim) && shape_dim != tensor_dim { + return Err(anyhow::anyhow!( + "input tensor dimensions do not match model: {:?} != {:?}", + self.dimensions, + tensor.dimensions + )); + } + } + } + if self.ty != tensor.ty { + return Err(anyhow::anyhow!( + "input tensor type does not match model: {:?} != {:?}", + self.ty, + tensor.ty + )); + } + Ok(()) + } +} + +fn convert_value_type(vt: &ort::ValueType) -> Result<(Vec, TensorType), BackendError> { + match vt { + ort::ValueType::Tensor { ty, dimensions } => { + let dims = dimensions.clone(); + let ty = (*ty).try_into()?; + Ok((dims, ty)) + } + _ => Err(BackendError::BackendAccess(anyhow::anyhow!( + "unsupported input type: {vt:?}" + ))), + } +} + +fn convert_i64(i: &i64) -> Result { + u32::try_from(*i).map_err(|d| -> BackendError { + anyhow::anyhow!("unable to convert dimension to u32: {d}").into() + }) +} + +impl TryFrom for TensorType { + type Error = BackendError; + fn try_from(ty: ort::TensorElementType) -> Result { + match ty { + ort::TensorElementType::Float32 => Ok(TensorType::Fp32), + ort::TensorElementType::Float64 => Ok(TensorType::Fp64), + ort::TensorElementType::Uint8 => Ok(TensorType::U8), + ort::TensorElementType::Int32 => Ok(TensorType::I32), + ort::TensorElementType::Int64 => Ok(TensorType::I64), + _ => Err(BackendError::BackendAccess(anyhow::anyhow!( + "unsupported tensor type: {ty:?}" + ))), + } + } +} + +fn to_input_value(slot: &TensorSlot) -> Result<[ort::SessionInputValue<'_>; 1], BackendError> { + match &slot.tensor { + Some(tensor) => match tensor.ty { + TensorType::Fp32 => { + let data = bytes_to_f32_vec(tensor.data.to_vec()); + let dimensions = tensor + .dimensions + .iter() + .map(|d| *d as i64) // TODO: fewer conversions + .collect::>(); + Ok(inputs![(dimensions, Arc::new(data.into_boxed_slice()))] + .context("failed to create ONNX session input")?) + } + _ => { + unimplemented!("{:?} not supported by ONNX", tensor.ty); + } + }, + None => { + return Err(BackendError::BackendAccess(anyhow::anyhow!( + "missing input tensor: {}", + slot.shape.name + ))); + } + } +} + +pub fn f32_vec_to_bytes(data: Vec) -> Vec { + let chunks: Vec<[u8; 4]> = data.into_iter().map(|f| f.to_le_bytes()).collect(); + let result: Vec = chunks.iter().flatten().copied().collect(); + result +} + +pub fn bytes_to_f32_vec(data: Vec) -> Vec { + let chunks: Vec<&[u8]> = data.chunks(4).collect(); + let v: Vec = chunks + .into_iter() + .map(|c| f32::from_le_bytes(c.try_into().unwrap())) + .collect(); + + v.into_iter().collect() +} + +/// Returns whether the dimension is dynamic. +/// +/// ONNX uses [dimensional variables] (i.e., name strings) to indicate that the +/// value of a tensor dimension is user-defined, not fixed by the model. This is +/// useful for batching up several inference requests, e.g. When `ort` returns a +/// dimension of this kind, though, it uses `-1` to indicate that the dimension +/// is dynamic. +/// +/// [dimensional variables]: +/// https://onnx.ai/onnx/repo-docs/IR.html#static-tensor-shapes +fn is_dynamic_dimension(d: i64) -> bool { + d == -1 +} diff --git a/crates/wasi-nn/src/backend/onnxruntime.rs b/crates/wasi-nn/src/backend/onnxruntime.rs deleted file mode 100644 index bddb03dc9b35..000000000000 --- a/crates/wasi-nn/src/backend/onnxruntime.rs +++ /dev/null @@ -1,149 +0,0 @@ -//! Implements a `wasi-nn` [`BackendInner`] using ONNX via ort. - -use super::{BackendError, BackendExecutionContext, BackendFromDir, BackendGraph, BackendInner}; -use crate::backend::read; -use crate::wit::types::{ExecutionTarget, GraphEncoding, Tensor, TensorType}; -use crate::{ExecutionContext, Graph}; -use ort::{inputs, GraphOptimizationLevel, Session}; -use std::path::Path; -use std::sync::{Arc, Mutex}; - -#[derive(Default)] -pub struct OnnxBackend(); -unsafe impl Send for OnnxBackend {} -unsafe impl Sync for OnnxBackend {} - -impl BackendInner for OnnxBackend { - fn encoding(&self) -> GraphEncoding { - GraphEncoding::Onnx - } - - fn load(&mut self, builders: &[&[u8]], target: ExecutionTarget) -> Result { - if builders.len() != 1 { - return Err(BackendError::InvalidNumberOfBuilders(1, builders.len()).into()); - } - - let session = Session::builder()? - .with_optimization_level(GraphOptimizationLevel::Level3)? - .with_model_from_memory(builders[0])?; - - let box_: Box = - Box::new(ONNXGraph(Arc::new(Mutex::new(session)), target)); - Ok(box_.into()) - } - - fn as_dir_loadable<'a>(&'a mut self) -> Option<&'a mut dyn BackendFromDir> { - Some(self) - } -} - -impl BackendFromDir for OnnxBackend { - fn load_from_dir( - &mut self, - path: &Path, - target: ExecutionTarget, - ) -> Result { - let model = read(&path.join("model.onnx"))?; - self.load(&[&model], target) - } -} - -struct ONNXGraph(Arc>, #[allow(dead_code)] ExecutionTarget); - -unsafe impl Send for ONNXGraph {} -unsafe impl Sync for ONNXGraph {} - -impl BackendGraph for ONNXGraph { - fn init_execution_context(&self) -> Result { - let session = self.0.lock().unwrap(); - let inputs = session.inputs.iter().map(|_| None).collect::>(); - let outputs = session.outputs.iter().map(|_| None).collect::>(); - let box_: Box = Box::new(ONNXExecutionContext { - session: self.0.clone(), - inputs, - outputs, - }); - Ok(box_.into()) - } -} - -struct ONNXExecutionContext { - session: Arc>, - inputs: Vec>, - outputs: Vec>>, -} - -unsafe impl Send for ONNXExecutionContext {} -unsafe impl Sync for ONNXExecutionContext {} - -impl BackendExecutionContext for ONNXExecutionContext { - fn set_input(&mut self, index: u32, tensor: &Tensor) -> Result<(), BackendError> { - self.inputs[index as usize].replace(tensor.clone()); - Ok(()) - } - - fn compute(&mut self) -> Result<(), BackendError> { - let shaped_inputs: Vec<_> = self - .inputs - .iter() - .enumerate() - .map(|(i, _o)| { - let input = self.inputs[i].as_ref().unwrap(); - let dims = input - .dimensions - .as_slice() - .iter() - .map(|d| *d as i64) - .collect::>(); - match input.tensor_type { - TensorType::Fp32 => { - let data = bytes_to_f32_vec(input.data.to_vec()); - inputs![(dims, Arc::new(data.into_boxed_slice()))].unwrap() - } - _ => { - unimplemented!("{:?} not supported by ONNX", input.tensor_type); - } - } - }) - .flatten() - .collect(); - - let session = self.session.lock().unwrap(); - let res = session.run(shaped_inputs.as_slice())?; - - for i in 0..self.outputs.len() { - let raw: (Vec, &[f32]) = res[i].extract_raw_tensor()?; - let f32s = raw.1.to_vec(); - self.outputs[i].replace(f32_vec_to_bytes(f32s)); - } - Ok(()) - } - - fn get_output(&mut self, index: u32, destination: &mut [u8]) -> Result { - let output = self.outputs[index as usize].as_ref().unwrap(); - destination[..output.len()].copy_from_slice(output); - Ok(output.len() as u32) - } -} - -impl From for BackendError { - fn from(e: ort::Error) -> Self { - BackendError::BackendAccess(e.into()) - } -} - -pub fn f32_vec_to_bytes(data: Vec) -> Vec { - let chunks: Vec<[u8; 4]> = data.into_iter().map(|f| f.to_le_bytes()).collect(); - let result: Vec = chunks.iter().flatten().copied().collect(); - result -} - -pub fn bytes_to_f32_vec(data: Vec) -> Vec { - let chunks: Vec<&[u8]> = data.chunks(4).collect(); - let v: Vec = chunks - .into_iter() - .map(|c| f32::from_le_bytes(c.try_into().unwrap())) - .collect(); - - v.into_iter().collect() -} diff --git a/crates/wasi-nn/src/backend/openvino.rs b/crates/wasi-nn/src/backend/openvino.rs index 65c96629eead..b24f93838cab 100644 --- a/crates/wasi-nn/src/backend/openvino.rs +++ b/crates/wasi-nn/src/backend/openvino.rs @@ -1,9 +1,9 @@ //! Implements a `wasi-nn` [`BackendInner`] using OpenVINO. use super::{ - read, BackendError, BackendExecutionContext, BackendFromDir, BackendGraph, BackendInner, + read, BackendError, BackendExecutionContext, BackendFromDir, BackendGraph, BackendInner, Id, }; -use crate::wit::types::{ExecutionTarget, GraphEncoding, Tensor, TensorType}; +use crate::wit::{self, ExecutionTarget, GraphEncoding, Tensor, TensorType}; use crate::{ExecutionContext, Graph}; use openvino::{InferenceError, Layout, Precision, SetupError, TensorDesc}; use std::path::Path; @@ -99,12 +99,15 @@ impl BackendGraph for OpenvinoGraph { struct OpenvinoExecutionContext(Arc, openvino::InferRequest); impl BackendExecutionContext for OpenvinoExecutionContext { - fn set_input(&mut self, index: u32, tensor: &Tensor) -> Result<(), BackendError> { - let input_name = self.0.get_input_name(index as usize)?; + fn set_input(&mut self, id: Id, tensor: &Tensor) -> Result<(), BackendError> { + let input_name = match id { + Id::Index(i) => self.0.get_input_name(i as usize)?, + Id::Name(name) => name, + }; // Construct the blob structure. TODO: there must be some good way to // discover the layout here; `desc` should not have to default to NHWC. - let precision = map_tensor_type_to_precision(tensor.tensor_type); + let precision = map_tensor_type_to_precision(tensor.ty); let dimensions = tensor .dimensions .iter() @@ -123,17 +126,20 @@ impl BackendExecutionContext for OpenvinoExecutionContext { Ok(()) } - fn get_output(&mut self, index: u32, destination: &mut [u8]) -> Result { - let output_name = self.0.get_output_name(index as usize)?; + fn get_output(&mut self, id: Id) -> Result { + let output_name = match id { + Id::Index(i) => self.0.get_output_name(i as usize)?, + Id::Name(name) => name, + }; + let dimensions = vec![]; // TODO: get actual shape + let ty = wit::TensorType::Fp32; // TODO: get actual type. let blob = self.1.get_blob(&output_name)?; - let blob_size = blob.byte_len()?; - if blob_size > destination.len() { - return Err(BackendError::NotEnoughMemory(blob_size)); - } - - // Copy the tensor data into the destination buffer. - destination[..blob_size].copy_from_slice(blob.buffer()?); - Ok(blob_size as u32) + let data = blob.buffer()?.to_vec(); + Ok(Tensor { + dimensions, + ty, + data, + }) } } diff --git a/crates/wasi-nn/src/backend/winml.rs b/crates/wasi-nn/src/backend/winml.rs index e11761f86732..b87510bfc877 100644 --- a/crates/wasi-nn/src/backend/winml.rs +++ b/crates/wasi-nn/src/backend/winml.rs @@ -1,16 +1,27 @@ //! Implements a `wasi-nn` [`BackendInner`] using WinML. - -use super::{BackendError, BackendExecutionContext, BackendFromDir, BackendGraph, BackendInner}; -use crate::wit::types::{ExecutionTarget, GraphEncoding, Tensor}; +//! +//! Note that the [docs.rs] documentation for the `windows` crate does have the +//! right features turned on to read about the functions used; see Microsoft's +//! private documentation instead: [microsoft.github.io/windows-docs-rs]. +//! +//! [docs.rs]: https://docs.rs/windows +//! [microsoft.github.io/windows-docs-rs]: https://microsoft.github.io/windows-docs-rs/doc/windows/AI/MachineLearning + +use crate::backend::{ + BackendError, BackendExecutionContext, BackendFromDir, BackendGraph, BackendInner, Id, +}; +use crate::wit::{ExecutionTarget, GraphEncoding, Tensor, TensorType}; use crate::{ExecutionContext, Graph}; use std::{fs::File, io::Read, mem::size_of, path::Path}; use windows::core::{ComInterface, HSTRING}; +use windows::Foundation::Collections::IVectorView; use windows::Storage::Streams::{ DataWriter, InMemoryRandomAccessStream, RandomAccessStreamReference, }; use windows::AI::MachineLearning::{ - LearningModel, LearningModelBinding, LearningModelDevice, LearningModelDeviceKind, - LearningModelEvaluationResult, LearningModelSession, TensorFeatureDescriptor, TensorFloat, + ILearningModelFeatureDescriptor, LearningModel, LearningModelBinding, LearningModelDevice, + LearningModelDeviceKind, LearningModelEvaluationResult, LearningModelSession, + TensorFeatureDescriptor, TensorFloat, }; #[derive(Default)] @@ -94,29 +105,64 @@ impl WinMLExecutionContext { } } +impl WinMLExecutionContext { + /// Helper function for finding the internal index of a tensor by [`Id`]. + fn find( + &self, + id: Id, + list: &IVectorView, + ) -> Result { + let index = match id { + Id::Index(i) => { + if i < list.Size()? { + i + } else { + return Err(BackendError::BackendAccess(anyhow::anyhow!( + "incorrect tensor index: {i} >= {}", + list.Size()? + ))); + } + } + Id::Name(name) => list + .into_iter() + .position(|d| d.Name().unwrap() == name) + .ok_or_else(|| { + BackendError::BackendAccess(anyhow::anyhow!("unknown tensor name: {name}")) + })? as u32, + }; + Ok(index) + } +} + impl BackendExecutionContext for WinMLExecutionContext { - fn set_input(&mut self, index: u32, tensor: &Tensor) -> Result<(), BackendError> { + fn set_input(&mut self, id: Id, tensor: &Tensor) -> Result<(), BackendError> { + let input_features = self.session.Model()?.InputFeatures()?; + let index = self.find(id, &input_features)?; + let input = input_features.GetAt(index)?; + // TODO: Support other tensor types. Only FP32 is supported right now. - match tensor.tensor_type { + match tensor.ty { crate::wit::types::TensorType::Fp32 => {} _ => unimplemented!(), } - let input = self.session.Model()?.InputFeatures()?.GetAt(index)?; - unsafe { - let data = std::slice::from_raw_parts( + // TODO: this is quite unsafe and probably incorrect--will the slice + // still be around by the time the binding is used?! + let data = unsafe { + std::slice::from_raw_parts( tensor.data.as_ptr() as *const f32, - tensor.data.len() / 4, - ); - - self.binding.Bind( - &input.Name()?, - &TensorFloat::CreateFromArray( - &input.cast::()?.Shape()?, - data, - )?, - )?; - } + tensor.data.len() / size_of::(), + ) + }; + + self.binding.Bind( + &input.Name()?, + &TensorFloat::CreateFromArray( + &input.cast::()?.Shape()?, + data, + )?, + )?; + Ok(()) } @@ -125,33 +171,32 @@ impl BackendExecutionContext for WinMLExecutionContext { Ok(()) } - fn get_output(&mut self, index: u32, destination: &mut [u8]) -> Result { - if self.result.is_none() { + fn get_output(&mut self, id: Id) -> Result { + if let Some(result) = &self.result { + let output_features = self.session.Model()?.OutputFeatures()?; + let index = self.find(id, &output_features)?; + let output = output_features.GetAt(index)?; + // TODO: this only handles FP32! + let tensor = result + .Outputs()? + .Lookup(&output.Name()?)? + .cast::()?; + let dimensions = dimensions_as_u32(&tensor.Shape()?)?; + let view = tensor.GetAsVectorView()?; + let mut data = Vec::with_capacity(view.Size()? as usize * size_of::()); + for f in view.into_iter() { + data.extend(f.to_le_bytes()); + } + Ok(Tensor { + ty: TensorType::Fp32, + dimensions, + data, + }) + } else { return Err(BackendError::BackendAccess(anyhow::Error::msg( "Output is not ready.", ))); } - let output_name = self.session.Model()?.OutputFeatures()?.GetAt(index)?; - let output_name_hstring = output_name.Name()?; - - let vector_view = self - .result - .as_ref() - .unwrap() - .Outputs()? - .Lookup(&output_name_hstring)? - .cast::()? - .GetAsVectorView()?; - let output: Vec = vector_view.into_iter().collect(); - let len_to_copy = output.len() * size_of::(); - unsafe { - destination[..len_to_copy].copy_from_slice(std::slice::from_raw_parts( - output.as_ptr() as *const u8, - len_to_copy, - )); - } - - Ok(len_to_copy as u32) } } @@ -168,3 +213,16 @@ impl From for BackendError { BackendError::BackendAccess(anyhow::Error::new(e)) } } + +fn dimensions_as_u32(dimensions: &IVectorView) -> Result, BackendError> { + dimensions + .into_iter() + .map(|d| if d == -1 { Ok(1) } else { convert_i64(d) }) + .collect() +} + +fn convert_i64(i: i64) -> Result { + u32::try_from(i).map_err(|d| -> BackendError { + anyhow::anyhow!("unable to convert dimension to u32: {d}").into() + }) +} diff --git a/crates/wasi-nn/src/ctx.rs b/crates/wasi-nn/src/ctx.rs deleted file mode 100644 index 40cfb9d53d2d..000000000000 --- a/crates/wasi-nn/src/ctx.rs +++ /dev/null @@ -1,146 +0,0 @@ -//! Implements the host state for the `wasi-nn` API: [WasiNnCtx]. - -use crate::backend::{self, BackendError}; -use crate::wit::types::GraphEncoding; -use crate::{Backend, ExecutionContext, Graph, InMemoryRegistry, Registry}; -use anyhow::anyhow; -use std::{collections::HashMap, hash::Hash, path::Path}; -use thiserror::Error; -use wiggle::GuestError; - -type GraphId = u32; -type GraphExecutionContextId = u32; -type BackendName = String; -type GraphDirectory = String; - -/// Construct an in-memory registry from the available backends and a list of -/// `(, )`. This assumes graphs can be loaded -/// from a local directory, which is a safe assumption currently for the current -/// model types. -pub fn preload( - preload_graphs: &[(BackendName, GraphDirectory)], -) -> anyhow::Result<(impl IntoIterator, Registry)> { - let mut backends = backend::list(); - let mut registry = InMemoryRegistry::new(); - for (kind, path) in preload_graphs { - let kind_ = kind.parse()?; - let backend = backends - .iter_mut() - .find(|b| b.encoding() == kind_) - .ok_or(anyhow!("unsupported backend: {}", kind))? - .as_dir_loadable() - .ok_or(anyhow!("{} does not support directory loading", kind))?; - registry.load(backend, Path::new(path))?; - } - Ok((backends, Registry::from(registry))) -} - -/// Capture the state necessary for calling into the backend ML libraries. -pub struct WasiNnCtx { - pub(crate) backends: HashMap, - pub(crate) registry: Registry, - pub(crate) graphs: Table, - pub(crate) executions: Table, -} - -impl WasiNnCtx { - /// Make a new context from the default state. - pub fn new(backends: impl IntoIterator, registry: Registry) -> Self { - let backends = backends.into_iter().map(|b| (b.encoding(), b)).collect(); - Self { - backends, - registry, - graphs: Table::default(), - executions: Table::default(), - } - } -} - -/// Possible errors while interacting with [WasiNnCtx]. -#[derive(Debug, Error)] -pub enum WasiNnError { - #[error("backend error")] - BackendError(#[from] BackendError), - #[error("guest error")] - GuestError(#[from] GuestError), - #[error("usage error")] - UsageError(#[from] UsageError), -} - -#[derive(Debug, Error)] -pub enum UsageError { - #[error("Invalid context; has the load function been called?")] - InvalidContext, - #[error("Only OpenVINO's IR is currently supported, passed encoding: {0:?}")] - InvalidEncoding(GraphEncoding), - #[error("OpenVINO expects only two buffers (i.e. [ir, weights]), passed: {0}")] - InvalidNumberOfBuilders(u32), - #[error("Invalid graph handle; has it been loaded?")] - InvalidGraphHandle, - #[error("Invalid execution context handle; has it been initialized?")] - InvalidExecutionContextHandle, - #[error("Not enough memory to copy tensor data of size: {0}")] - NotEnoughMemory(u32), - #[error("No graph found with name: {0}")] - NotFound(String), -} - -pub(crate) type WasiNnResult = std::result::Result; - -/// Record handle entries in a table. -pub struct Table { - entries: HashMap, - next_key: u32, -} - -impl Default for Table { - fn default() -> Self { - Self { - entries: HashMap::new(), - next_key: 0, - } - } -} - -impl Table -where - K: Eq + Hash + From + Copy, -{ - pub fn insert(&mut self, value: V) -> K { - let key = self.use_next_key(); - self.entries.insert(key, value); - key - } - - pub fn get(&self, key: K) -> Option<&V> { - self.entries.get(&key) - } - - pub fn get_mut(&mut self, key: K) -> Option<&mut V> { - self.entries.get_mut(&key) - } - - fn use_next_key(&mut self) -> K { - let current = self.next_key; - self.next_key += 1; - K::from(current) - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::registry::GraphRegistry; - - #[test] - fn example() { - struct FakeRegistry; - impl GraphRegistry for FakeRegistry { - fn get_mut(&mut self, _: &str) -> Option<&mut Graph> { - None - } - } - - let _ctx = WasiNnCtx::new([], Registry::from(FakeRegistry)); - } -} diff --git a/crates/wasi-nn/src/lib.rs b/crates/wasi-nn/src/lib.rs index 71d089d07489..a9f86f2da5a2 100644 --- a/crates/wasi-nn/src/lib.rs +++ b/crates/wasi-nn/src/lib.rs @@ -1,14 +1,34 @@ -mod ctx; -mod registry; - pub mod backend; -pub use ctx::{preload, WasiNnCtx}; -pub use registry::{GraphRegistry, InMemoryRegistry}; +mod registry; pub mod wit; pub mod witx; +use anyhow::anyhow; +use core::fmt; +pub use registry::{GraphRegistry, InMemoryRegistry}; +use std::path::Path; use std::sync::Arc; +/// Construct an in-memory registry from the available backends and a list of +/// `(, )`. This assumes graphs can be loaded +/// from a local directory, which is a safe assumption currently for the current +/// model types. +pub fn preload(preload_graphs: &[(String, String)]) -> anyhow::Result<(Vec, Registry)> { + let mut backends = backend::list(); + let mut registry = InMemoryRegistry::new(); + for (kind, path) in preload_graphs { + let kind_ = kind.parse()?; + let backend = backends + .iter_mut() + .find(|b| b.encoding() == kind_) + .ok_or(anyhow!("unsupported backend: {}", kind))? + .as_dir_loadable() + .ok_or(anyhow!("{} does not support directory loading", kind))?; + registry.load(backend, Path::new(path))?; + } + Ok((backends, Registry::from(registry))) +} + /// A machine learning backend. pub struct Backend(Box); impl std::ops::Deref for Backend { @@ -43,6 +63,27 @@ impl std::ops::Deref for Graph { } } +/// A host-side tensor. +/// +/// Eventually, this may be defined in each backend as they gain the ability to +/// hold tensors on various devices (TODO: +/// https://github.com/WebAssembly/wasi-nn/pull/70). +#[derive(Clone)] +pub struct Tensor { + dimensions: Vec, + ty: wit::TensorType, + data: Vec, +} +impl fmt::Debug for Tensor { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Tensor") + .field("dimensions", &self.dimensions) + .field("ty", &self.ty) + .field("data (bytes)", &self.data.len()) + .finish() + } +} + /// A backend-defined execution context. pub struct ExecutionContext(Box); impl From> for ExecutionContext { diff --git a/crates/wasi-nn/src/registry/in_memory.rs b/crates/wasi-nn/src/registry/in_memory.rs index b008f7f43684..86f81203d646 100644 --- a/crates/wasi-nn/src/registry/in_memory.rs +++ b/crates/wasi-nn/src/registry/in_memory.rs @@ -2,7 +2,7 @@ use super::{Graph, GraphRegistry}; use crate::backend::BackendFromDir; -use crate::wit::types::ExecutionTarget; +use crate::wit::ExecutionTarget; use anyhow::{anyhow, bail}; use std::{collections::HashMap, path::Path}; @@ -37,6 +37,9 @@ impl InMemoryRegistry { } impl GraphRegistry for InMemoryRegistry { + fn get(&self, name: &str) -> Option<&Graph> { + self.0.get(name) + } fn get_mut(&mut self, name: &str) -> Option<&mut Graph> { self.0.get_mut(name) } diff --git a/crates/wasi-nn/src/registry/mod.rs b/crates/wasi-nn/src/registry/mod.rs index 83f88e4dca0e..5f4d959132dc 100644 --- a/crates/wasi-nn/src/registry/mod.rs +++ b/crates/wasi-nn/src/registry/mod.rs @@ -12,5 +12,6 @@ use crate::Graph; pub use in_memory::InMemoryRegistry; pub trait GraphRegistry: Send + Sync { + fn get(&self, name: &str) -> Option<&Graph>; fn get_mut(&mut self, name: &str) -> Option<&mut Graph>; } diff --git a/crates/wasi-nn/src/wit.rs b/crates/wasi-nn/src/wit.rs index dbe894357cdf..40f6fc4c1ff6 100644 --- a/crates/wasi-nn/src/wit.rs +++ b/crates/wasi-nn/src/wit.rs @@ -15,8 +15,69 @@ //! [`Backend`]: crate::Backend //! [`types`]: crate::wit::types -use crate::{ctx::UsageError, WasiNnCtx}; -use std::{error::Error, fmt, hash::Hash, str::FromStr}; +use crate::backend::Id; +use crate::{Backend, Registry}; +use std::collections::HashMap; +use std::hash::Hash; +use std::{fmt, str::FromStr}; +use wasmtime::component::{Resource, ResourceTable}; + +/// Capture the state necessary for calling into the backend ML libraries. +pub struct WasiNnCtx { + pub(crate) backends: HashMap, + pub(crate) registry: Registry, +} + +impl WasiNnCtx { + /// Make a new context from the default state. + pub fn new(backends: impl IntoIterator, registry: Registry) -> Self { + let backends = backends.into_iter().map(|b| (b.encoding(), b)).collect(); + Self { backends, registry } + } +} + +/// A wrapper capturing the needed internal wasi-nn state. +/// +/// Unlike other WASI proposals (see `wasmtime-wasi`, `wasmtime-wasi-http`), +/// this wrapper is not a `trait` but rather holds the references directly. This +/// remove one layer of abstraction for simplicity only, and could be added back +/// in the future if embedders need more control here. +pub struct WasiNnView<'a> { + ctx: &'a mut WasiNnCtx, + table: &'a mut ResourceTable, +} + +impl<'a> WasiNnView<'a> { + /// Create a new view into the wasi-nn state. + pub fn new(table: &'a mut ResourceTable, ctx: &'a mut WasiNnCtx) -> Self { + Self { ctx, table } + } +} + +pub enum Error { + /// Caller module passed an invalid argument. + InvalidArgument, + /// Invalid encoding. + InvalidEncoding, + /// The operation timed out. + Timeout, + /// Runtime Error. + RuntimeError, + /// Unsupported operation. + UnsupportedOperation, + /// Graph is too large. + TooLarge, + /// Graph not found. + NotFound, + /// A runtime error occurred that we should trap on; see `StreamError`. + Trap(anyhow::Error), +} + +impl From for Error { + fn from(error: wasmtime::component::ResourceTableError) -> Self { + Self::Trap(error.into()) + } +} /// Generate the traits and types from the `wasi-nn` WIT specification. mod gen_ { @@ -24,126 +85,241 @@ mod gen_ { world: "ml", path: "wit/wasi-nn.wit", trappable_imports: true, + with: { + // Configure all WIT http resources to be defined types in this + // crate to use the `ResourceTable` helper methods. + "wasi:nn/graph/graph": crate::Graph, + "wasi:nn/tensor/tensor": crate::Tensor, + "wasi:nn/inference/graph-execution-context": crate::ExecutionContext, + }, + trappable_error_type: { + "wasi:nn/errors/error" => super::Error, + }, }); } -use gen_::wasi::nn as gen; // Shortcut to the module containing the types we need. +use gen_::wasi::nn::{self as gen}; // Shortcut to the module containing the types we need. // Export the `types` used in this crate as well as `ML::add_to_linker`. pub mod types { use super::gen; - pub use gen::graph::{ExecutionTarget, Graph, GraphEncoding}; + pub use gen::errors::Error; + pub use gen::graph::{ExecutionTarget, Graph, GraphBuilder, GraphEncoding}; pub use gen::inference::GraphExecutionContext; pub use gen::tensor::{Tensor, TensorType}; } +pub use gen::graph::{ExecutionTarget, Graph, GraphBuilder, GraphEncoding}; +pub use gen::inference::GraphExecutionContext; +pub use gen::tensor::{Tensor, TensorData, TensorDimensions, TensorType}; pub use gen_::Ml as ML; -impl gen::graph::Host for WasiNnCtx { - /// Load an opaque sequence of bytes to use for inference. +/// Add the WIT-based version of the `wasi-nn` API to a +/// [`wasmtime::component::Linker`]. +pub fn add_to_linker( + l: &mut wasmtime::component::Linker, + f: impl Fn(&mut T) -> WasiNnView<'_> + Send + Sync + Copy + 'static, +) -> anyhow::Result<()> { + gen::graph::add_to_linker_get_host(l, f)?; + gen::tensor::add_to_linker_get_host(l, f)?; + gen::inference::add_to_linker_get_host(l, f)?; + gen::errors::add_to_linker_get_host(l, f)?; + Ok(()) +} + +impl gen::graph::Host for WasiNnView<'_> { fn load( &mut self, - builders: Vec, - encoding: gen::graph::GraphEncoding, - target: gen::graph::ExecutionTarget, - ) -> wasmtime::Result> { - let graph = if let Some(backend) = self.backends.get_mut(&encoding) { + builders: Vec, + encoding: GraphEncoding, + target: ExecutionTarget, + ) -> Result, Error> { + tracing::debug!("load {encoding:?} {target:?}"); + if let Some(backend) = self.ctx.backends.get_mut(&encoding) { let slices = builders.iter().map(|s| s.as_slice()).collect::>(); - backend.load(&slices, target.into())? + match backend.load(&slices, target.into()) { + Ok(graph) => { + let graph = self.table.push(graph)?; + Ok(graph) + } + Err(error) => { + tracing::error!("failed to load graph: {error:?}"); + Err(Error::RuntimeError) + } + } } else { - return Err(UsageError::InvalidEncoding(encoding.into()).into()); - }; - let graph_id = self.graphs.insert(graph); - Ok(Ok(graph_id)) + Err(Error::InvalidEncoding) + } } - fn load_by_name( - &mut self, - name: String, - ) -> wasmtime::Result> { - if let Some(graph) = self.registry.get_mut(&name) { - let graph_id = self.graphs.insert(graph.clone().into()); - Ok(Ok(graph_id)) + fn load_by_name(&mut self, name: String) -> Result, Error> { + use core::result::Result::*; + tracing::debug!("load by name {name:?}"); + let registry = &self.ctx.registry; + if let Some(graph) = registry.get(&name) { + let graph = graph.clone(); + let graph = self.table.push(graph)?; + Ok(graph) } else { - return Err(UsageError::NotFound(name.to_string()).into()); + tracing::error!("failed to find graph with name: {name}"); + Err(Error::NotFound) } } } -impl gen::inference::Host for WasiNnCtx { - /// Create an execution instance of a loaded graph. - /// - /// TODO: remove completely? +impl gen::graph::HostGraph for WasiNnView<'_> { fn init_execution_context( &mut self, - graph_id: gen::graph::Graph, - ) -> wasmtime::Result> { - let exec_context = if let Some(graph) = self.graphs.get(graph_id) { - graph.init_execution_context()? - } else { - return Err(UsageError::InvalidGraphHandle.into()); - }; + graph: Resource, + ) -> Result, Error> { + use core::result::Result::*; + tracing::debug!("initialize execution context"); + let graph = self.table.get(&graph)?; + match graph.init_execution_context() { + Ok(exec_context) => { + let exec_context = self.table.push(exec_context)?; + Ok(exec_context) + } + Err(error) => { + tracing::error!("failed to initialize execution context: {error:?}"); + Err(Error::RuntimeError) + } + } + } - let exec_context_id = self.executions.insert(exec_context); - Ok(Ok(exec_context_id)) + fn drop(&mut self, graph: Resource) -> wasmtime::Result<()> { + self.table.delete(graph)?; + Ok(()) } +} - /// Define the inputs to use for inference. +impl gen::inference::HostGraphExecutionContext for WasiNnView<'_> { fn set_input( &mut self, - exec_context_id: gen::inference::GraphExecutionContext, - index: u32, - tensor: gen::tensor::Tensor, - ) -> wasmtime::Result> { - if let Some(exec_context) = self.executions.get_mut(exec_context_id) { - exec_context.set_input(index, &tensor)?; - Ok(Ok(())) + exec_context: Resource, + name: String, + tensor: Resource, + ) -> Result<(), Error> { + let tensor = self.table.get(&tensor)?; + tracing::debug!("set input {name:?}: {tensor:?}"); + let tensor = tensor.clone(); // TODO: avoid copying the tensor + let exec_context = self.table.get_mut(&exec_context)?; + if let Err(e) = exec_context.set_input(Id::Name(name), &tensor) { + tracing::error!("failed to set input: {e:?}"); + Err(Error::InvalidArgument) } else { - Err(UsageError::InvalidGraphHandle.into()) + Ok(()) } } - /// Compute the inference on the given inputs. - /// - /// TODO: refactor to compute(list) -> result, error> - fn compute( - &mut self, - exec_context_id: gen::inference::GraphExecutionContext, - ) -> wasmtime::Result> { - if let Some(exec_context) = self.executions.get_mut(exec_context_id) { - exec_context.compute()?; - Ok(Ok(())) - } else { - Err(UsageError::InvalidExecutionContextHandle.into()) + fn compute(&mut self, exec_context: Resource) -> Result<(), Error> { + let exec_context = &mut self.table.get_mut(&exec_context)?; + tracing::debug!("compute"); + match exec_context.compute() { + Ok(()) => Ok(()), + Err(error) => { + tracing::error!("failed to compute: {error:?}"); + Err(Error::RuntimeError) + } } } - /// Extract the outputs after inference. + #[doc = r" Extract the outputs after inference."] fn get_output( &mut self, - exec_context_id: gen::inference::GraphExecutionContext, - index: u32, - ) -> wasmtime::Result> { - if let Some(exec_context) = self.executions.get_mut(exec_context_id) { - // Read the output bytes. TODO: this involves a hard-coded upper - // limit on the tensor size that is necessary because there is no - // way to introspect the graph outputs - // (https://github.com/WebAssembly/wasi-nn/issues/37). - let mut destination = vec![0; 1024 * 1024]; - let bytes_read = exec_context.get_output(index, &mut destination)?; - destination.truncate(bytes_read as usize); - Ok(Ok(destination)) - } else { - Err(UsageError::InvalidGraphHandle.into()) + exec_context: Resource, + name: String, + ) -> Result, Error> { + let exec_context = self.table.get_mut(&exec_context)?; + tracing::debug!("get output {name:?}"); + match exec_context.get_output(Id::Name(name)) { + Ok(tensor) => { + let tensor = self.table.push(tensor)?; + Ok(tensor) + } + Err(error) => { + tracing::error!("failed to get output: {error:?}"); + Err(Error::RuntimeError) + } } } + + fn drop(&mut self, exec_context: Resource) -> wasmtime::Result<()> { + self.table.delete(exec_context)?; + Ok(()) + } } -impl gen::errors::Host for WasiNnCtx {} +impl gen::tensor::HostTensor for WasiNnView<'_> { + fn new( + &mut self, + dimensions: TensorDimensions, + ty: TensorType, + data: TensorData, + ) -> wasmtime::Result> { + let tensor = Tensor { + dimensions, + ty, + data, + }; + let tensor = self.table.push(tensor)?; + Ok(tensor) + } + + fn dimensions(&mut self, tensor: Resource) -> wasmtime::Result { + let tensor = self.table.get(&tensor)?; + Ok(tensor.dimensions.clone()) + } + + fn ty(&mut self, tensor: Resource) -> wasmtime::Result { + let tensor = self.table.get(&tensor)?; + Ok(tensor.ty) + } + + fn data(&mut self, tensor: Resource) -> wasmtime::Result { + let tensor = self.table.get(&tensor)?; + Ok(tensor.data.clone()) + } -impl gen::tensor::Host for WasiNnCtx {} + fn drop(&mut self, tensor: Resource) -> wasmtime::Result<()> { + self.table.delete(tensor)?; + Ok(()) + } +} + +impl gen::tensor::Host for WasiNnView<'_> {} +impl gen::errors::Host for WasiNnView<'_> { + fn convert_error(&mut self, err: Error) -> wasmtime::Result { + match err { + Error::InvalidArgument => Ok(gen::errors::Error::InvalidArgument), + Error::InvalidEncoding => Ok(gen::errors::Error::InvalidEncoding), + Error::Timeout => Ok(gen::errors::Error::Timeout), + Error::RuntimeError => Ok(gen::errors::Error::RuntimeError), + Error::UnsupportedOperation => Ok(gen::errors::Error::UnsupportedOperation), + Error::TooLarge => Ok(gen::errors::Error::TooLarge), + Error::NotFound => Ok(gen::errors::Error::NotFound), + Error::Trap(e) => Err(e), + } + } +} +impl gen::inference::Host for WasiNnView<'_> {} impl Hash for gen::graph::GraphEncoding { fn hash(&self, state: &mut H) { - core::mem::discriminant(self).hash(state); + self.to_string().hash(state) + } +} + +impl fmt::Display for gen::graph::GraphEncoding { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use gen::graph::GraphEncoding::*; + match self { + Openvino => write!(f, "openvino"), + Onnx => write!(f, "onnx"), + Pytorch => write!(f, "pytorch"), + Tensorflow => write!(f, "tensorflow"), + Tensorflowlite => write!(f, "tensorflowlite"), + Autodetect => write!(f, "autodetect"), + Ggml => write!(f, "ggml"), + } } } @@ -168,4 +344,4 @@ impl fmt::Display for GraphEncodingParseError { write!(f, "unknown graph encoding: {}", self.0) } } -impl Error for GraphEncodingParseError {} +impl std::error::Error for GraphEncodingParseError {} diff --git a/crates/wasi-nn/src/witx.rs b/crates/wasi-nn/src/witx.rs index 9d78a595ec90..f4f2eab90647 100644 --- a/crates/wasi-nn/src/witx.rs +++ b/crates/wasi-nn/src/witx.rs @@ -13,11 +13,83 @@ //! //! [`types`]: crate::wit::types -use crate::ctx::{UsageError, WasiNnCtx, WasiNnError, WasiNnResult as Result}; -use wiggle::{GuestMemory, GuestPtr}; +use crate::backend::BackendError; +use crate::backend::Id; +use crate::wit::GraphEncoding; +use crate::{Backend, ExecutionContext, Graph, Registry}; +use std::collections::HashMap; +use std::hash::Hash; +use thiserror::Error; +use wiggle::{GuestError, GuestMemory, GuestPtr}; pub use gen::wasi_ephemeral_nn::add_to_linker; +pub(crate) type WasiNnResult = std::result::Result; +type Result = WasiNnResult; +type GraphId = u32; +type GraphExecutionContextId = u32; + +/// Capture the state necessary for calling into the backend ML libraries. +pub struct WasiNnCtx { + pub(crate) backends: HashMap, + pub(crate) registry: Registry, + pub(crate) graphs: Table, + pub(crate) executions: Table, +} + +impl WasiNnCtx { + /// Make a new context from the default state. + pub fn new(backends: impl IntoIterator, registry: Registry) -> Self { + let backends = backends.into_iter().map(|b| (b.encoding(), b)).collect(); + Self { + backends, + registry, + graphs: Table::default(), + executions: Table::default(), + } + } +} + +/// Record handle entries in a table. +pub struct Table { + entries: HashMap, + next_key: u32, +} + +impl Default for Table { + fn default() -> Self { + Self { + entries: HashMap::new(), + next_key: 0, + } + } +} + +impl Table +where + K: Eq + Hash + From + Copy, +{ + pub fn insert(&mut self, value: V) -> K { + let key = self.use_next_key(); + self.entries.insert(key, value); + key + } + + pub fn get(&self, key: K) -> Option<&V> { + self.entries.get(&key) + } + + pub fn get_mut(&mut self, key: K) -> Option<&mut V> { + self.entries.get_mut(&key) + } + + fn use_next_key(&mut self) -> K { + let current = self.next_key; + self.next_key += 1; + K::from(current) + } +} + /// Generate the traits and types from the `wasi-nn` WITX specification. mod gen { use super::*; @@ -42,9 +114,10 @@ mod gen { ) -> anyhow::Result { tracing::debug!("host error: {:?}", e); match e { - WasiNnError::BackendError(_) => unimplemented!(), - WasiNnError::GuestError(_) => unimplemented!(), - WasiNnError::UsageError(_) => unimplemented!(), + WasiNnError::BackendError(_) => Ok(types::NnErrno::RuntimeError), + WasiNnError::GuestError(_) => unimplemented!("guest error conversion"), + WasiNnError::UsageError(_) => Ok(types::NnErrno::UnsupportedOperation), + WasiNnError::NotEnoughMemory(_) => Ok(types::NnErrno::TooLarge), } } } @@ -119,10 +192,10 @@ impl gen::wasi_ephemeral_nn::WasiEphemeralNn for WasiNnCtx { if let Some(exec_context) = self.executions.get_mut(exec_context_id.into()) { let tensor = crate::wit::types::Tensor { dimensions: memory.to_vec(tensor.dimensions)?, - tensor_type: tensor.type_.into(), + ty: tensor.type_.into(), data: memory.to_vec(tensor.data)?, }; - Ok(exec_context.set_input(index, &tensor)?) + Ok(exec_context.set_input(Id::Index(index), &tensor)?) } else { Err(UsageError::InvalidGraphHandle.into()) } @@ -149,13 +222,19 @@ impl gen::wasi_ephemeral_nn::WasiEphemeralNn for WasiNnCtx { out_buffer_max_size: u32, ) -> Result { if let Some(exec_context) = self.executions.get_mut(exec_context_id.into()) { - let mut destination = memory + let tensor = exec_context.get_output(Id::Index(index))?; + let destination = memory .as_slice_mut(out_buffer.as_array(out_buffer_max_size))? .expect( "cannot use with shared memories; \ see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)", ); - Ok(exec_context.get_output(index, &mut destination)?) + if tensor.data.len() > destination.len() { + Err(WasiNnError::NotEnoughMemory(tensor.data.len())) + } else { + destination[..tensor.data.len()].copy_from_slice(&tensor.data); + Ok(tensor.data.len() as u32) + } } else { Err(UsageError::InvalidGraphHandle.into()) } @@ -199,3 +278,28 @@ impl From for crate::wit::types::TensorType { } } } + +/// Possible errors while interacting with [WasiNnCtx]. +#[derive(Debug, Error)] +pub enum WasiNnError { + #[error("backend error")] + BackendError(#[from] BackendError), + #[error("guest error")] + GuestError(#[from] GuestError), + #[error("usage error")] + UsageError(#[from] UsageError), + #[error("not enough memory: requested {0} bytes")] + NotEnoughMemory(usize), +} + +#[derive(Debug, Error)] +pub enum UsageError { + #[error("Only OpenVINO's IR is currently supported, passed encoding: {0:?}")] + InvalidEncoding(GraphEncoding), + #[error("Invalid graph handle; has it been loaded?")] + InvalidGraphHandle, + #[error("Invalid execution context handle; has it been initialized?")] + InvalidExecutionContextHandle, + #[error("No graph found with name: {0}")] + NotFound(String), +} diff --git a/crates/wasi-nn/tests/check/mod.rs b/crates/wasi-nn/tests/check/mod.rs index ffdc099b3009..9f59d38130af 100644 --- a/crates/wasi-nn/tests/check/mod.rs +++ b/crates/wasi-nn/tests/check/mod.rs @@ -1,10 +1,8 @@ -//! This is testing-specific code--it is public only so that it can be -//! accessible both in unit and integration tests. +//! Check that the environment is set up correctly for running tests. //! //! This module checks: -//! - that OpenVINO can be found in the environment -//! - that WinML is available -//! - that some ML model artifacts can be downloaded and cached. +//! - that various backends can be located on the system (see sub-modules) +//! - that certain ML model artifacts can be downloaded and cached. #[allow(unused_imports)] use anyhow::{anyhow, Context, Result}; diff --git a/crates/wasi-nn/tests/exec/mod.rs b/crates/wasi-nn/tests/exec/mod.rs index 23840e7a5d3a..5fd25cbd3cbf 100644 --- a/crates/wasi-nn/tests/exec/mod.rs +++ b/crates/wasi-nn/tests/exec/mod.rs @@ -1,52 +1,6 @@ -use crate::check::artifacts_dir; -use anyhow::Result; -use std::path::Path; -use wasi_common::sync::{Dir, WasiCtxBuilder}; -use wasi_common::WasiCtx; -use wasmtime::{Config, Engine, Linker, Module, Store}; -use wasmtime_wasi_nn::{Backend, InMemoryRegistry, WasiNnCtx}; +//! Provide a Wasmtime embedding for executing wasi-nn test programs. -const PREOPENED_DIR_NAME: &str = "fixture"; +pub mod wit; +pub mod witx; -/// Run a wasi-nn test program. This is modeled after -/// `crates/wasi/tests/all/main.rs` but still uses the older preview1 API -/// for file reads. -pub fn run(path: &str, backend: Backend, preload_model: bool) -> Result<()> { - let path = Path::new(path); - let engine = Engine::new(&Config::new())?; - let mut linker = Linker::new(&engine); - wasmtime_wasi_nn::witx::add_to_linker(&mut linker, |s: &mut Ctx| &mut s.wasi_nn)?; - wasi_common::sync::add_to_linker(&mut linker, |s: &mut Ctx| &mut s.wasi)?; - let module = Module::from_file(&engine, path)?; - let mut store = Store::new(&engine, Ctx::new(&artifacts_dir(), preload_model, backend)?); - let instance = linker.instantiate(&mut store, &module)?; - let start = instance.get_typed_func::<(), ()>(&mut store, "_start")?; - start.call(&mut store, ())?; - Ok(()) -} - -/// The host state for running wasi-nn tests. -struct Ctx { - wasi: WasiCtx, - wasi_nn: WasiNnCtx, -} - -impl Ctx { - fn new(preopen_dir: &Path, preload_model: bool, mut backend: Backend) -> Result { - let preopen_dir = Dir::open_ambient_dir(preopen_dir, cap_std::ambient_authority())?; - let mut builder = WasiCtxBuilder::new(); - builder - .inherit_stdio() - .preopened_dir(preopen_dir, PREOPENED_DIR_NAME)?; - let wasi = builder.build(); - - let mut registry = InMemoryRegistry::new(); - let mobilenet_dir = artifacts_dir(); - if preload_model { - registry.load((backend).as_dir_loadable().unwrap(), &mobilenet_dir)?; - } - let wasi_nn = WasiNnCtx::new([backend.into()], registry.into()); - - Ok(Self { wasi, wasi_nn }) - } -} +pub const PREOPENED_DIR_NAME: &str = "fixture"; diff --git a/crates/wasi-nn/tests/exec/wit.rs b/crates/wasi-nn/tests/exec/wit.rs new file mode 100644 index 000000000000..5f2d546d667d --- /dev/null +++ b/crates/wasi-nn/tests/exec/wit.rs @@ -0,0 +1,73 @@ +use super::PREOPENED_DIR_NAME; +use crate::check::artifacts_dir; +use anyhow::{anyhow, Result}; +use std::path::Path; +use wasmtime::component::{Component, Linker, ResourceTable}; +use wasmtime::{Config, Engine, Store}; +use wasmtime_wasi::bindings::sync::Command; +use wasmtime_wasi::{DirPerms, FilePerms, WasiCtx, WasiCtxBuilder}; +use wasmtime_wasi_nn::wit::WasiNnView; +use wasmtime_wasi_nn::{wit::WasiNnCtx, Backend, InMemoryRegistry}; + +/// Run a wasi-nn test program. This is modeled after +/// `crates/wasi/tests/all/main.rs` but still uses the older preview1 API for +/// file reads. +pub fn run(path: &str, backend: Backend, preload_model: bool) -> Result<()> { + let path = Path::new(path); + let engine = Engine::new(&Config::new())?; + let mut linker = Linker::new(&engine); + wasmtime_wasi_nn::wit::add_to_linker(&mut linker, |c: &mut Ctx| { + WasiNnView::new(&mut c.table, &mut c.wasi_nn) + })?; + wasmtime_wasi::add_to_linker_sync(&mut linker)?; + let module = Component::from_file(&engine, path)?; + let mut store = Store::new(&engine, Ctx::new(&artifacts_dir(), preload_model, backend)?); + let command = Command::instantiate(&mut store, &module, &linker)?; + let result = command.wasi_cli_run().call_run(&mut store)?; + result.map_err(|_| anyhow!("failed to run command")) +} + +/// The host state for running wasi-nn component tests. +struct Ctx { + wasi: WasiCtx, + wasi_nn: WasiNnCtx, + table: ResourceTable, +} + +impl Ctx { + fn new(preopen_dir: &Path, preload_model: bool, mut backend: Backend) -> Result { + let mut builder = WasiCtxBuilder::new(); + builder.inherit_stdio().preopened_dir( + preopen_dir, + PREOPENED_DIR_NAME, + DirPerms::READ, + FilePerms::READ, + )?; + let wasi = builder.build(); + + let mut registry = InMemoryRegistry::new(); + let mobilenet_dir = artifacts_dir(); + if preload_model { + registry.load((backend).as_dir_loadable().unwrap(), &mobilenet_dir)?; + } + let wasi_nn = WasiNnCtx::new([backend.into()], registry.into()); + + let table = ResourceTable::new(); + + Ok(Self { + wasi, + wasi_nn, + table, + }) + } +} + +impl wasmtime_wasi::WasiView for Ctx { + fn ctx(&mut self) -> &mut WasiCtx { + &mut self.wasi + } + + fn table(&mut self) -> &mut ResourceTable { + &mut self.table + } +} diff --git a/crates/wasi-nn/tests/exec/witx.rs b/crates/wasi-nn/tests/exec/witx.rs new file mode 100644 index 000000000000..21feea1dd641 --- /dev/null +++ b/crates/wasi-nn/tests/exec/witx.rs @@ -0,0 +1,52 @@ +use super::PREOPENED_DIR_NAME; +use crate::check::artifacts_dir; +use anyhow::Result; +use std::path::Path; +use wasmtime::{Config, Engine, Linker, Module, Store}; +use wasmtime_wasi::{preview1::WasiP1Ctx, DirPerms, FilePerms, WasiCtxBuilder}; +use wasmtime_wasi_nn::{witx::WasiNnCtx, Backend, InMemoryRegistry}; + +/// Run a wasi-nn test program. This is modeled after +/// `crates/wasi/tests/all/main.rs` but still uses the older preview1 API +/// for file reads. +pub fn run(path: &str, backend: Backend, preload_model: bool) -> Result<()> { + let path = Path::new(path); + let engine = Engine::new(&Config::new())?; + let mut linker = Linker::new(&engine); + wasmtime_wasi_nn::witx::add_to_linker(&mut linker, |s: &mut Ctx| &mut s.wasi_nn)?; + wasmtime_wasi::preview1::add_to_linker_sync(&mut linker, |s: &mut Ctx| &mut s.wasi)?; + let module = Module::from_file(&engine, path)?; + let mut store = Store::new(&engine, Ctx::new(&artifacts_dir(), preload_model, backend)?); + let instance = linker.instantiate(&mut store, &module)?; + let start = instance.get_typed_func::<(), ()>(&mut store, "_start")?; + start.call(&mut store, ())?; + Ok(()) +} + +/// The host state for running wasi-nn tests. +struct Ctx { + wasi: WasiP1Ctx, + wasi_nn: WasiNnCtx, +} + +impl Ctx { + fn new(preopen_dir: &Path, preload_model: bool, mut backend: Backend) -> Result { + let mut builder = WasiCtxBuilder::new(); + builder.inherit_stdio().preopened_dir( + preopen_dir, + PREOPENED_DIR_NAME, + DirPerms::READ, + FilePerms::READ, + )?; + let wasi = builder.build_p1(); + + let mut registry = InMemoryRegistry::new(); + let mobilenet_dir = artifacts_dir(); + if preload_model { + registry.load((backend).as_dir_loadable().unwrap(), &mobilenet_dir)?; + } + let wasi_nn = WasiNnCtx::new([backend.into()], registry.into()); + + Ok(Self { wasi, wasi_nn }) + } +} diff --git a/crates/wasi-nn/tests/fixtures/readme.md b/crates/wasi-nn/tests/fixtures/README.md similarity index 100% rename from crates/wasi-nn/tests/fixtures/readme.md rename to crates/wasi-nn/tests/fixtures/README.md diff --git a/crates/wasi-nn/tests/test-programs.rs b/crates/wasi-nn/tests/test-programs.rs index 6dfb89a90e58..e375063b1cd8 100644 --- a/crates/wasi-nn/tests/test-programs.rs +++ b/crates/wasi-nn/tests/test-programs.rs @@ -23,6 +23,8 @@ use test_programs_artifacts::*; use wasmtime_wasi_nn::{backend, Backend}; fn main() -> Result<()> { + tracing_subscriber::fmt::init(); + if cfg!(miri) { return Ok(()); } @@ -45,7 +47,7 @@ fn main() -> Result<()> { let mut trials = Vec::new(); for program in programs { // Either ignore the test if it cannot run (i.e., downgrade `Fail` to - // `Ignore`) or pre-emptively fail it if `error_on_failed_check` is set. + // `Ignore`) or preemptively fail it if `error_on_failed_check` is set. let (run_test, mut check) = check_test_program(program); if !error_on_failed_check { check = check.downgrade_failure(); // Downgrade `Fail` to `Ignore`. @@ -68,103 +70,122 @@ fn main() -> Result<()> { /// Return the test program to run and a check that must pass for the test to /// run. fn check_test_program(name: &str) -> (fn() -> Result<()>, IgnoreCheck) { - use IgnoreCheck::*; match name { - "nn_image_classification" => ( - nn_image_classification, - if !cfg!(target_arch = "x86_64") { - Fail("requires x86_64".into()) - } else if !cfg!(target_os = "linux") && !cfg!(target_os = "windows") { - Fail("requires linux or windows".into()) - } else if let Err(e) = check::openvino::is_installed() { - Fail(e.to_string().into()) - } else { - Run - }, + // Legacy WITX-based tests: + "nn_witx_image_classification_openvino" => ( + nn_witx_image_classification_openvino, + IgnoreCheck::for_openvino(), + ), + "nn_witx_image_classification_openvino_named" => ( + nn_witx_image_classification_openvino_named, + IgnoreCheck::for_openvino(), + ), + "nn_witx_image_classification_onnx" => { + (nn_witx_image_classification_onnx, IgnoreCheck::for_onnx()) + } + "nn_witx_image_classification_winml_named" => ( + nn_witx_image_classification_winml_named, + IgnoreCheck::for_winml(), ), - "nn_image_classification_named" => ( - nn_image_classification_named, - if !cfg!(target_arch = "x86_64") { - Fail("requires x86_64".into()) - } else if !cfg!(target_os = "linux") && !cfg!(target_os = "windows") { - Fail("requires linux or windows or macos".into()) - } else if let Err(e) = check::openvino::is_installed() { - Fail(e.to_string().into()) - } else { - Run - }, + // WIT-based tests: + "nn_wit_image_classification_openvino" => ( + nn_wit_image_classification_openvino, + IgnoreCheck::for_openvino(), ), - "nn_image_classification_onnx" => ( - nn_image_classification_onnx, - #[cfg(feature = "onnx")] - if !cfg!(target_arch = "x86_64") && !cfg!(target_arch = "aarch64") { - Fail("requires x86_64 or aarch64".into()) - } else if !cfg!(target_os = "linux") - && !cfg!(target_os = "windows") - && !cfg!(target_os = "macos") - { - Fail("requires linux, windows, or macos".into()) - } else { - Run - }, - #[cfg(not(feature = "onnx"))] - Ignore("requires the `onnx` feature".into()), + "nn_wit_image_classification_openvino_named" => ( + nn_wit_image_classification_openvino_named, + IgnoreCheck::for_openvino(), ), - "nn_image_classification_winml" => ( - nn_image_classification_winml, - #[cfg(all(feature = "winml", target_os = "windows"))] - if !cfg!(target_arch = "x86_64") { - Fail("requires x86_64".into()) - } else if cfg!(target_os = "windows") { - Fail("requires windows".into()) - } else if let Err(e) = check::winml::is_available() { - Fail(e.to_string().into()) - } else { - Run - }, - #[cfg(not(all(feature = "winml", target_os = "windows")))] - Ignore("requires the `winml` feature on windows".into()), + "nn_wit_image_classification_onnx" => { + (nn_wit_image_classification_onnx, IgnoreCheck::for_onnx()) + } + "nn_wit_image_classification_winml_named" => ( + nn_wit_image_classification_winml_named, + IgnoreCheck::for_winml(), ), _ => panic!("unknown test program: {} (add to this `match`)", name), } } -fn nn_image_classification() -> Result<()> { +fn nn_witx_image_classification_openvino() -> Result<()> { check::openvino::is_installed()?; check::openvino::are_artifacts_available()?; let backend = Backend::from(backend::openvino::OpenvinoBackend::default()); - exec::run(NN_IMAGE_CLASSIFICATION, backend, false) + exec::witx::run(NN_WITX_IMAGE_CLASSIFICATION_OPENVINO, backend, false) } -fn nn_image_classification_named() -> Result<()> { +fn nn_witx_image_classification_openvino_named() -> Result<()> { check::openvino::is_installed()?; check::openvino::are_artifacts_available()?; let backend = Backend::from(backend::openvino::OpenvinoBackend::default()); - exec::run(NN_IMAGE_CLASSIFICATION_NAMED, backend, true) + exec::witx::run(NN_WITX_IMAGE_CLASSIFICATION_OPENVINO_NAMED, backend, true) } #[cfg(feature = "onnx")] -fn nn_image_classification_onnx() -> Result<()> { +fn nn_witx_image_classification_onnx() -> Result<()> { check::onnx::are_artifacts_available()?; - let backend = Backend::from(backend::onnxruntime::OnnxBackend::default()); - exec::run(NN_IMAGE_CLASSIFICATION_ONNX, backend, false) + let backend = Backend::from(backend::onnx::OnnxBackend::default()); + exec::witx::run(NN_WITX_IMAGE_CLASSIFICATION_ONNX, backend, false) } - #[cfg(not(feature = "onnx"))] -fn nn_image_classification_onnx() -> Result<()> { +fn nn_witx_image_classification_onnx() -> Result<()> { anyhow::bail!("this test requires the `onnx` feature") } #[cfg(all(feature = "winml", target_os = "windows"))] -fn nn_image_classification_winml() -> Result<()> { +fn nn_witx_image_classification_winml_named() -> Result<()> { check::winml::is_available()?; check::onnx::are_artifacts_available()?; let backend = Backend::from(backend::winml::WinMLBackend::default()); - exec::run(NN_IMAGE_CLASSIFICATION_ONNX, backend, false) + exec::witx::run(NN_WITX_IMAGE_CLASSIFICATION_ONNX, backend, false) +} +#[cfg(not(all(feature = "winml", target_os = "windows")))] +fn nn_witx_image_classification_winml_named() -> Result<()> { + anyhow::bail!("this test requires the `winml` feature and only runs on windows") } +fn nn_wit_image_classification_openvino() -> Result<()> { + check::openvino::is_installed()?; + check::openvino::are_artifacts_available()?; + let backend = Backend::from(backend::openvino::OpenvinoBackend::default()); + exec::wit::run( + NN_WIT_IMAGE_CLASSIFICATION_OPENVINO_COMPONENT, + backend, + false, + ) +} + +fn nn_wit_image_classification_openvino_named() -> Result<()> { + check::openvino::is_installed()?; + check::openvino::are_artifacts_available()?; + let backend = Backend::from(backend::openvino::OpenvinoBackend::default()); + exec::wit::run( + NN_WIT_IMAGE_CLASSIFICATION_OPENVINO_NAMED_COMPONENT, + backend, + true, + ) +} + +#[cfg(feature = "onnx")] +fn nn_wit_image_classification_onnx() -> Result<()> { + check::onnx::are_artifacts_available()?; + let backend = Backend::from(backend::onnx::OnnxBackend::default()); + exec::wit::run(NN_WIT_IMAGE_CLASSIFICATION_ONNX_COMPONENT, backend, false) +} +#[cfg(not(feature = "onnx"))] +fn nn_wit_image_classification_onnx() -> Result<()> { + anyhow::bail!("this test requires the `onnx` feature") +} + +#[cfg(all(feature = "winml", target_os = "windows"))] +fn nn_wit_image_classification_winml_named() -> Result<()> { + check::winml::is_available()?; + check::onnx::are_artifacts_available()?; + let backend = Backend::from(backend::winml::WinMLBackend::default()); + exec::wit::run(NN_WIT_IMAGE_CLASSIFICATION_ONNX_COMPONENT, backend, false) +} #[cfg(not(all(feature = "winml", target_os = "windows")))] -fn nn_image_classification_winml() -> Result<()> { +fn nn_wit_image_classification_winml_named() -> Result<()> { anyhow::bail!("this test requires the `winml` feature and only runs on windows") } @@ -197,3 +218,52 @@ impl IgnoreCheck { matches!(self, IgnoreCheck::Ignore(_)) } } + +/// Some pre-test checks for various backends. +impl IgnoreCheck { + fn for_openvino() -> IgnoreCheck { + use IgnoreCheck::*; + if !cfg!(target_arch = "x86_64") { + Fail("requires x86_64".into()) + } else if !cfg!(target_os = "linux") && !cfg!(target_os = "windows") { + Fail("requires linux or windows or macos".into()) + } else if let Err(e) = check::openvino::is_installed() { + Fail(e.to_string().into()) + } else { + Run + } + } + + fn for_onnx() -> Self { + use IgnoreCheck::*; + #[cfg(feature = "onnx")] + if !cfg!(target_arch = "x86_64") && !cfg!(target_arch = "aarch64") { + Fail("requires x86_64 or aarch64".into()) + } else if !cfg!(target_os = "linux") + && !cfg!(target_os = "windows") + && !cfg!(target_os = "macos") + { + Fail("requires linux, windows, or macos".into()) + } else { + Run + } + #[cfg(not(feature = "onnx"))] + Ignore("requires the `onnx` feature".into()) + } + + fn for_winml() -> IgnoreCheck { + use IgnoreCheck::*; + #[cfg(all(feature = "winml", target_os = "windows"))] + if !cfg!(target_arch = "x86_64") { + Fail("requires x86_64".into()) + } else if !cfg!(target_os = "windows") { + Fail("requires windows".into()) + } else if let Err(e) = check::winml::is_available() { + Fail(e.to_string().into()) + } else { + Run + } + #[cfg(not(all(feature = "winml", target_os = "windows")))] + Ignore("requires the `winml` feature on windows".into()) + } +} diff --git a/crates/wasi-nn/wit/wasi-nn.wit b/crates/wasi-nn/wit/wasi-nn.wit index 19e3de875d61..b8ffd22e8c04 100644 --- a/crates/wasi-nn/wit/wasi-nn.wit +++ b/crates/wasi-nn/wit/wasi-nn.wit @@ -43,16 +43,18 @@ interface tensor { /// memory--e.g., using row-major ordering--and could perhaps be improved. type tensor-data = list; - record tensor { + resource tensor { + constructor(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data); + // Describe the size of the tensor (e.g., 2x2x2x2 -> [2, 2, 2, 2]). To represent a tensor // containing a single value, use `[1]` for the tensor dimensions. - dimensions: tensor-dimensions, + dimensions: func() -> tensor-dimensions; // Describe the type of element in the tensor (e.g., `f32`). - tensor-type: tensor-type, + ty: func() -> tensor-type; - // Contains the tensor data. - data: tensor-data, + // Return the tensor data. + data: func() -> tensor-data; } } @@ -61,11 +63,12 @@ interface tensor { interface graph { use errors.{error}; use tensor.{tensor}; + use inference.{graph-execution-context}; /// An execution graph for performing inference (i.e., a model). - /// - /// TODO: replace with `resource` (https://github.com/WebAssembly/wasi-nn/issues/47). - type graph = u32; + resource graph { + init-execution-context: func() -> result; + } /// Describes the encoding of the graph. This allows the API to be implemented by various /// backends that encode (i.e., serialize) their graph IR with different formats. @@ -75,6 +78,7 @@ interface graph { tensorflow, pytorch, tensorflowlite, + ggml, autodetect, } @@ -107,27 +111,25 @@ interface graph { interface inference { use errors.{error}; use tensor.{tensor, tensor-data}; - use graph.{graph}; /// Bind a `graph` to the input and output tensors for an inference. /// - /// TODO: this is no longer necessary in WIT (https://github.com/WebAssembly/wasi-nn/issues/43) - type graph-execution-context = u32; - - /// Create an execution instance of a loaded graph. - init-execution-context: func(graph: graph) -> result; - - /// Define the inputs to use for inference. - set-input: func(ctx: graph-execution-context, index: u32, tensor: tensor) -> result<_, error>; - - /// Compute the inference on the given inputs. - /// - /// Note the expected sequence of calls: `set-input`, `compute`, `get-output`. TODO: this - /// expectation could be removed as a part of https://github.com/WebAssembly/wasi-nn/issues/43. - compute: func(ctx: graph-execution-context) -> result<_, error>; - - /// Extract the outputs after inference. - get-output: func(ctx: graph-execution-context, index: u32) -> result; + /// TODO: this may no longer be necessary in WIT + /// (https://github.com/WebAssembly/wasi-nn/issues/43) + resource graph-execution-context { + /// Define the inputs to use for inference. + set-input: func(name: string, tensor: tensor) -> result<_, error>; + + /// Compute the inference on the given inputs. + /// + /// Note the expected sequence of calls: `set-input`, `compute`, `get-output`. TODO: this + /// expectation could be removed as a part of + /// https://github.com/WebAssembly/wasi-nn/issues/43. + compute: func() -> result<_, error>; + + /// Extract the outputs after inference. + get-output: func(name: string) -> result; + } } /// TODO: create function-specific errors (https://github.com/WebAssembly/wasi-nn/issues/42) @@ -137,7 +139,8 @@ interface errors { invalid-argument, // Invalid encoding. invalid-encoding, - busy, + // The operation timed out. + timeout, // Runtime Error. runtime-error, // Unsupported operation. diff --git a/src/commands/run.rs b/src/commands/run.rs index 317b91aa22d7..e5b3b816a28a 100644 --- a/src/commands/run.rs +++ b/src/commands/run.rs @@ -18,7 +18,7 @@ use wasmtime::{Engine, Func, Module, Store, StoreLimits, Val, ValType}; use wasmtime_wasi::WasiView; #[cfg(feature = "wasi-nn")] -use wasmtime_wasi_nn::WasiNnCtx; +use wasmtime_wasi_nn::wit::WasiNnView; #[cfg(feature = "wasi-threads")] use wasmtime_wasi_threads::WasiThreadsCtx; @@ -624,40 +624,37 @@ impl RunCommand { { bail!("Cannot enable wasi-nn when the binary is not compiled with this feature."); } - #[cfg(feature = "wasi-nn")] + #[cfg(all(feature = "wasi-nn", feature = "component-model"))] { + let (backends, registry) = self.collect_preloaded_nn_graphs()?; match linker { CliLinker::Core(linker) => { wasmtime_wasi_nn::witx::add_to_linker(linker, |host| { - // This WASI proposal is currently not protected against - // concurrent access--i.e., when wasi-threads is actively - // spawning new threads, we cannot (yet) safely allow access and - // fail if more than one thread has `Arc`-references to the - // context. Once this proposal is updated (as wasi-common has - // been) to allow concurrent access, this `Arc::get_mut` - // limitation can be removed. - Arc::get_mut(host.wasi_nn.as_mut().unwrap()) + Arc::get_mut(host.wasi_nn_witx.as_mut().unwrap()) .expect("wasi-nn is not implemented with multi-threading support") })?; + store.data_mut().wasi_nn_witx = Some(Arc::new( + wasmtime_wasi_nn::witx::WasiNnCtx::new(backends, registry), + )); } #[cfg(feature = "component-model")] CliLinker::Component(linker) => { - wasmtime_wasi_nn::wit::ML::add_to_linker(linker, |host| { - Arc::get_mut(host.wasi_nn.as_mut().unwrap()) - .expect("wasi-nn is not implemented with multi-threading support") + wasmtime_wasi_nn::wit::add_to_linker(linker, |h: &mut Host| { + let preview2_ctx = + h.preview2_ctx.as_mut().expect("wasip2 is not configured"); + let preview2_ctx = Arc::get_mut(preview2_ctx) + .expect("wasmtime_wasi is not compatible with threads") + .get_mut() + .unwrap(); + let nn_ctx = Arc::get_mut(h.wasi_nn_wit.as_mut().unwrap()) + .expect("wasi-nn is not implemented with multi-threading support"); + WasiNnView::new(preview2_ctx.table(), nn_ctx) })?; + store.data_mut().wasi_nn_wit = Some(Arc::new( + wasmtime_wasi_nn::wit::WasiNnCtx::new(backends, registry), + )); } } - let graphs = self - .run - .common - .wasi - .nn_graph - .iter() - .map(|g| (g.format.clone(), g.dir.clone())) - .collect::>(); - let (backends, registry) = wasmtime_wasi_nn::preload(&graphs)?; - store.data_mut().wasi_nn = Some(Arc::new(WasiNnCtx::new(backends, registry))); } } @@ -767,6 +764,21 @@ impl RunCommand { store.data_mut().preview2_ctx = Some(Arc::new(Mutex::new(ctx))); Ok(()) } + + #[cfg(feature = "wasi-nn")] + fn collect_preloaded_nn_graphs( + &self, + ) -> Result<(Vec, wasmtime_wasi_nn::Registry)> { + let graphs = self + .run + .common + .wasi + .nn_graph + .iter() + .map(|g| (g.format.clone(), g.dir.clone())) + .collect::>(); + wasmtime_wasi_nn::preload(&graphs) + } } #[derive(Default, Clone)] @@ -779,7 +791,10 @@ struct Host { preview2_ctx: Option>>, #[cfg(feature = "wasi-nn")] - wasi_nn: Option>, + wasi_nn_wit: Option>, + #[cfg(feature = "wasi-nn")] + wasi_nn_witx: Option>, + #[cfg(feature = "wasi-threads")] wasi_threads: Option>>, #[cfg(feature = "wasi-http")] diff --git a/src/commands/serve.rs b/src/commands/serve.rs index 56c2af9d3024..8a200ee093f3 100644 --- a/src/commands/serve.rs +++ b/src/commands/serve.rs @@ -17,7 +17,7 @@ use wasmtime_wasi_http::io::TokioIo; use wasmtime_wasi_http::{body::HyperOutgoingBody, WasiHttpCtx, WasiHttpView}; #[cfg(feature = "wasi-nn")] -use wasmtime_wasi_nn::WasiNnCtx; +use wasmtime_wasi_nn::wit::WasiNnCtx; struct Host { table: wasmtime::component::ResourceTable, @@ -75,15 +75,8 @@ impl ServeCommand { pub fn execute(mut self) -> Result<()> { self.run.common.init_logging()?; - // We force cli errors before starting to listen for connections so then we don't - // accidentally delay them to the first request. - if self.run.common.wasi.nn == Some(true) { - #[cfg(not(feature = "wasi-nn"))] - { - bail!("Cannot enable wasi-nn when the binary is not compiled with this feature."); - } - } - + // We force cli errors before starting to listen for connections so then + // we don't accidentally delay them to the first request. if let Some(Profile::Guest { .. }) = &self.run.profile { bail!("Cannot use the guest profiler with components"); } @@ -99,8 +92,8 @@ impl ServeCommand { bail!("wasi-threads does not support components yet") } - // The serve command requires both wasi-http and the component model, so we enable those by - // default here. + // The serve command requires both wasi-http and the component model, so + // we enable those by default here. if self.run.common.wasi.http.replace(true) == Some(false) { bail!("wasi-http is required for the serve command, and must not be disabled"); } @@ -227,7 +220,10 @@ impl ServeCommand { } #[cfg(feature = "wasi-nn")] { - wasmtime_wasi_nn::wit::ML::add_to_linker(linker, |host| host.nn.as_mut().unwrap())?; + wasmtime_wasi_nn::wit::add_to_linker(linker, |h: &mut Host| { + let ctx = h.nn.as_mut().unwrap(); + wasmtime_wasi_nn::wit::WasiNnView::new(&mut h.table, ctx) + })?; } } diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index e29b1cfc4fd0..7cc1fa51a95a 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -2028,6 +2028,12 @@ criteria = "safe-to-deploy" version = "0.46.0" notes = "one use of unsafe to call windows specific api to get console handle." +[[audits.num-traits]] +who = "Andrew Brown " +criteria = "safe-to-deploy" +version = "0.2.19" +notes = "As advertised: a numeric library. The only `unsafe` is from some float-to-int conversions, which seems expected." + [[audits.num_cpus]] who = "Alex Crichton " criteria = "safe-to-deploy" @@ -2145,12 +2151,24 @@ criteria = "safe-to-deploy" version = "2.0.0-rc.0" notes = "As expected, this crate uses `unsafe` to access the `unsafe` `ort-sys` FFI functions; it also includes several `unsafe` implementations of `Send` for several structures. With the `load-dynamic` feature enabled, this crate will be `libloading` external libraries to call FFI functions. With the `fetch-models` feature enabled, this crate can also download arbitrary models to the local filesystem." +[[audits.ort]] +who = "Andrew Brown " +criteria = "safe-to-deploy" +delta = "2.0.0-rc.0 -> 2.0.0-rc.2" +notes = "Same as previous audit: the crate inherently uses `unsafe` FFI calls for using ONNX through `ort-sys` (e.g., logging C error strings). The changes are relatively uninteresting: a lot of documentation, some `must_use`, and general refactoring due to changes in the underlying API." + [[audits.ort-sys]] who = "Andrew Brown " criteria = "safe-to-deploy" version = "2.0.0-rc.0" notes = "As expected, this crate contains a significant number of `unsafe` definitions to expose the FFI surface of the ONNX libraries. Perhaps surprisingly, it also contains some `unsafe` system calls to locate the user's home directory. Another interesting bit is the `build.rs` script: with the `download-binaries` feature enabled, this script will retrieve and link various ONNX libraries from https://parcel.pyke.io. This seems par for the course with this kind of library, though; the alternative--attempting to find the library on an arbitrary system--can be quite complex." +[[audits.ort-sys]] +who = "Andrew Brown " +criteria = "safe-to-deploy" +delta = "2.0.0-rc.0 -> 2.0.0-rc.2" +notes = "This crate still downloads the ONNX libraries as a part of the `build.rs` script; now with more platform options for pre-built binaries stored in a `dist.txt` file. Otherwise largely unchanged since the previous audit." + [[audits.overload]] who = "Pat Hickey " criteria = "safe-to-deploy" From 9dff778cab1701209358b1e3d359ba78a83b0248 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 27 Jun 2024 18:01:20 -0500 Subject: [PATCH 10/10] serve: Fix logging prints to stdout/stderr (#8877) This commit fixes writes to stdout/stderr which don't end in a newline to not get split across lines with a prefix on each line. Instead internally a flag is used to track whether a prefix is required at the beginning of each chunk. --- .../src/bin/cli_serve_with_print.rs | 30 +++++++++ src/commands/serve.rs | 62 +++++++++++++------ tests/all/cli_tests.rs | 54 +++++++++++++++- 3 files changed, 125 insertions(+), 21 deletions(-) create mode 100644 crates/test-programs/src/bin/cli_serve_with_print.rs diff --git a/crates/test-programs/src/bin/cli_serve_with_print.rs b/crates/test-programs/src/bin/cli_serve_with_print.rs new file mode 100644 index 000000000000..424eb841cf47 --- /dev/null +++ b/crates/test-programs/src/bin/cli_serve_with_print.rs @@ -0,0 +1,30 @@ +use std::io::Write; +use test_programs::proxy; +use test_programs::wasi::http::types::{ + Fields, IncomingRequest, OutgoingResponse, ResponseOutparam, +}; + +struct T; + +proxy::export!(T); + +impl proxy::exports::wasi::http::incoming_handler::Guest for T { + fn handle(_request: IncomingRequest, outparam: ResponseOutparam) { + print!("this is half a print "); + std::io::stdout().flush().unwrap(); + println!("to stdout"); + println!(); // empty line + println!("after empty"); + + eprint!("this is half a print "); + std::io::stderr().flush().unwrap(); + eprintln!("to stderr"); + eprintln!(); // empty line + eprintln!("after empty"); + + let resp = OutgoingResponse::new(Fields::new()); + ResponseOutparam::set(outparam, Ok(resp)); + } +} + +fn main() {} diff --git a/src/commands/serve.rs b/src/commands/serve.rs index 8a200ee093f3..001688ce7030 100644 --- a/src/commands/serve.rs +++ b/src/commands/serve.rs @@ -127,15 +127,15 @@ impl ServeCommand { builder.env("REQUEST_ID", req_id.to_string()); - builder.stdout(LogStream { - prefix: format!("stdout [{req_id}] :: "), - output: Output::Stdout, - }); + builder.stdout(LogStream::new( + format!("stdout [{req_id}] :: "), + Output::Stdout, + )); - builder.stderr(LogStream { - prefix: format!("stderr [{req_id}] :: "), - output: Output::Stderr, - }); + builder.stderr(LogStream::new( + format!("stderr [{req_id}] :: "), + Output::Stderr, + )); let mut host = Host { table: wasmtime::component::ResourceTable::new(), @@ -470,6 +470,17 @@ impl Output { struct LogStream { prefix: String, output: Output, + needs_prefix_on_next_write: bool, +} + +impl LogStream { + fn new(prefix: String, output: Output) -> LogStream { + LogStream { + prefix, + output, + needs_prefix_on_next_write: true, + } + } } impl wasmtime_wasi::StdoutStream for LogStream { @@ -489,19 +500,34 @@ impl wasmtime_wasi::StdoutStream for LogStream { impl wasmtime_wasi::HostOutputStream for LogStream { fn write(&mut self, bytes: bytes::Bytes) -> StreamResult<()> { - let mut msg = Vec::new(); - - for line in bytes.split(|c| *c == b'\n') { - if !line.is_empty() { - msg.extend_from_slice(&self.prefix.as_bytes()); - msg.extend_from_slice(line); - msg.push(b'\n'); + let mut bytes = &bytes[..]; + + while !bytes.is_empty() { + if self.needs_prefix_on_next_write { + self.output + .write_all(self.prefix.as_bytes()) + .map_err(StreamError::LastOperationFailed)?; + self.needs_prefix_on_next_write = false; + } + match bytes.iter().position(|b| *b == b'\n') { + Some(i) => { + let (a, b) = bytes.split_at(i + 1); + bytes = b; + self.output + .write_all(a) + .map_err(StreamError::LastOperationFailed)?; + self.needs_prefix_on_next_write = true; + } + None => { + self.output + .write_all(bytes) + .map_err(StreamError::LastOperationFailed)?; + break; + } } } - self.output - .write_all(&msg) - .map_err(StreamError::LastOperationFailed) + Ok(()) } fn flush(&mut self) -> StreamResult<()> { diff --git a/tests/all/cli_tests.rs b/tests/all/cli_tests.rs index 20078eb9835c..069a0e532e36 100644 --- a/tests/all/cli_tests.rs +++ b/tests/all/cli_tests.rs @@ -1507,7 +1507,7 @@ mod test_programs { } /// Completes this server gracefully by printing the output on failure. - fn finish(mut self) -> Result { + fn finish(mut self) -> Result<(String, String)> { let mut child = self.child.take().unwrap(); // If the child process has already exited then collect the output @@ -1525,7 +1525,10 @@ mod test_programs { bail!("child failed {output:?}"); } - Ok(String::from_utf8_lossy(&output.stderr).into_owned()) + Ok(( + String::from_utf8_lossy(&output.stdout).into_owned(), + String::from_utf8_lossy(&output.stderr).into_owned(), + )) } /// Send a request to this server and wait for the response. @@ -1660,7 +1663,7 @@ mod test_programs { ) .await; assert!(result.is_err()); - let stderr = server.finish()?; + let (_, stderr) = server.finish()?; assert!( stderr.contains("maximum concurrent memory limit of 0 reached"), "bad stderr: {stderr}", @@ -1766,6 +1769,51 @@ mod test_programs { Ok(()) } + + #[tokio::test] + async fn cli_serve_with_print() -> Result<()> { + let server = WasmtimeServe::new(CLI_SERVE_WITH_PRINT_COMPONENT, |cmd| { + cmd.arg("-Scli"); + })?; + + for _ in 0..2 { + let resp = server + .send_request( + hyper::Request::builder() + .uri("http://localhost/") + .body(String::new()) + .context("failed to make request")?, + ) + .await?; + assert!(resp.status().is_success()); + } + + let (out, err) = server.finish()?; + assert_eq!( + out, + "\ +stdout [0] :: this is half a print to stdout +stdout [0] :: \n\ +stdout [0] :: after empty +stdout [1] :: this is half a print to stdout +stdout [1] :: \n\ +stdout [1] :: after empty +" + ); + assert_eq!( + err, + "\ +stderr [0] :: this is half a print to stderr +stderr [0] :: \n\ +stderr [0] :: after empty +stderr [1] :: this is half a print to stderr +stderr [1] :: \n\ +stderr [1] :: after empty +" + ); + + Ok(()) + } } #[test]