diff --git a/crates/componentize/src/bugs.rs b/crates/componentize/src/bugs.rs index 1e7ed9e63c..053dc4aa38 100644 --- a/crates/componentize/src/bugs.rs +++ b/crates/componentize/src/bugs.rs @@ -1,6 +1,6 @@ -use anyhow::bail; -use wasm_metadata::Producers; -use wasmparser::{Encoding, ExternalKind, Parser, Payload}; +use crate::module_info::ModuleInfo; + +pub const EARLIEST_PROBABLY_SAFE_CLANG_VERSION: &str = "15.0.7"; /// Represents the detected likelihood of the allocation bug fixed in /// https://github.com/WebAssembly/wasi-libc/pull/377 being present in a Wasm @@ -8,53 +8,35 @@ use wasmparser::{Encoding, ExternalKind, Parser, Payload}; #[derive(Debug, PartialEq)] pub enum WasiLibc377Bug { ProbablySafe, - ProbablyUnsafe, + ProbablyUnsafe { clang_version: String }, Unknown, } impl WasiLibc377Bug { - pub fn detect(module: &[u8]) -> anyhow::Result { - for payload in Parser::new(0).parse_all(module) { - match payload? { - Payload::Version { encoding, .. } if encoding != Encoding::Module => { - bail!("detection only applicable to modules"); - } - Payload::ExportSection(reader) => { - for export in reader { - let export = export?; - if export.kind == ExternalKind::Func && export.name == "cabi_realloc" { - // `cabi_realloc` is a good signal that this module - // uses wit-bindgen, making it probably-safe. - tracing::debug!("Found cabi_realloc export"); - return Ok(Self::ProbablySafe); - } - } - } - Payload::CustomSection(c) if c.name() == "producers" => { - let producers = Producers::from_bytes(c.data(), c.data_offset())?; - if let Some(clang_version) = - producers.get("processed-by").and_then(|f| f.get("clang")) - { - tracing::debug!(clang_version, "Parsed producers.processed-by.clang"); - - // Clang/LLVM version is a good proxy for wasi-sdk - // version; the allocation bug was fixed in wasi-sdk-18 - // and LLVM was updated to 15.0.7 in wasi-sdk-19. - if let Some((major, minor, patch)) = parse_clang_version(clang_version) { - return if (major, minor, patch) >= (15, 0, 7) { - Ok(Self::ProbablySafe) - } else { - Ok(Self::ProbablyUnsafe) - }; - } else { - tracing::warn!( - clang_version, - "Unexpected producers.processed-by.clang version" - ); - } - } - } - _ => (), + pub fn detect(module_info: &ModuleInfo) -> anyhow::Result { + if module_info.probably_uses_wit_bindgen() { + // Modules built with wit-bindgen are probably safe. + return Ok(Self::ProbablySafe); + } + if let Some(clang_version) = &module_info.clang_version { + // Clang/LLVM version is a good proxy for wasi-sdk + // version; the allocation bug was fixed in wasi-sdk-18 + // and LLVM was updated to 15.0.7 in wasi-sdk-19. + if let Some((major, minor, patch)) = parse_clang_version(clang_version) { + let earliest_safe = + parse_clang_version(EARLIEST_PROBABLY_SAFE_CLANG_VERSION).unwrap(); + return if (major, minor, patch) >= earliest_safe { + Ok(Self::ProbablySafe) + } else { + Ok(Self::ProbablyUnsafe { + clang_version: clang_version.clone(), + }) + }; + } else { + tracing::warn!( + clang_version, + "Unexpected producers.processed-by.clang version" + ); } } Ok(Self::Unknown) @@ -98,11 +80,15 @@ mod tests { ), ( r#"(module (@producers (processed-by "clang" "15.0.6")))"#, - ProbablyUnsafe, + ProbablyUnsafe { + clang_version: "15.0.6".into(), + }, ), ( - r#"(module (@producers (processed-by "clang" "14.0.0")))"#, - ProbablyUnsafe, + r#"(module (@producers (processed-by "clang" "14.0.0 extra-stuff")))"#, + ProbablyUnsafe { + clang_version: "14.0.0 extra-stuff".into(), + }, ), ( r#"(module (@producers (processed-by "clang" "a.b.c")))"#, @@ -111,7 +97,8 @@ mod tests { ] { eprintln!("WAT: {wasm}"); let module = wat::parse_str(wasm).unwrap(); - let detected = WasiLibc377Bug::detect(&module).unwrap(); + let module_info = ModuleInfo::from_module(&module).unwrap(); + let detected = WasiLibc377Bug::detect(&module_info).unwrap(); assert_eq!(detected, expected); } } diff --git a/crates/componentize/src/lib.rs b/crates/componentize/src/lib.rs index f4be1bcb13..f59caefb7d 100644 --- a/crates/componentize/src/lib.rs +++ b/crates/componentize/src/lib.rs @@ -3,6 +3,7 @@ use { anyhow::{anyhow, Context, Result}, convert::{IntoEntityType, IntoExportKind}, + module_info::ModuleInfo, std::{borrow::Cow, collections::HashSet}, wasm_encoder::{CustomSection, ExportSection, ImportSection, Module, RawSection}, wasmparser::{Encoding, Parser, Payload}, @@ -14,6 +15,7 @@ pub mod bugs; #[cfg(test)] mod abi_conformance; mod convert; +mod module_info; const SPIN_ADAPTER: &[u8] = include_bytes!(concat!( env!("OUT_DIR"), @@ -51,8 +53,9 @@ pub fn componentize_if_necessary(module_or_component: &[u8]) -> Result } pub fn componentize(module: &[u8]) -> Result> { - match WitBindgenVersion::from_module(module)? { - WitBindgenVersion::V0_2 => componentize_old_bindgen(module), + let module_info = ModuleInfo::from_module(module)?; + match WitBindgenVersion::detect(&module_info)? { + WitBindgenVersion::V0_2OrNone => componentize_old_module(module, &module_info), WitBindgenVersion::GreaterThanV0_4 => componentize_new_bindgen(module), WitBindgenVersion::Other(other) => Err(anyhow::anyhow!( "cannot adapt modules created with wit-bindgen version {other}" @@ -65,40 +68,36 @@ pub fn componentize(module: &[u8]) -> Result> { #[derive(Debug)] enum WitBindgenVersion { GreaterThanV0_4, - V0_2, + V0_2OrNone, Other(String), } impl WitBindgenVersion { - fn from_module(module: &[u8]) -> Result { - let (_, bindgen) = metadata::decode(module)?; - if let Some(producers) = bindgen.producers { - if let Some(processors) = producers.get("processed-by") { - let bindgen_version = processors.iter().find_map(|(key, value)| { - key.starts_with("wit-bindgen").then_some(value.as_str()) - }); - if let Some(v) = bindgen_version { - let mut parts = v.split('.'); - let Some(major) = parts.next().and_then(|p| p.parse::().ok()) else { - return Ok(Self::Other(v.to_owned())); - }; - let Some(minor) = parts.next().and_then(|p| p.parse::().ok()) else { - return Ok(Self::Other(v.to_owned())); - }; - if (major == 0 && minor < 5) || major >= 1 { - return Ok(Self::Other(v.to_owned())); - } - // Either there should be no patch version or nothing after patch - if parts.next().is_none() || parts.next().is_none() { - return Ok(Self::GreaterThanV0_4); - } else { - return Ok(Self::Other(v.to_owned())); - } + fn detect(module_info: &ModuleInfo) -> Result { + if let Some(processors) = module_info.bindgen_processors() { + let bindgen_version = processors + .iter() + .find_map(|(key, value)| key.starts_with("wit-bindgen").then_some(value.as_str())); + if let Some(v) = bindgen_version { + let mut parts = v.split('.'); + let Some(major) = parts.next().and_then(|p| p.parse::().ok()) else { + return Ok(Self::Other(v.to_owned())); + }; + let Some(minor) = parts.next().and_then(|p| p.parse::().ok()) else { + return Ok(Self::Other(v.to_owned())); + }; + if (major == 0 && minor < 5) || major >= 1 { + return Ok(Self::Other(v.to_owned())); + } + // Either there should be no patch version or nothing after patch + if parts.next().is_none() || parts.next().is_none() { + return Ok(Self::GreaterThanV0_4); + } else { + return Ok(Self::Other(v.to_owned())); } } } - - Ok(Self::V0_2) + Ok(Self::V0_2OrNone) } } @@ -111,6 +110,17 @@ pub fn componentize_new_bindgen(module: &[u8]) -> Result> { .encode() } +/// Modules *not* produced with wit-bindgen >= 0.5 could be old wit-bindgen or no wit-bindgen +pub fn componentize_old_module(module: &[u8], module_info: &ModuleInfo) -> Result> { + // If the module has a _start export and doesn't obviously use wit-bindgen + // it is likely an old p1 command module. + if module_info.has_start_export && !module_info.probably_uses_wit_bindgen() { + componentize_command(module) + } else { + componentize_old_bindgen(module) + } +} + /// Modules produced with wit-bindgen 0.2 need more extensive adaption pub fn componentize_old_bindgen(module: &[u8]) -> Result> { let (module, exports) = retarget_imports_and_get_exports(ADAPTER_NAME, module)?; diff --git a/crates/componentize/src/module_info.rs b/crates/componentize/src/module_info.rs new file mode 100644 index 0000000000..e02ece13c5 --- /dev/null +++ b/crates/componentize/src/module_info.rs @@ -0,0 +1,111 @@ +use wasm_metadata::Producers; +use wasmparser::{Encoding, ExternalKind, Parser, Payload}; +use wit_component::metadata::Bindgen; + +// wit-bindgen has used both of these historically. +const CANONICAL_ABI_REALLOC_EXPORTS: &[&str] = &["cabi_realloc", "canonical_abi_realloc"]; + +/// Stores various bits of info parsed from a Wasm module that are relevant to +/// componentization. +#[derive(Default)] +pub struct ModuleInfo { + pub bindgen: Option, + pub clang_version: Option, + pub realloc_export: Option, + pub has_start_export: bool, +} + +impl ModuleInfo { + /// Parses info from the given binary module bytes. + pub fn from_module(module: &[u8]) -> anyhow::Result { + let mut info = Self::default(); + for payload in Parser::new(0).parse_all(module) { + match payload? { + Payload::Version { encoding, .. } => { + anyhow::ensure!( + encoding == Encoding::Module, + "ModuleInfo::from_module is only applicable to Modules; got a {encoding:?}" + ); + } + Payload::ExportSection(reader) => { + for export in reader { + let export = export?; + if export.kind == ExternalKind::Func { + if CANONICAL_ABI_REALLOC_EXPORTS.contains(&export.name) { + tracing::debug!( + "Found canonical ABI realloc export {:?}", + export.name + ); + info.realloc_export = Some(export.name.to_string()); + } else if export.name == "_start" { + tracing::debug!("Found _start export"); + info.has_start_export = true; + } + } + } + } + Payload::CustomSection(c) => { + let section_name = c.name(); + if section_name == "producers" { + let producers = Producers::from_bytes(c.data(), c.data_offset())?; + if let Some(clang_version) = + producers.get("processed-by").and_then(|f| f.get("clang")) + { + tracing::debug!(clang_version, "Parsed producers.processed-by.clang"); + info.clang_version = Some(clang_version.to_string()); + } + } else if section_name.starts_with("component-type") { + match decode_bindgen_custom_section(section_name, c.data()) { + Ok(bindgen) => { + tracing::debug!("Parsed bindgen section {section_name:?}"); + info.bindgen = Some(bindgen); + } + Err(err) => tracing::warn!( + "Error parsing bindgen section {section_name:?}: {err}" + ), + } + } + } + _ => (), + } + } + Ok(info) + } + + /// Returns true if the given module was heuristically probably compiled + /// with wit-bindgen. + pub fn probably_uses_wit_bindgen(&self) -> bool { + if self.bindgen.is_some() { + // Presence of bindgen metadata is a strong signal + true + } else if self.realloc_export.is_some() { + // A canonical ABI realloc export is a decent signal + true + } else { + false + } + } + + /// Returns the wit-bindgen metadata producers processed-by field, if + /// present. + pub fn bindgen_processors(&self) -> Option { + self.bindgen + .as_ref()? + .producers + .as_ref()? + .get("processed-by") + } +} + +/// This is a silly workaround for the limited public interface available in +/// [`wit_component::metadata`]. +// TODO: Make Bindgen::decode_custom_section public? +fn decode_bindgen_custom_section(name: &str, data: &[u8]) -> anyhow::Result { + let mut module = wasm_encoder::Module::new(); + module.section(&wasm_encoder::CustomSection { + name: name.into(), + data: data.into(), + }); + let (_, bindgen) = wit_component::metadata::decode(module.as_slice())?; + Ok(bindgen) +} diff --git a/crates/http/src/config.rs b/crates/http/src/config.rs index 2c675f1ca2..b64b5f5dac 100644 --- a/crates/http/src/config.rs +++ b/crates/http/src/config.rs @@ -63,9 +63,6 @@ pub enum HttpExecutorType { #[derive(Clone, Debug, Deserialize, Serialize)] #[serde(default, deny_unknown_fields)] pub struct WagiTriggerConfig { - /// The name of the entrypoint. - pub entrypoint: String, - /// A string representation of the argv array. /// /// This should be a space-separate list of strings. The value @@ -81,11 +78,9 @@ pub struct WagiTriggerConfig { impl Default for WagiTriggerConfig { fn default() -> Self { /// This is the default Wagi entrypoint. - const WAGI_DEFAULT_ENTRYPOINT: &str = "_start"; const WAGI_DEFAULT_ARGV: &str = "${SCRIPT_NAME} ${ARGS}"; Self { - entrypoint: WAGI_DEFAULT_ENTRYPOINT.to_owned(), argv: WAGI_DEFAULT_ARGV.to_owned(), } } @@ -101,7 +96,6 @@ mod tests { else { panic!("wrong type"); }; - assert_eq!(config.entrypoint, "_start"); assert_eq!(config.argv, "${SCRIPT_NAME} ${ARGS}"); } } diff --git a/crates/trigger-http/src/server.rs b/crates/trigger-http/src/server.rs index 9fab3d6f1d..377282f8ce 100644 --- a/crates/trigger-http/src/server.rs +++ b/crates/trigger-http/src/server.rs @@ -98,10 +98,15 @@ impl HttpServer { let component_trigger_configs = HashMap::from_iter(component_trigger_configs); let component_handler_types = component_trigger_configs - .keys() - .map(|component_id| { - let component = trigger_app.get_component(component_id)?; - let handler_type = HandlerType::from_component(trigger_app.engine(), component)?; + .iter() + .map(|(component_id, trigger_config)| { + let handler_type = match trigger_config.executor { + None | Some(HttpExecutorType::Http) => { + let component = trigger_app.get_component(component_id)?; + HandlerType::from_component(trigger_app.engine(), component)? + } + Some(HttpExecutorType::Wagi(_)) => HandlerType::Wagi, + }; Ok((component_id.clone(), handler_type)) }) .collect::>()?; @@ -253,6 +258,7 @@ impl HttpServer { .execute(instance_builder, &route_match, req, client_addr) .await } + HandlerType::Wagi => unreachable!(), }, HttpExecutorType::Wagi(wagi_config) => { let executor = WagiHttpExecutor { @@ -446,9 +452,10 @@ pub(crate) trait HttpExecutor: Clone + Send + Sync + 'static { } /// Whether this handler uses the custom Spin http handler interface for wasi-http -#[derive(Copy, Clone)] +#[derive(Clone, Copy)] pub enum HandlerType { Spin, + Wagi, Wasi0_2, Wasi2023_11_10, Wasi2023_10_18, @@ -490,8 +497,10 @@ impl HandlerType { handler_ty.ok_or_else(|| { anyhow::anyhow!( - "Expected component to either export `{WASI_HTTP_EXPORT_2023_10_18}`, \ - `{WASI_HTTP_EXPORT_2023_11_10}`, `{WASI_HTTP_EXPORT_0_2_0}`, \ + "Expected component to export one of \ + `{WASI_HTTP_EXPORT_2023_10_18}`, \ + `{WASI_HTTP_EXPORT_2023_11_10}`, \ + `{WASI_HTTP_EXPORT_0_2_0}`, \ or `fermyon:spin/inbound-http` but it exported none of those" ) }) diff --git a/crates/trigger-http/src/wagi.rs b/crates/trigger-http/src/wagi.rs index c0a28485f0..e447e27e00 100644 --- a/crates/trigger-http/src/wagi.rs +++ b/crates/trigger-http/src/wagi.rs @@ -1,6 +1,6 @@ use std::{io::Cursor, net::SocketAddr}; -use anyhow::{anyhow, ensure, Context, Result}; +use anyhow::{ensure, Result}; use http_body_util::BodyExt; use hyper::{Request, Response}; use spin_http::{config::WagiTriggerConfig, routes::RouteMatch, wagi}; @@ -83,27 +83,18 @@ impl HttpExecutor for WagiHttpExecutor { let (instance, mut store) = instance_builder.instantiate(()).await?; - let start = instance - .get_func(&mut store, &self.wagi_config.entrypoint) - .ok_or_else(|| { - anyhow::anyhow!( - "No such function '{}' in {}", - self.wagi_config.entrypoint, - component - ) - })?; + let command = wasmtime_wasi::bindings::Command::new(&mut store, &instance)?; + tracing::trace!("Calling Wasm entry point"); - start - .call_async(&mut store, &[], &mut []) + if let Err(()) = command + .wasi_cli_run() + .call_run(&mut store) .await - .or_else(ignore_successful_proc_exit_trap) - .with_context(|| { - anyhow!( - "invoking {} for component {component}", - self.wagi_config.entrypoint - ) - })?; - tracing::info!("Module execution complete"); + .or_else(ignore_successful_proc_exit_trap)? + { + tracing::error!("Wagi main function returned unsuccessful result"); + } + tracing::info!("Wagi execution complete"); // Drop the store so we're left with a unique reference to `stdout`: drop(store); @@ -119,13 +110,13 @@ impl HttpExecutor for WagiHttpExecutor { } } -fn ignore_successful_proc_exit_trap(guest_err: anyhow::Error) -> Result<()> { +fn ignore_successful_proc_exit_trap(guest_err: anyhow::Error) -> Result> { match guest_err .root_cause() .downcast_ref::() { Some(trap) => match trap.0 { - 0 => Ok(()), + 0 => Ok(Ok(())), _ => Err(guest_err), }, None => Err(guest_err), diff --git a/crates/trigger-http/src/wasi.rs b/crates/trigger-http/src/wasi.rs index 05d6b33b75..d61f3bdb21 100644 --- a/crates/trigger-http/src/wasi.rs +++ b/crates/trigger-http/src/wasi.rs @@ -93,7 +93,8 @@ impl HttpExecutor for WasiHttpExecutor { drop(exports); Handler::Latest(Proxy::new(&mut store, &instance)?) } - HandlerType::Spin => panic!("should have used execute_spin instead"), + HandlerType::Spin => unreachable!("should have used SpinHttpExecutor"), + HandlerType::Wagi => unreachable!("should have used WagiExecutor instead"), } }; diff --git a/crates/trigger/src/cli.rs b/crates/trigger/src/cli.rs index 4b6dc422b9..b5800cc9c1 100644 --- a/crates/trigger/src/cli.rs +++ b/crates/trigger/src/cli.rs @@ -358,7 +358,7 @@ impl TriggerAppBuilder { ) })?; let component = spin_componentize::componentize_if_necessary(&bytes)?; - spin_core::Component::new(engine, component.as_ref()) + spin_core::Component::new(engine, component) .with_context(|| format!("loading module {}", quoted_path(&path))) } } diff --git a/examples/spin-wagi-http/spin.toml b/examples/spin-wagi-http/spin.toml index e6280308fd..37ecaee466 100644 --- a/examples/spin-wagi-http/spin.toml +++ b/examples/spin-wagi-http/spin.toml @@ -9,7 +9,7 @@ version = "1.0.0" [[trigger.http]] route = "/hello" component = "hello" -executor = { type = "wagi" } # _start (the default entrypoint) is automatically mapped to main() +executor = { type = "wagi" } [[trigger.http]] route = "/goodbye" diff --git a/examples/wagi-http-rust/spin.toml b/examples/wagi-http-rust/spin.toml index c1cc3f0657..c5d1a7821a 100644 --- a/examples/wagi-http-rust/spin.toml +++ b/examples/wagi-http-rust/spin.toml @@ -9,7 +9,7 @@ version = "1.0.0" [[trigger.http]] route = "/env" component = "env" -executor = { type = "wagi" } # _start (the default entrypoint) is automatically mapped to main() +executor = { type = "wagi" } [component.env] source = "target/wasm32-wasi/release/wagihelloworld.wasm"