From b4d7ab36f9af0615d128adc914e3227af4a336af Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 4 Aug 2022 15:42:06 -0500 Subject: [PATCH] Add a dataflow-based representation of components (#4597) * Add a dataflow-based representation of components This commit updates the inlining phase of compiling a component to creating a dataflow-based representation of a component instead of creating a final `Component` with a linear list of initializers. This dataflow graph is then linearized in a final step to create the actual final `Component`. The motivation for this commit stems primarily from my work implementing strings in fused adapters. In doing this my plan is to defer most low-level transcoding to the host itself rather than implementing that in the core wasm adapter modules. This means that small cranelift-generated trampolines will be used for adapter modules to call which then call "transcoding libcalls". The cranelift-generated trampolines will get raw pointers into linear memory and pass those to the libcall which core wasm doesn't have access to when passing arguments to an import. Implementing this with the previous representation of a `Component` was becoming too tricky to bear. The initialization of a transcoder needed to happen at just the right time: before the adapter module which needed it was instantiated but after the linear memories referenced had been extracted into the `VMComponentContext`. The difficulty here is further compounded by the current adapter module injection pass already being quite complicated. Adapter modules are already renumbering the index space of runtime instances and shuffling items around in the `GlobalInitializer` list. Perhaps the worst part of this was that memories could already be referenced by host function imports or exports to the host, and if adapters referenced the same memory it shouldn't be referenced twice in the component. This meant that `ExtractMemory` initializers ideally needed to be shuffled around in the initializer list to happen as early as possible instead of wherever they happened to show up during translation. Overall I did my best to implement the transcoders but everything always came up short. I have decided to throw my hands up in the air and try a completely different approach to this, namely the dataflow-based representation in this commit. This makes it much easier to edit the component after initial translation for injection of adapters, injection of transcoders, adding dependencies on possibly-already-existing items, etc. The adapter module partitioning pass in this commit was greatly simplified to something which I believe is functionally equivalent but is probably an order of magnitude easier to understand. The biggest downside of this representation I believe is having a duplicate representation of a component. The `component::info` was largely duplicated into the `component::dfg` module in this commit. Personally though I think this is a more appropriate tradeoff than before because it's very easy to reason about "convert representation A to B" code whereas it was very difficult to reason about shuffling around `GlobalInitializer` items in optimal fashions. This may also have a cost at compile-time in terms of shuffling data around, but my hope is that we have lots of other low-hanging fruit to optimize if it ever comes to that which allows keeping this easier-to-understand representation. Finally, to reiterate, the final representation of components is not changed by this PR. To the runtime internals everything is still the same. * Fix compile of factc --- Cargo.lock | 1 + crates/environ/examples/factc.rs | 6 +- crates/environ/src/component.rs | 1 + crates/environ/src/component/dfg.rs | 574 +++++++++++++++ crates/environ/src/component/info.rs | 13 +- crates/environ/src/component/translate.rs | 6 +- .../environ/src/component/translate/adapt.rs | 655 +++++------------- .../environ/src/component/translate/inline.rs | 181 ++--- crates/environ/src/component/types.rs | 4 - crates/environ/src/fact.rs | 5 +- crates/wasmtime/src/component/instance.rs | 2 - crates/wast/Cargo.toml | 1 + crates/wast/src/wast.rs | 4 + 13 files changed, 827 insertions(+), 626 deletions(-) create mode 100644 crates/environ/src/component/dfg.rs diff --git a/Cargo.lock b/Cargo.lock index 278d9b9c2594..4b4a8cd7da80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3718,6 +3718,7 @@ name = "wasmtime-wast" version = "0.40.0" dependencies = [ "anyhow", + "log", "wasmtime", "wast 45.0.0", ] diff --git a/crates/environ/examples/factc.rs b/crates/environ/examples/factc.rs index 98e7c68682c6..38300a830c0a 100644 --- a/crates/environ/examples/factc.rs +++ b/crates/environ/examples/factc.rs @@ -89,7 +89,7 @@ impl Factc { let mut next_def = 0; let mut dummy_def = || { next_def += 1; - CoreDef::Adapter(AdapterIndex::from_u32(next_def)) + dfg::CoreDef::Adapter(dfg::AdapterId::from_u32(next_def)) }; // Manufactures a `CoreExport` for a memory with the shape specified. Note @@ -112,8 +112,8 @@ impl Factc { } else { dst[0] }; - CoreExport { - instance: RuntimeInstanceIndex::from_u32(idx), + dfg::CoreExport { + instance: dfg::InstanceId::from_u32(idx), item: ExportItem::Name(String::new()), } }; diff --git a/crates/environ/src/component.rs b/crates/environ/src/component.rs index e130ce82066b..d6c1c2838492 100644 --- a/crates/environ/src/component.rs +++ b/crates/environ/src/component.rs @@ -38,6 +38,7 @@ pub const MAX_FLAT_PARAMS: usize = 16; pub const MAX_FLAT_RESULTS: usize = 1; mod compiler; +pub mod dfg; mod info; mod translate; mod types; diff --git a/crates/environ/src/component/dfg.rs b/crates/environ/src/component/dfg.rs new file mode 100644 index 000000000000..4b059177e867 --- /dev/null +++ b/crates/environ/src/component/dfg.rs @@ -0,0 +1,574 @@ +//! A dataflow-graph-like intermediate representation of a component +//! +//! This module contains `ComponentDfg` which is an intermediate step towards +//! becoming a full-fledged `Component`. The main purpose for the existence of +//! this representation of a component is to track dataflow between various +//! items within a component and support edits to them after the initial inlined +//! translation of a component. +//! +//! Currently fused adapters are represented with a core WebAssembly module +//! which gets "injected" into the final component as-if the component already +//! bundled it. In doing so the adapter modules need to be partitioned and +//! inserted into the final sequence of modules to instantiate. While this is +//! possible to do with a flat `GlobalInitializer` list it gets unwieldy really +//! quickly especially when other translation features are added. +//! +//! This module is largely a duplicate of the `component::info` module in this +//! crate. The hierarchy here uses `*Id` types instead of `*Index` types to +//! represent that they don't have any necessary implicit ordering. Additionally +//! nothing is kept in an ordered list and instead this is worked with in a +//! general dataflow fashion where dependencies are walked during processing. +//! +//! The `ComponentDfg::finish` method will convert the dataflow graph to a +//! linearized `GlobalInitializer` list which is intended to not be edited after +//! it's created. +//! +//! The `ComponentDfg` is created as part of the `component::inline` phase of +//! translation where the dataflow performed there allows identification of +//! fused adapters, what arguments make their way to core wasm modules, etc. + +use crate::component::*; +use crate::{EntityIndex, EntityRef, PrimaryMap, SignatureIndex}; +use indexmap::IndexMap; +use std::collections::HashMap; +use std::hash::Hash; +use std::ops::Index; + +#[derive(Default)] +#[allow(missing_docs)] +pub struct ComponentDfg { + /// Same as `Component::import_types` + pub import_types: PrimaryMap, + + /// Same as `Component::imports` + pub imports: PrimaryMap)>, + + /// Same as `Component::exports` + pub exports: IndexMap, + + /// All known lowered host functions along with the configuration for each + /// lowering. + pub lowerings: Intern, + + /// All known "always trapping" trampolines and the function signature they + /// have. + pub always_trap: Intern, + + /// Know reallocation functions which are used by `lowerings` (e.g. will be + /// used by the host) + pub reallocs: Intern, + + /// Same as `reallocs`, but for post-return. + pub post_returns: Intern, + + /// Same as `reallocs`, but for post-return. + pub memories: Intern>, + + /// Metadata about identified fused adapters. + /// + /// Note that this list is required to be populated in-order where the + /// "left" adapters cannot depend on "right" adapters. Currently this falls + /// out of the inlining pass of translation. + pub adapters: Intern, + + /// Metadata about all known core wasm instances created. + /// + /// This is mostly an ordered list and is not deduplicated based on contents + /// unlike the items above. Creation of an `Instance` is side-effectful and + /// all instances here are always required to be created. These are + /// considered "roots" in dataflow. + pub instances: Intern, + + /// Number of component instances that were created during the inlining + /// phase (this is not edited after creation). + pub num_runtime_component_instances: u32, + + /// Known adapter modules and how they are instantiated. + /// + /// This map is not filled in on the initial creation of a `ComponentDfg`. + /// Instead these modules are filled in by the `inline::adapt` phase where + /// adapter modules are identifed and filled in here. + /// + /// The payload here is the static module index representing the core wasm + /// adapter module that was generated as well as the arguments to the + /// instantiation of the adapter module. + pub adapter_modules: PrimaryMap)>, + + /// Metadata about where adapters can be found within their respective + /// adapter modules. + /// + /// Like `adapter_modules` this is not filled on the initial creation of + /// `ComponentDfg` but rather is created alongside `adapter_modules` during + /// the `inline::adapt` phase of translation. + /// + /// The values here are the module that the adapter is present within along + /// as the core wasm index of the export corresponding to the lowered + /// version of the adapter. + pub adapter_paritionings: PrimaryMap, +} + +macro_rules! id { + ($(pub struct $name:ident(u32);)*) => ($( + #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] + #[allow(missing_docs)] + pub struct $name(u32); + cranelift_entity::entity_impl!($name); + )*) +} + +id! { + pub struct InstanceId(u32); + pub struct LowerImportId(u32); + pub struct MemoryId(u32); + pub struct ReallocId(u32); + pub struct AdapterId(u32); + pub struct PostReturnId(u32); + pub struct AlwaysTrapId(u32); + pub struct AdapterModuleId(u32); +} + +/// Same as `info::InstantiateModule` +#[allow(missing_docs)] +pub enum Instance { + Static(StaticModuleIndex, Box<[CoreDef]>), + Import( + RuntimeImportIndex, + IndexMap>, + ), +} + +/// Same as `info::Export` +#[allow(missing_docs)] +pub enum Export { + LiftedFunction { + ty: TypeFuncIndex, + func: CoreDef, + options: CanonicalOptions, + }, + ModuleStatic(StaticModuleIndex), + ModuleImport(RuntimeImportIndex), + Instance(IndexMap), +} + +/// Same as `info::CoreDef`, except has an extra `Adapter` variant. +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +#[allow(missing_docs)] +pub enum CoreDef { + Export(CoreExport), + Lowered(LowerImportId), + AlwaysTrap(AlwaysTrapId), + InstanceFlags(RuntimeComponentInstanceIndex), + + /// This is a special variant not present in `info::CoreDef` which + /// represents that this definition refers to a fused adapter function. This + /// adapter is fully processed after the initial translation and + /// identificatino of adapters. + /// + /// During translation into `info::CoreDef` this variant is erased and + /// replaced by `info::CoreDef::Export` since adapters are always + /// represented as the exports of a core wasm instance. + Adapter(AdapterId), +} + +impl From> for CoreDef +where + EntityIndex: From, +{ + fn from(export: CoreExport) -> CoreDef { + CoreDef::Export(export.map_index(|i| i.into())) + } +} + +/// Same as `info::CoreExport` +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +#[allow(missing_docs)] +pub struct CoreExport { + pub instance: InstanceId, + pub item: ExportItem, +} + +impl CoreExport { + #[allow(missing_docs)] + pub fn map_index(self, f: impl FnOnce(T) -> U) -> CoreExport { + CoreExport { + instance: self.instance, + item: match self.item { + ExportItem::Index(i) => ExportItem::Index(f(i)), + ExportItem::Name(s) => ExportItem::Name(s), + }, + } + } +} + +/// Same as `info::LowerImport` +#[derive(Hash, Eq, PartialEq, Clone)] +#[allow(missing_docs)] +pub struct LowerImport { + pub import: RuntimeImportIndex, + pub canonical_abi: SignatureIndex, + pub options: CanonicalOptions, +} + +/// Same as `info::CanonicalOptions` +#[derive(Clone, Hash, Eq, PartialEq)] +#[allow(missing_docs)] +pub struct CanonicalOptions { + pub instance: RuntimeComponentInstanceIndex, + pub string_encoding: StringEncoding, + pub memory: Option, + pub realloc: Option, + pub post_return: Option, +} + +/// A helper structure to "intern" and deduplicate values of type `V` with an +/// identifying key `K`. +/// +/// Note that this can also be used where `V` can't be intern'd to represent a +/// flat list of items. +pub struct Intern { + intern_map: HashMap, + key_map: PrimaryMap, +} + +impl Intern +where + K: EntityRef, +{ + /// Pushes a new `value` into this list without interning, assigning a new + /// unique key `K` to the value. + pub fn push(&mut self, value: V) -> K { + self.key_map.push(value) + } + + /// Inserts the `value` specified into this set, returning either a fresh + /// key `K` if this value hasn't been seen before or otherwise returning the + /// previous `K` used to represent value. + /// + /// Note that this should only be used for component model items where the + /// creation of `value` is not side-effectful. + pub fn push_uniq(&mut self, value: V) -> K + where + V: Hash + Eq + Clone, + { + *self + .intern_map + .entry(value.clone()) + .or_insert_with(|| self.key_map.push(value)) + } + + /// Returns an iterator of all the values contained within this set. + pub fn iter(&self) -> impl Iterator { + self.key_map.iter() + } +} + +impl Index for Intern { + type Output = V; + fn index(&self, key: K) -> &V { + &self.key_map[key] + } +} + +impl Default for Intern { + fn default() -> Intern { + Intern { + intern_map: HashMap::new(), + key_map: PrimaryMap::new(), + } + } +} + +impl ComponentDfg { + /// Consumes the intermediate `ComponentDfg` to produce a final `Component` + /// with a linear innitializer list. + pub fn finish(self) -> Component { + let mut linearize = LinearizeDfg { + dfg: &self, + initializers: Vec::new(), + num_runtime_modules: 0, + runtime_memories: Default::default(), + runtime_post_return: Default::default(), + runtime_reallocs: Default::default(), + runtime_instances: Default::default(), + runtime_always_trap: Default::default(), + runtime_lowerings: Default::default(), + }; + + // First the instances are all processed for instantiation. This will, + // recursively, handle any arguments necessary for each instance such as + // instantiation of adapter modules. + for (id, instance) in linearize.dfg.instances.key_map.iter() { + linearize.instantiate(id, instance); + } + + // Second the exports of the instance are handled which will likely end + // up creating some lowered imports, perhaps some saved modules, etc. + let exports = self + .exports + .iter() + .map(|(name, export)| (name.clone(), linearize.export(export))) + .collect(); + + // With all those pieces done the results of the dataflow-based + // linearization are recorded into the `Component`. The number of + // runtime values used for each index space is used from the `linearize` + // result. + Component { + exports, + initializers: linearize.initializers, + + num_runtime_modules: linearize.num_runtime_modules, + num_runtime_memories: linearize.runtime_memories.len() as u32, + num_runtime_post_returns: linearize.runtime_post_return.len() as u32, + num_runtime_reallocs: linearize.runtime_reallocs.len() as u32, + num_runtime_instances: linearize.runtime_instances.len() as u32, + num_always_trap: linearize.runtime_always_trap.len() as u32, + num_lowerings: linearize.runtime_lowerings.len() as u32, + + imports: self.imports, + import_types: self.import_types, + num_runtime_component_instances: self.num_runtime_component_instances, + } + } +} + +struct LinearizeDfg<'a> { + dfg: &'a ComponentDfg, + initializers: Vec, + num_runtime_modules: u32, + runtime_memories: HashMap, + runtime_reallocs: HashMap, + runtime_post_return: HashMap, + runtime_instances: HashMap, + runtime_always_trap: HashMap, + runtime_lowerings: HashMap, +} + +#[derive(Copy, Clone, Hash, Eq, PartialEq)] +enum RuntimeInstance { + Normal(InstanceId), + Adapter(AdapterModuleId), +} + +impl LinearizeDfg<'_> { + fn instantiate(&mut self, instance: InstanceId, args: &Instance) { + let instantiation = match args { + Instance::Static(index, args) => InstantiateModule::Static( + *index, + args.iter().map(|def| self.core_def(def)).collect(), + ), + Instance::Import(index, args) => InstantiateModule::Import( + *index, + args.iter() + .map(|(module, values)| { + let values = values + .iter() + .map(|(name, def)| (name.clone(), self.core_def(def))) + .collect(); + (module.clone(), values) + }) + .collect(), + ), + }; + let index = RuntimeInstanceIndex::new(self.runtime_instances.len()); + self.initializers + .push(GlobalInitializer::InstantiateModule(instantiation)); + let prev = self + .runtime_instances + .insert(RuntimeInstance::Normal(instance), index); + assert!(prev.is_none()); + } + + fn export(&mut self, export: &Export) -> info::Export { + match export { + Export::LiftedFunction { ty, func, options } => { + let func = self.core_def(func); + let options = self.options(options); + info::Export::LiftedFunction { + ty: *ty, + func, + options, + } + } + Export::ModuleStatic(i) => { + let index = RuntimeModuleIndex::from_u32(self.num_runtime_modules); + self.num_runtime_modules += 1; + self.initializers + .push(GlobalInitializer::SaveStaticModule(*i)); + info::Export::Module(index) + } + Export::ModuleImport(i) => { + let index = RuntimeModuleIndex::from_u32(self.num_runtime_modules); + self.num_runtime_modules += 1; + self.initializers + .push(GlobalInitializer::SaveModuleImport(*i)); + info::Export::Module(index) + } + Export::Instance(map) => info::Export::Instance( + map.iter() + .map(|(name, export)| (name.clone(), self.export(export))) + .collect(), + ), + } + } + + fn options(&mut self, options: &CanonicalOptions) -> info::CanonicalOptions { + let memory = options.memory.map(|mem| self.runtime_memory(mem)); + let realloc = options.realloc.map(|mem| self.runtime_realloc(mem)); + let post_return = options.post_return.map(|mem| self.runtime_post_return(mem)); + info::CanonicalOptions { + instance: options.instance, + string_encoding: options.string_encoding, + memory, + realloc, + post_return, + } + } + + fn runtime_memory(&mut self, mem: MemoryId) -> RuntimeMemoryIndex { + self.intern( + mem, + |me| &mut me.runtime_memories, + |me, mem| me.core_export(&me.dfg.memories[mem]), + |index, export| GlobalInitializer::ExtractMemory(ExtractMemory { index, export }), + ) + } + + fn runtime_realloc(&mut self, realloc: ReallocId) -> RuntimeReallocIndex { + self.intern( + realloc, + |me| &mut me.runtime_reallocs, + |me, realloc| me.core_def(&me.dfg.reallocs[realloc]), + |index, def| GlobalInitializer::ExtractRealloc(ExtractRealloc { index, def }), + ) + } + + fn runtime_post_return(&mut self, post_return: PostReturnId) -> RuntimePostReturnIndex { + self.intern( + post_return, + |me| &mut me.runtime_post_return, + |me, post_return| me.core_def(&me.dfg.post_returns[post_return]), + |index, def| GlobalInitializer::ExtractPostReturn(ExtractPostReturn { index, def }), + ) + } + + fn core_def(&mut self, def: &CoreDef) -> info::CoreDef { + match def { + CoreDef::Export(e) => info::CoreDef::Export(self.core_export(e)), + CoreDef::AlwaysTrap(id) => info::CoreDef::AlwaysTrap(self.runtime_always_trap(*id)), + CoreDef::Lowered(id) => info::CoreDef::Lowered(self.runtime_lowering(*id)), + CoreDef::InstanceFlags(i) => info::CoreDef::InstanceFlags(*i), + CoreDef::Adapter(id) => info::CoreDef::Export(self.adapter(*id)), + } + } + + fn runtime_always_trap(&mut self, id: AlwaysTrapId) -> RuntimeAlwaysTrapIndex { + self.intern( + id, + |me| &mut me.runtime_always_trap, + |me, id| me.dfg.always_trap[id], + |index, canonical_abi| { + GlobalInitializer::AlwaysTrap(AlwaysTrap { + index, + canonical_abi, + }) + }, + ) + } + + fn runtime_lowering(&mut self, id: LowerImportId) -> LoweredIndex { + self.intern( + id, + |me| &mut me.runtime_lowerings, + |me, id| { + let info = &me.dfg.lowerings[id]; + let options = me.options(&info.options); + (info.import, info.canonical_abi, options) + }, + |index, (import, canonical_abi, options)| { + GlobalInitializer::LowerImport(info::LowerImport { + index, + import, + canonical_abi, + options, + }) + }, + ) + } + + fn core_export(&mut self, export: &CoreExport) -> info::CoreExport + where + T: Clone, + { + info::CoreExport { + instance: self.runtime_instances[&RuntimeInstance::Normal(export.instance)], + item: export.item.clone(), + } + } + + fn adapter(&mut self, adapter: AdapterId) -> info::CoreExport { + let (adapter_module, entity_index) = self.dfg.adapter_paritionings[adapter]; + + // Instantiates the adapter module if it hasn't already been + // instantiated or otherwise returns the index that the module was + // already instantiated at. + let instance = self.adapter_module(adapter_module); + + // This adapter is always an export of the instance. + info::CoreExport { + instance, + item: ExportItem::Index(entity_index), + } + } + + fn adapter_module(&mut self, adapter_module: AdapterModuleId) -> RuntimeInstanceIndex { + self.intern( + RuntimeInstance::Adapter(adapter_module), + |me| &mut me.runtime_instances, + |me, _| { + log::debug!("instantiating {adapter_module:?}"); + let (module_index, args) = &me.dfg.adapter_modules[adapter_module]; + let args = args.iter().map(|arg| me.core_def(arg)).collect(); + let instantiate = InstantiateModule::Static(*module_index, args); + GlobalInitializer::InstantiateModule(instantiate) + }, + |_, init| init, + ) + } + + /// Helper function to manage interning of results to avoid duplicate + /// initializers being inserted into the final list. + /// + /// * `key` - the key being referenced which is used to deduplicate. + /// * `map` - a closure to access the interning map on `Self` + /// * `gen` - a closure to generate an intermediate value with `Self` from + /// `K`. This is only used if `key` hasn't previously been seen. This + /// closure can recursively intern other values possibly. + /// * `init` - a closure to use the result of `gen` to create the final + /// initializer now that the index `V` of the runtime item is known. + /// + /// This is used by all the other interning methods above to lazily append + /// initializers on-demand and avoid pushing more than one initializer at a + /// time. + fn intern( + &mut self, + key: K, + map: impl Fn(&mut Self) -> &mut HashMap, + gen: impl FnOnce(&mut Self, K) -> T, + init: impl FnOnce(V, T) -> GlobalInitializer, + ) -> V + where + K: Hash + Eq + Copy, + V: EntityRef, + { + if let Some(val) = map(self).get(&key) { + return val.clone(); + } + let tmp = gen(self, key); + let index = V::new(map(self).len()); + self.initializers.push(init(index, tmp)); + let prev = map(self).insert(key, index); + assert!(prev.is_none()); + index + } +} diff --git a/crates/environ/src/component/info.rs b/crates/environ/src/component/info.rs index 85eac6814a3e..63f8545b825a 100644 --- a/crates/environ/src/component/info.rs +++ b/crates/environ/src/component/info.rs @@ -313,17 +313,6 @@ pub enum CoreDef { /// function is immediately `canon lower`'d in the same instance. Such a /// function always traps at runtime. AlwaysTrap(RuntimeAlwaysTrapIndex), - /// This refers to a core wasm function which is a synthesized fused adapter - /// between two other core wasm functions. - /// - /// The adapter's information is identified by `AdapterIndex` which is - /// available through an auxiliary map created during compilation of a - /// component. For more information see `adapt.rs`. - /// - /// Note that this is an intermediate variant which is replaced by the time - /// a component is fully compiled. This will be replaced with the `Export` - /// variant which refers to the export of an adapter module. - Adapter(AdapterIndex), /// This is a reference to a wasm global which represents the /// runtime-managed flags for a wasm instance. InstanceFlags(RuntimeComponentInstanceIndex), @@ -436,7 +425,7 @@ pub struct CanonicalOptions { // Note that the `repr(u8)` is load-bearing here since this is used in an // `extern "C" fn()` function argument which is called from cranelift-compiled // code so we must know the representation of this. -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] #[allow(missing_docs)] #[repr(u8)] pub enum StringEncoding { diff --git a/crates/environ/src/component/translate.rs b/crates/environ/src/component/translate.rs index a2ba465b8813..1bc559b38ef4 100644 --- a/crates/environ/src/component/translate.rs +++ b/crates/environ/src/component/translate.rs @@ -361,14 +361,14 @@ impl<'a, 'data> Translator<'a, 'data> { // much simpler than the original component and more efficient for // Wasmtime to process at runtime as well (e.g. no string lookups as // most everything is done through indices instead). - let (mut component, mut adapters) = inline::run( + let mut component = inline::run( &self.types, &self.result, &self.static_modules, &self.static_components, )?; - self.insert_adapter_module_initializers(&mut component, &mut adapters); - Ok((component, self.static_modules)) + self.partition_adapter_modules(&mut component); + Ok((component.finish(), self.static_modules)) } fn translate_payload( diff --git a/crates/environ/src/component/translate/adapt.rs b/crates/environ/src/component/translate/adapt.rs index b8afc0557a8d..ab5b7e4034ea 100644 --- a/crates/environ/src/component/translate/adapt.rs +++ b/crates/environ/src/component/translate/adapt.rs @@ -101,33 +101,27 @@ //! algorithm is a one-pass approach to partitioning everything into adapter //! modules. //! -//! As the `GlobalInitializer` list is iterated over the last adapter module -//! created is recorded. Each adapter module, when created, records the index -//! space limits at the time of its creation. If a new adapter is found which -//! depends on an item after the original adapter module was created then the -//! prior adapter module is finished and a new one is started. Adapters only -//! ever attempt to get inserted into the most recent adapter module, no -//! searching is currently done to try to fit adapters into a prior adapter -//! module. +//! Adapters were indentified in-order as part of the inlining phase of +//! translation where we're guaranteed that once an adapter is identified +//! it can't depend on anything identified later. The pass implemented here is +//! to visit all transitive dependencies of an adapter. If one of the +//! dependencies of an adapter is an adapter in the current adapter module +//! being built then the current module is finished and a new adapter module is +//! started. This should quickly parition adapters into contiugous chunks of +//! their index space which can be in adapter modules together. //! -//! During this remapping process the `RuntimeInstanceIndex` for all instances -//! is also updated. Insertion of an adapter module will increase all further -//! instance indices by one so this must be accounted for in various -//! references. +//! There's probably more general algorithms for this but for now this should be +//! fast enough as it's "just" a linear pass. As we get more components over +//! time this may want to be revisited if too many adapter modules are being +//! created. use crate::component::translate::*; use crate::fact::Module; +use std::collections::HashSet; use wasmparser::WasmFeatures; -/// Information about fused adapters within a component. -#[derive(Default)] -pub struct Adapters { - /// List of all fused adapters identified which are assigned an index and - /// contain various metadata about them as well. - pub adapters: PrimaryMap, -} - /// Metadata information about a fused adapter. +#[derive(Debug, Clone, Hash, Eq, PartialEq)] pub struct Adapter { /// The type used when the original core wasm function was lifted. /// @@ -145,12 +139,12 @@ pub struct Adapter { /// Canonical ABI options used when the function was lowered. pub lower_options: AdapterOptions, /// The original core wasm function which was lifted. - pub func: CoreDef, + pub func: dfg::CoreDef, } /// Configuration options which can be specified as part of the canonical ABI /// in the component model. -#[derive(Clone)] +#[derive(Debug, Clone, Hash, Eq, PartialEq)] pub struct AdapterOptions { /// The Wasmtime-assigned component instance index where the options were /// originally specified. @@ -158,503 +152,230 @@ pub struct AdapterOptions { /// How strings are encoded. pub string_encoding: StringEncoding, /// An optional memory definition supplied. - pub memory: Option>, + pub memory: Option>, /// If `memory` is specified, whether it's a 64-bit memory. pub memory64: bool, /// An optional definition of `realloc` to used. - pub realloc: Option, + pub realloc: Option, /// An optional definition of a `post-return` to use. - pub post_return: Option, + pub post_return: Option, } impl<'data> Translator<'_, 'data> { - /// Modifies the list of `GlobalInitializer` entries within a - /// `Component`with `InstantiateModule::Adapter` entries where necessary. - /// /// This is the entrypoint of functionality within this module which /// performs all the work of identifying adapter usages and organizing /// everything into adapter modules. - pub(super) fn insert_adapter_module_initializers( - &mut self, - component: &mut Component, - adapters: &mut Adapters, - ) { - let mut state = PartitionAdapterModules { - to_process: Vec::new(), - cur_idx: 0, - adapter_modules: PrimaryMap::new(), - items: DefinedItems::default(), - instance_map: PrimaryMap::with_capacity(component.num_runtime_instances as usize), - }; - state.run(component, adapters); - - // Next, in reverse, insert all of the adapter modules into the actual - // initializer list. Note that the iteration order is important here to - // ensure that all the `at_initializer_index` listed is valid for each - // entry. - let mut adapter_map = PrimaryMap::with_capacity(adapters.adapters.len()); - for _ in adapters.adapters.iter() { - adapter_map.push(None); - } - for (_, module) in state.adapter_modules.into_iter().rev() { - let index = module.at_initializer_index; - let instantiate = self.compile_adapter_module(module, adapters, &mut adapter_map); - let init = GlobalInitializer::InstantiateModule(instantiate); - component.initializers.insert(index, init); - } - - // Finally all references to `CoreDef::Adapter` are rewritten to their - // corresponding `CoreDef::Export` as identified within `adapter_map`. - for init in component.initializers.iter_mut() { - map_adapter_references(init, &adapter_map); - } - } - - fn compile_adapter_module( - &mut self, - module_parts: AdapterModuleParts, - adapters: &Adapters, - adapter_map: &mut PrimaryMap>>, - ) -> InstantiateModule { - // Use the `fact::Module` builder to create a new wasm module which - // represents all of the adapters specified here. - let mut module = Module::new( - self.types.component_types(), - self.tunables.debug_adapter_modules, - ); - let mut names = Vec::with_capacity(module_parts.adapters.len()); - for adapter in module_parts.adapters.iter() { - let name = format!("adapter{}", adapter.as_u32()); - module.adapt(&name, &adapters.adapters[*adapter]); - names.push(name); + /// + /// This will mutate the provided `component` in-place and fill out the dfg + /// metadata for adapter modules. + pub(super) fn partition_adapter_modules(&mut self, component: &mut dfg::ComponentDfg) { + // Visit each adapter, in order of its original definition, during the + // paritioning. This allows for the guarantee that dependencies are + // visited in a topological fashion ideally. + let mut state = PartitionAdapterModules::default(); + for (id, adapter) in component.adapters.iter() { + state.adapter(component, id, adapter); } - let wasm = module.encode(); - let args = module.imports().to_vec(); - - // Extend the lifetime of the owned `wasm: Vec` on the stack to a - // higher scope defined by our original caller. That allows to transform - // `wasm` into `&'data [u8]` which is much easier to work with here. - let wasm = &*self.scope_vec.push(wasm); - if log::log_enabled!(log::Level::Trace) { - match wasmprinter::print_bytes(wasm) { - Ok(s) => log::trace!("generated adapter module:\n{}", s), - Err(e) => log::trace!("failed to print adapter module: {}", e), + state.finish_adapter_module(); + + // Now that all adapters have been partitioned into modules this loop + // generates a core wasm module for each adapter module, translates + // the module using standard core wasm translation, and then fills out + // the dfg metadata for each adapter. + for (module_id, adapter_module) in state.adapter_modules.iter() { + let mut module = Module::new( + self.types.component_types(), + self.tunables.debug_adapter_modules, + ); + let mut names = Vec::with_capacity(adapter_module.adapters.len()); + for adapter in adapter_module.adapters.iter() { + let name = format!("adapter{}", adapter.as_u32()); + module.adapt(&name, &component.adapters[*adapter]); + names.push(name); + } + let wasm = module.encode(); + let args = module.imports().to_vec(); + + // Extend the lifetime of the owned `wasm: Vec` on the stack to + // a higher scope defined by our original caller. That allows to + // transform `wasm` into `&'data [u8]` which is much easier to work + // with here. + let wasm = &*self.scope_vec.push(wasm); + if log::log_enabled!(log::Level::Trace) { + match wasmprinter::print_bytes(wasm) { + Ok(s) => log::trace!("generated adapter module:\n{}", s), + Err(e) => log::trace!("failed to print adapter module: {}", e), + } } - } - - // With the wasm binary this is then pushed through general translation, - // validation, etc. Note that multi-memory is specifically enabled here - // since the adapter module is highly likely to use that if anything is - // actually indirected through memory. - let mut validator = Validator::new_with_features(WasmFeatures { - multi_memory: true, - ..*self.validator.features() - }); - let translation = ModuleEnvironment::new( - self.tunables, - &mut validator, - self.types.module_types_builder(), - ) - .translate(Parser::new(0), wasm) - .expect("invalid adapter module generated"); - // And with all metadata available about the generated module a map can - // be built from adapter index to the precise export in the module that - // was generated. - for (adapter, name) in module_parts.adapters.iter().zip(&names) { - assert!(adapter_map[*adapter].is_none()); - let index = translation.module.exports[name]; - adapter_map[*adapter] = Some(CoreExport { - instance: module_parts.index, - item: ExportItem::Index(index), + // With the wasm binary this is then pushed through general + // translation, validation, etc. Note that multi-memory is + // specifically enabled here since the adapter module is highly + // likely to use that if anything is actually indirected through + // memory. + let mut validator = Validator::new_with_features(WasmFeatures { + multi_memory: true, + ..*self.validator.features() }); - } + let translation = ModuleEnvironment::new( + self.tunables, + &mut validator, + self.types.module_types_builder(), + ) + .translate(Parser::new(0), wasm) + .expect("invalid adapter module generated"); + + // Record, for each adapter in this adapter module, the module that + // the adapter was placed within as well as the function index of + // the adapter in the wasm module generated. Note that adapters are + // paritioned in-order so we're guaranteed to push the adapters + // in-order here as well. (with an assert to double-check) + for (adapter, name) in adapter_module.adapters.iter().zip(&names) { + let index = translation.module.exports[name]; + let i = component.adapter_paritionings.push((module_id, index)); + assert_eq!(i, *adapter); + } - // Finally the module translation is saved in the list of static - // modules to get fully compiled later and the `InstantiateModule` - // representation of this adapter module is returned. - let static_index = self.static_modules.push(translation); - InstantiateModule::Static(static_index, args.into()) + // Finally the metadata necessary to instantiate this adapter + // module is also recorded in the dfg. This metadata will be used + // to generate `GlobalInitializer` entries during the linearization + // final phase. + let static_index = self.static_modules.push(translation); + let id = component.adapter_modules.push((static_index, args.into())); + assert_eq!(id, module_id); + } } } +#[derive(Default)] struct PartitionAdapterModules { - /// Stack of remaining elements to process - to_process: Vec, - - /// Index of the current `GlobalInitializer` being processed. - cur_idx: usize, + /// The next adapter module that's being created. This may be empty. + next_module: AdapterModuleInProgress, - /// Information about all fused adapter modules that have been created so - /// far. - /// - /// This is modified whenever a fused adapter is used. - adapter_modules: PrimaryMap, + /// The set of items which are known to be defined which the adapter module + /// in progress is allowed to depend on. + defined_items: HashSet, - /// Map from "old runtime instance index" to "new runtime instance index". + /// Finished adapter modules that won't be added to. /// - /// This map is populated when instances are created to account for prior - /// adapter modules having been created. This effectively tracks an offset - /// for each index. - instance_map: PrimaryMap, - - /// Current limits of index spaces. - items: DefinedItems, -} - -/// Entries in the `PartitionAdapterModules::to_process` array. -enum ToProcess { - /// An adapter needs its own dependencies processed. This will map the - /// fields of `Adapter` above for the specified index. - Adapter(AdapterIndex), - /// An adapter has had its dependencies fully processed (transitively) and - /// the adapter now needs to be inserted into a module. - AddAdapterToModule(AdapterIndex), - /// A global initializer needs to be remapped. - GlobalInitializer(usize), - /// An export needs to be remapped. - Export(usize), - /// A global initializer which creates an instance has had all of its - /// arguments processed and now the instance number needs to be recorded. - PushInstance, + /// In theory items could be added to preexisting modules here but to keep + /// this pass linear this is never modified after insertion. + adapter_modules: PrimaryMap, } -/// Custom index type used exclusively for the `adapter_modules` map above. -#[derive(Copy, Clone, PartialEq, Eq)] -struct AdapterModuleIndex(u32); -cranelift_entity::entity_impl!(AdapterModuleIndex); - -struct AdapterModuleParts { - /// The runtime index that will be assigned to this adapter module when it's - /// instantiated. - index: RuntimeInstanceIndex, - /// The index in the `GlobalInitializer` list that this adapter module will - /// get inserted at. - at_initializer_index: usize, - /// Items that were available when this adapter module was created. - items_at_initializer: DefinedItems, - /// Adapters that have been inserted into this module, guaranteed to be - /// non-empty. - adapters: Vec, +#[derive(Default)] +struct AdapterModuleInProgress { + /// The adapters which have been placed into this module. + adapters: Vec, } -#[derive(Default, Clone)] -struct DefinedItems { - /// Number of core wasm instances created so far. - /// - /// Note that this does not count adapter modules created, only the - /// instance index space before adapter modules were inserted. - instances: u32, - /// Number of host-lowered functions seen so far. - lowerings: u32, - /// Number of "always trap" functions seen so far. - always_trap: u32, - /// Map of whether adapters have been inserted into an adapter module yet. - adapter_to_module: PrimaryMap>, +/// Items that adapters can depend on. +/// +/// Note that this is somewhat of a flat list and is intended to mostly model +/// core wasm instances which are side-effectful unlike other host items like +/// lowerings or always-trapping functions. +#[derive(Copy, Clone, Hash, Eq, PartialEq)] +enum Def { + Adapter(dfg::AdapterId), + Instance(dfg::InstanceId), } impl PartitionAdapterModules { - /// Process the list of global `initializers` and partitions adapters into - /// adapter modules which will get inserted into the provided list in a - /// later pass. - fn run(&mut self, component: &mut Component, adapters: &mut Adapters) { - // This function is designed to be an iterative loop which models - // recursion in the `self.to_process` array instead of on the host call - // stack. The reason for this is that adapters need recursive processing - // since the argument to an adapter can hypothetically be an adapter - // itself (albeit silly but still valid). This recursive nature of - // adapters means that a component could be crafted to have an - // arbitrarily deep recursive dependeny chain for any one adapter. To - // avoid consuming host stack space the storage for this dependency - // chain is placed on the heap. + fn adapter(&mut self, dfg: &dfg::ComponentDfg, id: dfg::AdapterId, adapter: &Adapter) { + // Visit all dependencies of this adapter and if anything depends on + // the current adapter module in progress then a new adapter module is + // started. + self.adapter_options(dfg, &adapter.lift_options); + self.adapter_options(dfg, &adapter.lower_options); + self.core_def(dfg, &adapter.func); + + // With all dependencies visited this adapter is added to the next + // module. // - // The `self.to_process` list is a FIFO queue of what to process next. - // Initially seeded with all the global initializer indexes this is - // pushed to during processing to recursively handle adapters and - // similar. - assert!(self.to_process.is_empty()); - assert!(self.items.adapter_to_module.is_empty()); - - // Initially record all adapters as having no module which will get - // filled in over time. - for _ in adapters.adapters.iter() { - self.items.adapter_to_module.push(None); - } - - // Seed the worklist of what to process with the list of global - // initializers and exports, but in reverse order since this is a LIFO - // queue. Afterwards all of the items to process are handled in a loop. - for i in (0..component.exports.len()).rev() { - self.to_process.push(ToProcess::Export(i)); - } - for i in (0..component.initializers.len()).rev() { - self.to_process.push(ToProcess::GlobalInitializer(i)); - } - - while let Some(to_process) = self.to_process.pop() { - match to_process { - ToProcess::GlobalInitializer(i) => { - assert!(i <= self.cur_idx + 1); - self.cur_idx = i; - self.global_initializer(&mut component.initializers[i]); - } - - ToProcess::Export(i) => { - self.cur_idx = component.initializers.len(); - self.export(&mut component.exports[i]); - } - - ToProcess::PushInstance => { - // A new runtime instance is being created here so insert an - // entry into the remapping map for instance indexes. This - // instance's index is offset by the number of adapter modules - // created prior. - self.instance_map - .push(RuntimeInstanceIndex::from_u32(self.items.instances)); - self.items.instances += 1; - } - - ToProcess::Adapter(idx) => { - let info = &mut adapters.adapters[idx]; - self.process_core_def(&mut info.func); - self.process_options(&mut info.lift_options); - self.process_options(&mut info.lower_options); - } - - ToProcess::AddAdapterToModule(idx) => { - // If this adapter has already been assigned to a module - // then there's no need to do anything else here. - // - // This can happen when a core wasm instance is created with - // an adapter as the argument multiple times for example. - if self.items.adapter_to_module[idx].is_some() { - continue; - } - - // If an adapter module is already in progress and - // everything this adapter depends on was available at the - // time of creation of that adapter module, then this - // adapter can go in that module. - if let Some((module_idx, module)) = self.adapter_modules.last_mut() { - let info = &adapters.adapters[idx]; - if module.items_at_initializer.contains(info) { - self.items.adapter_to_module[idx] = Some(module_idx); - module.adapters.push(idx); - continue; - } - } - - // ... otherwise a new adapter module is started. Note that - // the instance count is bumped here to model the - // instantiation of the adapter module. - let module = AdapterModuleParts { - index: RuntimeInstanceIndex::from_u32(self.items.instances), - at_initializer_index: self.cur_idx, - items_at_initializer: self.items.clone(), - adapters: vec![idx], - }; - let index = self.adapter_modules.push(module); - self.items.adapter_to_module[idx] = Some(index); - self.items.instances += 1; - } - } - } - } - - fn global_initializer(&mut self, init: &mut GlobalInitializer) { - match init { - GlobalInitializer::InstantiateModule(module) => { - // Enqueue a bump of the instance count, but this only happens - // after all the arguments have been processed below. Given the - // LIFO nature of `self.to_process` this will be handled after - // all arguments are recursively processed. - self.to_process.push(ToProcess::PushInstance); - - match module { - InstantiateModule::Static(_, args) => { - for def in args.iter_mut() { - self.process_core_def(def); - } - } - InstantiateModule::Import(_, args) => { - for (_, map) in args { - for (_, def) in map { - self.process_core_def(def); - } - } - } - } - } - - GlobalInitializer::ExtractRealloc(e) => self.process_core_def(&mut e.def), - GlobalInitializer::ExtractPostReturn(e) => self.process_core_def(&mut e.def), - - // Update items available as they're defined - GlobalInitializer::LowerImport(_) => self.items.lowerings += 1, - GlobalInitializer::AlwaysTrap(_) => self.items.always_trap += 1, - - // Nothing is defined or referenced by these initializers that we - // need to worry about here. - GlobalInitializer::ExtractMemory(_) => {} - GlobalInitializer::SaveStaticModule(_) => {} - GlobalInitializer::SaveModuleImport(_) => {} - } - } - - fn export(&mut self, export: &mut Export) { - match export { - Export::LiftedFunction { func, .. } => { - self.process_core_def(func); - } - Export::Instance(exports) => { - for (_, export) in exports { - self.export(export); - } - } - Export::Module(_) => {} - } + // This will either get added the preexisting module if this adapter + // didn't depend on anything in that module itself or it will be added + // to a fresh module if this adapter depended on something that the + // current adapter module created. + log::debug!("adding {id:?} to adapter module {adapter:#?}"); + self.next_module.adapters.push(id); } - fn process_options(&mut self, opts: &mut AdapterOptions) { - if let Some(memory) = &mut opts.memory { - self.process_core_export(memory); + fn adapter_options(&mut self, dfg: &dfg::ComponentDfg, options: &AdapterOptions) { + if let Some(memory) = &options.memory { + self.core_export(dfg, memory); } - if let Some(def) = &mut opts.realloc { - self.process_core_def(def); + if let Some(def) = &options.realloc { + self.core_def(dfg, def); } - if let Some(def) = &mut opts.post_return { - self.process_core_def(def); + if let Some(def) = &options.post_return { + self.core_def(dfg, def); } } - fn process_core_def(&mut self, def: &mut CoreDef) { + fn core_def(&mut self, dfg: &dfg::ComponentDfg, def: &dfg::CoreDef) { match def { - CoreDef::Adapter(idx) => { - // The `to_process` queue is a LIFO queue so first enqueue the - // addition of this adapter into a module followed by the - // processing of the adapter itself. This means that the - // adapter's own dependencies will be processed before the - // adapter is added to a module. - self.to_process.push(ToProcess::AddAdapterToModule(*idx)); - self.to_process.push(ToProcess::Adapter(*idx)); - } - - CoreDef::Export(e) => self.process_core_export(e), - - // These are ignored since they don't contain a reference to an - // adapter which may need to be inserted into a module. - CoreDef::Lowered(_) | CoreDef::AlwaysTrap(_) | CoreDef::InstanceFlags(_) => {} - } - } - - fn process_core_export(&mut self, export: &mut CoreExport) { - // Remap the instance index referenced here as necessary to account - // for any adapter modules that needed creating in the meantime. - export.instance = self.instance_map[export.instance]; - } -} - -impl DefinedItems { - fn contains(&self, info: &Adapter) -> bool { - self.contains_options(&info.lift_options) - && self.contains_options(&info.lower_options) - && self.contains_def(&info.func) - } - - fn contains_options(&self, options: &AdapterOptions) -> bool { - let AdapterOptions { - instance: _, - string_encoding: _, - memory64: _, - memory, - realloc, - post_return, - } = options; - - if let Some(mem) = memory { - if !self.contains_export(mem) { - return false; - } - } + dfg::CoreDef::Export(e) => self.core_export(dfg, e), + dfg::CoreDef::Adapter(id) => { + // If this adapter is already defined then we can safely depend + // on it with no consequences. + if self.defined_items.contains(&Def::Adapter(*id)) { + return; + } - if let Some(def) = realloc { - if !self.contains_def(def) { - return false; + // .. otherwise we found a case of an adapter depending on an + // adapter-module-in-progress meaning that the current adapter + // module must be completed and then a new one is started. + self.finish_adapter_module(); + assert!(self.defined_items.contains(&Def::Adapter(*id))); } - } - if let Some(def) = post_return { - if !self.contains_def(def) { - return false; - } + // These items can't transitively depend on an adapter + dfg::CoreDef::Lowered(_) + | dfg::CoreDef::AlwaysTrap(_) + | dfg::CoreDef::InstanceFlags(_) => {} } - - true } - fn contains_def(&self, options: &CoreDef) -> bool { - match options { - CoreDef::Export(e) => self.contains_export(e), - CoreDef::AlwaysTrap(i) => i.as_u32() < self.always_trap, - CoreDef::Lowered(i) => i.as_u32() < self.lowerings, - CoreDef::Adapter(idx) => self.adapter_to_module[*idx].is_some(), - CoreDef::InstanceFlags(_) => true, + fn core_export(&mut self, dfg: &dfg::ComponentDfg, export: &dfg::CoreExport) { + // If this instance has already been visited that means it can already + // be defined for this adapter module, so nothing else needs to be done. + if !self.defined_items.insert(Def::Instance(export.instance)) { + return; } - } - - fn contains_export(&self, export: &CoreExport) -> bool { - // This `DefinedItems` index space will contain `export` if the - // instance referenced has already been instantiated. The actual item - // that `export` points to doesn't need to be tested since it comes - // from the instance regardless. - export.instance.as_u32() < self.instances - } -} -/// Rewrites all instances of `CoreDef::Adapter` within the `init` initializer -/// provided to `CoreExport` according to the `map` provided. -/// -/// This is called after all adapter modules have been constructed and the -/// core wasm function for each adapter has been identified. -fn map_adapter_references( - init: &mut GlobalInitializer, - map: &PrimaryMap>>, -) { - let map_core_def = |def: &mut CoreDef| { - let adapter = match def { - CoreDef::Adapter(idx) => *idx, - _ => return, - }; - *def = CoreDef::Export( - map[adapter] - .clone() - .expect("adapter should have been instantiated"), - ); - }; - match init { - GlobalInitializer::InstantiateModule(module) => match module { - InstantiateModule::Static(_, args) => { - for def in args.iter_mut() { - map_core_def(def); + // ... otherwise if this is the first timet he instance has been seen + // then the instances own arguments are recursively visited to find + // transitive dependencies on adapters. + match &dfg.instances[export.instance] { + dfg::Instance::Static(_, args) => { + for arg in args.iter() { + self.core_def(dfg, arg); } } - InstantiateModule::Import(_, args) => { - for (_, map) in args { - for (_, def) in map { - map_core_def(def); + dfg::Instance::Import(_, args) => { + for (_, values) in args { + for (_, def) in values { + self.core_def(dfg, def); } } } - }, + } + } - GlobalInitializer::ExtractRealloc(e) => map_core_def(&mut e.def), - GlobalInitializer::ExtractPostReturn(e) => map_core_def(&mut e.def), + fn finish_adapter_module(&mut self) { + if self.next_module.adapters.is_empty() { + return; + } - // Nothing to map here - GlobalInitializer::LowerImport(_) - | GlobalInitializer::AlwaysTrap(_) - | GlobalInitializer::ExtractMemory(_) => {} - GlobalInitializer::SaveStaticModule(_) => {} - GlobalInitializer::SaveModuleImport(_) => {} + // Reset the state of the current module-in-progress and then flag all + // pending adapters as now defined since the current module is being + // committed. + let module = mem::take(&mut self.next_module); + for adapter in module.adapters.iter() { + let inserted = self.defined_items.insert(Def::Adapter(*adapter)); + assert!(inserted); + } + let idx = self.adapter_modules.push(module); + log::debug!("finishing adapter module {idx:?}"); } } diff --git a/crates/environ/src/component/translate/inline.rs b/crates/environ/src/component/translate/inline.rs index b7507cd36267..a1244eedcfde 100644 --- a/crates/environ/src/component/translate/inline.rs +++ b/crates/environ/src/component/translate/inline.rs @@ -45,9 +45,9 @@ //! side-effectful initializers are emitted to the `GlobalInitializer` list in the //! final `Component`. -use crate::component::translate::adapt::{Adapter, AdapterOptions, Adapters}; +use crate::component::translate::adapt::{Adapter, AdapterOptions}; use crate::component::translate::*; -use crate::{EntityType, PrimaryMap, SignatureIndex}; +use crate::{EntityType, PrimaryMap}; use indexmap::IndexMap; pub(super) fn run( @@ -55,18 +55,13 @@ pub(super) fn run( result: &Translation<'_>, nested_modules: &PrimaryMap>, nested_components: &PrimaryMap>, -) -> Result<(Component, Adapters)> { +) -> Result { let mut inliner = Inliner { types, nested_modules, nested_components, - result: Component::default(), - adapters: Adapters::default(), + result: Default::default(), import_path_interner: Default::default(), - runtime_realloc_interner: Default::default(), - runtime_post_return_interner: Default::default(), - runtime_memory_interner: Default::default(), - runtime_always_trap_interner: Default::default(), runtime_instances: PrimaryMap::default(), }; @@ -109,7 +104,7 @@ pub(super) fn run( } inliner.result.exports = export_map; - Ok((inliner.result, inliner.adapters)) + Ok(inliner.result) } struct Inliner<'a> { @@ -134,21 +129,14 @@ struct Inliner<'a> { /// The final `Component` that is being constructed and returned from this /// inliner. - result: Component, - - /// Metadata about fused adapters identified throughout inlining. - adapters: Adapters, + result: dfg::ComponentDfg, // Maps used to "intern" various runtime items to only save them once at // runtime instead of multiple times. import_path_interner: HashMap, RuntimeImportIndex>, - runtime_realloc_interner: HashMap, - runtime_post_return_interner: HashMap, - runtime_memory_interner: HashMap, RuntimeMemoryIndex>, - runtime_always_trap_interner: HashMap, /// Origin information about where each runtime instance came from - runtime_instances: PrimaryMap, + runtime_instances: PrimaryMap, } /// A "stack frame" as part of the inlining process, or the progress through @@ -180,10 +168,10 @@ struct InlinerFrame<'a> { args: HashMap<&'a str, ComponentItemDef<'a>>, // core wasm index spaces - funcs: PrimaryMap, - memories: PrimaryMap>, - tables: PrimaryMap>, - globals: PrimaryMap>, + funcs: PrimaryMap, + memories: PrimaryMap>, + tables: PrimaryMap>, + globals: PrimaryMap>, modules: PrimaryMap>, // component model index spaces @@ -261,7 +249,7 @@ enum ModuleInstanceDef<'a> { /// The `RuntimeInstanceIndex` was the index allocated as this was the /// `n`th instantiation and the `ModuleIndex` points into an /// `InlinerFrame`'s local index space. - Instantiated(RuntimeInstanceIndex, ModuleIndex), + Instantiated(dfg::InstanceId, ModuleIndex), /// A "synthetic" core wasm module which is just a bag of named indices. /// @@ -278,7 +266,7 @@ enum ComponentFuncDef<'a> { /// A core wasm function was lifted into a component function. Lifted { ty: TypeFuncIndex, - func: CoreDef, + func: dfg::CoreDef, options: AdapterOptions, }, } @@ -408,19 +396,14 @@ impl<'a> Inliner<'a> { // trampoline to enter WebAssembly. That's recorded here // with all relevant information. ComponentFuncDef::Import(path) => { - let index = LoweredIndex::from_u32(self.result.num_lowerings); - self.result.num_lowerings += 1; let import = self.runtime_import(path); let options = self.canonical_options(options_lower); - self.result - .initializers - .push(GlobalInitializer::LowerImport(LowerImport { - canonical_abi, - import, - index, - options, - })); - CoreDef::Lowered(index) + let index = self.result.lowerings.push_uniq(dfg::LowerImport { + canonical_abi, + import, + options, + }); + dfg::CoreDef::Lowered(index) } // This case handles when a lifted function is later @@ -452,22 +435,8 @@ impl<'a> Inliner<'a> { options: options_lift, .. } if options_lift.instance == options_lower.instance => { - let index = *self - .runtime_always_trap_interner - .entry(canonical_abi) - .or_insert_with(|| { - let index = - RuntimeAlwaysTrapIndex::from_u32(self.result.num_always_trap); - self.result.num_always_trap += 1; - self.result.initializers.push(GlobalInitializer::AlwaysTrap( - AlwaysTrap { - canonical_abi, - index, - }, - )); - index - }); - CoreDef::AlwaysTrap(index) + let index = self.result.always_trap.push_uniq(canonical_abi); + dfg::CoreDef::AlwaysTrap(index) } // Lowering a lifted function where the destination @@ -503,14 +472,14 @@ impl<'a> Inliner<'a> { func, options: options_lift, } => { - let adapter_idx = self.adapters.adapters.push(Adapter { + let adapter_idx = self.result.adapters.push_uniq(Adapter { lift_ty: *lift_ty, lift_options: options_lift.clone(), lower_ty, lower_options: options_lower, func: func.clone(), }); - CoreDef::Adapter(adapter_idx) + dfg::CoreDef::Adapter(adapter_idx) } }; frame.funcs.push(func); @@ -555,7 +524,7 @@ impl<'a> Inliner<'a> { ); } instance_module = InstanceModule::Static(*idx); - InstantiateModule::Static(*idx, defs.into()) + dfg::Instance::Static(*idx, defs.into()) } ModuleDef::Import(path, ty) => { let mut defs = IndexMap::new(); @@ -569,17 +538,13 @@ impl<'a> Inliner<'a> { } let index = self.runtime_import(path); instance_module = InstanceModule::Import(*ty); - InstantiateModule::Import(index, defs) + dfg::Instance::Import(index, defs) } }; - let idx = RuntimeInstanceIndex::from_u32(self.result.num_runtime_instances); - self.result.num_runtime_instances += 1; + let idx = self.result.instances.push(init); let idx2 = self.runtime_instances.push(instance_module); assert_eq!(idx, idx2); - self.result - .initializers - .push(GlobalInitializer::InstantiateModule(init)); frame .module_instances .push(ModuleInstanceDef::Instantiated(idx, *module)); @@ -663,7 +628,7 @@ impl<'a> Inliner<'a> { AliasExportTable(instance, name) => { frame.tables.push( match self.core_def_of_module_instance_export(frame, *instance, *name) { - CoreDef::Export(e) => e, + dfg::CoreDef::Export(e) => e, _ => unreachable!(), }, ); @@ -672,7 +637,7 @@ impl<'a> Inliner<'a> { AliasExportGlobal(instance, name) => { frame.globals.push( match self.core_def_of_module_instance_export(frame, *instance, *name) { - CoreDef::Export(e) => e, + dfg::CoreDef::Export(e) => e, _ => unreachable!(), }, ); @@ -681,7 +646,7 @@ impl<'a> Inliner<'a> { AliasExportMemory(instance, name) => { frame.memories.push( match self.core_def_of_module_instance_export(frame, *instance, *name) { - CoreDef::Export(e) => e, + dfg::CoreDef::Export(e) => e, _ => unreachable!(), }, ); @@ -783,7 +748,7 @@ impl<'a> Inliner<'a> { frame: &InlinerFrame<'a>, instance: ModuleInstanceIndex, name: &'a str, - ) -> CoreDef { + ) -> dfg::CoreDef { match &frame.module_instances[instance] { // Instantiations of a statically known module means that we can // refer to the exported item by a precise index, skipping name @@ -800,7 +765,7 @@ impl<'a> Inliner<'a> { } ModuleDef::Import(..) => ExportItem::Name(name.to_string()), }; - CoreExport { + dfg::CoreExport { instance: *instance, item, } @@ -866,57 +831,17 @@ impl<'a> Inliner<'a> { /// memories/functions are inserted into the global initializer list for /// use at runtime. This is only used for lowered host functions and lifted /// functions exported to the host. - fn canonical_options(&mut self, options: AdapterOptions) -> CanonicalOptions { - let memory = options.memory.map(|export| { - *self - .runtime_memory_interner - .entry(export.clone()) - .or_insert_with(|| { - let index = RuntimeMemoryIndex::from_u32(self.result.num_runtime_memories); - self.result.num_runtime_memories += 1; - self.result - .initializers - .push(GlobalInitializer::ExtractMemory(ExtractMemory { - index, - export, - })); - index - }) - }); - let realloc = options.realloc.map(|def| { - *self - .runtime_realloc_interner - .entry(def.clone()) - .or_insert_with(|| { - let index = RuntimeReallocIndex::from_u32(self.result.num_runtime_reallocs); - self.result.num_runtime_reallocs += 1; - self.result - .initializers - .push(GlobalInitializer::ExtractRealloc(ExtractRealloc { - index, - def, - })); - index - }) - }); - let post_return = options.post_return.map(|def| { - *self - .runtime_post_return_interner - .entry(def.clone()) - .or_insert_with(|| { - let index = - RuntimePostReturnIndex::from_u32(self.result.num_runtime_post_returns); - self.result.num_runtime_post_returns += 1; - self.result - .initializers - .push(GlobalInitializer::ExtractPostReturn(ExtractPostReturn { - index, - def, - })); - index - }) - }); - CanonicalOptions { + fn canonical_options(&mut self, options: AdapterOptions) -> dfg::CanonicalOptions { + let memory = options + .memory + .map(|export| self.result.memories.push_uniq(export)); + let realloc = options + .realloc + .map(|def| self.result.reallocs.push_uniq(def)); + let post_return = options + .post_return + .map(|def| self.result.post_returns.push_uniq(def)); + dfg::CanonicalOptions { instance: options.instance, string_encoding: options.string_encoding, memory, @@ -929,25 +854,17 @@ impl<'a> Inliner<'a> { &mut self, name: &str, def: ComponentItemDef<'a>, - map: &mut IndexMap, + map: &mut IndexMap, ) -> Result<()> { let export = match def { // Exported modules are currently saved in a `PrimaryMap`, at // runtime, so an index (`RuntimeModuleIndex`) is assigned here and // then an initializer is recorded about where the module comes // from. - ComponentItemDef::Module(module) => { - let index = RuntimeModuleIndex::from_u32(self.result.num_runtime_modules); - self.result.num_runtime_modules += 1; - let init = match module { - ModuleDef::Static(idx) => GlobalInitializer::SaveStaticModule(idx), - ModuleDef::Import(path, _) => { - GlobalInitializer::SaveModuleImport(self.runtime_import(&path)) - } - }; - self.result.initializers.push(init); - Export::Module(index) - } + ComponentItemDef::Module(module) => match module { + ModuleDef::Static(idx) => dfg::Export::ModuleStatic(idx), + ModuleDef::Import(path, _) => dfg::Export::ModuleImport(self.runtime_import(&path)), + }, ComponentItemDef::Func(func) => match func { // If this is a lifted function from something lowered in this @@ -955,7 +872,7 @@ impl<'a> Inliner<'a> { // here. ComponentFuncDef::Lifted { ty, func, options } => { let options = self.canonical_options(options); - Export::LiftedFunction { ty, func, options } + dfg::Export::LiftedFunction { ty, func, options } } // Currently reexported functions from an import are not @@ -995,7 +912,7 @@ impl<'a> Inliner<'a> { } } } - Export::Instance(result) + dfg::Export::Instance(result) } // FIXME(#4283) should make an official decision on whether this is diff --git a/crates/environ/src/component/types.rs b/crates/environ/src/component/types.rs index 07b0ab9d7e0c..b3f964e55d1e 100644 --- a/crates/environ/src/component/types.rs +++ b/crates/environ/src/component/types.rs @@ -166,10 +166,6 @@ indices! { /// Index that represents an exported module from a component since that's /// currently the only use for saving the entire module state at runtime. pub struct RuntimeModuleIndex(u32); - - /// Index into the list of fused adapters identified during compilation. - /// Used in conjuction with the `Adapters` type. - pub struct AdapterIndex(u32); } // Reexport for convenience some core-wasm indices which are also used in the diff --git a/crates/environ/src/fact.rs b/crates/environ/src/fact.rs index b0c24aaf70c3..2c9f7b73dd80 100644 --- a/crates/environ/src/fact.rs +++ b/crates/environ/src/fact.rs @@ -18,9 +18,8 @@ //! their imports and then generating a core wasm module to implement all of //! that. -use crate::component::{ - Adapter, AdapterOptions, ComponentTypes, CoreDef, StringEncoding, TypeFuncIndex, -}; +use crate::component::dfg::CoreDef; +use crate::component::{Adapter, AdapterOptions, ComponentTypes, StringEncoding, TypeFuncIndex}; use crate::{FuncIndex, GlobalIndex, MemoryIndex}; use std::collections::HashMap; use std::mem; diff --git a/crates/wasmtime/src/component/instance.rs b/crates/wasmtime/src/component/instance.rs index 1d5f24588be0..1f836051d33c 100644 --- a/crates/wasmtime/src/component/instance.rs +++ b/crates/wasmtime/src/component/instance.rs @@ -142,8 +142,6 @@ impl InstanceData { }, }) } - // This should have been processed away during compilation. - CoreDef::Adapter(_) => unreachable!(), } } diff --git a/crates/wast/Cargo.toml b/crates/wast/Cargo.toml index 6356363d7a8f..e093d47fb0ce 100644 --- a/crates/wast/Cargo.toml +++ b/crates/wast/Cargo.toml @@ -13,6 +13,7 @@ edition = "2021" anyhow = "1.0.19" wasmtime = { path = "../wasmtime", version = "0.40.0", default-features = false, features = ['cranelift'] } wast = "45.0.0" +log = "0.4" [badges] maintenance = { status = "actively-developed" } diff --git a/crates/wast/src/wast.rs b/crates/wast/src/wast.rs index 102e4f7abd00..d9f885acff28 100644 --- a/crates/wast/src/wast.rs +++ b/crates/wast/src/wast.rs @@ -351,6 +351,10 @@ impl WastContext { for directive in ast.directives { let sp = directive.span(); + if log::log_enabled!(log::Level::Debug) { + let (line, col) = sp.linecol_in(wast); + log::debug!("failed directive on {}:{}:{}", filename, line + 1, col); + } self.run_directive(directive) .map_err(|e| match e.downcast() { Ok(err) => adjust_wast(err).into(),