diff --git a/crates/common/src/result.rs b/crates/common/src/result.rs index cc5928fe..175fb7ea 100644 --- a/crates/common/src/result.rs +++ b/crates/common/src/result.rs @@ -33,6 +33,10 @@ pub enum WasmErrorInner { /// Wasmer failed to build a Module from wasm byte code. /// With the feature `wasmer_sys` enabled, building a Module includes compiling the wasm. ModuleBuild(String), + /// Wasmer failed to serialize a Module to bytes. + ModuleSerialize(String), + /// Wasmer failed to deserialize a Module from bytes. + ModuleDeserialize(String), /// The host failed to call a function in the guest. CallError(String), } @@ -49,8 +53,10 @@ impl WasmErrorInner { | Self::ErrorWhileError // Bad memory is bad memory. | Self::Memory - // Failing to build the wasmer Module means we cannot use it. + // Failing to build, serialize, or deserialize the wasmer Module means we cannot use it. | Self::ModuleBuild(_) + | Self::ModuleSerialize(_) + | Self::ModuleDeserialize(_) // This is ambiguous so best to treat as potentially corrupt. | Self::CallError(_) => true, diff --git a/crates/host/src/module.rs b/crates/host/src/module.rs index c75b7b97..108ad923 100644 --- a/crates/host/src/module.rs +++ b/crates/host/src/module.rs @@ -192,52 +192,76 @@ impl ModuleCache { match self.get_from_filesystem(key) { // Filesystem cache hit, deserialize and save to cache Ok(Some(serialized_module)) => { - let module = self.builder.from_serialized_module(serialized_module)?; + let module_result = self.builder.from_serialized_module(serialized_module); + + // If deserialization fails, we assume the file is corrupt, + // so it is removed from the filesystem cache, + // and the wasm is re-added to the cache again. + let module = match module_result { + Ok(d) => Ok(d), + Err(_) => { + // Remove from filesystem + if let Err(e) = self.remove_from_filesystem(key) { + tracing::debug!("Failed to remove cached wasm from filesystem with cache key {:?}: {:?}", key, e); + } + + // Build wasm and save to both caches + self.add_to_cache_and_filesystem(key, wasm) + } + }?; + self.add_to_cache(key, module.clone()); Ok(module) } // Filesystem cache miss, build wasm and save to both caches - _ => { - // Each module needs to be compiled with a new engine because - // of middleware like metering. Middleware is compiled into the - // module once and available in all instances created from it. - let module = self.builder.from_binary(wasm)?; - - // Round trip the wasmer Module through serialization. - // - // A new middleware per module is required, hence a new engine - // per module is needed too. Serialization allows for uncoupling - // the module from the engine that was used for compilation. - // After that another engine can be used to deserialize the - // module again. The engine has to live as long as the module to - // prevent memory access out of bounds errors. - // - // This procedure facilitates caching of modules that can be - // instantiated with fresh stores free from state. Instance - // creation is highly performant which makes caching of instances - // and stores unnecessary. - // - // See https://github.com/wasmerio/wasmer/discussions/3829#discussioncomment-5790763 - let serialized_module = module - .serialize() - .map_err(|e| wasm_error!(WasmErrorInner::ModuleBuild(e.to_string())))?; - let module = self - .builder - .from_serialized_module(serialized_module.clone())?; - - // Save serialized module to filesystem cache - self.add_to_filesystem(key, serialized_module)?; - - // Save module to in-memory cache - self.add_to_cache(key, module.clone()); - - Ok(module) - } + _ => self.add_to_cache_and_filesystem(key, wasm), } } + /// Build a wasm, then save it to both the in-memory cache and filesystem + fn add_to_cache_and_filesystem( + &self, + key: CacheKey, + wasm: &[u8], + ) -> Result, wasmer::RuntimeError> { + // Each module needs to be compiled with a new engine because + // of middleware like metering. Middleware is compiled into the + // module once and available in all instances created from it. + let module = self.builder.from_binary(wasm)?; + + // Round trip the wasmer Module through serialization. + // + // A new middleware per module is required, hence a new engine + // per module is needed too. Serialization allows for uncoupling + // the module from the engine that was used for compilation. + // After that another engine can be used to deserialize the + // module again. The engine has to live as long as the module to + // prevent memory access out of bounds errors. + // + // This procedure facilitates caching of modules that can be + // instantiated with fresh stores free from state. Instance + // creation is highly performant which makes caching of instances + // and stores unnecessary. + // + // See https://github.com/wasmerio/wasmer/discussions/3829#discussioncomment-5790763 + let serialized_module = module + .serialize() + .map_err(|e| wasm_error!(WasmErrorInner::ModuleBuild(e.to_string())))?; + let module = self + .builder + .from_serialized_module(serialized_module.clone())?; + + // Save serialized module to filesystem cache + self.add_to_filesystem(key, serialized_module)?; + + // Save module to in-memory cache + self.add_to_cache(key, module.clone()); + + Ok(module) + } + /// Get serialized module from filesystem fn get_from_filesystem(&self, key: CacheKey) -> Result, wasmer::RuntimeError> { self.filesystem_module_path(key) @@ -297,6 +321,15 @@ impl ModuleCache { Ok(()) } + // Remove serialized module from filesystem cache + fn remove_from_filesystem(&self, key: CacheKey) -> Result<(), std::io::Error> { + if let Some(fs_path) = self.filesystem_module_path(key) { + std::fs::remove_file(fs_path)?; + } + + Ok(()) + } + /// Check cache for module fn get_from_cache(&self, key: CacheKey) -> Option> { let mut cache = self.cache.write(); diff --git a/crates/host/src/module/wasmer_sys.rs b/crates/host/src/module/wasmer_sys.rs index 98d86d94..0a9e9ba6 100644 --- a/crates/host/src/module/wasmer_sys.rs +++ b/crates/host/src/module/wasmer_sys.rs @@ -80,7 +80,7 @@ mod tests { use super::make_engine; use crate::module::{builder::ModuleBuilder, CacheKey, ModuleCache, PlruCache}; use std::io::Write; - use tempfile::tempdir; + use tempfile::TempDir; use wasmer::Module; #[test] @@ -95,9 +95,10 @@ mod tests { 0x61, 0x64, 0x64, 0x5f, 0x6f, 0x6e, 0x65, 0x02, 0x07, 0x01, 0x00, 0x01, 0x00, 0x02, 0x70, 0x30, ]; - let tmp_fs_cache_dir = tempdir().unwrap().into_path(); + let tmp_fs_cache_dir = TempDir::new().unwrap(); let module_builder = ModuleBuilder::new(make_engine); - let module_cache = ModuleCache::new(module_builder, Some(tmp_fs_cache_dir.clone())); + let module_cache = + ModuleCache::new(module_builder, Some(tmp_fs_cache_dir.path().to_owned())); assert!(module_cache .filesystem_path .clone() @@ -123,8 +124,6 @@ mod tests { module_cache.filesystem_path.unwrap().join(hex::encode(key)); assert!(std::fs::metadata(serialized_module_path).is_ok()); } - - std::fs::remove_dir_all(tmp_fs_cache_dir).unwrap(); } #[test] @@ -165,9 +164,10 @@ mod tests { 0x61, 0x64, 0x64, 0x5f, 0x6f, 0x6e, 0x65, 0x02, 0x07, 0x01, 0x00, 0x01, 0x00, 0x02, 0x70, 0x30, ]; - let tmp_fs_cache_dir = tempdir().unwrap().into_path(); + let tmp_fs_cache_dir = TempDir::new().unwrap(); let module_builder = ModuleBuilder::new(make_engine); - let module_cache = ModuleCache::new(module_builder, Some(tmp_fs_cache_dir.clone())); + let module_cache = + ModuleCache::new(module_builder, Some(tmp_fs_cache_dir.path().to_owned())); let key: CacheKey = [0u8; 32]; // Build module, serialize, save directly to filesystem @@ -175,7 +175,7 @@ mod tests { let module = std::sync::Arc::new(Module::from_binary(&compiler_engine, wasm.as_slice()).unwrap()); let serialized_module = module.serialize().unwrap(); - let serialized_module_path = tmp_fs_cache_dir.clone().join(hex::encode(key)); + let serialized_module_path = tmp_fs_cache_dir.path().join(hex::encode(key)); let mut file = std::fs::OpenOptions::new() .write(true) .create_new(true) @@ -198,7 +198,61 @@ mod tests { *module.serialize().unwrap() ); } + } - std::fs::remove_dir_all(tmp_fs_cache_dir).unwrap(); + #[test] + fn cache_get_from_fs_corrupt() { + // simple example wasm taken from wasmer docs + // https://docs.rs/wasmer/latest/wasmer/struct.Module.html#example + let wasm: Vec = vec![ + 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60, 0x01, 0x7f, + 0x01, 0x7f, 0x03, 0x02, 0x01, 0x00, 0x07, 0x0b, 0x01, 0x07, 0x61, 0x64, 0x64, 0x5f, + 0x6f, 0x6e, 0x65, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0x41, 0x01, + 0x6a, 0x0b, 0x00, 0x1a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x01, 0x0a, 0x01, 0x00, 0x07, + 0x61, 0x64, 0x64, 0x5f, 0x6f, 0x6e, 0x65, 0x02, 0x07, 0x01, 0x00, 0x01, 0x00, 0x02, + 0x70, 0x30, + ]; + + // Bad serialized_wasm + let bad_serialized_wasm = vec![0x00]; + + let tmp_fs_cache_dir = TempDir::new().unwrap(); + let module_builder = ModuleBuilder::new(make_engine); + let module_cache = + ModuleCache::new(module_builder, Some(tmp_fs_cache_dir.path().to_owned())); + let key: CacheKey = [0u8; 32]; + + // Build module, serialize, save directly to filesystem + let serialized_module_path = tmp_fs_cache_dir.path().join(hex::encode(key)); + let mut file = std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&serialized_module_path) + .unwrap(); + file.write_all(&bad_serialized_wasm).unwrap(); + + // Module can still be retrieved from fs cache, as it has been deleted from the filesystem and re-added to the cache + let res = module_cache.get(key, &wasm); + assert!(res.is_ok()); + + let compiler_engine = make_engine(); + let module = + std::sync::Arc::new(Module::from_binary(&compiler_engine, wasm.as_slice()).unwrap()); + + // make sure module is stored in deserialized cache + { + let deserialized_cached_module = module_cache.cache.write().get_item(&key).unwrap(); + assert_eq!( + *deserialized_cached_module.serialize().unwrap(), + *module.serialize().unwrap() + ); + } + + // make sure module has been stored in serialized filesystem cache + { + let serialized_module_path = + module_cache.filesystem_path.unwrap().join(hex::encode(key)); + assert!(std::fs::metadata(serialized_module_path).is_ok()); + } } }