From 437d5c8fbe8396a2663dab4aa21d370da77eef8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=9A=93?= Date: Fri, 19 Jan 2024 17:50:14 -0500 Subject: [PATCH] Lazy path cache (#90) * feat: path cache now evaluates lazily * chore: remove redundant slab dependency * chore: fill in missing contexts on some errors * style: use `reserve` instead of `with_capacity` for allocating strings * fix: fix cache clearing in `rename`, `remove_dir` and `remove_file` * fix: fix `create_dir` implementation when some parent dirs already exist * style: use `.iter().rev()` to reverse vector instead of `.reverse()` --- crates/filesystem/Cargo.toml | 2 +- crates/filesystem/src/path_cache.rs | 386 ++++++++++++++++++++++------ 2 files changed, 303 insertions(+), 85 deletions(-) diff --git a/crates/filesystem/Cargo.toml b/crates/filesystem/Cargo.toml index 71aa3104..4188442e 100644 --- a/crates/filesystem/Cargo.toml +++ b/crates/filesystem/Cargo.toml @@ -48,6 +48,7 @@ iter-read = "1.0.1" async_io_stream = "0.3.3" qp-trie.workspace = true +slab.workspace = true [target.'cfg(windows)'.dependencies] winreg = "0.51.0" @@ -58,7 +59,6 @@ async-fs = "2.1.0" [target.'cfg(target_arch = "wasm32")'.dependencies] once_cell.workspace = true -slab.workspace = true luminol-web = { version = "0.4.0", path = "../web/" } diff --git a/crates/filesystem/src/path_cache.rs b/crates/filesystem/src/path_cache.rs index a7facf7f..ec80d6ac 100644 --- a/crates/filesystem/src/path_cache.rs +++ b/crates/filesystem/src/path_cache.rs @@ -15,25 +15,182 @@ // You should have received a copy of the GNU General Public License // along with Luminol. If not, see . -use crate::{DirEntry, Error, Metadata, OpenFlags, Result}; use color_eyre::eyre::WrapErr; +use itertools::Itertools; + +use crate::{DirEntry, Error, FileSystem as FileSystemTrait, Metadata, OpenFlags, Result}; + +const TRIE_SUFFIX: &str = "\0"; #[derive(Debug, Clone)] +struct CactusNode { + /// The path component stored in this cactus stack node. + value: String, + /// The index of the next node within the cactus stack, or `None` if there is no next node. + next: Option, + /// One more than the number of times you need to follow `next` until you get `None`. + len: usize, +} + +/// This cache stores the lowercased versions of paths and their corresponding original paths. +/// Given a lowercased path, for example "data/mapinfos", you can find the original path by first +/// appending a forward slash followed by `TRIE_SUFFIX` to the end of the path, then looking up the +/// file at that path in `trie`. This gives you the index of a node in `cactus`, which stores the +/// original path. To recover the original path, follow the chain of cactus stack nodes by +/// following the `next` field on the nodes. This gives you the path components of the original +/// path in reverse order. +#[derive(Debug, Default, Clone)] +struct Cache { + trie: crate::FileSystemTrie, + cactus: slab::Slab, +} + +#[derive(Debug)] pub struct FileSystem { fs: F, - cache: dashmap::DashMap, + cache: parking_lot::RwLock, +} + +impl Cache { + fn get_path_from_cactus_index(&self, index: usize) -> camino::Utf8PathBuf { + let Some(node) = self.cactus.get(index) else { + return Default::default(); + }; + + let mut vec = Vec::with_capacity(node.len); + + let mut node = Some(node); + while let Some(n) = node { + vec.push(&n.value); + node = n.next.and_then(|next| self.cactus.get(next)); + } + + vec.iter().rev().join("/").into() + } + + /// Gets the original, case-sensitive version of the given case-insensitive path from the underlying + /// filesystem and puts it into the cache. + /// This method memoizes: if we want to insert "this/is/a/path" and the cache already contains + /// the case-sensitive version of the path "this/is", we will only scan the underlying filesystem + /// for the case-sensitive names of the remaining two components in the path. + fn regen( + &mut self, + fs: &impl FileSystemTrait, + path: impl AsRef, + ) -> crate::Result<()> { + let mut path = to_lowercase(path); + path.set_extension(""); + if self.trie.contains_dir(&path) { + return Ok(()); + } + + let prefix = self.trie.get_dir_prefix(&path); + let mut cactus_index = (!prefix.as_str().is_empty()) + .then(|| *self.trie.get_file(with_trie_suffix(prefix)).unwrap()); + let mut len = prefix.iter().count(); + + // Get the longest prefix of the path that is in the trie, convert it to lowercase and + // remove file extensions + let mut lower_string = prefix.to_string(); + if let Some(additional) = path + .as_str() + .bytes() + .len() + .checked_sub(lower_string.bytes().len()) + { + lower_string.reserve(additional); + } + + // This is the same thing as `lower_string` except with the actual letter casing from the + // filesystem and without removing file extensions + let mut original_string = cactus_index.map_or_else(Default::default, |i| { + self.get_path_from_cactus_index(i).to_string() + }); + if let Some(additional) = path + .as_str() + .bytes() + .len() + .checked_sub(original_string.bytes().len()) + { + original_string.reserve(additional); + } + + // Iterate over the remaining path components that aren't present in + // `lower_string`/`original_string` + for name in path.strip_prefix(prefix).unwrap().iter() { + let entries = fs + .read_dir(&original_string) + .wrap_err("While regenerating cache for path {path:?}")?; + len += 1; + + let mut original_name = None; + let mut new_cactus_index = 0; + for entry in entries.into_iter() { + let entry_name = camino::Utf8Path::new(entry.file_name()) + .file_stem() + .unwrap_or(entry.file_name()) + .to_lowercase(); + let index = self.cactus.insert(CactusNode { + value: entry.file_name().to_string(), + next: cactus_index, + len, + }); + self.trie.create_file( + if lower_string.is_empty() { + with_trie_suffix(&entry_name) + } else { + format!("{lower_string}/{entry_name}/{TRIE_SUFFIX}").into() + }, + index, + ); + if entry_name == name { + original_name = Some(entry.file_name().to_string()); + new_cactus_index = index; + } + } + + let Some(original_name) = original_name else { + return Ok(()); + }; + if !lower_string.is_empty() { + lower_string.push('/'); + } + lower_string.push_str(name); + if !original_string.is_empty() { + original_string.push('/'); + } + original_string.push_str(&original_name); + cactus_index = Some(new_cactus_index); + } + + Ok(()) + } + + /// Gets the case-sensitive version of the given case-insensitive path from the cache. + /// The path has to already exist in the cache; you need to use `.regen` to insert paths into + /// the cache before this can get them. + fn desensitize(&self, path: impl AsRef) -> Option { + let path = path.as_ref(); + if path.as_str().is_empty() { + return Some(Default::default()); + } + let mut path = to_lowercase(path); + path.set_extension(""); + self.trie + .get_file(with_trie_suffix(&path)) + .map(|i| self.get_path_from_cactus_index(*i)) + } } impl FileSystem where - F: crate::FileSystem, + F: FileSystemTrait, { pub fn new(fs: F) -> Result { let this = FileSystem { fs, - cache: dashmap::DashMap::new(), + cache: Default::default(), }; - this.regen_cache()?; Ok(this) } @@ -41,83 +198,54 @@ where &self.fs } - pub fn regen_cache(&self) -> Result<()> { - let c = "While regenerating path cache"; - - fn read_dir_recursive( - fs: &(impl crate::FileSystem + ?Sized), - path: impl AsRef, - mut f: impl FnMut(&camino::Utf8Path), - ) -> Result<()> { - fn internal( - fs: &(impl crate::FileSystem + ?Sized), - path: impl AsRef, - f: &mut impl FnMut(&camino::Utf8Path), - ) -> Result<()> { - // In web builds, RTPs are currently to be placed in the "RTP" subdirectory of - // the project root directory, so this is to avoid loading the contents of - // those directories twice - let skip = matches!(path.as_ref().iter().next_back(), Some("RTP")); - - for entry in fs.read_dir(path)? { - f(entry.path()); - if !skip && !entry.metadata().is_file { - internal(fs, entry.path(), f)?; - } - } - Ok(()) - } - internal(fs, path, &mut f) - } - - self.cache.clear(); - read_dir_recursive(&self.fs, "", |path| { - let mut lowercase = to_lowercase(path); - lowercase.set_extension(""); - - self.cache.insert(lowercase, path.to_path_buf()); - }) - .wrap_err(c) - } - pub fn debug_ui(&self, ui: &mut egui::Ui) { + let cache = self.cache.read(); + egui::ScrollArea::vertical() .id_source("luminol_path_cache_debug_ui") .show_rows( ui, ui.text_style_height(&egui::TextStyle::Body), - self.cache.len(), + cache.cactus.len(), |ui, rows| { - for (_, item) in self - .cache - .iter() + for (_, (key, index)) in cache + .trie + .iter_prefix("") + .unwrap() .enumerate() .filter(|(index, _)| rows.contains(index)) { + let Some(key) = key.as_str().strip_suffix(&format!("/{TRIE_SUFFIX}")) + else { + continue; + }; ui.horizontal(|ui| { - ui.label(item.key().as_str()); + ui.label(key); ui.label("➡"); - ui.label(item.value().as_str()); + ui.label(cache.get_path_from_cactus_index(*index).as_str()); }); } }, ); } - - pub fn desensitize(&self, path: impl AsRef) -> Option { - let mut path = to_lowercase(path); - path.set_extension(""); - self.cache.get(&path).as_deref().cloned() - } } pub fn to_lowercase(p: impl AsRef) -> camino::Utf8PathBuf { p.as_ref().as_str().to_lowercase().into() } -impl crate::FileSystem for FileSystem +fn with_trie_suffix(path: impl AsRef) -> camino::Utf8PathBuf { + let path = path.as_ref(); + if path.as_str().is_empty() { + TRIE_SUFFIX.into() + } else { + format!("{path}/{TRIE_SUFFIX}").into() + } +} + +impl FileSystemTrait for FileSystem where - F: crate::FileSystem, + F: FileSystemTrait, { type File = F::File; @@ -126,24 +254,46 @@ where path: impl AsRef, flags: OpenFlags, ) -> Result { + let mut cache = self.cache.write(); let path = path.as_ref(); let c = format!("While opening file {path:?} in a path cache"); - if flags.contains(OpenFlags::Create) { - let mut lower_path = to_lowercase(path); - lower_path.set_extension(""); - self.cache.insert(lower_path, path.to_path_buf()); + cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?; + + if flags.contains(OpenFlags::Create) && cache.desensitize(path).is_none() { + let path = cache + .desensitize( + path.parent() + .ok_or(Error::NotExist) + .wrap_err_with(|| c.clone())?, + ) + .ok_or(Error::NotExist) + .wrap_err_with(|| c.clone())?; + let file = self + .fs + .open_file(&path, flags) + .wrap_err_with(|| c.clone())?; + cache.regen(&self.fs, &path).wrap_err_with(|| c.clone())?; + Ok(file) + } else { + self.fs + .open_file( + cache + .desensitize(path) + .ok_or(Error::NotExist) + .wrap_err_with(|| c.clone())?, + flags, + ) + .wrap_err_with(|| c.clone()) } - let path = self - .desensitize(path) - .ok_or(Error::NotExist) - .wrap_err_with(|| c.clone())?; - self.fs.open_file(path, flags).wrap_err_with(|| c.clone()) } fn metadata(&self, path: impl AsRef) -> Result { + let mut cache = self.cache.write(); let path = path.as_ref(); let c = format!("While getting metadata for {path:?} in a path cache"); - let path = self + cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?; + + let path = cache .desensitize(path) .ok_or(Error::NotExist) .wrap_err_with(|| c.clone())?; @@ -155,65 +305,133 @@ where from: impl AsRef, to: impl AsRef, ) -> Result<()> { + let mut cache = self.cache.write(); let c = format!( "While renaming {:?} to {:?} in a path cache", from.as_ref(), to.as_ref() ); - let from = self + cache + .regen(&self.fs, from.as_ref()) + .wrap_err_with(|| c.clone())?; + let from = cache .desensitize(from) .ok_or(Error::NotExist) .wrap_err_with(|| c.clone())?; - let to = to.as_ref().to_path_buf(); - self.fs.rename(&from, &to).wrap_err_with(|| c.clone())?; + self.fs.rename(&from, to).wrap_err_with(|| c.clone())?; - self.cache.remove(&from); - self.cache.insert(to_lowercase(&to), to); + for index in cache + .trie + .iter_prefix(&from) + .unwrap() + .map(|(_, i)| *i) + .collect_vec() + { + cache.cactus.remove(index); + } + cache.trie.remove_dir(&from); Ok(()) } fn exists(&self, path: impl AsRef) -> Result { - Ok(self.desensitize(path).is_some()) + let mut cache = self.cache.write(); + let path = path.as_ref(); + let c = format!("While checking if {path:?} exists in a path cache"); + cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?; + Ok(cache.desensitize(path).is_some()) } fn create_dir(&self, path: impl AsRef) -> Result<()> { - let path = path.as_ref().to_path_buf(); + let mut cache = self.cache.write(); + let path = path.as_ref(); let c = format!("While creating directory {path:?} in a path cache"); - - self.fs.create_dir(&path).wrap_err_with(|| c.clone())?; - - self.cache.insert(to_lowercase(&path), path); + cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?; + + let mut lower_path = to_lowercase(path); + lower_path.set_extension(""); + let prefix = cache.trie.get_dir_prefix(lower_path); + let cactus_index = (!prefix.as_str().is_empty()) + .then(|| *cache.trie.get_file(with_trie_suffix(prefix)).unwrap()); + let original_prefix = + cactus_index.map_or_else(Default::default, |i| cache.get_path_from_cactus_index(i)); + let len = original_prefix.iter().count(); + + self.fs + .create_dir(if len == 0 { + path.to_path_buf() + } else if len == path.iter().count() { + original_prefix + } else { + format!("{original_prefix}/{}", path.iter().skip(len).join("/")).into() + }) + .wrap_err_with(|| c.clone())?; Ok(()) } fn remove_dir(&self, path: impl AsRef) -> Result<()> { - let path = self.desensitize(path).ok_or(Error::NotExist)?; + let mut cache = self.cache.write(); + let path = path.as_ref(); let c = format!("While removing directory {path:?} in a path cache"); + cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?; + let path = cache + .desensitize(path) + .ok_or(Error::NotExist) + .wrap_err_with(|| c.clone())?; self.fs.remove_dir(&path).wrap_err_with(|| c.clone())?; - self.cache.remove(&to_lowercase(path)); + for index in cache + .trie + .iter_prefix(&path) + .unwrap() + .map(|(_, i)| *i) + .collect_vec() + { + cache.cactus.remove(index); + } + cache.trie.remove_dir(&path); Ok(()) } fn remove_file(&self, path: impl AsRef) -> Result<()> { - let path = self.desensitize(path).ok_or(Error::NotExist)?; + let mut cache = self.cache.write(); + let path = path.as_ref(); let c = format!("While removing file {path:?} in a path cache"); + cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?; + let path = cache + .desensitize(path) + .ok_or(Error::NotExist) + .wrap_err_with(|| c.clone())?; self.fs.remove_file(&path).wrap_err_with(|| c.clone())?; - self.cache.remove(&to_lowercase(path)); + for index in cache + .trie + .iter_prefix(&path) + .unwrap() + .map(|(_, i)| *i) + .collect_vec() + { + cache.cactus.remove(index); + } + cache.trie.remove_dir(&path); Ok(()) } fn read_dir(&self, path: impl AsRef) -> Result> { - let path = self.desensitize(path).ok_or(Error::NotExist)?; + let mut cache = self.cache.write(); + let path = path.as_ref(); let c = format!("While reading the contents of the directory {path:?} in a path cache"); + cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?; + let path = cache + .desensitize(path) + .ok_or(Error::NotExist) + .wrap_err_with(|| c.clone())?; self.fs.read_dir(path).wrap_err_with(|| c.clone()) } }