From 5a18ac83e3894b65a5244707f617c488d0802cf4 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sun, 23 Jun 2024 19:18:16 +0100 Subject: [PATCH] Improve normalization of `VendoredPath`s --- Cargo.lock | 1 + .../red_knot_module_resolver/src/typeshed.rs | 3 +- crates/ruff_db/Cargo.toml | 1 + crates/ruff_db/src/vendored.rs | 74 +++++++++++++------ crates/ruff_db/src/vfs.rs | 47 ++++++++---- 5 files changed, 87 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 657c408977ea3..ba5873c903d26 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2210,6 +2210,7 @@ dependencies = [ "ruff_text_size", "rustc-hash", "salsa", + "thiserror", "tracing", "zip", ] diff --git a/crates/red_knot_module_resolver/src/typeshed.rs b/crates/red_knot_module_resolver/src/typeshed.rs index fa49261d5f814..8e23916a34941 100644 --- a/crates/red_knot_module_resolver/src/typeshed.rs +++ b/crates/red_knot_module_resolver/src/typeshed.rs @@ -64,9 +64,10 @@ mod tests { let vendored_path_kind = vendored_typeshed_stubs .metadata(vendored_path) - .unwrap_or_else(|| { + .unwrap_or_else(|err| { panic!( "Expected metadata for {vendored_path:?} to be retrievable from the `VendoredFileSystem! + Error encountered: {err} Vendored file system: diff --git a/crates/ruff_db/Cargo.toml b/crates/ruff_db/Cargo.toml index 2c56e1ce451ff..146ead0ed95a9 100644 --- a/crates/ruff_db/Cargo.toml +++ b/crates/ruff_db/Cargo.toml @@ -23,6 +23,7 @@ filetime = { workspace = true } salsa = { workspace = true } tracing = { workspace = true } rustc-hash = { workspace = true } +thiserror = { workspace = true } zip = { workspace = true } [dev-dependencies] diff --git a/crates/ruff_db/src/vendored.rs b/crates/ruff_db/src/vendored.rs index e5197091dea77..5764bcff9bccb 100644 --- a/crates/ruff_db/src/vendored.rs +++ b/crates/ruff_db/src/vendored.rs @@ -11,8 +11,6 @@ pub use path::{VendoredPath, VendoredPathBuf}; pub mod path; -type Result = io::Result; - /// File system that stores all content in a static zip archive /// bundled as part of the Ruff binary. /// @@ -23,14 +21,16 @@ pub struct VendoredFileSystem { } impl VendoredFileSystem { - pub fn new(raw_bytes: &'static [u8]) -> Result { + pub fn new(raw_bytes: &'static [u8]) -> io::Result { Ok(Self { inner: VendoredFileSystemInner::new(raw_bytes)?, }) } pub fn exists(&self, path: &VendoredPath) -> bool { - let normalized = normalize_vendored_path(path); + let Ok(normalized) = normalize_vendored_path(path) else { + return false; + }; let inner_locked = self.inner.lock(); let mut archive = inner_locked.borrow_mut(); @@ -44,8 +44,8 @@ impl VendoredFileSystem { .is_ok() } - pub fn metadata(&self, path: &VendoredPath) -> Option { - let normalized = normalize_vendored_path(path); + pub fn metadata(&self, path: &VendoredPath) -> Result { + let normalized = normalize_vendored_path(path)?; let inner_locked = self.inner.lock(); // Must probe the zipfile twice, as "stdlib" and "stdlib/" are considered @@ -54,13 +54,13 @@ impl VendoredFileSystem { // work the same as other paths in Ruff. let mut archive = inner_locked.borrow_mut(); if let Ok(zip_file) = archive.lookup_path(&normalized) { - return Some(Metadata::from_zip_file(zip_file)); + return Ok(Metadata::from_zip_file(zip_file)); } if let Ok(zip_file) = archive.lookup_path(&normalized.with_trailing_slash()) { - return Some(Metadata::from_zip_file(zip_file)); + return Ok(Metadata::from_zip_file(zip_file)); } - None + Err(MetadataLookupError::NonExistentPath(path.to_path_buf())) } /// Read the entire contents of the zip file at `path` into a string @@ -69,10 +69,10 @@ impl VendoredFileSystem { /// - The path does not exist in the underlying zip archive /// - The path exists in the underlying zip archive, but represents a directory /// - The contents of the zip file at `path` contain invalid UTF-8 - pub fn read(&self, path: &VendoredPath) -> Result { + pub fn read(&self, path: &VendoredPath) -> Result { let inner_locked = self.inner.lock(); let mut archive = inner_locked.borrow_mut(); - let mut zip_file = archive.lookup_path(&normalize_vendored_path(path))?; + let mut zip_file = archive.lookup_path(&normalize_vendored_path(path)?)?; let mut buffer = String::new(); zip_file.read_to_string(&mut buffer)?; Ok(buffer) @@ -114,6 +114,30 @@ impl fmt::Debug for VendoredFileSystem { } } +#[derive(Debug, thiserror::Error)] +pub enum InvalidVendoredPathError { + #[error("Unsupported component in a vendored path: {0}")] + UnsupportedComponent(String), + #[error("Path attempts to escape out of the zipfile using `..` parts")] + EscapeFromZipFile, +} + +#[derive(Debug, thiserror::Error)] +pub enum MetadataLookupError { + #[error("{0:?} does not exist in the zip archive")] + NonExistentPath(VendoredPathBuf), + #[error("{0}")] + InvalidPath(#[from] InvalidVendoredPathError), +} + +#[derive(Debug, thiserror::Error)] +pub enum ZipFileReadError { + #[error("{0}")] + InvalidPath(#[from] InvalidVendoredPathError), + #[error("Path could not be read due to {0}")] + UnreadablePath(#[from] io::Error), +} + /// Private struct only used in `Debug` implementations /// /// This could possibly be unified with the `Metadata` struct, @@ -200,7 +224,7 @@ struct VendoredFileSystemInner(Mutex>); type LockedZipArchive<'a> = MutexGuard<'a, RefCell>; impl VendoredFileSystemInner { - fn new(raw_bytes: &'static [u8]) -> Result { + fn new(raw_bytes: &'static [u8]) -> io::Result { Ok(Self(Mutex::new(RefCell::new(VendoredZipArchive::new( raw_bytes, )?)))) @@ -221,11 +245,11 @@ impl VendoredFileSystemInner { struct VendoredZipArchive(ZipArchive>); impl VendoredZipArchive { - fn new(data: &'static [u8]) -> Result { + fn new(data: &'static [u8]) -> io::Result { Ok(Self(ZipArchive::new(io::Cursor::new(data))?)) } - fn lookup_path(&mut self, path: &NormalizedVendoredPath) -> Result { + fn lookup_path(&mut self, path: &NormalizedVendoredPath) -> io::Result { Ok(self.0.by_name(path.as_str())?) } @@ -260,7 +284,9 @@ impl NormalizedVendoredPath { /// If a path with an unsupported component for vendored paths is passed. /// Unsupported components are path prefixes, /// and path root directories appearing anywhere except at the start of the path. -fn normalize_vendored_path(path: &VendoredPath) -> NormalizedVendoredPath { +fn normalize_vendored_path( + path: &VendoredPath, +) -> Result { // Allow the `RootDir` component, but only if it is at the very start of the string. let mut components = path.components().peekable(); if let Some(camino::Utf8Component::RootDir) = components.peek() { @@ -273,12 +299,18 @@ fn normalize_vendored_path(path: &VendoredPath) -> NormalizedVendoredPath { camino::Utf8Component::Normal(part) => normalized_parts.push(part), camino::Utf8Component::CurDir => continue, camino::Utf8Component::ParentDir => { - normalized_parts.pop(); + if normalized_parts.pop().is_none() { + return Err(InvalidVendoredPathError::EscapeFromZipFile); + } + } + unsupported => { + return Err(InvalidVendoredPathError::UnsupportedComponent( + unsupported.to_string(), + )) } - unsupported => panic!("Unsupported component in a vendored path: {unsupported}"), } } - NormalizedVendoredPath(normalized_parts.join("/")) + Ok(NormalizedVendoredPath(normalized_parts.join("/"))) } #[cfg(test)] @@ -426,10 +458,8 @@ mod tests { let mock_typeshed = mock_typeshed(); let path = VendoredPath::new(path); assert!(!mock_typeshed.exists(path)); - assert!(mock_typeshed.metadata(path).is_none()); - assert!(mock_typeshed - .read(path) - .is_err_and(|err| err.to_string().contains("file not found"))); + assert!(mock_typeshed.metadata(path).is_err()); + assert!(mock_typeshed.read(path).is_err()); } #[test] diff --git a/crates/ruff_db/src/vfs.rs b/crates/ruff_db/src/vfs.rs index f9ca06eb6f74b..e802a13fbc53a 100644 --- a/crates/ruff_db/src/vfs.rs +++ b/crates/ruff_db/src/vfs.rs @@ -1,3 +1,4 @@ +use std::io; use std::sync::Arc; use countme::Count; @@ -8,7 +9,7 @@ pub use path::VfsPath; use crate::file_revision::FileRevision; use crate::file_system::FileSystemPath; -use crate::vendored::VendoredFileSystem; +use crate::vendored::{MetadataLookupError, VendoredFileSystem, ZipFileReadError}; use crate::vfs::private::FileStatus; use crate::{Db, FxDashMap}; @@ -31,9 +32,15 @@ pub fn system_path_to_file(db: &dyn Db, path: impl AsRef) -> Opt } } -/// Interns a vendored file path. Returns `Some` if the vendored file for `path` exists and `None` otherwise. +/// Interns a vendored file path. +/// +/// - Returns `Ok(VfsFile)` if `path` is valid and points to a file that exists +/// - Otherwise, returns `Err` #[inline] -pub fn vendored_path_to_file(db: &dyn Db, path: impl AsRef) -> Option { +pub fn vendored_path_to_file( + db: &dyn Db, + path: impl AsRef, +) -> Result { db.vfs().vendored(db, path.as_ref()) } @@ -46,7 +53,7 @@ pub fn vendored_path_to_file(db: &dyn Db, path: impl AsRef) -> Opt pub fn vfs_path_to_file(db: &dyn Db, path: &VfsPath) -> Option { match path { VfsPath::FileSystem(path) => system_path_to_file(db, path), - VfsPath::Vendored(path) => vendored_path_to_file(db, path), + VfsPath::Vendored(path) => vendored_path_to_file(db, path).ok(), } } @@ -135,7 +142,7 @@ impl Vfs { /// Looks up a vendored file by its path. Returns `Some` if a vendored file for the given path /// exists and `None` otherwise. - fn vendored(&self, db: &dyn Db, path: &VendoredPath) -> Option { + fn vendored(&self, db: &dyn Db, path: &VendoredPath) -> Result { let file = match self .inner .files_by_path @@ -160,7 +167,7 @@ impl Vfs { } }; - Some(file) + Ok(file) } /// Stubs out the vendored files with the given content. @@ -312,28 +319,32 @@ impl Default for VendoredVfs { } impl VendoredVfs { - fn revision(&self, path: &VendoredPath) -> Option { + fn revision(&self, path: &VendoredPath) -> Result { match self { VendoredVfs::Real(file_system) => file_system .metadata(path) .map(|metadata| metadata.revision()), - VendoredVfs::Stubbed(stubbed) => stubbed - .contains_key(&path.to_path_buf()) - .then_some(FileRevision::new(1)), + VendoredVfs::Stubbed(stubbed) => { + if stubbed.contains_key(&path.to_path_buf()) { + Ok(FileRevision::new(1)) + } else { + Err(MetadataLookupError::NonExistentPath(path.to_path_buf())) + } + } } } - fn read(&self, path: &VendoredPath) -> std::io::Result { + fn read(&self, path: &VendoredPath) -> Result { match self { VendoredVfs::Real(file_system) => file_system.read(path), VendoredVfs::Stubbed(stubbed) => { if let Some(contents) = stubbed.get(&path.to_path_buf()).as_deref().cloned() { Ok(contents) } else { - Err(std::io::Error::new( - std::io::ErrorKind::NotFound, - format!("Could not find file {path:?}"), - )) + Err(ZipFileReadError::UnreadablePath(io::Error::new( + io::ErrorKind::NotFound, + format!("Could not read {path:?}"), + ))) } } } @@ -357,6 +368,7 @@ mod private { mod tests { use crate::file_revision::FileRevision; use crate::tests::TestDb; + use crate::vendored::MetadataLookupError; use crate::vfs::{system_path_to_file, vendored_path_to_file}; #[test] @@ -402,6 +414,9 @@ mod tests { fn stubbed_vendored_file_non_existing() { let db = TestDb::new(); - assert_eq!(vendored_path_to_file(&db, "test.py"), None); + assert!(matches!( + vendored_path_to_file(&db, "test.py"), + Err(MetadataLookupError::NonExistentPath(_)) + )); } }