Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve normalization of VendoredPaths #11991

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion crates/red_knot_module_resolver/src/typeshed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ mod tests {

let vendored_path_kind = vendored_typeshed_stubs
.metadata(vendored_path)
.unwrap_or_else(|| {
.unwrap_or_else(|err| {
panic!(
"Expected metadata for {vendored_path:?} to be retrievable from the `VendoredFileSystem!
Error encountered: {err}

Vendored file system:

Expand Down
1 change: 1 addition & 0 deletions crates/ruff_db/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ filetime = { workspace = true }
salsa = { workspace = true }
tracing = { workspace = true }
rustc-hash = { workspace = true }
thiserror = { workspace = true }
zip = { workspace = true }

[dev-dependencies]
Expand Down
74 changes: 52 additions & 22 deletions crates/ruff_db/src/vendored.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ pub use path::{VendoredPath, VendoredPathBuf};

pub mod path;

type Result<T> = io::Result<T>;

/// File system that stores all content in a static zip archive
/// bundled as part of the Ruff binary.
///
Expand All @@ -23,14 +21,16 @@ pub struct VendoredFileSystem {
}

impl VendoredFileSystem {
pub fn new(raw_bytes: &'static [u8]) -> Result<Self> {
pub fn new(raw_bytes: &'static [u8]) -> io::Result<Self> {
Ok(Self {
inner: VendoredFileSystemInner::new(raw_bytes)?,
})
}

pub fn exists(&self, path: &VendoredPath) -> bool {
let normalized = normalize_vendored_path(path);
let Ok(normalized) = normalize_vendored_path(path) else {
return false;
};
let inner_locked = self.inner.lock();
let mut archive = inner_locked.borrow_mut();

Expand All @@ -44,8 +44,8 @@ impl VendoredFileSystem {
.is_ok()
}

pub fn metadata(&self, path: &VendoredPath) -> Option<Metadata> {
let normalized = normalize_vendored_path(path);
pub fn metadata(&self, path: &VendoredPath) -> Result<Metadata, MetadataLookupError> {
let normalized = normalize_vendored_path(path)?;
let inner_locked = self.inner.lock();

// Must probe the zipfile twice, as "stdlib" and "stdlib/" are considered
Expand All @@ -54,13 +54,13 @@ impl VendoredFileSystem {
// work the same as other paths in Ruff.
let mut archive = inner_locked.borrow_mut();
if let Ok(zip_file) = archive.lookup_path(&normalized) {
return Some(Metadata::from_zip_file(zip_file));
return Ok(Metadata::from_zip_file(zip_file));
}
if let Ok(zip_file) = archive.lookup_path(&normalized.with_trailing_slash()) {
return Some(Metadata::from_zip_file(zip_file));
return Ok(Metadata::from_zip_file(zip_file));
}

None
Err(MetadataLookupError::NonExistentPath(path.to_path_buf()))
}

/// Read the entire contents of the zip file at `path` into a string
Expand All @@ -69,10 +69,10 @@ impl VendoredFileSystem {
/// - The path does not exist in the underlying zip archive
/// - The path exists in the underlying zip archive, but represents a directory
/// - The contents of the zip file at `path` contain invalid UTF-8
pub fn read(&self, path: &VendoredPath) -> Result<String> {
pub fn read(&self, path: &VendoredPath) -> Result<String, ZipFileReadError> {
let inner_locked = self.inner.lock();
let mut archive = inner_locked.borrow_mut();
let mut zip_file = archive.lookup_path(&normalize_vendored_path(path))?;
let mut zip_file = archive.lookup_path(&normalize_vendored_path(path)?)?;
let mut buffer = String::new();
zip_file.read_to_string(&mut buffer)?;
Ok(buffer)
Expand Down Expand Up @@ -114,6 +114,30 @@ impl fmt::Debug for VendoredFileSystem {
}
}

#[derive(Debug, thiserror::Error)]
pub enum InvalidVendoredPathError {
#[error("Unsupported component in a vendored path: {0}")]
UnsupportedComponent(String),
#[error("Path attempts to escape out of the zipfile using `..` parts")]
EscapeFromZipFile,
}

#[derive(Debug, thiserror::Error)]
pub enum MetadataLookupError {
#[error("{0:?} does not exist in the zip archive")]
NonExistentPath(VendoredPathBuf),
#[error("{0}")]
InvalidPath(#[from] InvalidVendoredPathError),
}
Comment on lines +125 to +131
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dealing with different errors can often times be more difficult than just having one error, even if it contains more variant than are actually possible.

For example, someone calling metdata and read now needs to handle both MetadataLookupError and ZipFileReadError but there's no way to convert one error to the other. It forces the caller to define its own error type that is a union over the two.


#[derive(Debug, thiserror::Error)]
pub enum ZipFileReadError {
#[error("{0}")]
InvalidPath(#[from] InvalidVendoredPathError),
#[error("Path could not be read due to {0}")]
UnreadablePath(#[from] io::Error),
}

/// Private struct only used in `Debug` implementations
///
/// This could possibly be unified with the `Metadata` struct,
Expand Down Expand Up @@ -200,7 +224,7 @@ struct VendoredFileSystemInner(Mutex<RefCell<VendoredZipArchive>>);
type LockedZipArchive<'a> = MutexGuard<'a, RefCell<VendoredZipArchive>>;

impl VendoredFileSystemInner {
fn new(raw_bytes: &'static [u8]) -> Result<Self> {
fn new(raw_bytes: &'static [u8]) -> io::Result<Self> {
Ok(Self(Mutex::new(RefCell::new(VendoredZipArchive::new(
raw_bytes,
)?))))
Expand All @@ -221,11 +245,11 @@ impl VendoredFileSystemInner {
struct VendoredZipArchive(ZipArchive<io::Cursor<&'static [u8]>>);

impl VendoredZipArchive {
fn new(data: &'static [u8]) -> Result<Self> {
fn new(data: &'static [u8]) -> io::Result<Self> {
Ok(Self(ZipArchive::new(io::Cursor::new(data))?))
}

fn lookup_path(&mut self, path: &NormalizedVendoredPath) -> Result<ZipFile> {
fn lookup_path(&mut self, path: &NormalizedVendoredPath) -> io::Result<ZipFile> {
Ok(self.0.by_name(path.as_str())?)
}

Expand Down Expand Up @@ -260,7 +284,9 @@ impl NormalizedVendoredPath {
/// If a path with an unsupported component for vendored paths is passed.
/// Unsupported components are path prefixes,
/// and path root directories appearing anywhere except at the start of the path.
fn normalize_vendored_path(path: &VendoredPath) -> NormalizedVendoredPath {
fn normalize_vendored_path(
path: &VendoredPath,
) -> Result<NormalizedVendoredPath, InvalidVendoredPathError> {
// Allow the `RootDir` component, but only if it is at the very start of the string.
let mut components = path.components().peekable();
if let Some(camino::Utf8Component::RootDir) = components.peek() {
Expand All @@ -273,12 +299,18 @@ fn normalize_vendored_path(path: &VendoredPath) -> NormalizedVendoredPath {
camino::Utf8Component::Normal(part) => normalized_parts.push(part),
camino::Utf8Component::CurDir => continue,
camino::Utf8Component::ParentDir => {
normalized_parts.pop();
if normalized_parts.pop().is_none() {
return Err(InvalidVendoredPathError::EscapeFromZipFile);
}
Comment on lines +302 to +304
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I quickly checked what the normal FS operations return when you navigate outside the cwd. They just return a NotFound error, which is probably also enough for us.

}
unsupported => {
return Err(InvalidVendoredPathError::UnsupportedComponent(
unsupported.to_string(),
))
Comment on lines +306 to +309
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I correct that this is only to handle prefixes? I guess the problem we run into here is that camino::Utf8Path path parsing is platform-dependent. I'm not sure how to solve this best but I'm also not sure if this special case is worth returning an Error (we control the path creation, it's not that they're provided from externally)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or a camino::Utf8Component::RootDir... which I don't think is possible in the middle of a file, but that can't be validated using the type system

}
unsupported => panic!("Unsupported component in a vendored path: {unsupported}"),
}
}
NormalizedVendoredPath(normalized_parts.join("/"))
Ok(NormalizedVendoredPath(normalized_parts.join("/")))
}

#[cfg(test)]
Expand Down Expand Up @@ -426,10 +458,8 @@ mod tests {
let mock_typeshed = mock_typeshed();
let path = VendoredPath::new(path);
assert!(!mock_typeshed.exists(path));
assert!(mock_typeshed.metadata(path).is_none());
assert!(mock_typeshed
.read(path)
.is_err_and(|err| err.to_string().contains("file not found")));
assert!(mock_typeshed.metadata(path).is_err());
assert!(mock_typeshed.read(path).is_err());
}

#[test]
Expand Down
47 changes: 31 additions & 16 deletions crates/ruff_db/src/vfs.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::io;
use std::sync::Arc;

use countme::Count;
Expand All @@ -8,7 +9,7 @@ pub use path::VfsPath;

use crate::file_revision::FileRevision;
use crate::file_system::FileSystemPath;
use crate::vendored::VendoredFileSystem;
use crate::vendored::{MetadataLookupError, VendoredFileSystem, ZipFileReadError};
use crate::vfs::private::FileStatus;
use crate::{Db, FxDashMap};

Expand All @@ -31,9 +32,15 @@ pub fn system_path_to_file(db: &dyn Db, path: impl AsRef<FileSystemPath>) -> Opt
}
}

/// Interns a vendored file path. Returns `Some` if the vendored file for `path` exists and `None` otherwise.
/// Interns a vendored file path.
///
/// - Returns `Ok(VfsFile)` if `path` is valid and points to a file that exists
/// - Otherwise, returns `Err`
#[inline]
pub fn vendored_path_to_file(db: &dyn Db, path: impl AsRef<VendoredPath>) -> Option<VfsFile> {
pub fn vendored_path_to_file(
db: &dyn Db,
path: impl AsRef<VendoredPath>,
) -> Result<VfsFile, MetadataLookupError> {
db.vfs().vendored(db, path.as_ref())
}

Expand All @@ -46,7 +53,7 @@ pub fn vendored_path_to_file(db: &dyn Db, path: impl AsRef<VendoredPath>) -> Opt
pub fn vfs_path_to_file(db: &dyn Db, path: &VfsPath) -> Option<VfsFile> {
match path {
VfsPath::FileSystem(path) => system_path_to_file(db, path),
VfsPath::Vendored(path) => vendored_path_to_file(db, path),
VfsPath::Vendored(path) => vendored_path_to_file(db, path).ok(),
}
}

Expand Down Expand Up @@ -135,7 +142,7 @@ impl Vfs {

/// Looks up a vendored file by its path. Returns `Some` if a vendored file for the given path
/// exists and `None` otherwise.
fn vendored(&self, db: &dyn Db, path: &VendoredPath) -> Option<VfsFile> {
fn vendored(&self, db: &dyn Db, path: &VendoredPath) -> Result<VfsFile, MetadataLookupError> {
let file = match self
.inner
.files_by_path
Expand All @@ -160,7 +167,7 @@ impl Vfs {
}
};

Some(file)
Ok(file)
}

/// Stubs out the vendored files with the given content.
Expand Down Expand Up @@ -312,28 +319,32 @@ impl Default for VendoredVfs {
}

impl VendoredVfs {
fn revision(&self, path: &VendoredPath) -> Option<FileRevision> {
fn revision(&self, path: &VendoredPath) -> Result<FileRevision, MetadataLookupError> {
match self {
VendoredVfs::Real(file_system) => file_system
.metadata(path)
.map(|metadata| metadata.revision()),
VendoredVfs::Stubbed(stubbed) => stubbed
.contains_key(&path.to_path_buf())
.then_some(FileRevision::new(1)),
VendoredVfs::Stubbed(stubbed) => {
if stubbed.contains_key(&path.to_path_buf()) {
Ok(FileRevision::new(1))
} else {
Err(MetadataLookupError::NonExistentPath(path.to_path_buf()))
}
}
}
}

fn read(&self, path: &VendoredPath) -> std::io::Result<String> {
fn read(&self, path: &VendoredPath) -> Result<String, ZipFileReadError> {
match self {
VendoredVfs::Real(file_system) => file_system.read(path),
VendoredVfs::Stubbed(stubbed) => {
if let Some(contents) = stubbed.get(&path.to_path_buf()).as_deref().cloned() {
Ok(contents)
} else {
Err(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("Could not find file {path:?}"),
))
Err(ZipFileReadError::UnreadablePath(io::Error::new(
io::ErrorKind::NotFound,
format!("Could not read {path:?}"),
)))
}
}
}
Expand All @@ -357,6 +368,7 @@ mod private {
mod tests {
use crate::file_revision::FileRevision;
use crate::tests::TestDb;
use crate::vendored::MetadataLookupError;
use crate::vfs::{system_path_to_file, vendored_path_to_file};

#[test]
Expand Down Expand Up @@ -402,6 +414,9 @@ mod tests {
fn stubbed_vendored_file_non_existing() {
let db = TestDb::new();

assert_eq!(vendored_path_to_file(&db, "test.py"), None);
assert!(matches!(
vendored_path_to_file(&db, "test.py"),
Err(MetadataLookupError::NonExistentPath(_))
));
}
}
Loading