Skip to content
This repository has been archived by the owner on Jul 27, 2022. It is now read-only.

feat: path handling UX improvements #21

Merged
merged 3 commits into from
Jun 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 8 additions & 50 deletions src/local.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
//! An object store implementation for a local filesystem
use crate::{
maybe_spawn_blocking, path::Path, GetResult, ListResult, ObjectMeta, ObjectStore, Result,
maybe_spawn_blocking,
path::{filesystem_path_to_url, Path},
GetResult, ListResult, ObjectMeta, ObjectStore, Result,
};
use async_trait::async_trait;
use bytes::Bytes;
use futures::{stream::BoxStream, StreamExt};
use percent_encoding::percent_decode;
use snafu::{ensure, OptionExt, ResultExt, Snafu};
use std::collections::VecDeque;
use std::fs::File;
Expand Down Expand Up @@ -91,29 +92,12 @@ pub(crate) enum Error {
source: io::Error,
},

#[snafu(display("Path \"{}\" contained non-unicode characters: {}", path, source))]
NonUnicode {
path: String,
source: std::str::Utf8Error,
},

#[snafu(display("Unable to canonicalize path {}: {}", path.display(), source))]
UnableToCanonicalize {
source: io::Error,
path: std::path::PathBuf,
},

#[snafu(display("Error seeking file {}: {}", path.display(), source))]
Seek {
source: io::Error,
path: std::path::PathBuf,
},

#[snafu(display("Unable to convert path \"{}\" to URL", path.display()))]
InvalidPath {
path: std::path::PathBuf,
},

#[snafu(display("Unable to convert URL \"{}\" to filesystem path", url))]
InvalidUrl {
url: Url,
Expand Down Expand Up @@ -200,7 +184,7 @@ impl LocalFileSystem {
pub fn new_with_prefix(prefix: impl AsRef<std::path::Path>) -> Result<Self> {
Ok(Self {
config: Arc::new(Config {
root: path_to_url(prefix, true)?,
root: filesystem_path_to_url(prefix)?,
}),
})
}
Expand All @@ -219,15 +203,10 @@ impl Config {
}

fn filesystem_to_path(&self, location: &std::path::Path) -> Result<Path> {
let url = path_to_url(location, false)?;
let relative = self.root.make_relative(&url).expect("relative path");

// Reverse any percent encoding performed by conversion to URL
let decoded = percent_decode(relative.as_bytes())
.decode_utf8()
.context(NonUnicodeSnafu { path: &relative })?;

Ok(Path::parse(decoded)?)
Ok(Path::from_filesystem_path_with_base(
location,
Some(&self.root),
)?)
}
}

Expand Down Expand Up @@ -491,27 +470,6 @@ fn open_file(path: &std::path::PathBuf) -> Result<File> {
Ok(file)
}

/// Encode a path as a URL
///
/// Note: The returned URL is percent encoded
fn path_to_url(path: impl AsRef<std::path::Path>, is_dir: bool) -> Result<Url, Error> {
// Convert to canonical, i.e. absolute representation
let canonical = path
.as_ref()
.canonicalize()
.context(UnableToCanonicalizeSnafu {
path: path.as_ref(),
})?;

// Convert to file URL
let result = match is_dir {
true => Url::from_directory_path(&canonical),
false => Url::from_file_path(&canonical),
};

result.map_err(|_| Error::InvalidPath { path: canonical })
}

fn convert_entry(entry: DirEntry, location: Path) -> Result<ObjectMeta> {
let metadata = entry
.metadata()
Expand Down
115 changes: 101 additions & 14 deletions src/path/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
//! Path abstraction for Object Storage

use itertools::Itertools;
use percent_encoding::percent_decode;
use snafu::{ensure, ResultExt, Snafu};
use std::fmt::Formatter;
use url::Url;

/// The delimiter to separate object namespaces, creating a directory structure.
pub const DELIMITER: &str = "/";
Expand All @@ -21,8 +23,26 @@ pub enum Error {
#[snafu(display("Path \"{}\" contained empty path segment", path))]
EmptySegment { path: String },

#[snafu(display("Error parsing Path \"{}\": \"{}\"", path, source))]
#[snafu(display("Error parsing Path \"{}\": {}", path, source))]
BadSegment { path: String, source: InvalidPart },

#[snafu(display("Failed to canonicalize path \"{}\": {}", path.display(), source))]
Canonicalize {
path: std::path::PathBuf,
source: std::io::Error,
},

#[snafu(display("Unable to convert path \"{}\" to URL", path.display()))]
InvalidPath { path: std::path::PathBuf },

#[snafu(display("Path \"{}\" contained non-unicode characters: {}", path, source))]
NonUnicode {
path: String,
source: std::str::Utf8Error,
},

#[snafu(display("Path {} does not start with prefix {}", path, prefix))]
PrefixMismatch { path: String, prefix: String },
}

/// A parsed path representation that can be safely written to object storage
Expand Down Expand Up @@ -108,6 +128,10 @@ impl Path {
let path = path.as_ref();

let stripped = path.strip_prefix(DELIMITER).unwrap_or(path);
if stripped.is_empty() {
return Ok(Default::default());
}

let stripped = stripped.strip_suffix(DELIMITER).unwrap_or(stripped);

for segment in stripped.split(DELIMITER) {
Expand All @@ -120,6 +144,44 @@ impl Path {
})
}

/// Convert a filesystem path to a [`Path`] relative to the filesystem root
///
/// This will return an error if the path does not exist, or contains illegal
/// character sequences as defined by [`Path::parse`]
pub fn from_filesystem_path(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
Self::from_filesystem_path_with_base(path, None)
}

/// Convert a filesystem path to a [`Path`] relative to the provided base
///
/// This will return an error if the path does not exist on the local filesystem,
/// contains illegal character sequences as defined by [`Path::parse`], or `base`
/// does not refer to a parent path of `path`
pub(crate) fn from_filesystem_path_with_base(
path: impl AsRef<std::path::Path>,
base: Option<&Url>,
) -> Result<Self, Error> {
let url = filesystem_path_to_url(path)?;
let path = match base {
Some(prefix) => {
url.path()
.strip_prefix(prefix.path())
.ok_or_else(|| Error::PrefixMismatch {
path: url.path().to_string(),
prefix: prefix.to_string(),
})?
}
None => url.path(),
};

// Reverse any percent encoding performed by conversion to URL
let decoded = percent_decode(path.as_bytes())
.decode_utf8()
.context(NonUnicodeSnafu { path })?;

Self::parse(decoded)
}

/// Returns the [`PathPart`] of this [`Path`]
pub fn parts(&self) -> impl Iterator<Item = PathPart<'_>> {
match self.raw.is_empty() {
Expand All @@ -132,29 +194,24 @@ impl Path {
}
}

pub(crate) fn prefix_match(
&self,
prefix: &Self,
) -> Option<impl Iterator<Item = PathPart<'_>> + '_> {
let diff = itertools::diff_with(
self.raw.split(DELIMITER).filter(|x| !x.is_empty()),
prefix.raw.split(DELIMITER).filter(|x| !x.is_empty()),
|a, b| a == b,
);
/// Returns an iterator of the [`PathPart`] of this [`Path`] after `prefix`
///
/// Returns `None` if the prefix does not match
pub fn prefix_match(&self, prefix: &Self) -> Option<impl Iterator<Item = PathPart<'_>> + '_> {
let diff = itertools::diff_with(self.parts(), prefix.parts(), |a, b| a == b);

match diff {
// Both were equal
None => Some(itertools::Either::Left(std::iter::empty())),
// Mismatch or prefix was longer => None
Some(itertools::Diff::FirstMismatch(_, _, _) | itertools::Diff::Longer(_, _)) => None,
// Match with remaining
Some(itertools::Diff::Shorter(_, back)) => Some(itertools::Either::Right(
back.map(|s| PathPart { raw: s.into() }),
)),
Some(itertools::Diff::Shorter(_, back)) => Some(itertools::Either::Right(back)),
}
}

pub(crate) fn prefix_matches(&self, prefix: &Self) -> bool {
/// Returns true if this [`Path`] starts with `prefix`
pub fn prefix_matches(&self, prefix: &Self) -> bool {
self.prefix_match(prefix).is_some()
}

Expand Down Expand Up @@ -214,6 +271,20 @@ where
}
}

/// Given a filesystem path, convert it to its canonical URL representation,
/// returning an error if the file doesn't exist on the local filesystem
pub(crate) fn filesystem_path_to_url(path: impl AsRef<std::path::Path>) -> Result<Url, Error> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps a docstring explaining what this does would be useful as well

let path = path.as_ref().canonicalize().context(CanonicalizeSnafu {
path: path.as_ref(),
})?;

match path.is_dir() {
true => Url::from_directory_path(&path),
false => Url::from_file_path(&path),
}
.map_err(|_| Error::InvalidPath { path })
}

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -233,6 +304,22 @@ mod tests {
assert_eq!(location.as_ref(), "foo%2Fbar/baz%252Ftest");
}

#[test]
fn test_parse() {
assert_eq!(Path::parse("/").unwrap().as_ref(), "");
assert_eq!(Path::parse("").unwrap().as_ref(), "");

let err = Path::parse("//").unwrap_err();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

assert!(matches!(err, Error::EmptySegment { .. }));

assert_eq!(Path::parse("/foo/bar/").unwrap().as_ref(), "foo/bar");
assert_eq!(Path::parse("foo/bar/").unwrap().as_ref(), "foo/bar");
assert_eq!(Path::parse("foo/bar").unwrap().as_ref(), "foo/bar");

let err = Path::parse("foo///bar").unwrap_err();
assert!(matches!(err, Error::EmptySegment { .. }));
}

#[test]
fn convert_raw_before_partial_eq() {
// dir and file_name
Expand Down