Skip to content
This repository has been archived by the owner on Jul 27, 2022. It is now read-only.

Commit

Permalink
feat: path handling UX improvements (#21)
Browse files Browse the repository at this point in the history
* feat: path handling UX improvements

* chore: review feedback
  • Loading branch information
tustvold authored Jun 6, 2022
1 parent febf83e commit 3997aa2
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 64 deletions.
58 changes: 8 additions & 50 deletions src/local.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
//! An object store implementation for a local filesystem
use crate::{
maybe_spawn_blocking, path::Path, GetResult, ListResult, ObjectMeta, ObjectStore, Result,
maybe_spawn_blocking,
path::{filesystem_path_to_url, Path},
GetResult, ListResult, ObjectMeta, ObjectStore, Result,
};
use async_trait::async_trait;
use bytes::Bytes;
use futures::{stream::BoxStream, StreamExt};
use percent_encoding::percent_decode;
use snafu::{ensure, OptionExt, ResultExt, Snafu};
use std::collections::VecDeque;
use std::fs::File;
Expand Down Expand Up @@ -91,29 +92,12 @@ pub(crate) enum Error {
source: io::Error,
},

#[snafu(display("Path \"{}\" contained non-unicode characters: {}", path, source))]
NonUnicode {
path: String,
source: std::str::Utf8Error,
},

#[snafu(display("Unable to canonicalize path {}: {}", path.display(), source))]
UnableToCanonicalize {
source: io::Error,
path: std::path::PathBuf,
},

#[snafu(display("Error seeking file {}: {}", path.display(), source))]
Seek {
source: io::Error,
path: std::path::PathBuf,
},

#[snafu(display("Unable to convert path \"{}\" to URL", path.display()))]
InvalidPath {
path: std::path::PathBuf,
},

#[snafu(display("Unable to convert URL \"{}\" to filesystem path", url))]
InvalidUrl {
url: Url,
Expand Down Expand Up @@ -200,7 +184,7 @@ impl LocalFileSystem {
pub fn new_with_prefix(prefix: impl AsRef<std::path::Path>) -> Result<Self> {
Ok(Self {
config: Arc::new(Config {
root: path_to_url(prefix, true)?,
root: filesystem_path_to_url(prefix)?,
}),
})
}
Expand All @@ -219,15 +203,10 @@ impl Config {
}

fn filesystem_to_path(&self, location: &std::path::Path) -> Result<Path> {
let url = path_to_url(location, false)?;
let relative = self.root.make_relative(&url).expect("relative path");

// Reverse any percent encoding performed by conversion to URL
let decoded = percent_decode(relative.as_bytes())
.decode_utf8()
.context(NonUnicodeSnafu { path: &relative })?;

Ok(Path::parse(decoded)?)
Ok(Path::from_filesystem_path_with_base(
location,
Some(&self.root),
)?)
}
}

Expand Down Expand Up @@ -491,27 +470,6 @@ fn open_file(path: &std::path::PathBuf) -> Result<File> {
Ok(file)
}

/// Encode a path as a URL
///
/// Note: The returned URL is percent encoded
fn path_to_url(path: impl AsRef<std::path::Path>, is_dir: bool) -> Result<Url, Error> {
// Convert to canonical, i.e. absolute representation
let canonical = path
.as_ref()
.canonicalize()
.context(UnableToCanonicalizeSnafu {
path: path.as_ref(),
})?;

// Convert to file URL
let result = match is_dir {
true => Url::from_directory_path(&canonical),
false => Url::from_file_path(&canonical),
};

result.map_err(|_| Error::InvalidPath { path: canonical })
}

fn convert_entry(entry: DirEntry, location: Path) -> Result<ObjectMeta> {
let metadata = entry
.metadata()
Expand Down
115 changes: 101 additions & 14 deletions src/path/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
//! Path abstraction for Object Storage
use itertools::Itertools;
use percent_encoding::percent_decode;
use snafu::{ensure, ResultExt, Snafu};
use std::fmt::Formatter;
use url::Url;

/// The delimiter to separate object namespaces, creating a directory structure.
pub const DELIMITER: &str = "/";
Expand All @@ -21,8 +23,26 @@ pub enum Error {
#[snafu(display("Path \"{}\" contained empty path segment", path))]
EmptySegment { path: String },

#[snafu(display("Error parsing Path \"{}\": \"{}\"", path, source))]
#[snafu(display("Error parsing Path \"{}\": {}", path, source))]
BadSegment { path: String, source: InvalidPart },

#[snafu(display("Failed to canonicalize path \"{}\": {}", path.display(), source))]
Canonicalize {
path: std::path::PathBuf,
source: std::io::Error,
},

#[snafu(display("Unable to convert path \"{}\" to URL", path.display()))]
InvalidPath { path: std::path::PathBuf },

#[snafu(display("Path \"{}\" contained non-unicode characters: {}", path, source))]
NonUnicode {
path: String,
source: std::str::Utf8Error,
},

#[snafu(display("Path {} does not start with prefix {}", path, prefix))]
PrefixMismatch { path: String, prefix: String },
}

/// A parsed path representation that can be safely written to object storage
Expand Down Expand Up @@ -108,6 +128,10 @@ impl Path {
let path = path.as_ref();

let stripped = path.strip_prefix(DELIMITER).unwrap_or(path);
if stripped.is_empty() {
return Ok(Default::default());
}

let stripped = stripped.strip_suffix(DELIMITER).unwrap_or(stripped);

for segment in stripped.split(DELIMITER) {
Expand All @@ -120,6 +144,44 @@ impl Path {
})
}

/// Convert a filesystem path to a [`Path`] relative to the filesystem root
///
/// This will return an error if the path does not exist, or contains illegal
/// character sequences as defined by [`Path::parse`]
pub fn from_filesystem_path(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
Self::from_filesystem_path_with_base(path, None)
}

/// Convert a filesystem path to a [`Path`] relative to the provided base
///
/// This will return an error if the path does not exist on the local filesystem,
/// contains illegal character sequences as defined by [`Path::parse`], or `base`
/// does not refer to a parent path of `path`
pub(crate) fn from_filesystem_path_with_base(
path: impl AsRef<std::path::Path>,
base: Option<&Url>,
) -> Result<Self, Error> {
let url = filesystem_path_to_url(path)?;
let path = match base {
Some(prefix) => {
url.path()
.strip_prefix(prefix.path())
.ok_or_else(|| Error::PrefixMismatch {
path: url.path().to_string(),
prefix: prefix.to_string(),
})?
}
None => url.path(),
};

// Reverse any percent encoding performed by conversion to URL
let decoded = percent_decode(path.as_bytes())
.decode_utf8()
.context(NonUnicodeSnafu { path })?;

Self::parse(decoded)
}

/// Returns the [`PathPart`] of this [`Path`]
pub fn parts(&self) -> impl Iterator<Item = PathPart<'_>> {
match self.raw.is_empty() {
Expand All @@ -132,29 +194,24 @@ impl Path {
}
}

pub(crate) fn prefix_match(
&self,
prefix: &Self,
) -> Option<impl Iterator<Item = PathPart<'_>> + '_> {
let diff = itertools::diff_with(
self.raw.split(DELIMITER).filter(|x| !x.is_empty()),
prefix.raw.split(DELIMITER).filter(|x| !x.is_empty()),
|a, b| a == b,
);
/// Returns an iterator of the [`PathPart`] of this [`Path`] after `prefix`
///
/// Returns `None` if the prefix does not match
pub fn prefix_match(&self, prefix: &Self) -> Option<impl Iterator<Item = PathPart<'_>> + '_> {
let diff = itertools::diff_with(self.parts(), prefix.parts(), |a, b| a == b);

match diff {
// Both were equal
None => Some(itertools::Either::Left(std::iter::empty())),
// Mismatch or prefix was longer => None
Some(itertools::Diff::FirstMismatch(_, _, _) | itertools::Diff::Longer(_, _)) => None,
// Match with remaining
Some(itertools::Diff::Shorter(_, back)) => Some(itertools::Either::Right(
back.map(|s| PathPart { raw: s.into() }),
)),
Some(itertools::Diff::Shorter(_, back)) => Some(itertools::Either::Right(back)),
}
}

pub(crate) fn prefix_matches(&self, prefix: &Self) -> bool {
/// Returns true if this [`Path`] starts with `prefix`
pub fn prefix_matches(&self, prefix: &Self) -> bool {
self.prefix_match(prefix).is_some()
}

Expand Down Expand Up @@ -214,6 +271,20 @@ where
}
}

/// Given a filesystem path, convert it to its canonical URL representation,
/// returning an error if the file doesn't exist on the local filesystem
pub(crate) fn filesystem_path_to_url(path: impl AsRef<std::path::Path>) -> Result<Url, Error> {
let path = path.as_ref().canonicalize().context(CanonicalizeSnafu {
path: path.as_ref(),
})?;

match path.is_dir() {
true => Url::from_directory_path(&path),
false => Url::from_file_path(&path),
}
.map_err(|_| Error::InvalidPath { path })
}

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -233,6 +304,22 @@ mod tests {
assert_eq!(location.as_ref(), "foo%2Fbar/baz%252Ftest");
}

#[test]
fn test_parse() {
assert_eq!(Path::parse("/").unwrap().as_ref(), "");
assert_eq!(Path::parse("").unwrap().as_ref(), "");

let err = Path::parse("//").unwrap_err();
assert!(matches!(err, Error::EmptySegment { .. }));

assert_eq!(Path::parse("/foo/bar/").unwrap().as_ref(), "foo/bar");
assert_eq!(Path::parse("foo/bar/").unwrap().as_ref(), "foo/bar");
assert_eq!(Path::parse("foo/bar").unwrap().as_ref(), "foo/bar");

let err = Path::parse("foo///bar").unwrap_err();
assert!(matches!(err, Error::EmptySegment { .. }));
}

#[test]
fn convert_raw_before_partial_eq() {
// dir and file_name
Expand Down

0 comments on commit 3997aa2

Please sign in to comment.