Skip to content

Commit

Permalink
Extend Storage support to relative local FS paths
Browse files Browse the repository at this point in the history
  • Loading branch information
gruuya committed Nov 22, 2024
1 parent 697a200 commit 10fe25c
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 5 deletions.
12 changes: 11 additions & 1 deletion crates/iceberg/src/io/file_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,23 @@ impl FileIO {
///
/// - If it's a valid url, for example `s3://bucket/a`, url scheme will be used, and the rest of the url will be ignored.
/// - If it's not a valid url, will try to detect if it's a file path.
/// - If it's a relative file path the full path must exist.
///
/// Otherwise will return parsing error.
pub fn from_path(path: impl AsRef<str>) -> crate::Result<FileIOBuilder> {
let url = Url::parse(path.as_ref())
.map_err(Error::from)
.or_else(|e| {
Url::from_file_path(path.as_ref()).map_err(|_| {
let absolute =
if path.as_ref().starts_with("file://") || path.as_ref().starts_with("/") {
path.as_ref().to_string()
} else {
// If it's not an absolute path try to canonicalize it (this requires the
// full path to actually exist)
std::fs::canonicalize(path.as_ref())?.display().to_string()
};

Url::from_file_path(absolute).map_err(|_| {
Error::new(
ErrorKind::DataInvalid,
"Input is neither a valid url nor path",
Expand Down
43 changes: 39 additions & 4 deletions crates/iceberg/src/io/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

use std::sync::Arc;

use opendal::services::FsConfig;
#[cfg(feature = "storage-gcs")]
use opendal::services::GcsConfig;
#[cfg(feature = "storage-s3")]
Expand Down Expand Up @@ -105,12 +106,26 @@ impl Storage {
}
#[cfg(feature = "storage-fs")]
Storage::LocalFs => {
let op = super::fs_config_build()?;

if let Some(stripped) = path.strip_prefix("file:/") {
if let Some(stripped) = path.strip_prefix("file:/").or(path.strip_prefix("/")) {
let op = super::fs_config_build()?;
Ok((op, stripped))
} else {
Ok((op, &path[1..]))
// Compute the full path and extract the root portion of it.
// For now support only ./path/to/loc and path/to/loc.
let relative = path.strip_prefix("./").unwrap_or(path);
let root = std::fs::canonicalize(path)?
.to_str()
.ok_or(Error::new(
ErrorKind::DataInvalid,
format!("invalid relative path format {path}"),
))?
.trim_end_matches(relative)
.to_string();

let mut cfg = FsConfig::default();
cfg.root = Some(root);
let op = Operator::from_config(cfg)?.finish();
Ok((op, relative))
}
}
#[cfg(feature = "storage-s3")]
Expand Down Expand Up @@ -169,3 +184,23 @@ impl Storage {
}
}
}

#[cfg(test)]
mod tests {
use crate::io::storage::Storage;

#[tokio::test]
async fn test_local_fs_storage_relative_paths() {
let rel_manifest_path = "testdata/manifests_lists/manifest-list-v2-1.avro".to_string();

// Check both dot-based and folder-based relative paths
for rel_prefix in ["", "./"] {
let path = format!("{rel_prefix}{rel_manifest_path}");
let (op, relative) = Storage::LocalFs.create_operator(&path).expect("");

assert_eq!(relative, rel_manifest_path);

op.check().await.expect("Relative path operator functional");
}
}
}

0 comments on commit 10fe25c

Please sign in to comment.