Skip to content

Commit dcd2f4f

Browse files
authored
fix: Preserve percent-encoding in PartitionedFile paths during deserialization (#18346)
## Which issue does this PR close? - Closes #18345 ## Rationale for this change ## What changes are included in this PR? This PR changes the implementation to use Path::parse(proto.path). As per the object_store crate's documentation, Path::parse is the correct method for constructing a Path from a raw, already-encoded string, as it preserves the encoding. ## Are these changes tested? Yes, with unit tests. ## Are there any user-facing changes? No.
1 parent 52894db commit dcd2f4f

File tree

1 file changed

+55
-1
lines changed

1 file changed

+55
-1
lines changed

datafusion/proto/src/physical_plan/from_proto.rs

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,9 @@ impl TryFrom<&protobuf::PartitionedFile> for PartitionedFile {
572572
fn try_from(val: &protobuf::PartitionedFile) -> Result<Self, Self::Error> {
573573
Ok(PartitionedFile {
574574
object_meta: ObjectMeta {
575-
location: Path::from(val.path.as_str()),
575+
location: Path::parse(val.path.as_str()).map_err(|e| {
576+
proto_error(format!("Invalid object_store path: {e}"))
577+
})?,
576578
last_modified: Utc.timestamp_nanos(val.last_modified_ns as i64),
577579
size: val.size,
578580
e_tag: None,
@@ -694,3 +696,55 @@ impl TryFrom<&protobuf::FileSinkConfig> for FileSinkConfig {
694696
})
695697
}
696698
}
699+
700+
#[cfg(test)]
701+
mod tests {
702+
use super::*;
703+
use chrono::{TimeZone, Utc};
704+
use datafusion_datasource::PartitionedFile;
705+
use object_store::path::Path;
706+
use object_store::ObjectMeta;
707+
708+
#[test]
709+
fn partitioned_file_path_roundtrip_percent_encoded() {
710+
let path_str = "foo/foo%2Fbar/baz%252Fqux";
711+
let pf = PartitionedFile {
712+
object_meta: ObjectMeta {
713+
location: Path::parse(path_str).unwrap(),
714+
last_modified: Utc.timestamp_nanos(1_000),
715+
size: 42,
716+
e_tag: None,
717+
version: None,
718+
},
719+
partition_values: vec![],
720+
range: None,
721+
statistics: None,
722+
extensions: None,
723+
metadata_size_hint: None,
724+
};
725+
726+
let proto = protobuf::PartitionedFile::try_from(&pf).unwrap();
727+
assert_eq!(proto.path, path_str);
728+
729+
let pf2 = PartitionedFile::try_from(&proto).unwrap();
730+
assert_eq!(pf2.object_meta.location.as_ref(), path_str);
731+
assert_eq!(pf2.object_meta.location, pf.object_meta.location);
732+
assert_eq!(pf2.object_meta.size, pf.object_meta.size);
733+
assert_eq!(pf2.object_meta.last_modified, pf.object_meta.last_modified);
734+
}
735+
736+
#[test]
737+
fn partitioned_file_from_proto_invalid_path() {
738+
let proto = protobuf::PartitionedFile {
739+
path: "foo//bar".to_string(),
740+
size: 1,
741+
last_modified_ns: 0,
742+
partition_values: vec![],
743+
range: None,
744+
statistics: None,
745+
};
746+
747+
let err = PartitionedFile::try_from(&proto).unwrap_err();
748+
assert!(err.to_string().contains("Invalid object_store path"));
749+
}
750+
}

0 commit comments

Comments
 (0)