Skip to content

Commit

Permalink
refactor!: remove a layer of lifetimes from PartitionFilter (#1725)
Browse files Browse the repository at this point in the history
# Description
This commit removes a bunch of lifetime restrictions on the
`PartitionFilter` and `PartitionFilterValue` classes to make them easier
to use. While the original discussion in Slack and #1501 made mention of
using a reference type, there doesn't seem to a need for it. A
particular instance of a `PartitionFilter` is created once and just
borrowed and read for the remainder of its life.

Functions, when necessary continue to accept the non-container types
(i.e, `&str` and `&[&str]`), allowing their containerized counterparts
to continue working with them without needing to borrow or clone the
containers (i.e, `String` and `Vec<String>`).

# Related Issue(s)
- resolves #1501 

# Documentation
  • Loading branch information
cmackenzie1 authored Oct 14, 2023
1 parent 04576f4 commit 187a58c
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 124 deletions.
31 changes: 16 additions & 15 deletions python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,18 +193,17 @@ impl RawDeltaTable {
&self,
partitions_filters: Vec<(&str, &str, PartitionFilterValue)>,
) -> PyResult<Vec<String>> {
let partition_filters: Result<Vec<PartitionFilter<&str>>, DeltaTableError> =
partitions_filters
.into_iter()
.map(|filter| match filter {
(key, op, PartitionFilterValue::Single(v)) => {
PartitionFilter::try_from((key, op, v))
}
(key, op, PartitionFilterValue::Multiple(v)) => {
PartitionFilter::try_from((key, op, v))
}
})
.collect();
let partition_filters: Result<Vec<PartitionFilter>, DeltaTableError> = partitions_filters
.into_iter()
.map(|filter| match filter {
(key, op, PartitionFilterValue::Single(v)) => {
PartitionFilter::try_from((key, op, v))
}
(key, op, PartitionFilterValue::Multiple(v)) => {
PartitionFilter::try_from((key, op, v.as_slice()))
}
})
.collect();
match partition_filters {
Ok(filters) => Ok(self
._table
Expand Down Expand Up @@ -673,13 +672,15 @@ impl RawDeltaTable {
}

fn convert_partition_filters<'a>(
partitions_filters: Vec<(&'a str, &'a str, PartitionFilterValue<'a>)>,
) -> Result<Vec<PartitionFilter<&'a str>>, DeltaTableError> {
partitions_filters: Vec<(&'a str, &'a str, PartitionFilterValue)>,
) -> Result<Vec<PartitionFilter>, DeltaTableError> {
partitions_filters
.into_iter()
.map(|filter| match filter {
(key, op, PartitionFilterValue::Single(v)) => PartitionFilter::try_from((key, op, v)),
(key, op, PartitionFilterValue::Multiple(v)) => PartitionFilter::try_from((key, op, v)),
(key, op, PartitionFilterValue::Multiple(v)) => {
PartitionFilter::try_from((key, op, v.as_slice()))
}
})
.collect()
}
Expand Down
44 changes: 22 additions & 22 deletions rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
//! async {
//! let table = deltalake::open_table_with_version("./tests/data/simple_table", 0).await.unwrap();
//! let files = table.get_files_by_partitions(&[deltalake::PartitionFilter {
//! key: "month",
//! value: deltalake::PartitionValue::Equal("12"),
//! key: "month".to_string(),
//! value: deltalake::PartitionValue::Equal("12".to_string()),
//! }]);
//! };
//! ```
Expand Down Expand Up @@ -348,12 +348,12 @@ mod tests {

let filters = vec![
crate::PartitionFilter {
key: "month",
value: crate::PartitionValue::Equal("2"),
key: "month".to_string(),
value: crate::PartitionValue::Equal("2".to_string()),
},
crate::PartitionFilter {
key: "year",
value: crate::PartitionValue::Equal("2020"),
key: "year".to_string(),
value: crate::PartitionValue::Equal("2020".to_string()),
},
];

Expand Down Expand Up @@ -383,8 +383,8 @@ mod tests {
);

let filters = vec![crate::PartitionFilter {
key: "month",
value: crate::PartitionValue::NotEqual("2"),
key: "month".to_string(),
value: crate::PartitionValue::NotEqual("2".to_string()),
}];
assert_eq!(
table.get_files_by_partitions(&filters).unwrap(),
Expand All @@ -397,8 +397,8 @@ mod tests {
);

let filters = vec![crate::PartitionFilter {
key: "month",
value: crate::PartitionValue::In(vec!["2", "12"]),
key: "month".to_string(),
value: crate::PartitionValue::In(vec!["2".to_string(), "12".to_string()]),
}];
assert_eq!(
table.get_files_by_partitions(&filters).unwrap(),
Expand All @@ -411,8 +411,8 @@ mod tests {
);

let filters = vec![crate::PartitionFilter {
key: "month",
value: crate::PartitionValue::NotIn(vec!["2", "12"]),
key: "month".to_string(),
value: crate::PartitionValue::NotIn(vec!["2".to_string(), "12".to_string()]),
}];
assert_eq!(
table.get_files_by_partitions(&filters).unwrap(),
Expand All @@ -430,8 +430,8 @@ mod tests {
.unwrap();

let filters = vec![crate::PartitionFilter {
key: "k",
value: crate::PartitionValue::Equal("A"),
key: "k".to_string(),
value: crate::PartitionValue::Equal("A".to_string()),
}];
assert_eq!(
table.get_files_by_partitions(&filters).unwrap(),
Expand All @@ -441,8 +441,8 @@ mod tests {
);

let filters = vec![crate::PartitionFilter {
key: "k",
value: crate::PartitionValue::Equal(""),
key: "k".to_string(),
value: crate::PartitionValue::Equal("".to_string()),
}];
assert_eq!(
table.get_files_by_partitions(&filters).unwrap(),
Expand Down Expand Up @@ -473,8 +473,8 @@ mod tests {
);

let filters = vec![crate::PartitionFilter {
key: "x",
value: crate::PartitionValue::Equal("A/A"),
key: "x".to_string(),
value: crate::PartitionValue::Equal("A/A".to_string()),
}];
assert_eq!(
table.get_files_by_partitions(&filters).unwrap(),
Expand All @@ -492,8 +492,8 @@ mod tests {
.unwrap();

let filters = vec![crate::PartitionFilter {
key: "x",
value: crate::PartitionValue::LessThanOrEqual("9"),
key: "x".to_string(),
value: crate::PartitionValue::LessThanOrEqual("9".to_string()),
}];
assert_eq!(
table.get_files_by_partitions(&filters).unwrap(),
Expand All @@ -503,8 +503,8 @@ mod tests {
);

let filters = vec![crate::PartitionFilter {
key: "y",
value: crate::PartitionValue::LessThan("10.0"),
key: "y".to_string(),
value: crate::PartitionValue::LessThan("10.0".to_string()),
}];
assert_eq!(
table.get_files_by_partitions(&filters).unwrap(),
Expand Down
10 changes: 5 additions & 5 deletions rust/src/operations/optimize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ pub struct OptimizeBuilder<'a> {
/// Delta object store for handling data files
store: ObjectStoreRef,
/// Filters to select specific table partitions to be optimized
filters: &'a [PartitionFilter<'a, &'a str>],
filters: &'a [PartitionFilter],
/// Desired file size after bin-packing files
target_size: Option<i64>,
/// Properties passed to underlying parquet writer
Expand Down Expand Up @@ -200,7 +200,7 @@ impl<'a> OptimizeBuilder<'a> {
}

/// Only optimize files that return true for the specified partition filter
pub fn with_filters(mut self, filters: &'a [PartitionFilter<'a, &'a str>]) -> Self {
pub fn with_filters(mut self, filters: &'a [PartitionFilter]) -> Self {
self.filters = filters;
self
}
Expand Down Expand Up @@ -769,7 +769,7 @@ impl PartitionTuples {
pub fn create_merge_plan(
optimize_type: OptimizeType,
snapshot: &DeltaTableState,
filters: &[PartitionFilter<'_, &str>],
filters: &[PartitionFilter],
target_size: Option<i64>,
writer_properties: WriterProperties,
) -> Result<MergePlan, DeltaTableError> {
Expand Down Expand Up @@ -860,7 +860,7 @@ impl IntoIterator for MergeBin {
fn build_compaction_plan(
snapshot: &DeltaTableState,
partition_keys: &[String],
filters: &[PartitionFilter<'_, &str>],
filters: &[PartitionFilter],
target_size: i64,
) -> Result<(OptimizeOperations, Metrics), DeltaTableError> {
let mut metrics = Metrics::default();
Expand Down Expand Up @@ -923,7 +923,7 @@ fn build_zorder_plan(
zorder_columns: Vec<String>,
snapshot: &DeltaTableState,
partition_keys: &[String],
filters: &[PartitionFilter<'_, &str>],
filters: &[PartitionFilter],
) -> Result<(OptimizeOperations, Metrics), DeltaTableError> {
if zorder_columns.is_empty() {
return Err(DeltaTableError::Generic(
Expand Down
Loading

0 comments on commit 187a58c

Please sign in to comment.