Skip to content

Commit c02ab00

Browse files
ZENOTMECopilot
authored andcommitted
feat: add process delete enrty in snapshot produce (#33)
* support spec id in data file * support proccess delete entry * fullfill partition spec id * fix * fix spelling mistake Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: ZENOTME <st810918843@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent d785038 commit c02ab00

22 files changed

+227
-65
lines changed

crates/iceberg/src/arrow/record_batch_partition_spliter.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
2727

2828
use super::record_batch_projector::RecordBatchProjector;
2929
use crate::arrow::{arrow_struct_to_literal, type_to_arrow_type};
30-
use crate::spec::{Literal, PartitionSpecRef, SchemaRef, Struct, StructType, Type};
30+
use crate::spec::{Literal, PartitionSpec, PartitionSpecRef, SchemaRef, Struct, StructType, Type};
3131
use crate::transform::{create_transform_function, BoxedTransformFunction};
3232
use crate::{Error, ErrorKind, Result};
3333

@@ -186,6 +186,10 @@ impl RecordBatchPartitionSpliter {
186186
})
187187
}
188188

189+
pub(crate) fn partition_spec(&self) -> &PartitionSpec {
190+
self.partition_spec.as_ref()
191+
}
192+
189193
/// Split the record batch into multiple record batches by the partition spec.
190194
pub(crate) fn split(&self, batch: &RecordBatch) -> Result<Vec<(OwnedRow, RecordBatch)>> {
191195
// get array using partition spec

crates/iceberg/src/expr/visitors/expression_evaluator.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ mod tests {
346346
split_offsets: vec![],
347347
equality_ids: vec![],
348348
sort_order_id: None,
349+
partition_spec_id: 0,
349350
}
350351
}
351352

@@ -369,6 +370,7 @@ mod tests {
369370
split_offsets: vec![],
370371
equality_ids: vec![],
371372
sort_order_id: None,
373+
partition_spec_id: 0,
372374
}
373375
}
374376

crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1996,6 +1996,7 @@ mod test {
19961996
split_offsets: vec![],
19971997
equality_ids: vec![],
19981998
sort_order_id: None,
1999+
partition_spec_id: 0,
19992000
}
20002001
}
20012002

@@ -2017,6 +2018,7 @@ mod test {
20172018
split_offsets: vec![],
20182019
equality_ids: vec![],
20192020
sort_order_id: None,
2021+
partition_spec_id: 0,
20202022
}
20212023
}
20222024

@@ -2074,6 +2076,7 @@ mod test {
20742076
split_offsets: vec![],
20752077
equality_ids: vec![],
20762078
sort_order_id: None,
2079+
partition_spec_id: 0,
20772080
}
20782081
}
20792082
fn get_test_file_2() -> DataFile {
@@ -2100,6 +2103,7 @@ mod test {
21002103
split_offsets: vec![],
21012104
equality_ids: vec![],
21022105
sort_order_id: None,
2106+
partition_spec_id: 0,
21032107
}
21042108
}
21052109

@@ -2127,6 +2131,7 @@ mod test {
21272131
split_offsets: vec![],
21282132
equality_ids: vec![],
21292133
sort_order_id: None,
2134+
partition_spec_id: 0,
21302135
}
21312136
}
21322137

@@ -2154,6 +2159,7 @@ mod test {
21542159
split_offsets: vec![],
21552160
equality_ids: vec![],
21562161
sort_order_id: None,
2162+
partition_spec_id: 0,
21572163
}
21582164
}
21592165
}

crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,7 @@ mod test {
581581
split_offsets: vec![],
582582
equality_ids: vec![],
583583
sort_order_id: None,
584+
partition_spec_id: 0,
584585
}
585586
}
586587

@@ -602,6 +603,7 @@ mod test {
602603
split_offsets: vec![],
603604
equality_ids: vec![],
604605
sort_order_id: None,
606+
partition_spec_id: 0,
605607
}
606608
}
607609

@@ -623,6 +625,7 @@ mod test {
623625
split_offsets: vec![],
624626
equality_ids: vec![],
625627
sort_order_id: None,
628+
partition_spec_id: 0,
626629
}
627630
}
628631

@@ -645,6 +648,7 @@ mod test {
645648
split_offsets: vec![],
646649
equality_ids: vec![],
647650
sort_order_id: None,
651+
partition_spec_id: 0,
648652
}
649653
}
650654

crates/iceberg/src/io/object_cache.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ mod tests {
277277
.status(ManifestStatus::Added)
278278
.data_file(
279279
DataFileBuilder::default()
280+
.partition_spec_id(0)
280281
.content(DataContentType::Data)
281282
.file_path(format!("{}/1.parquet", &self.table_location))
282283
.file_format(DataFileFormat::Parquet)

crates/iceberg/src/scan.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,6 +1480,7 @@ pub mod tests {
14801480
.status(ManifestStatus::Added)
14811481
.data_file(
14821482
DataFileBuilder::default()
1483+
.partition_spec_id(0)
14831484
.content(DataContentType::Data)
14841485
.file_path(format!("{}/1.parquet", &self.table_location))
14851486
.file_format(DataFileFormat::Parquet)
@@ -1502,6 +1503,7 @@ pub mod tests {
15021503
.file_sequence_number(parent_snapshot.sequence_number())
15031504
.data_file(
15041505
DataFileBuilder::default()
1506+
.partition_spec_id(0)
15051507
.content(DataContentType::Data)
15061508
.file_path(format!("{}/2.parquet", &self.table_location))
15071509
.file_format(DataFileFormat::Parquet)
@@ -1523,6 +1525,7 @@ pub mod tests {
15231525
.file_sequence_number(parent_snapshot.sequence_number())
15241526
.data_file(
15251527
DataFileBuilder::default()
1528+
.partition_spec_id(0)
15261529
.content(DataContentType::Data)
15271530
.file_path(format!("{}/3.parquet", &self.table_location))
15281531
.file_format(DataFileFormat::Parquet)
@@ -1693,6 +1696,7 @@ pub mod tests {
16931696
.status(ManifestStatus::Added)
16941697
.data_file(
16951698
DataFileBuilder::default()
1699+
.partition_spec_id(0)
16961700
.content(DataContentType::Data)
16971701
.file_path(format!("{}/1.parquet", &self.table_location))
16981702
.file_format(DataFileFormat::Parquet)
@@ -1716,6 +1720,7 @@ pub mod tests {
17161720
.file_sequence_number(parent_snapshot.sequence_number())
17171721
.data_file(
17181722
DataFileBuilder::default()
1723+
.partition_spec_id(0)
17191724
.content(DataContentType::Data)
17201725
.file_path(format!("{}/2.parquet", &self.table_location))
17211726
.file_format(DataFileFormat::Parquet)
@@ -1738,6 +1743,7 @@ pub mod tests {
17381743
.file_sequence_number(parent_snapshot.sequence_number())
17391744
.data_file(
17401745
DataFileBuilder::default()
1746+
.partition_spec_id(0)
17411747
.content(DataContentType::Data)
17421748
.file_path(format!("{}/3.parquet", &self.table_location))
17431749
.file_format(DataFileFormat::Parquet)

0 commit comments

Comments
 (0)