From 0c6035b2954b1253c675336ec9d22b1463240225 Mon Sep 17 00:00:00 2001 From: Atef Sawaed Date: Wed, 21 Jun 2023 11:46:48 +0300 Subject: [PATCH 1/7] Make create_add public --- rust/src/writer/mod.rs | 7 +++++-- rust/src/writer/stats.rs | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/rust/src/writer/mod.rs b/rust/src/writer/mod.rs index e7dc5a38c5..0614e728ef 100644 --- a/rust/src/writer/mod.rs +++ b/rust/src/writer/mod.rs @@ -16,7 +16,7 @@ pub use record_batch::RecordBatchWriter; pub mod json; pub mod record_batch; -pub(crate) mod stats; +pub mod stats; pub mod utils; #[cfg(test)] @@ -24,7 +24,7 @@ pub mod test_utils; /// Enum representing an error when calling [`DeltaWriter`]. #[derive(thiserror::Error, Debug)] -pub(crate) enum DeltaWriterError { +pub enum DeltaWriterError { /// Partition column is missing in a record written to delta. #[error("Missing partition column: {0}")] MissingPartitionColumn(String), @@ -58,13 +58,16 @@ pub(crate) enum DeltaWriterError { /// Serialization of delta log statistics failed. #[error("Failed to write statistics value {debug_value} with logical type {logical_type:?}")] StatsParsingFailed { + /// The debug value for this statistics value. debug_value: String, + /// The logical type of this statistics value. logical_type: Option, }, /// JSON serialization failed #[error("Failed to serialize data to JSON: {source}")] JSONSerializationFailed { + /// The wrapped [`serde_json::Error`] #[from] source: serde_json::Error, }, diff --git a/rust/src/writer/stats.rs b/rust/src/writer/stats.rs index 2e0878c6d6..06a25b50b6 100644 --- a/rust/src/writer/stats.rs +++ b/rust/src/writer/stats.rs @@ -1,3 +1,4 @@ +//! Statistics for Delta Table columns. use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use std::{collections::HashMap, ops::AddAssign}; @@ -13,6 +14,7 @@ use parquet::{ use super::*; use crate::action::{Add, ColumnValueStat, Stats}; +/// Creates an [`Add`] log action struct. pub(crate) fn create_add( partition_values: &HashMap>, path: String, From dcfb0dcf689dc94272e36ea7c2d90ffd105f76fd Mon Sep 17 00:00:00 2001 From: Atef Sawaed Date: Wed, 21 Jun 2023 14:36:49 +0300 Subject: [PATCH 2/7] make create_add public --- rust/src/writer/stats.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/src/writer/stats.rs b/rust/src/writer/stats.rs index 06a25b50b6..e8bbd0ab6e 100644 --- a/rust/src/writer/stats.rs +++ b/rust/src/writer/stats.rs @@ -15,7 +15,7 @@ use super::*; use crate::action::{Add, ColumnValueStat, Stats}; /// Creates an [`Add`] log action struct. -pub(crate) fn create_add( +pub fn create_add( partition_values: &HashMap>, path: String, size: i64, From ecdb50cc69a29dd01b3e1963995eba3632c86f40 Mon Sep 17 00:00:00 2001 From: Atef Sawaed Date: Wed, 21 Jun 2023 17:51:55 +0300 Subject: [PATCH 3/7] use DeltaTableError instead of DeltaWriteError --- rust/src/errors.rs | 6 ++++++ rust/src/writer/stats.rs | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/rust/src/errors.rs b/rust/src/errors.rs index 7b982b3cc3..0186cbd84f 100644 --- a/rust/src/errors.rs +++ b/rust/src/errors.rs @@ -228,6 +228,12 @@ impl From for DeltaTableError { } } +impl From for DeltaTableError { + fn from(value: serde_json::Error) -> Self { + DeltaTableError::InvalidStatsJson { json_err: value } + } +} + impl DeltaTableError { /// Crate a NotATable Error with message for given path. pub fn not_a_table(path: impl AsRef) -> Self { diff --git a/rust/src/writer/stats.rs b/rust/src/writer/stats.rs index e8bbd0ab6e..26912e7bd0 100644 --- a/rust/src/writer/stats.rs +++ b/rust/src/writer/stats.rs @@ -20,7 +20,7 @@ pub fn create_add( path: String, size: i64, file_metadata: &FileMetaData, -) -> Result { +) -> Result { let stats = stats_from_file_metadata(partition_values, file_metadata)?; let stats_string = serde_json::to_string(&stats)?; From ecd7de57c98a258f6d13210b9bdee2bfd3fa030d Mon Sep 17 00:00:00 2001 From: Atef Sawaed Date: Wed, 21 Jun 2023 17:56:01 +0300 Subject: [PATCH 4/7] undo changes in DeltaWriteError --- rust/src/writer/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rust/src/writer/mod.rs b/rust/src/writer/mod.rs index 0614e728ef..34ebb55cc6 100644 --- a/rust/src/writer/mod.rs +++ b/rust/src/writer/mod.rs @@ -24,7 +24,7 @@ pub mod test_utils; /// Enum representing an error when calling [`DeltaWriter`]. #[derive(thiserror::Error, Debug)] -pub enum DeltaWriterError { +pub(crate) enum DeltaWriterError { /// Partition column is missing in a record written to delta. #[error("Missing partition column: {0}")] MissingPartitionColumn(String), @@ -58,16 +58,13 @@ pub enum DeltaWriterError { /// Serialization of delta log statistics failed. #[error("Failed to write statistics value {debug_value} with logical type {logical_type:?}")] StatsParsingFailed { - /// The debug value for this statistics value. debug_value: String, - /// The logical type of this statistics value. logical_type: Option, }, /// JSON serialization failed #[error("Failed to serialize data to JSON: {source}")] JSONSerializationFailed { - /// The wrapped [`serde_json::Error`] #[from] source: serde_json::Error, }, From f7625dffddf308d7d8f14eef5d8c4c7dae7f13ea Mon Sep 17 00:00:00 2001 From: Atef Sawaed Date: Wed, 21 Jun 2023 23:04:51 +0300 Subject: [PATCH 5/7] cargo fmt --- rust/src/errors.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/src/errors.rs b/rust/src/errors.rs index 0186cbd84f..fed0e823f8 100644 --- a/rust/src/errors.rs +++ b/rust/src/errors.rs @@ -230,7 +230,7 @@ impl From for DeltaTableError { impl From for DeltaTableError { fn from(value: serde_json::Error) -> Self { - DeltaTableError::InvalidStatsJson { json_err: value } + DeltaTableError::InvalidStatsJson { json_err: value } } } From 48a9783ea913284f9c2079040061920619d9fa03 Mon Sep 17 00:00:00 2001 From: Atef Sawaed Date: Thu, 22 Jun 2023 17:05:42 +0300 Subject: [PATCH 6/7] make the stats module private --- rust/src/writer/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/src/writer/mod.rs b/rust/src/writer/mod.rs index 34ebb55cc6..74ea353181 100644 --- a/rust/src/writer/mod.rs +++ b/rust/src/writer/mod.rs @@ -13,10 +13,11 @@ use crate::DeltaTable; pub use json::JsonWriter; pub use record_batch::RecordBatchWriter; +pub use stats::create_add; pub mod json; pub mod record_batch; -pub mod stats; +pub(crate) mod stats; pub mod utils; #[cfg(test)] From 355345d011f45b89e19cce89246ec8a26ed6c611 Mon Sep 17 00:00:00 2001 From: Atef Sawaed Date: Sun, 25 Jun 2023 10:54:39 +0300 Subject: [PATCH 7/7] remove file doc --- rust/src/writer/stats.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/src/writer/stats.rs b/rust/src/writer/stats.rs index 26912e7bd0..5db22c7936 100644 --- a/rust/src/writer/stats.rs +++ b/rust/src/writer/stats.rs @@ -1,4 +1,3 @@ -//! Statistics for Delta Table columns. use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use std::{collections::HashMap, ops::AddAssign};