Skip to content

Commit

Permalink
fix: use table config target file size
Browse files Browse the repository at this point in the history
  • Loading branch information
ion-elgreco committed Sep 3, 2024
1 parent 4ee4e54 commit b2fc0fb
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 2 deletions.
16 changes: 16 additions & 0 deletions crates/core/src/operations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,22 @@ pub fn get_num_idx_cols_and_stats_columns(
)
}

/// Get the target_file_size from the table configuration in the sates
/// If table_config does not exist (only can occur in the first write action) it takes
/// the configuration that was passed to the writerBuilder.
pub fn get_target_file_size(
config: &Option<crate::table::config::TableConfig<'_>>,
configuration: &HashMap<String, Option<String>>,
) -> i64 {
match &config {
Some(conf) => conf.target_file_size(),
_ => configuration
.get("delta.targetFileSize")
.and_then(|v| v.clone().map(|v| v.parse::<i64>().unwrap()))
.unwrap_or(crate::table::config::DEFAULT_TARGET_FILE_SIZE),
}
}

#[cfg(feature = "datafusion")]
mod datafusion_utils {
use datafusion::execution::context::SessionState;
Expand Down
7 changes: 5 additions & 2 deletions crates/core/src/operations/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,6 @@ async fn write_execution_plan_with_predicate(
}
_ => checker,
};

// Write data to disk
let mut tasks = vec![];
for i in 0..plan.properties().output_partitioning().partition_count() {
Expand Down Expand Up @@ -977,13 +976,17 @@ impl std::future::IntoFuture for WriteBuilder {
.as_ref()
.map(|snapshot| snapshot.table_config());

let target_file_size = this.target_file_size.or_else(|| {
Some(super::get_target_file_size(&config, &this.configuration) as usize)
});
let (num_indexed_cols, stats_columns) =
super::get_num_idx_cols_and_stats_columns(config, this.configuration);

let writer_stats_config = WriterStatsConfig {
num_indexed_cols,
stats_columns,
};

// Here we need to validate if the new data conforms to a predicate if one is provided
let add_actions = write_execution_plan_with_predicate(
predicate.clone(),
Expand All @@ -992,7 +995,7 @@ impl std::future::IntoFuture for WriteBuilder {
plan.clone(),
partition_columns.clone(),
this.log_store.object_store().clone(),
this.target_file_size,
target_file_size,
this.write_batch_size,
this.writer_properties.clone(),
writer_stats_config.clone(),
Expand Down
2 changes: 2 additions & 0 deletions crates/core/src/table/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ pub struct TableConfig<'a>(pub(crate) &'a HashMap<String, Option<String>>);

/// Default num index cols
pub const DEFAULT_NUM_INDEX_COLS: i32 = 32;
/// Default target file size
pub const DEFAULT_TARGET_FILE_SIZE: i64 = 104857600;

impl<'a> TableConfig<'a> {
table_config!(
Expand Down

0 comments on commit b2fc0fb

Please sign in to comment.