diff --git a/python/deltalake/table.py b/python/deltalake/table.py index dc7e7a1214..b57cad0213 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -678,6 +678,7 @@ def z_order( partition_filters: Optional[FilterType] = None, target_size: Optional[int] = None, max_concurrent_tasks: Optional[int] = None, + max_spill_size: int = 20 * 1024 * 1024 * 1024, ) -> Dict[str, Any]: """ Reorders the data using a Z-order curve to improve data skipping. @@ -692,10 +693,15 @@ def z_order( :param max_concurrent_tasks: the maximum number of concurrent tasks to use for file compaction. Defaults to number of CPUs. More concurrent tasks can make compaction faster, but will also use more memory. + :param max_spill_size: the maximum number of bytes to spill to disk. Defaults to 20GB. :return: the metrics from optimize """ metrics = self.table._table.z_order_optimize( - list(columns), partition_filters, target_size, max_concurrent_tasks + list(columns), + partition_filters, + target_size, + max_concurrent_tasks, + max_spill_size, ) self.table.update_incremental() return json.loads(metrics) diff --git a/python/src/lib.rs b/python/src/lib.rs index 4b63567f26..ca7247c365 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -291,16 +291,18 @@ impl RawDeltaTable { } /// Run z-order variation of optimize - #[pyo3(signature = (z_order_columns, partition_filters = None, target_size = None, max_concurrent_tasks = None))] + #[pyo3(signature = (z_order_columns, partition_filters = None, target_size = None, max_concurrent_tasks = None, max_spill_size = 20 * 1024 * 1024 * 1024))] pub fn z_order_optimize( &mut self, z_order_columns: Vec, partition_filters: Option>, target_size: Option, max_concurrent_tasks: Option, + max_spill_size: usize, ) -> PyResult { let mut cmd = OptimizeBuilder::new(self._table.object_store(), self._table.state.clone()) .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get)) + .with_max_spill_size(max_spill_size) .with_type(OptimizeType::ZOrder(z_order_columns)); if let Some(size) = target_size { cmd = cmd.with_target_size(size);