Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: early stop if all values in arr are null #2764

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions crates/core/src/kernel/snapshot/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ use crate::{DeltaResult, DeltaTableError};

pub(super) fn read_metadata(batch: &dyn ProvidesColumnByName) -> DeltaResult<Option<Metadata>> {
if let Some(arr) = ex::extract_and_cast_opt::<StructArray>(batch, "metaData") {
// Stop early if all values are null
if arr.null_count() == arr.len() {
return Ok(None);
}

let id = ex::extract_and_cast::<StringArray>(arr, "id")?;
let name = ex::extract_and_cast::<StringArray>(arr, "name")?;
let description = ex::extract_and_cast::<StringArray>(arr, "description")?;
Expand Down Expand Up @@ -43,6 +48,11 @@ pub(super) fn read_metadata(batch: &dyn ProvidesColumnByName) -> DeltaResult<Opt

pub(super) fn read_protocol(batch: &dyn ProvidesColumnByName) -> DeltaResult<Option<Protocol>> {
if let Some(arr) = ex::extract_and_cast_opt::<StructArray>(batch, "protocol") {
// Stop early if all values are null
if arr.null_count() == arr.len() {
return Ok(None);
}

let min_reader_version = ex::extract_and_cast::<Int32Array>(arr, "minReaderVersion")?;
let min_writer_version = ex::extract_and_cast::<Int32Array>(arr, "minWriterVersion")?;
let maybe_reader_features = ex::extract_and_cast_opt::<ListArray>(arr, "readerFeatures");
Expand Down Expand Up @@ -138,6 +148,11 @@ pub(super) fn read_cdf_adds(array: &dyn ProvidesColumnByName) -> DeltaResult<Vec
let mut result = Vec::new();

if let Some(arr) = ex::extract_and_cast_opt::<StructArray>(array, "cdc") {
// Stop early if all values are null
if arr.null_count() == arr.len() {
return Ok(result);
}

let path = ex::extract_and_cast::<StringArray>(arr, "path")?;
let pvs = ex::extract_and_cast_opt::<MapArray>(arr, "partitionValues");
let size = ex::extract_and_cast::<Int64Array>(arr, "size")?;
Expand Down Expand Up @@ -171,6 +186,11 @@ pub(super) fn read_removes(array: &dyn ProvidesColumnByName) -> DeltaResult<Vec<
let mut result = Vec::new();

if let Some(arr) = ex::extract_and_cast_opt::<StructArray>(array, "remove") {
// Stop early if all values are null
if arr.null_count() == arr.len() {
return Ok(result);
}

let path = ex::extract_and_cast::<StringArray>(arr, "path")?;
let data_change = ex::extract_and_cast::<BooleanArray>(arr, "dataChange")?;
let deletion_timestamp = ex::extract_and_cast::<Int64Array>(arr, "deletionTimestamp")?;
Expand Down
Loading