From f873d77bc77847b95921374aa66ba1d38e9cebf8 Mon Sep 17 00:00:00 2001 From: Wakahisa Date: Wed, 14 Jul 2021 22:15:14 +0200 Subject: [PATCH] make slice work for nested types (#389) revert changes made in ARROW-11394 See commit https://github.com/apache/arrow-rs/commit/9f965612626cdf31187ce07ba5dbecc3503077b8 Only slice into structs --- arrow/src/array/array_struct.rs | 7 +---- arrow/src/array/data.rs | 39 ++++++++++++++++++++------ arrow/src/array/transform/structure.rs | 13 ++++----- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/arrow/src/array/array_struct.rs b/arrow/src/array/array_struct.rs index 0e7304e80a46..7dc2b2d957cd 100644 --- a/arrow/src/array/array_struct.rs +++ b/arrow/src/array/array_struct.rs @@ -84,12 +84,7 @@ impl From for StructArray { fn from(data: ArrayData) -> Self { let mut boxed_fields = vec![]; for cd in data.child_data() { - let child_data = if data.offset() != 0 || data.len() != cd.len() { - cd.slice(data.offset(), data.len()) - } else { - cd.clone() - }; - boxed_fields.push(make_array(child_data)); + boxed_fields.push(make_array(cd.clone())); } Self { data, boxed_fields } } diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 8528f7aeda87..228f0221933f 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -383,15 +383,36 @@ impl ArrayData { pub fn slice(&self, offset: usize, length: usize) -> ArrayData { assert!((offset + length) <= self.len()); - let mut new_data = self.clone(); - - new_data.len = length; - new_data.offset = offset + self.offset; - - new_data.null_count = - count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); - - new_data + if let DataType::Struct(_) = self.data_type() { + // Slice into children + let new_offset = self.offset + offset; + let new_data = ArrayData { + data_type: self.data_type().clone(), + len: length, + null_count: count_nulls(self.null_buffer(), new_offset, length), + offset: new_offset, + buffers: self.buffers.clone(), + // Slice child data, to propagate offsets down to them + child_data: self + .child_data() + .iter() + .map(|data| data.slice(offset, length)) + .collect(), + null_bitmap: self.null_bitmap().clone(), + }; + + new_data + } else { + let mut new_data = self.clone(); + + new_data.len = length; + new_data.offset = offset + self.offset; + + new_data.null_count = + count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); + + new_data + } } /// Returns the `buffer` as a slice of type `T` starting at self.offset diff --git a/arrow/src/array/transform/structure.rs b/arrow/src/array/transform/structure.rs index c019f5ac6a93..5c41d76a7f1c 100644 --- a/arrow/src/array/transform/structure.rs +++ b/arrow/src/array/transform/structure.rs @@ -26,13 +26,10 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend { index: usize, start: usize, len: usize| { - mutable.child_data.iter_mut().for_each(|child| { - child.extend( - index, - array.offset() + start, - array.offset() + start + len, - ) - }) + mutable + .child_data + .iter_mut() + .for_each(|child| child.extend(index, start, start + len)) }, ) } else { @@ -41,7 +38,7 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend { index: usize, start: usize, len: usize| { - (array.offset() + start..array.offset() + start + len).for_each(|i| { + (start..start + len).for_each(|i| { if array.is_valid(i) { mutable .child_data