diff --git a/arrow/src/array/array_struct.rs b/arrow/src/array/array_struct.rs index 59ee527e5f8c..031a1f842ba8 100644 --- a/arrow/src/array/array_struct.rs +++ b/arrow/src/array/array_struct.rs @@ -85,12 +85,7 @@ impl From for StructArray { fn from(data: ArrayData) -> Self { let mut boxed_fields = vec![]; for cd in data.child_data() { - let child_data = if data.offset() != 0 || data.len() != cd.len() { - cd.slice(data.offset(), data.len()) - } else { - cd.clone() - }; - boxed_fields.push(make_array(child_data)); + boxed_fields.push(make_array(cd.clone())); } Self { data, boxed_fields } } diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 172bdaac9eb6..09d2379257e8 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -21,14 +21,14 @@ use std::mem; use std::sync::Arc; -use crate::datatypes::{DataType, IntervalUnit}; +use crate::{array::raw_pointer::RawPtrBox, datatypes::{DataType, IntervalUnit}}; use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; use crate::{ buffer::{Buffer, MutableBuffer}, util::bit_util, }; -use super::equal::equal; +use super::{OffsetSizeTrait, equal::equal}; #[inline] pub(crate) fn count_nulls( @@ -385,15 +385,55 @@ impl ArrayData { pub fn slice(&self, offset: usize, length: usize) -> ArrayData { assert!((offset + length) <= self.len()); - let mut new_data = self.clone(); - - new_data.len = length; - new_data.offset = offset + self.offset; - - new_data.null_count = - count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); - - new_data + // If data type is primitive, it's quick to clone array + if self.child_data().is_empty() { + let mut new_data = self.clone(); + + new_data.len = length; + new_data.offset = offset + self.offset; + + new_data.null_count = + count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); + + new_data + } else { + // Slice into children + let new_offset = self.offset + offset; + let new_data = ArrayData { + data_type: self.data_type().clone(), + len: length, + null_count: count_nulls(self.null_buffer(), new_offset, length), + offset: new_offset, + buffers: self.buffers.clone(), + child_data: self.child_data().iter().map(|data| { + match self.data_type() { + DataType::List(_) => { + let (start, end) = get_list_child_slice::( + self.buffers.get(0).unwrap(), + offset, + length + ); + data.slice(start, end - start) + } + DataType::LargeList(_) => { + let (start, end) = get_list_child_slice::( + self.buffers.get(0).unwrap(), + offset, + length + ); + data.slice(start, end - start) + } + _ => { + // All other types don't require computing offsets + data.slice(offset, length) + } + } + }).collect(), + null_bitmap: self.null_bitmap().clone(), + }; + + new_data + } } /// Returns the `buffer` as a slice of type `T` starting at self.offset @@ -467,6 +507,25 @@ impl ArrayData { } } +#[inline] +fn get_list_child_slice( + buffer: &Buffer, + offset: usize, + length: usize +) -> (usize, usize) { + let raw_buffer = buffer.as_ptr(); + let value_offsets: &[OffsetSize] = unsafe { + let value_offsets = RawPtrBox::::new(raw_buffer); + std::slice::from_raw_parts( + value_offsets.as_ptr().add(offset), + length + 1, + ) + }; + let start = value_offsets[0]; + let end = value_offsets[length - 1]; + (start.to_usize().unwrap(), end.to_usize().unwrap()) +} + impl PartialEq for ArrayData { fn eq(&self, other: &Self) -> bool { equal(self, other)