diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 2a5e2275d415..228f0221933f 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -21,14 +21,14 @@ use std::mem; use std::sync::Arc; -use crate::{array::raw_pointer::RawPtrBox, datatypes::{DataType, IntervalUnit}}; +use crate::datatypes::{DataType, IntervalUnit}; use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; use crate::{ buffer::{Buffer, MutableBuffer}, util::bit_util, }; -use super::{OffsetSizeTrait, equal::equal}; +use super::equal::equal; #[inline] pub(crate) fn count_nulls( @@ -383,18 +383,7 @@ impl ArrayData { pub fn slice(&self, offset: usize, length: usize) -> ArrayData { assert!((offset + length) <= self.len()); - // If data type is primitive, it's quick to clone array - if self.child_data().is_empty() { - let mut new_data = self.clone(); - - new_data.len = length; - new_data.offset = offset + self.offset; - - new_data.null_count = - count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); - - new_data - } else { + if let DataType::Struct(_) = self.data_type() { // Slice into children let new_offset = self.offset + offset; let new_data = ArrayData { @@ -403,33 +392,25 @@ impl ArrayData { null_count: count_nulls(self.null_buffer(), new_offset, length), offset: new_offset, buffers: self.buffers.clone(), - child_data: self.child_data().iter().map(|data| { - match self.data_type() { - DataType::List(_) => { - let (start, end) = get_list_child_slice::( - self.buffers.get(0).unwrap(), - offset, - length - ); - data.slice(start, end - start) - } - DataType::LargeList(_) => { - let (start, end) = get_list_child_slice::( - self.buffers.get(0).unwrap(), - offset, - length - ); - data.slice(start, end - start) - } - _ => { - // All other types don't require computing offsets - data.slice(offset, length) - } - } - }).collect(), + // Slice child data, to propagate offsets down to them + child_data: self + .child_data() + .iter() + .map(|data| data.slice(offset, length)) + .collect(), null_bitmap: self.null_bitmap().clone(), }; + new_data + } else { + let mut new_data = self.clone(); + + new_data.len = length; + new_data.offset = offset + self.offset; + + new_data.null_count = + count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); + new_data } } @@ -505,25 +486,6 @@ impl ArrayData { } } -#[inline] -fn get_list_child_slice( - buffer: &Buffer, - offset: usize, - length: usize -) -> (usize, usize) { - let raw_buffer = buffer.as_ptr(); - let value_offsets: &[OffsetSize] = unsafe { - let value_offsets = RawPtrBox::::new(raw_buffer); - std::slice::from_raw_parts( - value_offsets.as_ptr().add(offset), - length + 1, - ) - }; - let start = value_offsets[0]; - let end = value_offsets[length - 1]; - (start.to_usize().unwrap(), end.to_usize().unwrap()) -} - impl PartialEq for ArrayData { fn eq(&self, other: &Self) -> bool { equal(self, other)