diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index e924824e75ea..89fdca507b00 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -18,12 +18,14 @@ use crate::array::print_long_array; use crate::builder::BooleanBuilder; use crate::iterator::BooleanIter; -use crate::{Array, ArrayAccessor}; +use crate::{Array, ArrayAccessor, ArrayRef}; +use arrow_buffer::buffer::NullBuffer; use arrow_buffer::{bit_util, Buffer, MutableBuffer}; use arrow_data::bit_mask::combine_option_bitmap; use arrow_data::ArrayData; use arrow_schema::DataType; use std::any::Any; +use std::sync::Arc; /// Array of bools /// @@ -265,9 +267,22 @@ impl Array for BooleanArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.data.nulls() + } } impl<'a> ArrayAccessor for &'a BooleanArray { diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs index 442e795cec52..81c5824a5e04 100644 --- a/arrow-array/src/array/byte_array.rs +++ b/arrow-array/src/array/byte_array.rs @@ -20,12 +20,13 @@ use crate::builder::GenericByteBuilder; use crate::iterator::ArrayIter; use crate::types::bytes::ByteArrayNativeType; use crate::types::ByteArrayType; -use crate::{Array, ArrayAccessor, OffsetSizeTrait}; -use arrow_buffer::buffer::OffsetBuffer; +use crate::{Array, ArrayAccessor, ArrayRef, OffsetSizeTrait}; +use arrow_buffer::buffer::{NullBuffer, OffsetBuffer}; use arrow_buffer::{ArrowNativeType, Buffer}; use arrow_data::ArrayData; use arrow_schema::DataType; use std::any::Any; +use std::sync::Arc; /// Generic struct for variable-size byte arrays /// @@ -237,9 +238,22 @@ impl Array for GenericByteArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.data.nulls() + } } impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray { diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index f9a40c6f3400..ee58a485c71c 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -23,10 +23,12 @@ use crate::{ make_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, PrimitiveArray, StringArray, }; +use arrow_buffer::buffer::NullBuffer; use arrow_buffer::ArrowNativeType; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType}; use std::any::Any; +use std::sync::Arc; /// /// A dictionary array where each element is a single value indexed by an integer key. @@ -590,9 +592,22 @@ impl Array for DictionaryArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.data.nulls() + } } impl std::fmt::Debug for DictionaryArray { @@ -669,9 +684,21 @@ impl<'a, K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'a, &self.dictionary.data } + fn to_data(&self) -> ArrayData { + self.dictionary.to_data() + } + fn into_data(self) -> ArrayData { self.dictionary.into_data() } + + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + self.dictionary.slice(offset, length) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.dictionary.nulls() + } } impl<'a, K, V> IntoIterator for TypedDictionaryArray<'a, K, V> diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs index 87f1b955723d..062961a20abb 100644 --- a/arrow-array/src/array/fixed_size_binary_array.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -17,11 +17,13 @@ use crate::array::print_long_array; use crate::iterator::FixedSizeBinaryIter; -use crate::{Array, ArrayAccessor, FixedSizeListArray}; +use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray}; +use arrow_buffer::buffer::NullBuffer; use arrow_buffer::{bit_util, Buffer, MutableBuffer}; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType}; use std::any::Any; +use std::sync::Arc; /// An array where each element is a fixed-size sequence of bytes. /// @@ -462,9 +464,22 @@ impl Array for FixedSizeBinaryArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.data.nulls() + } } impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray { diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index 6e228ba3c770..7d65927cdeec 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -18,9 +18,11 @@ use crate::array::print_long_array; use crate::builder::{FixedSizeListBuilder, PrimitiveBuilder}; use crate::{make_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType}; +use arrow_buffer::buffer::NullBuffer; use arrow_data::ArrayData; use arrow_schema::DataType; use std::any::Any; +use std::sync::Arc; /// A list array where each element is a fixed-size sequence of values with the same /// type whose maximum length is represented by a i32. @@ -205,9 +207,22 @@ impl Array for FixedSizeListArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.data.nulls() + } } impl ArrayAccessor for FixedSizeListArray { diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 178139f810e7..203b98d2fca5 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -20,12 +20,13 @@ use crate::builder::{GenericListBuilder, PrimitiveBuilder}; use crate::{ iterator::GenericListArrayIter, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, }; -use arrow_buffer::buffer::OffsetBuffer; +use arrow_buffer::buffer::{NullBuffer, OffsetBuffer}; use arrow_buffer::ArrowNativeType; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType, Field}; use num::Integer; use std::any::Any; +use std::sync::Arc; /// trait declaring an offset size, relevant for i32 vs i64 array types. pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer { @@ -244,9 +245,22 @@ impl Array for GenericListArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.data.nulls() + } } impl<'a, OffsetSize: OffsetSizeTrait> ArrayAccessor for &'a GenericListArray { diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs index 8c9b02921781..de9e3a87396c 100644 --- a/arrow-array/src/array/map_array.rs +++ b/arrow-array/src/array/map_array.rs @@ -17,7 +17,7 @@ use crate::array::{get_offsets, print_long_array}; use crate::{make_array, Array, ArrayRef, StringArray, StructArray}; -use arrow_buffer::buffer::OffsetBuffer; +use arrow_buffer::buffer::{NullBuffer, OffsetBuffer}; use arrow_buffer::{ArrowNativeType, Buffer, ToByteSlice}; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType, Field}; @@ -214,10 +214,23 @@ impl Array for MapArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.data.nulls() + } + /// Returns the total number of bytes of memory occupied by the buffers owned by this [MapArray]. fn get_buffer_memory_size(&self) -> usize { self.data.get_buffer_memory_size() diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index dfdaac85bf85..048f41b73a85 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -20,7 +20,7 @@ mod binary_array; use crate::types::*; -use arrow_buffer::buffer::{OffsetBuffer, ScalarBuffer}; +use arrow_buffer::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; use arrow_buffer::ArrowNativeType; use arrow_data::ArrayData; use arrow_schema::{DataType, IntervalUnit, TimeUnit}; @@ -96,12 +96,19 @@ pub trait Array: std::fmt::Debug + Send + Sync { fn as_any(&self) -> &dyn Any; /// Returns a reference to the underlying data of this array. + /// + /// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880) fn data(&self) -> &ArrayData; + /// Returns the underlying data of this array. + fn to_data(&self) -> ArrayData; + /// Returns the underlying data of this array. fn into_data(self) -> ArrayData; /// Returns a reference-counted pointer to the underlying data of this array. + /// + /// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880) fn data_ref(&self) -> &ArrayData { self.data() } @@ -135,9 +142,7 @@ pub trait Array: std::fmt::Debug + Send + Sync { /// /// assert_eq!(array_slice.as_ref(), &Int32Array::from(vec![2, 3, 4])); /// ``` - fn slice(&self, offset: usize, length: usize) -> ArrayRef { - make_array(self.data_ref().slice(offset, length)) - } + fn slice(&self, offset: usize, length: usize) -> ArrayRef; /// Returns the length (i.e., number of elements) of this array. /// @@ -189,6 +194,9 @@ pub trait Array: std::fmt::Debug + Send + Sync { self.data_ref().offset() } + /// Returns the null buffers of this array if any + fn nulls(&self) -> Option<&NullBuffer>; + /// Returns whether the element at `index` is null. /// When using this function on a slice, the index is relative to the slice. /// @@ -203,7 +211,7 @@ pub trait Array: std::fmt::Debug + Send + Sync { /// assert_eq!(array.is_null(1), true); /// ``` fn is_null(&self, index: usize) -> bool { - self.data_ref().is_null(index) + self.nulls().map(|n| n.is_null(index)).unwrap_or_default() } /// Returns whether the element at `index` is not null. @@ -220,7 +228,7 @@ pub trait Array: std::fmt::Debug + Send + Sync { /// assert_eq!(array.is_valid(1), false); /// ``` fn is_valid(&self, index: usize) -> bool { - self.data_ref().is_valid(index) + !self.is_null(index) } /// Returns the total number of null values in this array. @@ -236,7 +244,7 @@ pub trait Array: std::fmt::Debug + Send + Sync { /// assert_eq!(array.null_count(), 2); /// ``` fn null_count(&self) -> usize { - self.data_ref().null_count() + self.nulls().map(|n| n.null_count()).unwrap_or_default() } /// Returns the total number of bytes of memory pointed to by this array. @@ -269,6 +277,10 @@ impl Array for ArrayRef { self.as_ref().data() } + fn to_data(&self) -> ArrayData { + self.as_ref().to_data() + } + fn into_data(self) -> ArrayData { self.data().clone() } @@ -297,6 +309,10 @@ impl Array for ArrayRef { self.as_ref().offset() } + fn nulls(&self) -> Option<&NullBuffer> { + self.as_ref().nulls() + } + fn is_null(&self, index: usize) -> bool { self.as_ref().is_null(index) } @@ -327,6 +343,10 @@ impl<'a, T: Array> Array for &'a T { T::data(self) } + fn to_data(&self) -> ArrayData { + T::to_data(self) + } + fn into_data(self) -> ArrayData { self.data().clone() } @@ -355,6 +375,10 @@ impl<'a, T: Array> Array for &'a T { T::offset(self) } + fn nulls(&self) -> Option<&NullBuffer> { + T::nulls(self) + } + fn is_null(&self, index: usize) -> bool { T::is_null(self, index) } diff --git a/arrow-array/src/array/null_array.rs b/arrow-array/src/array/null_array.rs index 8eb8e64b0eda..fba6e41e871d 100644 --- a/arrow-array/src/array/null_array.rs +++ b/arrow-array/src/array/null_array.rs @@ -17,10 +17,12 @@ //! Contains the `NullArray` type. -use crate::Array; +use crate::{Array, ArrayRef}; +use arrow_buffer::buffer::NullBuffer; use arrow_data::ArrayData; use arrow_schema::DataType; use std::any::Any; +use std::sync::Arc; /// An Array where all elements are nulls /// @@ -63,10 +65,23 @@ impl Array for NullArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + None + } + /// Returns whether the element at `index` is null. /// All elements of a `NullArray` are always null. fn is_null(&self, _index: usize) -> bool { diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 408f0c4ae96a..dd1ce4f1473f 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -23,9 +23,9 @@ use crate::temporal_conversions::{ }; use crate::timezone::Tz; use crate::trusted_len::trusted_len_unzip; -use crate::{types::*, ArrowNativeTypeOp}; +use crate::{types::*, ArrayRef, ArrowNativeTypeOp}; use crate::{Array, ArrayAccessor}; -use arrow_buffer::buffer::ScalarBuffer; +use arrow_buffer::buffer::{NullBuffer, ScalarBuffer}; use arrow_buffer::{i256, ArrowNativeType, Buffer}; use arrow_data::bit_iterator::try_for_each_valid_idx; use arrow_data::ArrayData; @@ -33,6 +33,7 @@ use arrow_schema::{ArrowError, DataType}; use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime}; use half::f16; use std::any::Any; +use std::sync::Arc; /// /// # Example: Using `collect` @@ -697,9 +698,22 @@ impl Array for PrimitiveArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.data.nulls() + } } impl<'a, T: ArrowPrimitiveType> ArrayAccessor for &'a PrimitiveArray { diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs index e50903f30f9b..2f69e5a2472a 100644 --- a/arrow-array/src/array/run_array.rs +++ b/arrow-array/src/array/run_array.rs @@ -16,8 +16,9 @@ // under the License. use std::any::Any; +use std::sync::Arc; -use arrow_buffer::buffer::RunEndBuffer; +use arrow_buffer::buffer::{NullBuffer, RunEndBuffer}; use arrow_buffer::ArrowNativeType; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{ArrowError, DataType, Field}; @@ -288,9 +289,22 @@ impl Array for RunArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + None + } } impl std::fmt::Debug for RunArray { @@ -473,9 +487,21 @@ impl<'a, R: RunEndIndexType, V: Sync> Array for TypedRunArray<'a, R, V> { &self.run_array.data } + fn to_data(&self) -> ArrayData { + self.run_array.to_data() + } + fn into_data(self) -> ArrayData { self.run_array.into_data() } + + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + self.run_array.slice(offset, length) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.run_array.nulls() + } } // Array accessor converts the index of logical array to the index of the physical array diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs index 35d4444e0117..34d9d0db5117 100644 --- a/arrow-array/src/array/struct_array.rs +++ b/arrow-array/src/array/struct_array.rs @@ -16,10 +16,11 @@ // under the License. use crate::{make_array, Array, ArrayRef}; -use arrow_buffer::buffer::buffer_bin_or; +use arrow_buffer::buffer::{buffer_bin_or, NullBuffer}; use arrow_buffer::Buffer; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType, Field}; +use std::sync::Arc; use std::{any::Any, ops::Index}; /// A nested array type where each child (called *field*) is represented by a separate @@ -196,13 +197,21 @@ impl Array for StructArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } - /// Returns the length (i.e., number of elements) of this array - fn len(&self) -> usize { - self.data_ref().len() + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + // TODO: Slice buffers directly (#3880) + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + self.data.nulls() } } diff --git a/arrow-array/src/array/union_array.rs b/arrow-array/src/array/union_array.rs index 867eb8d59fde..5a4d2af7ca45 100644 --- a/arrow-array/src/array/union_array.rs +++ b/arrow-array/src/array/union_array.rs @@ -16,12 +16,14 @@ // under the License. use crate::{make_array, Array, ArrayRef}; +use arrow_buffer::buffer::NullBuffer; use arrow_buffer::Buffer; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType, Field, UnionMode}; /// Contains the `UnionArray` type. /// use std::any::Any; +use std::sync::Arc; /// An Array that can represent slots of varying types. /// @@ -317,10 +319,22 @@ impl Array for UnionArray { &self.data } + fn to_data(&self) -> ArrayData { + self.data.clone() + } + fn into_data(self) -> ArrayData { self.into() } + fn slice(&self, offset: usize, length: usize) -> ArrayRef { + Arc::new(Self::from(self.data.slice(offset, length))) + } + + fn nulls(&self) -> Option<&NullBuffer> { + None + } + /// Union types always return non null as there is no validity buffer. /// To check validity correctly you must check the underlying vector. fn is_null(&self, _index: usize) -> bool {