Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Array::to_data and Array::nulls (apache#3880)
Browse files Browse the repository at this point in the history
tustvold committed Mar 17, 2023
1 parent 0df2188 commit 03e83c8
Showing 13 changed files with 236 additions and 21 deletions.
17 changes: 16 additions & 1 deletion arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
@@ -18,12 +18,14 @@
use crate::array::print_long_array;
use crate::builder::BooleanBuilder;
use crate::iterator::BooleanIter;
use crate::{Array, ArrayAccessor};
use crate::{Array, ArrayAccessor, ArrayRef};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::bit_mask::combine_option_bitmap;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// Array of bools
///
@@ -265,9 +267,22 @@ impl Array for BooleanArray {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl<'a> ArrayAccessor for &'a BooleanArray {
18 changes: 16 additions & 2 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
@@ -20,12 +20,13 @@ use crate::builder::GenericByteBuilder;
use crate::iterator::ArrayIter;
use crate::types::bytes::ByteArrayNativeType;
use crate::types::ByteArrayType;
use crate::{Array, ArrayAccessor, OffsetSizeTrait};
use arrow_buffer::buffer::OffsetBuffer;
use crate::{Array, ArrayAccessor, ArrayRef, OffsetSizeTrait};
use arrow_buffer::buffer::{NullBuffer, OffsetBuffer};
use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// Generic struct for variable-size byte arrays
///
@@ -237,9 +238,22 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
27 changes: 27 additions & 0 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
@@ -23,10 +23,12 @@ use crate::{
make_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, PrimitiveArray,
StringArray,
};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::ArrowNativeType;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;

///
/// A dictionary array where each element is a single value indexed by an integer key.
@@ -590,9 +592,22 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl<T: ArrowDictionaryKeyType> std::fmt::Debug for DictionaryArray<T> {
@@ -669,9 +684,21 @@ impl<'a, K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'a,
&self.dictionary.data
}

fn to_data(&self) -> ArrayData {
self.dictionary.to_data()
}

fn into_data(self) -> ArrayData {
self.dictionary.into_data()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
self.dictionary.slice(offset, length)
}

fn nulls(&self) -> Option<&NullBuffer> {
self.dictionary.nulls()
}
}

impl<'a, K, V> IntoIterator for TypedDictionaryArray<'a, K, V>
17 changes: 16 additions & 1 deletion arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
@@ -17,11 +17,13 @@

use crate::array::print_long_array;
use crate::iterator::FixedSizeBinaryIter;
use crate::{Array, ArrayAccessor, FixedSizeListArray};
use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;

/// An array where each element is a fixed-size sequence of bytes.
///
@@ -462,9 +464,22 @@ impl Array for FixedSizeBinaryArray {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray {
15 changes: 15 additions & 0 deletions arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
@@ -18,9 +18,11 @@
use crate::array::print_long_array;
use crate::builder::{FixedSizeListBuilder, PrimitiveBuilder};
use crate::{make_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType};
use arrow_buffer::buffer::NullBuffer;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// A list array where each element is a fixed-size sequence of values with the same
/// type whose maximum length is represented by a i32.
@@ -205,9 +207,22 @@ impl Array for FixedSizeListArray {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl ArrayAccessor for FixedSizeListArray {
16 changes: 15 additions & 1 deletion arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
@@ -20,12 +20,13 @@ use crate::builder::{GenericListBuilder, PrimitiveBuilder};
use crate::{
iterator::GenericListArrayIter, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType,
};
use arrow_buffer::buffer::OffsetBuffer;
use arrow_buffer::buffer::{NullBuffer, OffsetBuffer};
use arrow_buffer::ArrowNativeType;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field};
use num::Integer;
use std::any::Any;
use std::sync::Arc;

/// trait declaring an offset size, relevant for i32 vs i64 array types.
pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
@@ -244,9 +245,22 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl<'a, OffsetSize: OffsetSizeTrait> ArrayAccessor for &'a GenericListArray<OffsetSize> {
15 changes: 14 additions & 1 deletion arrow-array/src/array/map_array.rs
Original file line number Diff line number Diff line change
@@ -17,7 +17,7 @@

use crate::array::{get_offsets, print_long_array};
use crate::{make_array, Array, ArrayRef, StringArray, StructArray};
use arrow_buffer::buffer::OffsetBuffer;
use arrow_buffer::buffer::{NullBuffer, OffsetBuffer};
use arrow_buffer::{ArrowNativeType, Buffer, ToByteSlice};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field};
@@ -214,10 +214,23 @@ impl Array for MapArray {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}

/// Returns the total number of bytes of memory occupied by the buffers owned by this [MapArray].
fn get_buffer_memory_size(&self) -> usize {
self.data.get_buffer_memory_size()
38 changes: 31 additions & 7 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@
mod binary_array;

use crate::types::*;
use arrow_buffer::buffer::{OffsetBuffer, ScalarBuffer};
use arrow_buffer::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
use arrow_buffer::ArrowNativeType;
use arrow_data::ArrayData;
use arrow_schema::{DataType, IntervalUnit, TimeUnit};
@@ -96,12 +96,19 @@ pub trait Array: std::fmt::Debug + Send + Sync {
fn as_any(&self) -> &dyn Any;

/// Returns a reference to the underlying data of this array.
///
/// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880)
fn data(&self) -> &ArrayData;

/// Returns the underlying data of this array.
fn to_data(&self) -> ArrayData;

/// Returns the underlying data of this array.
fn into_data(self) -> ArrayData;

/// Returns a reference-counted pointer to the underlying data of this array.
///
/// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880)
fn data_ref(&self) -> &ArrayData {
self.data()
}
@@ -135,9 +142,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
///
/// assert_eq!(array_slice.as_ref(), &Int32Array::from(vec![2, 3, 4]));
/// ```
fn slice(&self, offset: usize, length: usize) -> ArrayRef {
make_array(self.data_ref().slice(offset, length))
}
fn slice(&self, offset: usize, length: usize) -> ArrayRef;

/// Returns the length (i.e., number of elements) of this array.
///
@@ -189,6 +194,9 @@ pub trait Array: std::fmt::Debug + Send + Sync {
self.data_ref().offset()
}

/// Returns the null buffers of this array if any
fn nulls(&self) -> Option<&NullBuffer>;

/// Returns whether the element at `index` is null.
/// When using this function on a slice, the index is relative to the slice.
///
@@ -203,7 +211,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// assert_eq!(array.is_null(1), true);
/// ```
fn is_null(&self, index: usize) -> bool {
self.data_ref().is_null(index)
self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
}

/// Returns whether the element at `index` is not null.
@@ -220,7 +228,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// assert_eq!(array.is_valid(1), false);
/// ```
fn is_valid(&self, index: usize) -> bool {
self.data_ref().is_valid(index)
!self.is_null(index)
}

/// Returns the total number of null values in this array.
@@ -236,7 +244,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// assert_eq!(array.null_count(), 2);
/// ```
fn null_count(&self) -> usize {
self.data_ref().null_count()
self.nulls().map(|n| n.null_count()).unwrap_or_default()
}

/// Returns the total number of bytes of memory pointed to by this array.
@@ -269,6 +277,10 @@ impl Array for ArrayRef {
self.as_ref().data()
}

fn to_data(&self) -> ArrayData {
self.as_ref().to_data()
}

fn into_data(self) -> ArrayData {
self.data().clone()
}
@@ -297,6 +309,10 @@ impl Array for ArrayRef {
self.as_ref().offset()
}

fn nulls(&self) -> Option<&NullBuffer> {
self.as_ref().nulls()
}

fn is_null(&self, index: usize) -> bool {
self.as_ref().is_null(index)
}
@@ -327,6 +343,10 @@ impl<'a, T: Array> Array for &'a T {
T::data(self)
}

fn to_data(&self) -> ArrayData {
T::to_data(self)
}

fn into_data(self) -> ArrayData {
self.data().clone()
}
@@ -355,6 +375,10 @@ impl<'a, T: Array> Array for &'a T {
T::offset(self)
}

fn nulls(&self) -> Option<&NullBuffer> {
T::nulls(self)
}

fn is_null(&self, index: usize) -> bool {
T::is_null(self, index)
}
17 changes: 16 additions & 1 deletion arrow-array/src/array/null_array.rs
Original file line number Diff line number Diff line change
@@ -17,10 +17,12 @@

//! Contains the `NullArray` type.
use crate::Array;
use crate::{Array, ArrayRef};
use arrow_buffer::buffer::NullBuffer;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// An Array where all elements are nulls
///
@@ -63,10 +65,23 @@ impl Array for NullArray {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
None
}

/// Returns whether the element at `index` is null.
/// All elements of a `NullArray` are always null.
fn is_null(&self, _index: usize) -> bool {
18 changes: 16 additions & 2 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
@@ -23,16 +23,17 @@ use crate::temporal_conversions::{
};
use crate::timezone::Tz;
use crate::trusted_len::trusted_len_unzip;
use crate::{types::*, ArrowNativeTypeOp};
use crate::{types::*, ArrayRef, ArrowNativeTypeOp};
use crate::{Array, ArrayAccessor};
use arrow_buffer::buffer::ScalarBuffer;
use arrow_buffer::buffer::{NullBuffer, ScalarBuffer};
use arrow_buffer::{i256, ArrowNativeType, Buffer};
use arrow_data::bit_iterator::try_for_each_valid_idx;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime};
use half::f16;
use std::any::Any;
use std::sync::Arc;

///
/// # Example: Using `collect`
@@ -697,9 +698,22 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl<'a, T: ArrowPrimitiveType> ArrayAccessor for &'a PrimitiveArray<T> {
28 changes: 27 additions & 1 deletion arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
@@ -16,8 +16,9 @@
// under the License.

use std::any::Any;
use std::sync::Arc;

use arrow_buffer::buffer::RunEndBuffer;
use arrow_buffer::buffer::{NullBuffer, RunEndBuffer};
use arrow_buffer::ArrowNativeType;
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, Field};
@@ -288,9 +289,22 @@ impl<T: RunEndIndexType> Array for RunArray<T> {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
None
}
}

impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
@@ -473,9 +487,21 @@ impl<'a, R: RunEndIndexType, V: Sync> Array for TypedRunArray<'a, R, V> {
&self.run_array.data
}

fn to_data(&self) -> ArrayData {
self.run_array.to_data()
}

fn into_data(self) -> ArrayData {
self.run_array.into_data()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
self.run_array.slice(offset, length)
}

fn nulls(&self) -> Option<&NullBuffer> {
self.run_array.nulls()
}
}

// Array accessor converts the index of logical array to the index of the physical array
17 changes: 13 additions & 4 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
@@ -16,10 +16,11 @@
// under the License.

use crate::{make_array, Array, ArrayRef};
use arrow_buffer::buffer::buffer_bin_or;
use arrow_buffer::buffer::{buffer_bin_or, NullBuffer};
use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field};
use std::sync::Arc;
use std::{any::Any, ops::Index};

/// A nested array type where each child (called *field*) is represented by a separate
@@ -196,13 +197,21 @@ impl Array for StructArray {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

/// Returns the length (i.e., number of elements) of this array
fn len(&self) -> usize {
self.data_ref().len()
fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

14 changes: 14 additions & 0 deletions arrow-array/src/array/union_array.rs
Original file line number Diff line number Diff line change
@@ -16,12 +16,14 @@
// under the License.

use crate::{make_array, Array, ArrayRef};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field, UnionMode};
/// Contains the `UnionArray` type.
///
use std::any::Any;
use std::sync::Arc;

/// An Array that can represent slots of varying types.
///
@@ -317,10 +319,22 @@ impl Array for UnionArray {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
None
}

/// Union types always return non null as there is no validity buffer.
/// To check validity correctly you must check the underlying vector.
fn is_null(&self, _index: usize) -> bool {

0 comments on commit 03e83c8

Please sign in to comment.