Skip to content

Commit

Permalink
Deprecate Array::data_ref (apache#3880)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Mar 23, 2023
1 parent de9a90b commit 71be2e7
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 144 deletions.
7 changes: 4 additions & 3 deletions arrow-array/src/array/binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,14 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
}

fn from_list(v: GenericListArray<OffsetSize>) -> Self {
let v = v.into_data();
assert_eq!(
v.data_ref().child_data().len(),
v.child_data().len(),
1,
"BinaryArray can only be created from list array of u8 values \
(i.e. List<PrimitiveArray<u8>>)."
);
let child_data = &v.data_ref().child_data()[0];
let child_data = &v.child_data()[0];

assert_eq!(
child_data.child_data().len(),
Expand All @@ -75,7 +76,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
let builder = ArrayData::builder(Self::DATA_TYPE)
.len(v.len())
.offset(v.offset())
.add_buffer(v.data_ref().buffers()[0].clone())
.add_buffer(v.buffers()[0].clone())
.add_buffer(child_data.buffers()[0].slice(child_data.offset()))
.nulls(v.nulls().cloned());

Expand Down
8 changes: 5 additions & 3 deletions arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,13 +396,15 @@ impl From<FixedSizeBinaryArray> for ArrayData {
/// Creates a `FixedSizeBinaryArray` from `FixedSizeList<u8>` array
impl From<FixedSizeListArray> for FixedSizeBinaryArray {
fn from(v: FixedSizeListArray) -> Self {
let value_len = v.value_length();
let v = v.into_data();
assert_eq!(
v.data_ref().child_data().len(),
v.child_data().len(),
1,
"FixedSizeBinaryArray can only be created from list array of u8 values \
(i.e. FixedSizeList<PrimitiveArray<u8>>)."
);
let child_data = &v.data_ref().child_data()[0];
let child_data = &v.child_data()[0];

assert_eq!(
child_data.child_data().len(),
Expand All @@ -421,7 +423,7 @@ impl From<FixedSizeListArray> for FixedSizeBinaryArray {
"The child array cannot contain null values."
);

let builder = ArrayData::builder(DataType::FixedSizeBinary(v.value_length()))
let builder = ArrayData::builder(DataType::FixedSizeBinary(value_len))
.len(v.len())
.offset(v.offset())
.add_buffer(child_data.buffers()[0].slice(child_data.offset()))
Expand Down
27 changes: 17 additions & 10 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// Returns a reference-counted pointer to the underlying data of this array.
///
/// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880)
#[deprecated(note = "Use Array::to_data or Array::into_data")]
fn data_ref(&self) -> &ArrayData {
self.data()
}
Expand All @@ -126,6 +127,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
///
/// assert_eq!(*array.data_type(), DataType::Int32);
/// ```
#[allow(deprecated)] // (#3880)
fn data_type(&self) -> &DataType {
self.data_ref().data_type()
}
Expand Down Expand Up @@ -156,6 +158,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
///
/// assert_eq!(array.len(), 5);
/// ```
#[allow(deprecated)] // (#3880)
fn len(&self) -> usize {
self.data_ref().len()
}
Expand All @@ -171,6 +174,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
///
/// assert_eq!(array.is_empty(), false);
/// ```
#[allow(deprecated)] // (#3880)
fn is_empty(&self) -> bool {
self.data_ref().is_empty()
}
Expand All @@ -191,6 +195,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// assert_eq!(array.offset(), 0);
/// assert_eq!(array_slice.offset(), 1);
/// ```
#[allow(deprecated)] // (#3880)
fn offset(&self) -> usize {
self.data_ref().offset()
}
Expand Down Expand Up @@ -250,13 +255,15 @@ pub trait Array: std::fmt::Debug + Send + Sync {

/// Returns the total number of bytes of memory pointed to by this array.
/// The buffers store bytes in the Arrow memory format, and include the data as well as the validity map.
#[allow(deprecated)] // (#3880)
fn get_buffer_memory_size(&self) -> usize {
self.data_ref().get_buffer_memory_size()
}

/// Returns the total number of bytes of memory occupied physically by this array.
/// This value will always be greater than returned by `get_buffer_memory_size()` and
/// includes the overhead of the data structures that contain the pointers to the various buffers.
#[allow(deprecated)] // (#3880)
fn get_array_memory_size(&self) -> usize {
// both data.get_array_memory_size and size_of_val(self) include ArrayData fields,
// to only count additional fields of this array substract size_of(ArrayData)
Expand Down Expand Up @@ -286,6 +293,7 @@ impl Array for ArrayRef {
self.data().clone()
}

#[allow(deprecated)]
fn data_ref(&self) -> &ArrayData {
self.as_ref().data_ref()
}
Expand Down Expand Up @@ -352,6 +360,7 @@ impl<'a, T: Array> Array for &'a T {
self.data().clone()
}

#[allow(deprecated)]
fn data_ref(&self) -> &ArrayData {
T::data_ref(self)
}
Expand Down Expand Up @@ -997,19 +1006,17 @@ mod tests {
(0..256).map(|i| (i % values.len()) as i16),
);

let dict_data = ArrayData::builder(DataType::Dictionary(
let dict_data_type = DataType::Dictionary(
Box::new(keys.data_type().clone()),
Box::new(values.data_type().clone()),
))
.len(keys.len())
.buffers(keys.data_ref().buffers().to_vec())
.child_data(vec![ArrayData::builder(DataType::Int64)
.len(values.len())
.buffers(values.data_ref().buffers().to_vec())
);
let dict_data = keys
.into_data()
.into_builder()
.data_type(dict_data_type)
.child_data(vec![values.into_data()])
.build()
.unwrap()])
.build()
.unwrap();
.unwrap();

let empty_data = ArrayData::new_empty(&DataType::Dictionary(
Box::new(DataType::Int16),
Expand Down
24 changes: 8 additions & 16 deletions arrow-cast/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3448,9 +3448,9 @@ where
OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
OffsetSizeTo: OffsetSizeTrait + NumCast,
{
let data = array.data_ref();
let list = array.as_list::<OffsetSizeFrom>();
// the value data stored by the list
let value_data = data.child_data()[0].clone();
let values = list.values();

let out_dtype = match array.data_type() {
DataType::List(value_type) => {
Expand All @@ -3473,7 +3473,7 @@ where
std::mem::size_of::<OffsetSizeTo>(),
std::mem::size_of::<i32>()
);
if value_data.len() > i32::MAX as usize {
if values.len() > i32::MAX as usize {
return Err(ArrowError::ComputeError(
"LargeList too large to cast to List".into(),
));
Expand All @@ -3484,14 +3484,7 @@ where
_ => unreachable!(),
};

// Safety:
// The first buffer is the offsets and they are aligned to OffSetSizeFrom: (i64 or i32)
// Justification:
// The safe variant data.buffer::<OffsetSizeFrom> take the offset into account and we
// cannot create a list array with offsets starting at non zero.
let offsets = unsafe { data.buffers()[0].as_slice().align_to::<OffsetSizeFrom>() }.1;

let iter = offsets.iter().map(|idx| {
let iter = list.value_offsets().iter().map(|idx| {
let idx: OffsetSizeTo = NumCast::from(*idx).unwrap();
idx
});
Expand All @@ -3502,14 +3495,13 @@ where

// wrap up
let builder = ArrayData::builder(out_dtype)
.offset(array.offset())
.len(array.len())
.len(list.len())
.add_buffer(offset_buffer)
.add_child_data(value_data)
.nulls(data.nulls().cloned());
.add_child_data(values.to_data())
.nulls(list.nulls().cloned());

let array_data = unsafe { builder.build_unchecked() };
Ok(make_array(array_data))
Ok(Arc::new(GenericListArray::<OffsetSizeTo>::from(array_data)))
}

#[cfg(test)]
Expand Down
83 changes: 13 additions & 70 deletions arrow-ord/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@
use arrow_array::cast::*;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::bit_mask::combine_option_bitmap;
use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};
use arrow_select::take::take;
Expand Down Expand Up @@ -1214,8 +1213,7 @@ where
));
}

let null_bit_buffer =
combine_option_bitmap(&[left.data_ref(), right.data_ref()], len);
let nulls = NullBuffer::union(left.nulls(), right.nulls());

// we process the data in chunks so that each iteration results in one u64 of comparison result bits
const CHUNK_SIZE: usize = 64;
Expand Down Expand Up @@ -1276,18 +1274,8 @@ where
result_remainder.copy_from_slice(remainder_mask_as_bytes);
}

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
len,
None,
null_bit_buffer,
0,
vec![result.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
let values = BooleanBuffer::new(result.into(), 0, len);
Ok(BooleanArray::new(values, nulls))
}

/// Helper function to perform boolean lambda function on values from an array and a scalar value using
Expand Down Expand Up @@ -2709,19 +2697,13 @@ where

let num_bytes = bit_util::ceil(left_len, 8);

let not_both_null_bit_buffer =
match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len) {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();

let nulls = NullBuffer::union(left.nulls(), right.nulls());
let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
let bool_slice = bool_buf.as_slice_mut();

// if both array slots are valid, check if list contains primitive
for i in 0..left_len {
if bit_util::get_bit(not_both_null_bitmap, i) {
if nulls.as_ref().map(|n| n.is_valid(i)).unwrap_or(true) {
let list = right.value(i);
let list = list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();

Expand All @@ -2734,18 +2716,8 @@ where
}
}

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
None,
0,
vec![bool_buf.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
let values = BooleanBuffer::new(bool_buf.into(), 0, left_len);
Ok(BooleanArray::new(values, None))
}

/// Checks if a [`GenericListArray`] contains a value in the [`GenericStringArray`]
Expand All @@ -2766,24 +2738,15 @@ where

let num_bytes = bit_util::ceil(left_len, 8);

let not_both_null_bit_buffer =
match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len) {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();

let nulls = NullBuffer::union(left.nulls(), right.nulls());
let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
let bool_slice = &mut bool_buf;

for i in 0..left_len {
// contains(null, null) = false
if bit_util::get_bit(not_both_null_bitmap, i) {
if nulls.as_ref().map(|n| n.is_valid(i)).unwrap_or(true) {
let list = right.value(i);
let list = list
.as_any()
.downcast_ref::<GenericStringArray<OffsetSize>>()
.unwrap();
let list = list.as_string::<OffsetSize>();

for j in 0..list.len() {
if list.is_valid(j) && (left.value(i) == list.value(j)) {
Expand All @@ -2793,28 +2756,8 @@ where
}
}
}

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
None,
0,
vec![bool_buf.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
}

// create a buffer and fill it with valid bits
#[inline]
fn new_all_set_buffer(len: usize) -> Buffer {
let buffer = MutableBuffer::new(len);
let buffer = buffer.with_bitset(len, true);

buffer.into()
let values = BooleanBuffer::new(bool_buf.into(), 0, left_len);
Ok(BooleanArray::new(values, None))
}

// disable wrapping inside literal vectors used for test data and assertions
Expand Down
26 changes: 10 additions & 16 deletions arrow-select/src/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -735,13 +735,13 @@ where
IndexType: ArrowPrimitiveType,
IndexType::Native: ToPrimitive,
{
let data_ref = values.data_ref();
let nulls = values.nulls();
let array_iter = indices
.values()
.iter()
.map(|idx| {
let idx = maybe_usize::<IndexType::Native>(*idx)?;
if data_ref.is_valid(idx) {
if nulls.map(|n| n.is_valid(idx)).unwrap_or(true) {
Ok(Some(values.value(idx)))
} else {
Ok(None)
Expand All @@ -768,20 +768,14 @@ where
I::Native: ToPrimitive,
{
let new_keys = take_primitive::<T, I>(values.keys(), indices)?;
let new_keys_data = new_keys.data_ref();

let data = unsafe {
ArrayData::new_unchecked(
values.data_type().clone(),
new_keys.len(),
Some(new_keys_data.null_count()),
new_keys_data.nulls().map(|b| b.inner().sliced()),
0,
new_keys_data.buffers().to_vec(),
values.data().child_data().to_vec(),
)
};

let builder = new_keys
.into_data()
.into_builder()
.data_type(values.data_type().clone())
.child_data(vec![values.values().to_data()]);

// Safety: Indices were valid before
let data = unsafe { builder.build_unchecked() };
Ok(DictionaryArray::<T>::from(data))
}

Expand Down
Loading

0 comments on commit 71be2e7

Please sign in to comment.