Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate Array::data (#3880) #4019

Merged
merged 2 commits into from
Apr 5, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions arrow-array/src/array/binary_array.rs
Original file line number Diff line number Diff line change
@@ -467,9 +467,6 @@ mod tests {
let list_array = GenericListArray::<O>::from(array_data2);
let binary_array2 = GenericBinaryArray::<O>::from(list_array);

assert_eq!(2, binary_array2.data().buffers().len());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is no longer needed because the ArrayData builder will check the buffer count (is that correct)?

assert_eq!(0, binary_array2.data().child_data().len());

assert_eq!(binary_array1.len(), binary_array2.len());
assert_eq!(binary_array1.null_count(), binary_array2.null_count());
assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets());
25 changes: 19 additions & 6 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -95,8 +95,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
fn as_any(&self) -> &dyn Any;

/// Returns a reference to the underlying data of this array
///
/// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880)
#[deprecated(note = "Use Array::to_data or Array::into_data")]
fn data(&self) -> &ArrayData;

/// Returns the underlying data of this array
@@ -108,9 +107,8 @@ pub trait Array: std::fmt::Debug + Send + Sync {
fn into_data(self) -> ArrayData;

/// Returns a reference-counted pointer to the underlying data of this array.
///
/// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880)
#[deprecated(note = "Use Array::to_data or Array::into_data")]
#[allow(deprecated)]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Part of #3880 will be to port these to not use Array::data

fn data_ref(&self) -> &ArrayData {
self.data()
}
@@ -281,6 +279,7 @@ impl Array for ArrayRef {
self.as_ref().as_any()
}

#[allow(deprecated)]
fn data(&self) -> &ArrayData {
self.as_ref().data()
}
@@ -348,6 +347,7 @@ impl<'a, T: Array> Array for &'a T {
T::as_any(self)
}

#[allow(deprecated)]
fn data(&self) -> &ArrayData {
T::data(self)
}
@@ -435,78 +435,91 @@ pub trait ArrayAccessor: Array {
}

impl PartialEq for dyn Array + '_ {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl<T: Array> PartialEq<T> for dyn Array + '_ {
#[allow(deprecated)]
fn eq(&self, other: &T) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for NullArray {
#[allow(deprecated)]
fn eq(&self, other: &NullArray) -> bool {
self.data().eq(other.data())
}
}

impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
#[allow(deprecated)]
fn eq(&self, other: &PrimitiveArray<T>) -> bool {
self.data().eq(other.data())
}
}

impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for BooleanArray {
#[allow(deprecated)]
fn eq(&self, other: &BooleanArray) -> bool {
self.data().eq(other.data())
}
}

impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for FixedSizeBinaryArray {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for MapArray {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for FixedSizeListArray {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for StructArray {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
@@ -865,8 +878,8 @@ mod tests {
let null_array = new_null_array(array.data_type(), 9);
assert_eq!(&array, &null_array);
assert_eq!(
array.data().buffers()[0].len(),
null_array.data().buffers()[0].len()
array.to_data().buffers()[0].len(),
null_array.to_data().buffers()[0].len()
);
}

3 changes: 1 addition & 2 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
@@ -519,10 +519,9 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
O: ArrowPrimitiveType,
F: Fn(T::Native) -> Result<O::Native, E>,
{
let data = self.data();
let len = self.len();

let nulls = data.nulls().cloned();
let nulls = self.nulls().cloned();
let mut buffer = BufferBuilder::<O::Native>::new(len);
buffer.append_n_zeroed(len);
let slice = buffer.as_slice_mut();
18 changes: 8 additions & 10 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
@@ -174,7 +174,7 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
// null: the null mask of the arrays.
let mut null: Option<Buffer> = None;
for (field_name, array) in values {
let child_datum = array.data();
let child_datum = array.to_data();
let child_datum_len = child_datum.len();
if let Some(len) = len {
if len != child_datum_len {
@@ -186,7 +186,6 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
} else {
len = Some(child_datum_len)
}
child_data.push(child_datum.clone());
fields.push(Arc::new(Field::new(
field_name,
array.data_type().clone(),
@@ -209,6 +208,7 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
// when one of the fields has no nulls, then there is no null in the array
null = None;
}
child_data.push(child_datum);
}
let len = len.unwrap();

@@ -385,25 +385,23 @@ mod tests {

#[test]
fn test_struct_array_builder() {
let array = BooleanArray::from(vec![false, false, true, true]);
let boolean_data = array.data();
let array = Int64Array::from(vec![42, 28, 19, 31]);
let int_data = array.data();
let boolean_array = BooleanArray::from(vec![false, false, true, true]);
let int_array = Int64Array::from(vec![42, 28, 19, 31]);

let fields = vec![
Field::new("a", DataType::Boolean, false),
Field::new("b", DataType::Int64, false),
];
let struct_array_data = ArrayData::builder(DataType::Struct(fields.into()))
.len(4)
.add_child_data(boolean_data.clone())
.add_child_data(int_data.clone())
.add_child_data(boolean_array.to_data())
.add_child_data(int_array.to_data())
.build()
.unwrap();
let struct_array = StructArray::from(struct_array_data);

assert_eq!(boolean_data, struct_array.column(0).data());
assert_eq!(int_data, struct_array.column(1).data());
assert_eq!(struct_array.column(0).as_ref(), &boolean_array);
assert_eq!(struct_array.column(1).as_ref(), &int_array);
}

#[test]
38 changes: 13 additions & 25 deletions arrow-cast/src/cast.rs
Original file line number Diff line number Diff line change
@@ -2217,9 +2217,9 @@ fn value_to_string<O: OffsetSizeTrait>(
let mut builder = GenericStringBuilder::<O>::new();
let options = FormatOptions::default();
let formatter = ArrayFormatter::try_new(array, &options)?;
let data = array.data();
for i in 0..data.len() {
match data.is_null(i) {
let nulls = array.nulls();
for i in 0..array.len() {
match nulls.map(|x| x.is_null(i)).unwrap_or_default() {
true => builder.append_null(),
false => {
formatter.value(i).write(&mut builder)?;
@@ -3500,7 +3500,7 @@ where
FROM::Offset: OffsetSizeTrait + ToPrimitive,
TO::Offset: OffsetSizeTrait + NumCast,
{
let data = array.data();
let data = array.to_data();
assert_eq!(data.data_type(), &FROM::DATA_TYPE);
let str_values_buf = data.buffers()[1].clone();
let offsets = data.buffers()[0].typed_data::<FROM::Offset>();
@@ -4844,9 +4844,8 @@ mod tests {

#[test]
fn test_cast_list_i32_to_list_u16() {
let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000])
.data()
.clone();
let value_data =
Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000]).into_data();

let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);

@@ -4875,15 +4874,9 @@ mod tests {
assert_eq!(0, cast_array.null_count());

// offsets should be the same
assert_eq!(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the equality comparison still covered?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

List arrays only contain a single buffer, and so assert_eq!(list_array.value_offsets(), array.value_offsets()); is equivalent

list_array.data().buffers().to_vec(),
cast_array.data().buffers().to_vec()
);
let array = cast_array
.as_ref()
.as_any()
.downcast_ref::<ListArray>()
.unwrap();
let array = cast_array.as_list::<i32>();
assert_eq!(list_array.value_offsets(), array.value_offsets());

assert_eq!(DataType::UInt16, array.value_type());
assert_eq!(3, array.value_length(0));
assert_eq!(3, array.value_length(1));
@@ -4908,9 +4901,8 @@ mod tests {
)]
fn test_cast_list_i32_to_list_timestamp() {
// Construct a value array
let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000])
.data()
.clone();
let value_data =
Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000]).into_data();

let value_offsets = Buffer::from_slice_ref([0, 3, 6, 9]);

@@ -7355,11 +7347,7 @@ mod tests {
fn test_list_to_string() {
let str_array = StringArray::from(vec!["a", "b", "c", "d", "e", "f", "g", "h"]);
let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
let value_data = ArrayData::builder(DataType::Utf8)
.len(str_array.len())
.buffers(str_array.data().buffers().to_vec())
.build()
.unwrap();
let value_data = str_array.into_data();

let list_data_type =
DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)));
@@ -8123,7 +8111,7 @@ mod tests {
let options = CastOptions { safe: true };
let array = cast_with_options(&s, &DataType::Utf8, &options).unwrap();
let a = array.as_string::<i32>();
a.data().validate_full().unwrap();
a.to_data().validate_full().unwrap();

assert_eq!(a.null_count(), 1);
assert_eq!(a.len(), 2);
2 changes: 1 addition & 1 deletion arrow-integration-test/src/lib.rs
Original file line number Diff line number Diff line change
@@ -938,7 +938,7 @@ pub fn dictionary_array_from_json(
// convert key and value to dictionary data
let dict_data = ArrayData::builder(field.data_type().clone())
.len(keys.len())
.add_buffer(keys.data().buffers()[0].clone())
.add_buffer(keys.to_data().buffers()[0].clone())
.null_bit_buffer(Some(null_buf))
.add_child_data(values.into_data())
.build()
Original file line number Diff line number Diff line change
@@ -200,8 +200,8 @@ fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {

for i in 0..num_columns {
assert_eq!(
arrow_batch.column(i).data(),
json_batch.column(i).data(),
arrow_batch.column(i).as_ref(),
json_batch.column(i).as_ref(),
"Arrow and JSON batch columns not the same"
);
}
Original file line number Diff line number Diff line change
@@ -232,8 +232,8 @@ async fn consume_flight_location(
let field = schema.field(i);
let field_name = field.name();

let expected_data = expected_batch.column(i).data();
let actual_data = actual_batch.column(i).data();
let expected_data = expected_batch.column(i).as_ref();
let actual_data = actual_batch.column(i).as_ref();

assert_eq!(expected_data, actual_data, "Data for field {field_name}");
}
Loading