Skip to content

Deprecate Array::data (#3880) #4019

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions arrow-array/src/array/binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -467,9 +467,6 @@ mod tests {
let list_array = GenericListArray::<O>::from(array_data2);
let binary_array2 = GenericBinaryArray::<O>::from(list_array);

assert_eq!(2, binary_array2.data().buffers().len());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is no longer needed because the ArrayData builder will check the buffer count (is that correct)?

assert_eq!(0, binary_array2.data().child_data().len());

assert_eq!(binary_array1.len(), binary_array2.len());
assert_eq!(binary_array1.null_count(), binary_array2.null_count());
assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets());
Expand Down
25 changes: 19 additions & 6 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
fn as_any(&self) -> &dyn Any;

/// Returns a reference to the underlying data of this array
///
/// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880)
#[deprecated(note = "Use Array::to_data or Array::into_data")]
fn data(&self) -> &ArrayData;

/// Returns the underlying data of this array
Expand All @@ -108,9 +107,8 @@ pub trait Array: std::fmt::Debug + Send + Sync {
fn into_data(self) -> ArrayData;

/// Returns a reference-counted pointer to the underlying data of this array.
///
/// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880)
#[deprecated(note = "Use Array::to_data or Array::into_data")]
#[allow(deprecated)]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Part of #3880 will be to port these to not use Array::data

fn data_ref(&self) -> &ArrayData {
self.data()
}
Expand Down Expand Up @@ -281,6 +279,7 @@ impl Array for ArrayRef {
self.as_ref().as_any()
}

#[allow(deprecated)]
fn data(&self) -> &ArrayData {
self.as_ref().data()
}
Expand Down Expand Up @@ -348,6 +347,7 @@ impl<'a, T: Array> Array for &'a T {
T::as_any(self)
}

#[allow(deprecated)]
fn data(&self) -> &ArrayData {
T::data(self)
}
Expand Down Expand Up @@ -435,78 +435,91 @@ pub trait ArrayAccessor: Array {
}

impl PartialEq for dyn Array + '_ {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl<T: Array> PartialEq<T> for dyn Array + '_ {
#[allow(deprecated)]
fn eq(&self, other: &T) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for NullArray {
#[allow(deprecated)]
fn eq(&self, other: &NullArray) -> bool {
self.data().eq(other.data())
}
}

impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
#[allow(deprecated)]
fn eq(&self, other: &PrimitiveArray<T>) -> bool {
self.data().eq(other.data())
}
}

impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for BooleanArray {
#[allow(deprecated)]
fn eq(&self, other: &BooleanArray) -> bool {
self.data().eq(other.data())
}
}

impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for FixedSizeBinaryArray {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for MapArray {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for FixedSizeListArray {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}

impl PartialEq for StructArray {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
Expand Down Expand Up @@ -865,8 +878,8 @@ mod tests {
let null_array = new_null_array(array.data_type(), 9);
assert_eq!(&array, &null_array);
assert_eq!(
array.data().buffers()[0].len(),
null_array.data().buffers()[0].len()
array.to_data().buffers()[0].len(),
null_array.to_data().buffers()[0].len()
);
}

Expand Down
3 changes: 1 addition & 2 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -519,10 +519,9 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
O: ArrowPrimitiveType,
F: Fn(T::Native) -> Result<O::Native, E>,
{
let data = self.data();
let len = self.len();

let nulls = data.nulls().cloned();
let nulls = self.nulls().cloned();
let mut buffer = BufferBuilder::<O::Native>::new(len);
buffer.append_n_zeroed(len);
let slice = buffer.as_slice_mut();
Expand Down
18 changes: 8 additions & 10 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
// null: the null mask of the arrays.
let mut null: Option<Buffer> = None;
for (field_name, array) in values {
let child_datum = array.data();
let child_datum = array.to_data();
let child_datum_len = child_datum.len();
if let Some(len) = len {
if len != child_datum_len {
Expand All @@ -186,7 +186,6 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
} else {
len = Some(child_datum_len)
}
child_data.push(child_datum.clone());
fields.push(Arc::new(Field::new(
field_name,
array.data_type().clone(),
Expand All @@ -209,6 +208,7 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
// when one of the fields has no nulls, then there is no null in the array
null = None;
}
child_data.push(child_datum);
}
let len = len.unwrap();

Expand Down Expand Up @@ -385,25 +385,23 @@ mod tests {

#[test]
fn test_struct_array_builder() {
let array = BooleanArray::from(vec![false, false, true, true]);
let boolean_data = array.data();
let array = Int64Array::from(vec![42, 28, 19, 31]);
let int_data = array.data();
let boolean_array = BooleanArray::from(vec![false, false, true, true]);
let int_array = Int64Array::from(vec![42, 28, 19, 31]);

let fields = vec![
Field::new("a", DataType::Boolean, false),
Field::new("b", DataType::Int64, false),
];
let struct_array_data = ArrayData::builder(DataType::Struct(fields.into()))
.len(4)
.add_child_data(boolean_data.clone())
.add_child_data(int_data.clone())
.add_child_data(boolean_array.to_data())
.add_child_data(int_array.to_data())
.build()
.unwrap();
let struct_array = StructArray::from(struct_array_data);

assert_eq!(boolean_data, struct_array.column(0).data());
assert_eq!(int_data, struct_array.column(1).data());
assert_eq!(struct_array.column(0).as_ref(), &boolean_array);
assert_eq!(struct_array.column(1).as_ref(), &int_array);
}

#[test]
Expand Down
38 changes: 13 additions & 25 deletions arrow-cast/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2217,9 +2217,9 @@ fn value_to_string<O: OffsetSizeTrait>(
let mut builder = GenericStringBuilder::<O>::new();
let options = FormatOptions::default();
let formatter = ArrayFormatter::try_new(array, &options)?;
let data = array.data();
for i in 0..data.len() {
match data.is_null(i) {
let nulls = array.nulls();
for i in 0..array.len() {
match nulls.map(|x| x.is_null(i)).unwrap_or_default() {
true => builder.append_null(),
false => {
formatter.value(i).write(&mut builder)?;
Expand Down Expand Up @@ -3500,7 +3500,7 @@ where
FROM::Offset: OffsetSizeTrait + ToPrimitive,
TO::Offset: OffsetSizeTrait + NumCast,
{
let data = array.data();
let data = array.to_data();
assert_eq!(data.data_type(), &FROM::DATA_TYPE);
let str_values_buf = data.buffers()[1].clone();
let offsets = data.buffers()[0].typed_data::<FROM::Offset>();
Expand Down Expand Up @@ -4844,9 +4844,8 @@ mod tests {

#[test]
fn test_cast_list_i32_to_list_u16() {
let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000])
.data()
.clone();
let value_data =
Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000]).into_data();

let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);

Expand Down Expand Up @@ -4875,15 +4874,9 @@ mod tests {
assert_eq!(0, cast_array.null_count());

// offsets should be the same
assert_eq!(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the equality comparison still covered?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

List arrays only contain a single buffer, and so assert_eq!(list_array.value_offsets(), array.value_offsets()); is equivalent

list_array.data().buffers().to_vec(),
cast_array.data().buffers().to_vec()
);
let array = cast_array
.as_ref()
.as_any()
.downcast_ref::<ListArray>()
.unwrap();
let array = cast_array.as_list::<i32>();
assert_eq!(list_array.value_offsets(), array.value_offsets());

assert_eq!(DataType::UInt16, array.value_type());
assert_eq!(3, array.value_length(0));
assert_eq!(3, array.value_length(1));
Expand All @@ -4908,9 +4901,8 @@ mod tests {
)]
fn test_cast_list_i32_to_list_timestamp() {
// Construct a value array
let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000])
.data()
.clone();
let value_data =
Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000]).into_data();

let value_offsets = Buffer::from_slice_ref([0, 3, 6, 9]);

Expand Down Expand Up @@ -7355,11 +7347,7 @@ mod tests {
fn test_list_to_string() {
let str_array = StringArray::from(vec!["a", "b", "c", "d", "e", "f", "g", "h"]);
let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
let value_data = ArrayData::builder(DataType::Utf8)
.len(str_array.len())
.buffers(str_array.data().buffers().to_vec())
.build()
.unwrap();
let value_data = str_array.into_data();

let list_data_type =
DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)));
Expand Down Expand Up @@ -8123,7 +8111,7 @@ mod tests {
let options = CastOptions { safe: true };
let array = cast_with_options(&s, &DataType::Utf8, &options).unwrap();
let a = array.as_string::<i32>();
a.data().validate_full().unwrap();
a.to_data().validate_full().unwrap();

assert_eq!(a.null_count(), 1);
assert_eq!(a.len(), 2);
Expand Down
2 changes: 1 addition & 1 deletion arrow-integration-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ pub fn dictionary_array_from_json(
// convert key and value to dictionary data
let dict_data = ArrayData::builder(field.data_type().clone())
.len(keys.len())
.add_buffer(keys.data().buffers()[0].clone())
.add_buffer(keys.to_data().buffers()[0].clone())
.null_bit_buffer(Some(null_buf))
.add_child_data(values.into_data())
.build()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@ fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {

for i in 0..num_columns {
assert_eq!(
arrow_batch.column(i).data(),
json_batch.column(i).data(),
arrow_batch.column(i).as_ref(),
json_batch.column(i).as_ref(),
"Arrow and JSON batch columns not the same"
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,8 @@ async fn consume_flight_location(
let field = schema.field(i);
let field_name = field.name();

let expected_data = expected_batch.column(i).data();
let actual_data = actual_batch.column(i).data();
let expected_data = expected_batch.column(i).as_ref();
let actual_data = actual_batch.column(i).as_ref();

assert_eq!(expected_data, actual_data, "Data for field {field_name}");
}
Expand Down
Loading