From c4ea6ead7eaa59052fbbc89131559116de0148f7 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 4 Apr 2023 19:38:21 +0100 Subject: [PATCH 1/2] Deprecate Array::data (#3880) --- arrow-array/src/array/binary_array.rs | 3 -- arrow-array/src/array/mod.rs | 23 +++++++++-- arrow-array/src/array/primitive_array.rs | 3 +- arrow-array/src/array/struct_array.rs | 18 ++++----- arrow-cast/src/cast.rs | 38 +++++++------------ arrow-integration-test/src/lib.rs | 2 +- .../src/bin/arrow-json-integration-test.rs | 4 +- .../integration_test.rs | 4 +- arrow-ipc/src/writer.rs | 25 ++++++------ arrow-row/src/lib.rs | 2 +- arrow-select/src/concat.rs | 32 +++++++++------- arrow-select/src/filter.rs | 4 +- arrow-select/src/interleave.rs | 3 +- arrow-select/src/take.rs | 24 ++++-------- arrow-select/src/zip.rs | 6 +-- arrow-string/src/length.rs | 4 +- arrow/src/ffi_stream.rs | 8 ++-- arrow/src/pyarrow.rs | 2 +- arrow/tests/array_transform.rs | 4 +- 19 files changed, 102 insertions(+), 107 deletions(-) diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs index be861474f659..3b13a513f646 100644 --- a/arrow-array/src/array/binary_array.rs +++ b/arrow-array/src/array/binary_array.rs @@ -467,9 +467,6 @@ mod tests { let list_array = GenericListArray::::from(array_data2); let binary_array2 = GenericBinaryArray::::from(list_array); - assert_eq!(2, binary_array2.data().buffers().len()); - assert_eq!(0, binary_array2.data().child_data().len()); - assert_eq!(binary_array1.len(), binary_array2.len()); assert_eq!(binary_array1.null_count(), binary_array2.null_count()); assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets()); diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index 41d5c8bebe29..90d76b7871fa 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -95,8 +95,7 @@ pub trait Array: std::fmt::Debug + Send + Sync { fn as_any(&self) -> &dyn Any; /// Returns a reference to the underlying data of this array - /// - /// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880) + #[deprecated(note = "Use Array::to_data or Array::into_data")] fn data(&self) -> &ArrayData; /// Returns the underlying data of this array @@ -111,6 +110,7 @@ pub trait Array: std::fmt::Debug + Send + Sync { /// /// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880) #[deprecated(note = "Use Array::to_data or Array::into_data")] + #[allow(deprecated)] fn data_ref(&self) -> &ArrayData { self.data() } @@ -281,6 +281,7 @@ impl Array for ArrayRef { self.as_ref().as_any() } + #[allow(deprecated)] fn data(&self) -> &ArrayData { self.as_ref().data() } @@ -348,6 +349,7 @@ impl<'a, T: Array> Array for &'a T { T::as_any(self) } + #[allow(deprecated)] fn data(&self) -> &ArrayData { T::data(self) } @@ -435,78 +437,91 @@ pub trait ArrayAccessor: Array { } impl PartialEq for dyn Array + '_ { + #[allow(deprecated)] fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) } } impl PartialEq for dyn Array + '_ { + #[allow(deprecated)] fn eq(&self, other: &T) -> bool { self.data().eq(other.data()) } } impl PartialEq for NullArray { + #[allow(deprecated)] fn eq(&self, other: &NullArray) -> bool { self.data().eq(other.data()) } } impl PartialEq for PrimitiveArray { + #[allow(deprecated)] fn eq(&self, other: &PrimitiveArray) -> bool { self.data().eq(other.data()) } } impl PartialEq for DictionaryArray { + #[allow(deprecated)] fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) } } impl PartialEq for BooleanArray { + #[allow(deprecated)] fn eq(&self, other: &BooleanArray) -> bool { self.data().eq(other.data()) } } impl PartialEq for GenericStringArray { + #[allow(deprecated)] fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) } } impl PartialEq for GenericBinaryArray { + #[allow(deprecated)] fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) } } impl PartialEq for FixedSizeBinaryArray { + #[allow(deprecated)] fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) } } impl PartialEq for GenericListArray { + #[allow(deprecated)] fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) } } impl PartialEq for MapArray { + #[allow(deprecated)] fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) } } impl PartialEq for FixedSizeListArray { + #[allow(deprecated)] fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) } } impl PartialEq for StructArray { + #[allow(deprecated)] fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) } @@ -865,8 +880,8 @@ mod tests { let null_array = new_null_array(array.data_type(), 9); assert_eq!(&array, &null_array); assert_eq!( - array.data().buffers()[0].len(), - null_array.data().buffers()[0].len() + array.to_data().buffers()[0].len(), + null_array.to_data().buffers()[0].len() ); } diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 5dfcb4da4d16..75bf85b3f2a0 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -519,10 +519,9 @@ impl PrimitiveArray { O: ArrowPrimitiveType, F: Fn(T::Native) -> Result, { - let data = self.data(); let len = self.len(); - let nulls = data.nulls().cloned(); + let nulls = self.nulls().cloned(); let mut buffer = BufferBuilder::::new(len); buffer.append_n_zeroed(len); let slice = buffer.as_slice_mut(); diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs index 27e10a31fd00..1dccfc7d4ef3 100644 --- a/arrow-array/src/array/struct_array.rs +++ b/arrow-array/src/array/struct_array.rs @@ -174,7 +174,7 @@ impl TryFrom> for StructArray { // null: the null mask of the arrays. let mut null: Option = None; for (field_name, array) in values { - let child_datum = array.data(); + let child_datum = array.to_data(); let child_datum_len = child_datum.len(); if let Some(len) = len { if len != child_datum_len { @@ -186,7 +186,6 @@ impl TryFrom> for StructArray { } else { len = Some(child_datum_len) } - child_data.push(child_datum.clone()); fields.push(Arc::new(Field::new( field_name, array.data_type().clone(), @@ -209,6 +208,7 @@ impl TryFrom> for StructArray { // when one of the fields has no nulls, then there is no null in the array null = None; } + child_data.push(child_datum); } let len = len.unwrap(); @@ -385,10 +385,8 @@ mod tests { #[test] fn test_struct_array_builder() { - let array = BooleanArray::from(vec![false, false, true, true]); - let boolean_data = array.data(); - let array = Int64Array::from(vec![42, 28, 19, 31]); - let int_data = array.data(); + let boolean_array = BooleanArray::from(vec![false, false, true, true]); + let int_array = Int64Array::from(vec![42, 28, 19, 31]); let fields = vec![ Field::new("a", DataType::Boolean, false), @@ -396,14 +394,14 @@ mod tests { ]; let struct_array_data = ArrayData::builder(DataType::Struct(fields.into())) .len(4) - .add_child_data(boolean_data.clone()) - .add_child_data(int_data.clone()) + .add_child_data(boolean_array.to_data()) + .add_child_data(int_array.to_data()) .build() .unwrap(); let struct_array = StructArray::from(struct_array_data); - assert_eq!(boolean_data, struct_array.column(0).data()); - assert_eq!(int_data, struct_array.column(1).data()); + assert_eq!(struct_array.column(0).as_ref(), &boolean_array); + assert_eq!(struct_array.column(1).as_ref(), &int_array); } #[test] diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 0ea6332a7ea5..372fcc1a3132 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -2217,9 +2217,9 @@ fn value_to_string( let mut builder = GenericStringBuilder::::new(); let options = FormatOptions::default(); let formatter = ArrayFormatter::try_new(array, &options)?; - let data = array.data(); - for i in 0..data.len() { - match data.is_null(i) { + let nulls = array.nulls(); + for i in 0..array.len() { + match nulls.map(|x| x.is_null(i)).unwrap_or_default() { true => builder.append_null(), false => { formatter.value(i).write(&mut builder)?; @@ -3500,7 +3500,7 @@ where FROM::Offset: OffsetSizeTrait + ToPrimitive, TO::Offset: OffsetSizeTrait + NumCast, { - let data = array.data(); + let data = array.to_data(); assert_eq!(data.data_type(), &FROM::DATA_TYPE); let str_values_buf = data.buffers()[1].clone(); let offsets = data.buffers()[0].typed_data::(); @@ -4844,9 +4844,8 @@ mod tests { #[test] fn test_cast_list_i32_to_list_u16() { - let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000]) - .data() - .clone(); + let value_data = + Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000]).into_data(); let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]); @@ -4875,15 +4874,9 @@ mod tests { assert_eq!(0, cast_array.null_count()); // offsets should be the same - assert_eq!( - list_array.data().buffers().to_vec(), - cast_array.data().buffers().to_vec() - ); - let array = cast_array - .as_ref() - .as_any() - .downcast_ref::() - .unwrap(); + let array = cast_array.as_list::(); + assert_eq!(list_array.value_offsets(), array.value_offsets()); + assert_eq!(DataType::UInt16, array.value_type()); assert_eq!(3, array.value_length(0)); assert_eq!(3, array.value_length(1)); @@ -4908,9 +4901,8 @@ mod tests { )] fn test_cast_list_i32_to_list_timestamp() { // Construct a value array - let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000]) - .data() - .clone(); + let value_data = + Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000]).into_data(); let value_offsets = Buffer::from_slice_ref([0, 3, 6, 9]); @@ -7355,11 +7347,7 @@ mod tests { fn test_list_to_string() { let str_array = StringArray::from(vec!["a", "b", "c", "d", "e", "f", "g", "h"]); let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]); - let value_data = ArrayData::builder(DataType::Utf8) - .len(str_array.len()) - .buffers(str_array.data().buffers().to_vec()) - .build() - .unwrap(); + let value_data = str_array.into_data(); let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))); @@ -8123,7 +8111,7 @@ mod tests { let options = CastOptions { safe: true }; let array = cast_with_options(&s, &DataType::Utf8, &options).unwrap(); let a = array.as_string::(); - a.data().validate_full().unwrap(); + a.to_data().validate_full().unwrap(); assert_eq!(a.null_count(), 1); assert_eq!(a.len(), 2); diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs index 8ee7bc60085e..04bbcf3f6f23 100644 --- a/arrow-integration-test/src/lib.rs +++ b/arrow-integration-test/src/lib.rs @@ -938,7 +938,7 @@ pub fn dictionary_array_from_json( // convert key and value to dictionary data let dict_data = ArrayData::builder(field.data_type().clone()) .len(keys.len()) - .add_buffer(keys.data().buffers()[0].clone()) + .add_buffer(keys.to_data().buffers()[0].clone()) .null_bit_buffer(Some(null_buf)) .add_child_data(values.into_data()) .build() diff --git a/arrow-integration-testing/src/bin/arrow-json-integration-test.rs b/arrow-integration-testing/src/bin/arrow-json-integration-test.rs index 90a2d171d347..2c36e8d9b8ae 100644 --- a/arrow-integration-testing/src/bin/arrow-json-integration-test.rs +++ b/arrow-integration-testing/src/bin/arrow-json-integration-test.rs @@ -200,8 +200,8 @@ fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> { for i in 0..num_columns { assert_eq!( - arrow_batch.column(i).data(), - json_batch.column(i).data(), + arrow_batch.column(i).as_ref(), + json_batch.column(i).as_ref(), "Arrow and JSON batch columns not the same" ); } diff --git a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs index 3c537c5f61d8..a55c2dec0580 100644 --- a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs +++ b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs @@ -232,8 +232,8 @@ async fn consume_flight_location( let field = schema.field(i); let field_name = field.name(); - let expected_data = expected_batch.column(i).data(); - let actual_data = actual_batch.column(i).data(); + let expected_data = expected_batch.column(i).as_ref(); + let actual_data = actual_batch.column(i).as_ref(); assert_eq!(expected_data, actual_data, "Data for field {field_name}"); } diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs index 7d29f048a762..7d44d8f24030 100644 --- a/arrow-ipc/src/writer.rs +++ b/arrow-ipc/src/writer.rs @@ -220,15 +220,16 @@ impl IpcDataGenerator { } } DataType::RunEndEncoded(_, values) => { - if column.data().child_data().len() != 2 { + let data = column.to_data(); + if data.child_data().len() != 2 { return Err(ArrowError::InvalidArgumentError(format!( "The run encoded array should have exactly two child arrays. Found {}", - column.data().child_data().len() + data.child_data().len() ))); } - // The run_ends array is not expected to be dictionoary encoded. Hence encode dictionaries + // The run_ends array is not expected to be dictionary encoded. Hence encode dictionaries // only for values array. - let values_array = make_array(column.data().child_data()[1].clone()); + let values_array = make_array(data.child_data()[1].clone()); self.encode_dictionaries( values, &values_array, @@ -330,7 +331,7 @@ impl IpcDataGenerator { let dict_id = field .dict_id() .expect("All Dictionary types have `dict_id`"); - let dict_data = column.data(); + let dict_data = column.to_data(); let dict_values = &dict_data.child_data()[0]; let values = make_array(dict_data.child_data()[0].clone()); @@ -418,9 +419,9 @@ impl IpcDataGenerator { batch_compression_type.map(TryInto::try_into).transpose()?; for array in batch.columns() { - let array_data = array.data(); + let array_data = array.to_data(); offset = write_array_data( - array_data, + &array_data, &mut buffers, &mut arrow_data, &mut nodes, @@ -631,7 +632,7 @@ fn into_zero_offset_run_array( /// multiple times. Can optionally error if an update to an existing dictionary is attempted, which /// isn't allowed in the `FileWriter`. pub struct DictionaryTracker { - written: HashMap, + written: HashMap, error_on_replacement: bool, } @@ -660,18 +661,18 @@ impl DictionaryTracker { dict_id: i64, column: &ArrayRef, ) -> Result { - let dict_data = column.data(); + let dict_data = column.to_data(); let dict_values = &dict_data.child_data()[0]; // If a dictionary with this id was already emitted, check if it was the same. if let Some(last) = self.written.get(&dict_id) { - if ArrayData::ptr_eq(&last.data().child_data()[0], dict_values) { + if ArrayData::ptr_eq(&last.child_data()[0], dict_values) { // Same dictionary values => no need to emit it again return Ok(false); } if self.error_on_replacement { // If error on replacement perform a logical comparison - if last.data().child_data()[0] == *dict_values { + if last.child_data()[0] == *dict_values { // Same dictionary values => no need to emit it again return Ok(false); } @@ -684,7 +685,7 @@ impl DictionaryTracker { } } - self.written.insert(dict_id, column.clone()); + self.written.insert(dict_id, dict_data); Ok(true) } } diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 9cc7b4f301cb..71e1de416617 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -2308,7 +2308,7 @@ mod tests { let back = converter.convert_rows(&rows).unwrap(); for ((actual, expected), preserve) in back.iter().zip(&arrays).zip(preserve) { - actual.data().validate_full().unwrap(); + actual.to_data().validate_full().unwrap(); dictionary_eq(preserve, actual, expected) } } diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs index e34cc9edb884..ed27520cc61d 100644 --- a/arrow-select/src/concat.rs +++ b/arrow-select/src/concat.rs @@ -81,7 +81,8 @@ pub fn concat(arrays: &[&dyn Array]) -> Result { _ => Capacities::Array(arrays.iter().map(|a| a.len()).sum()), }; - let array_data = arrays.iter().map(|a| a.data()).collect::>(); + let array_data: Vec<_> = arrays.iter().map(|a| a.to_data()).collect::>(); + let array_data = array_data.iter().collect(); let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity); for (i, a) in arrays.iter().enumerate() { @@ -131,6 +132,7 @@ pub fn concat_batches<'a>( #[cfg(test)] mod tests { use super::*; + use arrow_array::cast::AsArray; use arrow_schema::{Field, Schema}; use std::sync::Arc; @@ -527,7 +529,7 @@ mod tests { let arr = concat(&[&a, &b, &c]).unwrap(); // this would have been 1280 if we did not precompute the value lengths. - assert_eq!(arr.data().buffers()[1].capacity(), 960); + assert_eq!(arr.to_data().buffers()[1].capacity(), 960); } #[test] @@ -563,16 +565,20 @@ mod tests { ); // Should have reused the dictionary - assert!(array.data().child_data()[0].ptr_eq(&combined.data().child_data()[0])); - assert!(copy.data().child_data()[0].ptr_eq(&combined.data().child_data()[0])); + assert!(array + .values() + .to_data() + .ptr_eq(&combined.values().to_data())); + assert!(copy.values().to_data().ptr_eq(&combined.values().to_data())); let new: DictionaryArray = vec!["d"].into_iter().collect(); let combined = concat(&[© as _, &array as _, &new as _]).unwrap(); + let com = combined.as_dictionary::(); // Should not have reused the dictionary - assert!(!array.data().child_data()[0].ptr_eq(&combined.data().child_data()[0])); - assert!(!copy.data().child_data()[0].ptr_eq(&combined.data().child_data()[0])); - assert!(!new.data().child_data()[0].ptr_eq(&combined.data().child_data()[0])); + assert!(!array.values().to_data().ptr_eq(&com.values().to_data())); + assert!(!copy.values().to_data().ptr_eq(&com.values().to_data())); + assert!(!new.values().to_data().ptr_eq(&com.values().to_data())); } #[test] @@ -656,12 +662,12 @@ mod tests { let a = Int32Array::from_iter_values(0..100); let b = Int32Array::from_iter_values(10..20); let a = concat(&[&a, &b]).unwrap(); - let data = a.data(); + let data = a.to_data(); assert_eq!(data.buffers()[0].len(), 440); assert_eq!(data.buffers()[0].capacity(), 448); // Nearest multiple of 64 let a = concat(&[&a.slice(10, 20), &b]).unwrap(); - let data = a.data(); + let data = a.to_data(); assert_eq!(data.buffers()[0].len(), 120); assert_eq!(data.buffers()[0].capacity(), 128); // Nearest multiple of 64 @@ -669,7 +675,7 @@ mod tests { let b = StringArray::from(vec!["bingo", "bongo", "lorem", ""]); let a = concat(&[&a, &b]).unwrap(); - let data = a.data(); + let data = a.to_data(); // (100 + 4 + 1) * size_of() assert_eq!(data.buffers()[0].len(), 420); assert_eq!(data.buffers()[0].capacity(), 448); // Nearest multiple of 64 @@ -679,7 +685,7 @@ mod tests { assert_eq!(data.buffers()[1].capacity(), 320); // Nearest multiple of 64 let a = concat(&[&a.slice(10, 40), &b]).unwrap(); - let data = a.data(); + let data = a.to_data(); // (40 + 4 + 5) * size_of() assert_eq!(data.buffers()[0].len(), 180); assert_eq!(data.buffers()[0].capacity(), 192); // Nearest multiple of 64 @@ -693,7 +699,7 @@ mod tests { LargeBinaryArray::from_iter_values(std::iter::repeat(b"cupcakes").take(10)); let a = concat(&[&a, &b]).unwrap(); - let data = a.data(); + let data = a.to_data(); // (100 + 10 + 1) * size_of() assert_eq!(data.buffers()[0].len(), 888); assert_eq!(data.buffers()[0].capacity(), 896); // Nearest multiple of 64 @@ -703,7 +709,7 @@ mod tests { assert_eq!(data.buffers()[1].capacity(), 384); // Nearest multiple of 64 let a = concat(&[&a.slice(10, 40), &b]).unwrap(); - let data = a.data(); + let data = a.to_data(); // (40 + 10 + 1) * size_of() assert_eq!(data.buffers()[0].len(), 408); assert_eq!(data.buffers()[0].capacity(), 448); // Nearest multiple of 64 diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs index ba8fc4a2cc1a..06f0833561d6 100644 --- a/arrow-select/src/filter.rs +++ b/arrow-select/src/filter.rs @@ -650,7 +650,7 @@ where .into_data() .into_builder() .data_type(array.data_type().clone()) - .child_data(array.data().child_data().to_vec()); + .child_data(vec![array.values().to_data()]); // SAFETY: // Keys were valid before, filtered subset is therefore still valid @@ -1433,7 +1433,7 @@ mod tests { builder.append::("A", 3).unwrap(); let expected = builder.build().unwrap(); - assert_eq!(filtered.data(), expected.data()); + assert_eq!(filtered.to_data(), expected.to_data()); } #[test] diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index f274a3ebc30f..491395d1cc1a 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -193,7 +193,8 @@ fn interleave_fallback( values: &[&dyn Array], indices: &[(usize, usize)], ) -> Result { - let arrays: Vec<_> = values.iter().map(|x| x.data()).collect(); + let arrays: Vec<_> = values.iter().map(|x| x.to_data()).collect(); + let arrays: Vec<_> = arrays.iter().collect(); let mut array_data = MutableArrayData::new(arrays, false, indices.len()); let mut cur_array = indices[0].0; diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 01d6148132bd..3e7432530743 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -1531,9 +1531,8 @@ mod tests { macro_rules! test_take_list { ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{ // Construct a value array, [[0,0,0], [-1,-2,-1], [], [2,3]] - let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]) - .data() - .clone(); + let value_data = + Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]).into_data(); // Construct offsets let value_offsets: [$offset_type; 5] = [0, 3, 6, 6, 8]; let value_offsets = Buffer::from_slice_ref(&value_offsets); @@ -1570,8 +1569,7 @@ mod tests { Some(0), Some(0), ]) - .data() - .clone(); + .into_data(); // construct offsets let expected_offsets: [$offset_type; 6] = [0, 2, 2, 5, 5, 8]; let expected_offsets = Buffer::from_slice_ref(&expected_offsets); @@ -1604,8 +1602,7 @@ mod tests { Some(5), None, ]) - .data() - .clone(); + .into_data(); // Construct offsets let value_offsets: [$offset_type; 5] = [0, 3, 6, 7, 9]; let value_offsets = Buffer::from_slice_ref(&value_offsets); @@ -1644,8 +1641,7 @@ mod tests { None, Some(0), ]) - .data() - .clone(); + .into_data(); // construct offsets let expected_offsets: [$offset_type; 6] = [0, 1, 1, 4, 6, 9]; let expected_offsets = Buffer::from_slice_ref(&expected_offsets); @@ -1677,8 +1673,7 @@ mod tests { Some(5), None, ]) - .data() - .clone(); + .into_data(); // Construct offsets let value_offsets: [$offset_type; 5] = [0, 3, 6, 6, 8]; let value_offsets = Buffer::from_slice_ref(&value_offsets); @@ -1716,8 +1711,7 @@ mod tests { None, Some(0), ]) - .data() - .clone(); + .into_data(); // construct offsets let expected_offsets: [$offset_type; 6] = [0, 0, 0, 3, 5, 8]; let expected_offsets = Buffer::from_slice_ref(&expected_offsets); @@ -1852,9 +1846,7 @@ mod tests { #[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")] fn test_take_list_out_of_bounds() { // Construct a value array, [[0,0,0], [-1,-2,-1], [2,3]] - let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]) - .data() - .clone(); + let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]).into_data(); // Construct offsets let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]); // Construct a list array from the above two diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs index e5d0f25e8fdb..b5df891544a8 100644 --- a/arrow-select/src/zip.rs +++ b/arrow-select/src/zip.rs @@ -42,10 +42,10 @@ pub fn zip( "all arrays should have the same length".into(), )); } - let falsy = falsy.data(); - let truthy = truthy.data(); + let falsy = falsy.to_data(); + let truthy = truthy.to_data(); - let mut mutable = MutableArrayData::new(vec![truthy, falsy], false, truthy.len()); + let mut mutable = MutableArrayData::new(vec![&truthy, &falsy], false, truthy.len()); // the SlicesIterator slices only the true values. So the gaps left by this iterator we need to // fill with falsy values diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs index c206fffb9166..bd022532d6e1 100644 --- a/arrow-string/src/length.rs +++ b/arrow-string/src/length.rs @@ -243,7 +243,7 @@ mod tests { let result = $kernel(&array).unwrap(); let result = result.as_any().downcast_ref::<$result_ty>().unwrap(); let expected: $result_ty = $expected.into(); - assert_eq!(expected.data(), result.data()); + assert_eq!(&expected, result); }}; } @@ -256,7 +256,7 @@ mod tests { let result = length(&array).unwrap(); let result = result.as_any().downcast_ref::<$result_ty>().unwrap(); let expected: $result_ty = $expected.into(); - assert_eq!(expected.data(), result.data()); + assert_eq!(&expected, result); }}; } diff --git a/arrow/src/ffi_stream.rs b/arrow/src/ffi_stream.rs index 6b3067ab7d75..0e358c36a0dc 100644 --- a/arrow/src/ffi_stream.rs +++ b/arrow/src/ffi_stream.rs @@ -220,7 +220,7 @@ impl ExportedArrayStream { let mut private_data = self.get_private_data(); let reader = &mut private_data.batch_reader; - let ret_code = match reader.next() { + match reader.next() { None => { // Marks ArrowArray released to indicate reaching the end of stream. unsafe { std::ptr::write(out, FFI_ArrowArray::empty()) } @@ -229,7 +229,7 @@ impl ExportedArrayStream { Some(next_batch) => { if let Ok(batch) = next_batch { let struct_array = StructArray::from(batch); - let array = FFI_ArrowArray::new(struct_array.data()); + let array = FFI_ArrowArray::new(&struct_array.to_data()); unsafe { std::ptr::copy(addr_of!(array), out, 1) }; std::mem::forget(array); @@ -240,9 +240,7 @@ impl ExportedArrayStream { get_error_code(err) } } - }; - - ret_code + } } pub fn get_last_error(&mut self) -> &String { diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 8cc08988cbe6..081cc8063366 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -187,7 +187,7 @@ impl PyArrowConvert for RecordBatch { let columns = self.columns().iter(); for array in columns { - py_arrays.push(array.data().to_pyarrow(py)?); + py_arrays.push(array.to_data().to_pyarrow(py)?); } let py_schema = schema.to_pyarrow(py)?; diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs index 30a8bad60368..7cd0007cce75 100644 --- a/arrow/tests/array_transform.rs +++ b/arrow/tests/array_transform.rs @@ -44,8 +44,8 @@ fn create_decimal_array( #[cfg(not(feature = "force_validate"))] fn test_decimal() { let decimal_array = - create_decimal_array(vec![Some(1), Some(2), None, Some(3)], 10, 3); - let arrays = vec![Array::data(&decimal_array)]; + create_decimal_array(vec![Some(1), Some(2), None, Some(3)], 10, 3).into_data(); + let arrays = vec![&decimal_array]; let mut a = MutableArrayData::new(arrays, true, 3); a.extend(0, 0, 3); a.extend(0, 2, 3); From 10eb575e1dce1fe7370d0b122eb4ff42b2134dea Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 5 Apr 2023 10:32:57 +0100 Subject: [PATCH 2/2] Review feedback --- arrow-array/src/array/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index 90d76b7871fa..fa6e970b497a 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -107,8 +107,6 @@ pub trait Array: std::fmt::Debug + Send + Sync { fn into_data(self) -> ArrayData; /// Returns a reference-counted pointer to the underlying data of this array. - /// - /// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880) #[deprecated(note = "Use Array::to_data or Array::into_data")] #[allow(deprecated)] fn data_ref(&self) -> &ArrayData {