From 2bf7015988a010d23c540649b0bf8c94fa47cbd9 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Wed, 9 Mar 2022 06:16:25 +0000 Subject: [PATCH] More idiomatic code --- src/buffer/immutable.rs | 4 +- src/compute/arithmetics/decimal/add.rs | 60 +++++++++------- src/compute/arithmetics/decimal/div.rs | 58 ++++++++------- src/compute/arithmetics/decimal/mul.rs | 58 ++++++++------- src/compute/arithmetics/decimal/sub.rs | 60 +++++++++------- src/compute/arity.rs | 97 +++++++++++++++----------- src/compute/cast/binary_to.rs | 19 +++-- src/compute/cast/boolean_to.rs | 12 ++-- src/compute/hash.rs | 31 +++++--- src/compute/length.rs | 8 +-- src/compute/take/generic_binary.rs | 28 +++++--- src/io/parquet/write/mod.rs | 19 +++-- 12 files changed, 259 insertions(+), 195 deletions(-) diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index fd98d562f8c..1f6774f0a81 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -169,9 +169,7 @@ impl Buffer { iterator.collect::>().into() } - /// # Safety - /// This method assumes that the iterator's size is correct and is undefined behavior - /// to use it on an iterator that reports an incorrect length. + /// Creates a [`Buffer`] from an fallible [`Iterator`] with a trusted length. #[inline] pub fn try_from_trusted_len_iter>>( iterator: I, diff --git a/src/compute/arithmetics/decimal/add.rs b/src/compute/arithmetics/decimal/add.rs index f3b6a0b4a1a..84bf8f4aead 100644 --- a/src/compute/arithmetics/decimal/add.rs +++ b/src/compute/arithmetics/decimal/add.rs @@ -1,7 +1,6 @@ //! Defines the addition arithmetic kernels for [`PrimitiveArray`] representing decimals. use crate::{ array::PrimitiveArray, - buffer::Buffer, compute::{ arithmetics::{ArrayAdd, ArrayCheckedAdd, ArraySaturatingAdd}, arity::{binary, binary_checked}, @@ -188,17 +187,29 @@ pub fn adaptive_add( ) -> Result> { check_same_len(lhs, rhs)?; - if let (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) = - (lhs.data_type(), rhs.data_type()) - { - // The resulting precision is mutable because it could change while - // looping through the iterator - let (mut res_p, res_s, diff) = adjusted_precision_scale(*lhs_p, *lhs_s, *rhs_p, *rhs_s); - - let shift = 10i128.pow(diff as u32); - let mut max = max_value(res_p); - - let iter = lhs.values().iter().zip(rhs.values().iter()).map(|(l, r)| { + let (lhs_p, lhs_s, rhs_p, rhs_s) = + if let (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) = + (lhs.data_type(), rhs.data_type()) + { + (*lhs_p, *lhs_s, *rhs_p, *rhs_s) + } else { + return Err(ArrowError::InvalidArgumentError( + "Incorrect data type for the array".to_string(), + )); + }; + + // The resulting precision is mutable because it could change while + // looping through the iterator + let (mut res_p, res_s, diff) = adjusted_precision_scale(lhs_p, lhs_s, rhs_p, rhs_s); + + let shift = 10i128.pow(diff as u32); + let mut max = max_value(res_p); + + let values = lhs + .values() + .iter() + .zip(rhs.values().iter()) + .map(|(l, r)| { // Based on the array's scales one of the arguments in the sum has to be shifted // to the left to match the final scale let res = if lhs_s > rhs_s { @@ -220,19 +231,14 @@ pub fn adaptive_add( max = max_value(res_p); } res - }); - let values = Buffer::from_trusted_len_iter(iter); - - let validity = combine_validities(lhs.validity(), rhs.validity()); - - Ok(PrimitiveArray::::new( - DataType::Decimal(res_p, res_s), - values, - validity, - )) - } else { - Err(ArrowError::InvalidArgumentError( - "Incorrect data type for the array".to_string(), - )) - } + }) + .collect::>(); + + let validity = combine_validities(lhs.validity(), rhs.validity()); + + Ok(PrimitiveArray::::new( + DataType::Decimal(res_p, res_s), + values.into(), + validity, + )) } diff --git a/src/compute/arithmetics/decimal/div.rs b/src/compute/arithmetics/decimal/div.rs index 1085b1ab285..224209084d5 100644 --- a/src/compute/arithmetics/decimal/div.rs +++ b/src/compute/arithmetics/decimal/div.rs @@ -3,7 +3,6 @@ use crate::{ array::PrimitiveArray, - buffer::Buffer, compute::{ arithmetics::{ArrayCheckedDiv, ArrayDiv}, arity::{binary, binary_checked, unary}, @@ -248,18 +247,30 @@ pub fn adaptive_div( ) -> Result> { check_same_len(lhs, rhs)?; - if let (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) = - (lhs.data_type(), rhs.data_type()) - { - // The resulting precision is mutable because it could change while - // looping through the iterator - let (mut res_p, res_s, diff) = adjusted_precision_scale(*lhs_p, *lhs_s, *rhs_p, *rhs_s); - - let shift = 10i128.pow(diff as u32); - let shift_1 = 10i128.pow(res_s as u32); - let mut max = max_value(res_p); - - let iter = lhs.values().iter().zip(rhs.values().iter()).map(|(l, r)| { + let (lhs_p, lhs_s, rhs_p, rhs_s) = + if let (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) = + (lhs.data_type(), rhs.data_type()) + { + (*lhs_p, *lhs_s, *rhs_p, *rhs_s) + } else { + return Err(ArrowError::InvalidArgumentError( + "Incorrect data type for the array".to_string(), + )); + }; + + // The resulting precision is mutable because it could change while + // looping through the iterator + let (mut res_p, res_s, diff) = adjusted_precision_scale(lhs_p, lhs_s, rhs_p, rhs_s); + + let shift = 10i128.pow(diff as u32); + let shift_1 = 10i128.pow(res_s as u32); + let mut max = max_value(res_p); + + let values = lhs + .values() + .iter() + .zip(rhs.values().iter()) + .map(|(l, r)| { let numeral: i128 = l * shift_1; // Based on the array's scales one of the arguments in the sum has to be shifted @@ -285,19 +296,14 @@ pub fn adaptive_div( } res - }); - let values = Buffer::from_trusted_len_iter(iter); + }) + .collect::>(); - let validity = combine_validities(lhs.validity(), rhs.validity()); + let validity = combine_validities(lhs.validity(), rhs.validity()); - Ok(PrimitiveArray::::new( - DataType::Decimal(res_p, res_s), - values, - validity, - )) - } else { - Err(ArrowError::InvalidArgumentError( - "Incorrect data type for the array".to_string(), - )) - } + Ok(PrimitiveArray::::new( + DataType::Decimal(res_p, res_s), + values.into(), + validity, + )) } diff --git a/src/compute/arithmetics/decimal/mul.rs b/src/compute/arithmetics/decimal/mul.rs index 01c505349ef..42f21368bb8 100644 --- a/src/compute/arithmetics/decimal/mul.rs +++ b/src/compute/arithmetics/decimal/mul.rs @@ -3,7 +3,6 @@ use crate::{ array::PrimitiveArray, - buffer::Buffer, compute::{ arithmetics::{ArrayCheckedMul, ArrayMul, ArraySaturatingMul}, arity::{binary, binary_checked, unary}, @@ -260,18 +259,30 @@ pub fn adaptive_mul( ) -> Result> { check_same_len(lhs, rhs)?; - if let (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) = - (lhs.data_type(), rhs.data_type()) - { - // The resulting precision is mutable because it could change while - // looping through the iterator - let (mut res_p, res_s, diff) = adjusted_precision_scale(*lhs_p, *lhs_s, *rhs_p, *rhs_s); - - let shift = 10i128.pow(diff as u32); - let shift_1 = 10i128.pow(res_s as u32); - let mut max = max_value(res_p); - - let iter = lhs.values().iter().zip(rhs.values().iter()).map(|(l, r)| { + let (lhs_p, lhs_s, rhs_p, rhs_s) = + if let (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) = + (lhs.data_type(), rhs.data_type()) + { + (*lhs_p, *lhs_s, *rhs_p, *rhs_s) + } else { + return Err(ArrowError::InvalidArgumentError( + "Incorrect data type for the array".to_string(), + )); + }; + + // The resulting precision is mutable because it could change while + // looping through the iterator + let (mut res_p, res_s, diff) = adjusted_precision_scale(lhs_p, lhs_s, rhs_p, rhs_s); + + let shift = 10i128.pow(diff as u32); + let shift_1 = 10i128.pow(res_s as u32); + let mut max = max_value(res_p); + + let values = lhs + .values() + .iter() + .zip(rhs.values().iter()) + .map(|(l, r)| { // Based on the array's scales one of the arguments in the sum has to be shifted // to the left to match the final scale let res = if lhs_s > rhs_s { @@ -297,19 +308,14 @@ pub fn adaptive_mul( } res - }); - let values = Buffer::from_trusted_len_iter(iter); + }) + .collect::>(); - let validity = combine_validities(lhs.validity(), rhs.validity()); + let validity = combine_validities(lhs.validity(), rhs.validity()); - Ok(PrimitiveArray::::new( - DataType::Decimal(res_p, res_s), - values, - validity, - )) - } else { - Err(ArrowError::InvalidArgumentError( - "Incorrect data type for the array".to_string(), - )) - } + Ok(PrimitiveArray::::new( + DataType::Decimal(res_p, res_s), + values.into(), + validity, + )) } diff --git a/src/compute/arithmetics/decimal/sub.rs b/src/compute/arithmetics/decimal/sub.rs index 3c10eab87ce..c027dc0c51a 100644 --- a/src/compute/arithmetics/decimal/sub.rs +++ b/src/compute/arithmetics/decimal/sub.rs @@ -2,7 +2,6 @@ use crate::{ array::PrimitiveArray, - buffer::Buffer, compute::{ arithmetics::{ArrayCheckedSub, ArraySaturatingSub, ArraySub}, arity::{binary, binary_checked}, @@ -187,17 +186,29 @@ pub fn adaptive_sub( ) -> Result> { check_same_len(lhs, rhs)?; - if let (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) = - (lhs.data_type(), rhs.data_type()) - { - // The resulting precision is mutable because it could change while - // looping through the iterator - let (mut res_p, res_s, diff) = adjusted_precision_scale(*lhs_p, *lhs_s, *rhs_p, *rhs_s); - - let shift = 10i128.pow(diff as u32); - let mut max = max_value(res_p); - - let iter = lhs.values().iter().zip(rhs.values().iter()).map(|(l, r)| { + let (lhs_p, lhs_s, rhs_p, rhs_s) = + if let (DataType::Decimal(lhs_p, lhs_s), DataType::Decimal(rhs_p, rhs_s)) = + (lhs.data_type(), rhs.data_type()) + { + (*lhs_p, *lhs_s, *rhs_p, *rhs_s) + } else { + return Err(ArrowError::InvalidArgumentError( + "Incorrect data type for the array".to_string(), + )); + }; + + // The resulting precision is mutable because it could change while + // looping through the iterator + let (mut res_p, res_s, diff) = adjusted_precision_scale(lhs_p, lhs_s, rhs_p, rhs_s); + + let shift = 10i128.pow(diff as u32); + let mut max = max_value(res_p); + + let values = lhs + .values() + .iter() + .zip(rhs.values().iter()) + .map(|(l, r)| { // Based on the array's scales one of the arguments in the sum has to be shifted // to the left to match the final scale let res: i128 = if lhs_s > rhs_s { @@ -220,19 +231,14 @@ pub fn adaptive_sub( } res - }); - let values = Buffer::from_trusted_len_iter(iter); - - let validity = combine_validities(lhs.validity(), rhs.validity()); - - Ok(PrimitiveArray::::new( - DataType::Decimal(res_p, res_s), - values, - validity, - )) - } else { - Err(ArrowError::InvalidArgumentError( - "Incorrect data type for the array".to_string(), - )) - } + }) + .collect::>(); + + let validity = combine_validities(lhs.validity(), rhs.validity()); + + Ok(PrimitiveArray::::new( + DataType::Decimal(res_p, res_s), + values.into(), + validity, + )) } diff --git a/src/compute/arity.rs b/src/compute/arity.rs index 68142bcec23..6ac651420a8 100644 --- a/src/compute/arity.rs +++ b/src/compute/arity.rs @@ -4,14 +4,13 @@ use super::utils::{check_same_len, combine_validities}; use crate::{ array::PrimitiveArray, bitmap::{Bitmap, MutableBitmap}, - buffer::Buffer, datatypes::DataType, error::Result, types::NativeType, }; -/// Applies an unary and infallible function to a primitive array. This is the -/// fastest way to perform an operation on a primitive array when the benefits +/// Applies an unary and infallible function to a [`PrimitiveArray`]. This is the +/// fastest way to perform an operation on a [`PrimitiveArray`] when the benefits /// of a vectorized operation outweighs the cost of branching nulls and /// non-nulls. /// @@ -26,10 +25,9 @@ where O: NativeType, F: Fn(I) -> O, { - let values = array.values().iter().map(|v| op(*v)); - let values = Buffer::from_trusted_len_iter(values); + let values = array.values().iter().map(|v| op(*v)).collect::>(); - PrimitiveArray::::new(data_type, values, array.validity().cloned()) + PrimitiveArray::::new(data_type, values.into(), array.validity().cloned()) } /// Version of unary that checks for errors in the closure used to create the @@ -44,8 +42,12 @@ where O: NativeType, F: Fn(I) -> Result, { - let values = array.values().iter().map(|v| op(*v)); - let values = Buffer::try_from_trusted_len_iter(values)?; + let values = array + .values() + .iter() + .map(|v| op(*v)) + .collect::>>()? + .into(); Ok(PrimitiveArray::::new( data_type, @@ -68,13 +70,16 @@ where { let mut mut_bitmap = MutableBitmap::with_capacity(array.len()); - let values = array.values().iter().map(|v| { - let (res, over) = op(*v); - mut_bitmap.push(over); - res - }); - - let values = Buffer::from_trusted_len_iter(values); + let values = array + .values() + .iter() + .map(|v| { + let (res, over) = op(*v); + mut_bitmap.push(over); + res + }) + .collect::>() + .into(); ( PrimitiveArray::::new(data_type, values, array.validity().cloned()), @@ -97,18 +102,21 @@ where { let mut mut_bitmap = MutableBitmap::with_capacity(array.len()); - let values = array.values().iter().map(|v| match op(*v) { - Some(val) => { - mut_bitmap.push(true); - val - } - None => { - mut_bitmap.push(false); - O::default() - } - }); - - let values = Buffer::from_trusted_len_iter(values); + let values = array + .values() + .iter() + .map(|v| match op(*v) { + Some(val) => { + mut_bitmap.push(true); + val + } + None => { + mut_bitmap.push(false); + O::default() + } + }) + .collect::>() + .into(); // The validity has to be checked against the bitmap created during the // creation of the values with the iterator. If an error was found during @@ -153,8 +161,9 @@ where .values() .iter() .zip(rhs.values().iter()) - .map(|(l, r)| op(*l, *r)); - let values = Buffer::from_trusted_len_iter(values); + .map(|(l, r)| op(*l, *r)) + .collect::>() + .into(); PrimitiveArray::::new(data_type, values, validity) } @@ -180,9 +189,9 @@ where .values() .iter() .zip(rhs.values().iter()) - .map(|(l, r)| op(*l, *r)); - - let values = Buffer::try_from_trusted_len_iter(values)?; + .map(|(l, r)| op(*l, *r)) + .collect::>>()? + .into(); Ok(PrimitiveArray::::new(data_type, values, validity)) } @@ -206,13 +215,17 @@ where let mut mut_bitmap = MutableBitmap::with_capacity(lhs.len()); - let values = lhs.values().iter().zip(rhs.values().iter()).map(|(l, r)| { - let (res, over) = op(*l, *r); - mut_bitmap.push(over); - res - }); - - let values = Buffer::from_trusted_len_iter(values); + let values = lhs + .values() + .iter() + .zip(rhs.values().iter()) + .map(|(l, r)| { + let (res, over) = op(*l, *r); + mut_bitmap.push(over); + res + }) + .collect::>() + .into(); ( PrimitiveArray::::new(data_type, values, validity), @@ -251,9 +264,9 @@ where mut_bitmap.push(false); T::default() } - }); - - let values = Buffer::from_trusted_len_iter(values); + }) + .collect::>() + .into(); let bitmap: Bitmap = mut_bitmap.into(); let validity = combine_validities(lhs.validity(), rhs.validity()); diff --git a/src/compute/cast/binary_to.rs b/src/compute/cast/binary_to.rs index faba71738dd..a351705f1aa 100644 --- a/src/compute/cast/binary_to.rs +++ b/src/compute/cast/binary_to.rs @@ -1,16 +1,21 @@ use std::convert::TryFrom; use crate::error::{ArrowError, Result}; -use crate::{array::*, buffer::Buffer, datatypes::DataType, types::NativeType}; +use crate::{array::*, datatypes::DataType, types::NativeType}; use super::CastOptions; /// Conversion of binary pub fn binary_to_large_binary(from: &BinaryArray, to_data_type: DataType) -> BinaryArray { let values = from.values().clone(); - let offsets = from.offsets().iter().map(|x| *x as i64); - let offsets = Buffer::from_trusted_len_iter(offsets); - BinaryArray::::new(to_data_type, offsets, values, from.validity().cloned()) + let offsets = from.offsets().iter().map(|x| *x as i64).collect::>(); + // todo: use `new_unchecked` since all invariants are preserved + BinaryArray::::new( + to_data_type, + offsets.into(), + values, + from.validity().cloned(), + ) } /// Conversion of binary @@ -22,11 +27,11 @@ pub fn binary_large_to_binary( let _ = i32::try_from(*from.offsets().last().unwrap()).map_err(ArrowError::from_external_error)?; - let offsets = from.offsets().iter().map(|x| *x as i32); - let offsets = Buffer::from_trusted_len_iter(offsets); + let offsets = from.offsets().iter().map(|x| *x as i32).collect::>(); + // todo: use `new_unchecked` since all invariants are preserved Ok(BinaryArray::::new( to_data_type, - offsets, + offsets.into(), values, from.validity().cloned(), )) diff --git a/src/compute/cast/boolean_to.rs b/src/compute/cast/boolean_to.rs index e8c3951dcb0..ef24e4b4dff 100644 --- a/src/compute/cast/boolean_to.rs +++ b/src/compute/cast/boolean_to.rs @@ -1,7 +1,7 @@ -use crate::{array::*, buffer::Buffer, types::NativeType}; use crate::{ - array::{BinaryArray, Offset, Utf8Array}, + array::{Array, BinaryArray, BooleanArray, Offset, PrimitiveArray, Utf8Array}, error::Result, + types::NativeType, }; pub(super) fn boolean_to_primitive_dyn(array: &dyn Array) -> Result> @@ -17,13 +17,13 @@ pub fn boolean_to_primitive(from: &BooleanArray) -> PrimitiveArray where T: NativeType + num_traits::One, { - let iter = from + let values = from .values() .iter() - .map(|x| if x { T::one() } else { T::default() }); - let values = Buffer::::from_trusted_len_iter(iter); + .map(|x| if x { T::one() } else { T::default() }) + .collect::>(); - PrimitiveArray::::new(T::PRIMITIVE.into(), values, from.validity().cloned()) + PrimitiveArray::::new(T::PRIMITIVE.into(), values.into(), from.validity().cloned()) } /// Casts the [`BooleanArray`] to a [`Utf8Array`], casting trues to `"1"` and falses to `"0"` diff --git a/src/compute/hash.rs b/src/compute/hash.rs index f21d0c82cac..bfb7ae24d58 100644 --- a/src/compute/hash.rs +++ b/src/compute/hash.rs @@ -1,5 +1,5 @@ //! Contains the [`hash`] and typed (e.g. [`hash_primitive`]) operators. -//! // multiversion does not copy documentation, causing a false positive +// multiversion does not copy documentation, causing a false positive #![allow(missing_docs)] use ahash::{CallHasher, RandomState}; use multiversion::multiversion; @@ -13,7 +13,6 @@ macro_rules! new_state { use crate::{ array::{Array, BinaryArray, BooleanArray, Offset, PrimitiveArray, Utf8Array}, - buffer::Buffer, datatypes::{DataType, PhysicalType, PrimitiveType}, error::{ArrowError, Result}, types::NativeType, @@ -36,8 +35,12 @@ pub fn hash_primitive(array: &PrimitiveArray) -> Primit pub fn hash_boolean(array: &BooleanArray) -> PrimitiveArray { let state = new_state!(); - let iter = array.values_iter().map(|x| u8::get_hash(&x, &state)); - let values = Buffer::from_trusted_len_iter(iter); + let values = array + .values_iter() + .map(|x| u8::get_hash(&x, &state)) + .collect::>() + .into(); + PrimitiveArray::::new(DataType::UInt64, values, array.validity().cloned()) } @@ -47,18 +50,24 @@ pub fn hash_boolean(array: &BooleanArray) -> PrimitiveArray { pub fn hash_utf8(array: &Utf8Array) -> PrimitiveArray { let state = new_state!(); - let iter = array + let values = array .values_iter() - .map(|x| <[u8]>::get_hash(&x.as_bytes(), &state)); - let values = Buffer::from_trusted_len_iter(iter); + .map(|x| <[u8]>::get_hash(&x.as_bytes(), &state)) + .collect::>() + .into(); + PrimitiveArray::::new(DataType::UInt64, values, array.validity().cloned()) } /// Element-wise hash of a [`BinaryArray`]. Validity is preserved. pub fn hash_binary(array: &BinaryArray) -> PrimitiveArray { let state = new_state!(); - let iter = array.values_iter().map(|x| <[u8]>::get_hash(&x, &state)); - let values = Buffer::from_trusted_len_iter(iter); + let values = array + .values_iter() + .map(|x| <[u8]>::get_hash(&x, &state)) + .collect::>() + .into(); + PrimitiveArray::::new(DataType::UInt64, values, array.validity().cloned()) } @@ -67,7 +76,7 @@ macro_rules! with_match_primitive_type {( ) => ({ macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )} use crate::datatypes::PrimitiveType::*; - use crate::types::days_ms; + use crate::types::{days_ms, months_days_ns}; match $key_type { Int8 => __with_ty__! { i8 }, Int16 => __with_ty__! { i16 }, @@ -75,6 +84,7 @@ macro_rules! with_match_primitive_type {( Int64 => __with_ty__! { i64 }, Int128 => __with_ty__! { i128 }, DaysMs => __with_ty__! { days_ms }, + MonthDayNano => __with_ty__! { months_days_ns }, UInt8 => __with_ty__! { u8 }, UInt16 => __with_ty__! { u16 }, UInt32 => __with_ty__! { u32 }, @@ -137,6 +147,7 @@ pub fn can_hash(data_type: &DataType) -> bool { | PhysicalType::Primitive(PrimitiveType::Int64) | PhysicalType::Primitive(PrimitiveType::Int128) | PhysicalType::Primitive(PrimitiveType::DaysMs) + | PhysicalType::Primitive(PrimitiveType::MonthDayNano) | PhysicalType::Primitive(PrimitiveType::UInt8) | PhysicalType::Primitive(PrimitiveType::UInt16) | PhysicalType::Primitive(PrimitiveType::UInt32) diff --git a/src/compute/length.rs b/src/compute/length.rs index 459215c7e32..cd445d8f43b 100644 --- a/src/compute/length.rs +++ b/src/compute/length.rs @@ -19,7 +19,6 @@ use crate::{ array::*, - buffer::Buffer, datatypes::DataType, error::{ArrowError, Result}, types::NativeType, @@ -33,9 +32,8 @@ where let values = array .offsets() .windows(2) - .map(|offset| op(offset[1] - offset[0])); - - let values = Buffer::from_trusted_len_iter(values); + .map(|offset| op(offset[1] - offset[0])) + .collect::>(); let data_type = if O::is_large() { DataType::Int64 @@ -43,7 +41,7 @@ where DataType::Int32 }; - PrimitiveArray::::new(data_type, values, array.validity().cloned()) + PrimitiveArray::::new(data_type, values.into(), array.validity().cloned()) } /// Returns an array of integers with the number of bytes on each string of the array. diff --git a/src/compute/take/generic_binary.rs b/src/compute/take/generic_binary.rs index 87b8f01fdfe..ef78184f6b8 100644 --- a/src/compute/take/generic_binary.rs +++ b/src/compute/take/generic_binary.rs @@ -39,8 +39,10 @@ pub fn take_no_validity( buffer.extend_from_slice(&values[_start..end]); length }); - let offsets = std::iter::once(O::default()).chain(offsets); - let offsets = Buffer::from_trusted_len_iter(offsets); + let offsets = std::iter::once(O::default()) + .chain(offsets) + .collect::>() + .into(); (offsets, buffer.into(), None) } @@ -69,12 +71,13 @@ pub fn take_values_validity>( starts.push(start); length }); - let offsets = std::iter::once(O::default()).chain(offsets); - let offsets = Buffer::from_trusted_len_iter(offsets); + let offsets = std::iter::once(O::default()) + .chain(offsets) + .collect::>(); let buffer = take_values(length, starts.as_slice(), offsets.as_slice(), values_values); - (offsets, buffer, validity.into()) + (offsets.into(), buffer, validity.into()) } // take implementation when only indices contain nulls @@ -98,13 +101,14 @@ pub fn take_indices_validity( }; length }); - let offsets = std::iter::once(O::default()).chain(offsets); - let offsets = Buffer::from_trusted_len_iter(offsets); + let offsets = std::iter::once(O::default()) + .chain(offsets) + .collect::>(); let starts: Buffer = starts.into(); let buffer = take_values(length, starts.as_slice(), offsets.as_slice(), values); - (offsets, buffer, indices.validity().cloned()) + (offsets.into(), buffer, indices.validity().cloned()) } // take implementation when both indices and values contain nulls @@ -140,11 +144,13 @@ pub fn take_values_indices_validity>(); + let starts: Buffer = starts.into(); let buffer = take_values(length, starts.as_slice(), offsets.as_slice(), values_values); - (offsets, buffer, validity.into()) + (offsets.into(), buffer, validity.into()) } diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index 7ddf3c19b75..7c31f27fc52 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -14,7 +14,6 @@ mod utils; use crate::array::*; use crate::bitmap::Bitmap; -use crate::buffer::Buffer; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::io::parquet::read::is_type_nullable; @@ -253,14 +252,24 @@ pub fn array_to_page( .downcast_ref::>() .unwrap(); if precision <= 9 { - let values = array.values().iter().map(|x| *x as i32); - let values = Buffer::from_trusted_len_iter(values); + let values = array + .values() + .iter() + .map(|x| *x as i32) + .collect::>() + .into(); + let array = PrimitiveArray::::new(DataType::Int32, values, array.validity().cloned()); primitive::array_to_page::(&array, options, descriptor) } else if precision <= 18 { - let values = array.values().iter().map(|x| *x as i64); - let values = Buffer::from_trusted_len_iter(values); + let values = array + .values() + .iter() + .map(|x| *x as i64) + .collect::>() + .into(); + let array = PrimitiveArray::::new(DataType::Int64, values, array.validity().cloned()); primitive::array_to_page::(&array, options, descriptor)