From bd46079cd78845dc3fba15d91d68b0b55d01dd3d Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sun, 19 Jun 2022 21:25:05 +0000 Subject: [PATCH] Simplified code --- src/array/primitive/mod.rs | 22 +++++------ src/array/specification.rs | 8 ---- src/array/utf8/mod.rs | 78 ++++++++++++++++---------------------- src/array/utf8/mutable.rs | 57 +++++++++++++++++++++------- 4 files changed, 86 insertions(+), 79 deletions(-) diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index f358b12dbab..0a4aacf2a9a 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -309,7 +309,7 @@ impl PrimitiveArray { /// /// This function is primarily used to re-use memory regions. #[must_use] - pub fn into_mut(self) -> Either> { + pub fn into_mut(mut self) -> Either> { use Either::*; if let Some(bitmap) = self.validity { @@ -319,27 +319,27 @@ impl PrimitiveArray { self.values, Some(bitmap), )), - Right(mutable_bitmap) => match self.values.into_mut() { - Left(buffer) => Left(PrimitiveArray::new( - self.data_type, - buffer, - Some(mutable_bitmap.into()), - )), - Right(values) => Right(MutablePrimitiveArray::from_data( + Right(mutable_bitmap) => match self.values.get_mut().map(std::mem::take) { + Some(values) => Right(MutablePrimitiveArray::from_data( self.data_type, values, Some(mutable_bitmap), )), + None => Left(PrimitiveArray::new( + self.data_type, + self.values, + Some(mutable_bitmap.into()), + )), }, } } else { - match self.values.into_mut() { - Left(values) => Left(PrimitiveArray::new(self.data_type, values, None)), - Right(values) => Right(MutablePrimitiveArray::from_data( + match self.values.get_mut().map(std::mem::take) { + Some(values) => Right(MutablePrimitiveArray::from_data( self.data_type, values, None, )), + None => Left(PrimitiveArray::new(self.data_type, self.values, None)), } } } diff --git a/src/array/specification.rs b/src/array/specification.rs index f025cc3c8f7..0ede2582329 100644 --- a/src/array/specification.rs +++ b/src/array/specification.rs @@ -30,14 +30,6 @@ pub fn check_offsets_minimal(offsets: &[O], values_len: usize) -> usi len } -/// # Panics iff: -/// * the `offsets` is not monotonically increasing, or -/// * any slice of `values` between two consecutive pairs from `offsets` is invalid `utf8`, or -/// * any offset is larger or equal to `values_len`. -pub fn check_offsets_and_utf8(offsets: &[O], values: &[u8]) { - try_check_offsets_and_utf8(offsets, values).unwrap() -} - /// # Panics iff: /// * the `offsets` is not monotonically increasing, or /// * any slice of `values` between two consecutive pairs from `offsets` is invalid `utf8`, or diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index 047e8778c0b..8c9b9046f9e 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -243,7 +243,7 @@ impl Utf8Array { } /// Try to convert this `Utf8Array` to a `MutableUtf8Array` - pub fn into_mut(self) -> Either> { + pub fn into_mut(mut self) -> Either> { use Either::*; if let Some(bitmap) = self.validity { match bitmap.into_mut() { @@ -256,84 +256,70 @@ impl Utf8Array { Some(bitmap), ) }), - Right(mutable_bitmap) => match (self.values.into_mut(), self.offsets.into_mut()) { - (Left(immutable_values), Left(immutable_offsets)) => { + Right(mutable_bitmap) => match ( + self.values.get_mut().map(std::mem::take), + self.offsets.get_mut().map(std::mem::take), + ) { + (None, None) => { // Safety: invariants are preserved Left(unsafe { Utf8Array::new_unchecked( self.data_type, - immutable_offsets, - immutable_values, + self.offsets, + self.values, Some(mutable_bitmap.into()), ) }) } - (Left(immutable_values), Right(mutable_offsets)) => { + (None, Some(offsets)) => { // Safety: invariants are preserved Left(unsafe { Utf8Array::new_unchecked( self.data_type, - mutable_offsets.into(), - immutable_values, + offsets.into(), + self.values, Some(mutable_bitmap.into()), ) }) } - (Right(mutable_values), Left(immutable_offsets)) => { + (Some(mutable_values), None) => { // Safety: invariants are preserved Left(unsafe { Utf8Array::new_unchecked( self.data_type, - immutable_offsets, + self.offsets, mutable_values.into(), Some(mutable_bitmap.into()), ) }) } - (Right(mutable_values), Right(mutable_offsets)) => { - Right(MutableUtf8Array::from_data( + (Some(values), Some(offsets)) => Right(unsafe { + MutableUtf8Array::from_data_unchecked( self.data_type, - mutable_offsets, - mutable_values, + offsets, + values, Some(mutable_bitmap), - )) - } + ) + }), }, } } else { - match (self.values.into_mut(), self.offsets.into_mut()) { - (Left(immutable_values), Left(immutable_offsets)) => Left(unsafe { - Utf8Array::new_unchecked( - self.data_type, - immutable_offsets, - immutable_values, - None, - ) + match ( + self.values.get_mut().map(std::mem::take), + self.offsets.get_mut().map(std::mem::take), + ) { + (None, None) => Left(unsafe { + Utf8Array::new_unchecked(self.data_type, self.offsets, self.values, None) }), - (Left(immutable_values), Right(mutable_offsets)) => Left(unsafe { - Utf8Array::new_unchecked( - self.data_type, - mutable_offsets.into(), - immutable_values, - None, - ) + (None, Some(offsets)) => Left(unsafe { + Utf8Array::new_unchecked(self.data_type, offsets.into(), self.values, None) }), - (Right(mutable_values), Left(immutable_offsets)) => Left(unsafe { - Utf8Array::from_data( - self.data_type, - immutable_offsets, - mutable_values.into(), - None, - ) + (Some(values), None) => Left(unsafe { + Utf8Array::new_unchecked(self.data_type, self.offsets, values.into(), None) + }), + (Some(values), Some(offsets)) => Right(unsafe { + MutableUtf8Array::from_data_unchecked(self.data_type, offsets, values, None) }), - (Right(mutable_values), Right(mutable_offsets)) => { - Right(MutableUtf8Array::from_data( - self.data_type, - mutable_offsets, - mutable_values, - None, - )) - } } } } diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index 3d53e69bbba..b80c73e212c 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -2,7 +2,7 @@ use std::{iter::FromIterator, sync::Arc}; use crate::{ array::{ - specification::{check_offsets_and_utf8, check_offsets_minimal}, + specification::{check_offsets_minimal, try_check_offsets_and_utf8}, Array, MutableArray, Offset, TryExtend, TryPush, }, bitmap::MutableBitmap, @@ -75,6 +75,47 @@ impl MutableUtf8Array { } } + /// Returns a [`MutableUtf8Array`] created from its internal representation. + /// + /// # Errors + /// This function returns an error iff: + /// * the offsets are not monotonically increasing + /// * The last offset is not equal to the values' length. + /// * the validity's length is not equal to `offsets.len() - 1`. + /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. + /// * The `values` between two consecutive `offsets` are not valid utf8 + /// # Implementation + /// This function is `O(N)` - checking monotinicity and utf8 is `O(N)` + pub fn try_new( + data_type: DataType, + offsets: Vec, + values: Vec, + validity: Option, + ) -> Result { + try_check_offsets_and_utf8(&offsets, &values)?; + if validity + .as_ref() + .map_or(false, |validity| validity.len() != offsets.len() - 1) + { + return Err(Error::oos( + "validity's length must be equal to the number of values", + )); + } + + if data_type.to_physical_type() != Self::default_data_type().to_physical_type() { + return Err(Error::oos( + "MutableUtf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8", + )); + } + + Ok(Self { + data_type, + offsets, + values, + validity, + }) + } + /// The canonical method to create a [`MutableUtf8Array`] out of low-end APIs. /// # Panics /// This function panics iff: @@ -87,19 +128,7 @@ impl MutableUtf8Array { values: Vec, validity: Option, ) -> Self { - check_offsets_and_utf8(&offsets, &values); - if let Some(ref validity) = validity { - assert_eq!(offsets.len() - 1, validity.len()); - } - if data_type.to_physical_type() != Self::default_data_type().to_physical_type() { - panic!("MutableUtf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8") - } - Self { - data_type, - offsets, - values, - validity, - } + Self::try_new(data_type, offsets, values, validity).unwrap() } /// Create a [`MutableUtf8Array`] out of low-end APIs.