Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Simplified code
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Jun 21, 2022
1 parent e3a005d commit bd46079
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 79 deletions.
22 changes: 11 additions & 11 deletions src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ impl<T: NativeType> PrimitiveArray<T> {
///
/// This function is primarily used to re-use memory regions.
#[must_use]
pub fn into_mut(self) -> Either<Self, MutablePrimitiveArray<T>> {
pub fn into_mut(mut self) -> Either<Self, MutablePrimitiveArray<T>> {
use Either::*;

if let Some(bitmap) = self.validity {
Expand All @@ -319,27 +319,27 @@ impl<T: NativeType> PrimitiveArray<T> {
self.values,
Some(bitmap),
)),
Right(mutable_bitmap) => match self.values.into_mut() {
Left(buffer) => Left(PrimitiveArray::new(
self.data_type,
buffer,
Some(mutable_bitmap.into()),
)),
Right(values) => Right(MutablePrimitiveArray::from_data(
Right(mutable_bitmap) => match self.values.get_mut().map(std::mem::take) {
Some(values) => Right(MutablePrimitiveArray::from_data(
self.data_type,
values,
Some(mutable_bitmap),
)),
None => Left(PrimitiveArray::new(
self.data_type,
self.values,
Some(mutable_bitmap.into()),
)),
},
}
} else {
match self.values.into_mut() {
Left(values) => Left(PrimitiveArray::new(self.data_type, values, None)),
Right(values) => Right(MutablePrimitiveArray::from_data(
match self.values.get_mut().map(std::mem::take) {
Some(values) => Right(MutablePrimitiveArray::from_data(
self.data_type,
values,
None,
)),
None => Left(PrimitiveArray::new(self.data_type, self.values, None)),
}
}
}
Expand Down
8 changes: 0 additions & 8 deletions src/array/specification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,6 @@ pub fn check_offsets_minimal<O: Offset>(offsets: &[O], values_len: usize) -> usi
len
}

/// # Panics iff:
/// * the `offsets` is not monotonically increasing, or
/// * any slice of `values` between two consecutive pairs from `offsets` is invalid `utf8`, or
/// * any offset is larger or equal to `values_len`.
pub fn check_offsets_and_utf8<O: Offset>(offsets: &[O], values: &[u8]) {
try_check_offsets_and_utf8(offsets, values).unwrap()
}

/// # Panics iff:
/// * the `offsets` is not monotonically increasing, or
/// * any slice of `values` between two consecutive pairs from `offsets` is invalid `utf8`, or
Expand Down
78 changes: 32 additions & 46 deletions src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ impl<O: Offset> Utf8Array<O> {
}

/// Try to convert this `Utf8Array` to a `MutableUtf8Array`
pub fn into_mut(self) -> Either<Self, MutableUtf8Array<O>> {
pub fn into_mut(mut self) -> Either<Self, MutableUtf8Array<O>> {
use Either::*;
if let Some(bitmap) = self.validity {
match bitmap.into_mut() {
Expand All @@ -256,84 +256,70 @@ impl<O: Offset> Utf8Array<O> {
Some(bitmap),
)
}),
Right(mutable_bitmap) => match (self.values.into_mut(), self.offsets.into_mut()) {
(Left(immutable_values), Left(immutable_offsets)) => {
Right(mutable_bitmap) => match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
) {
(None, None) => {
// Safety: invariants are preserved
Left(unsafe {
Utf8Array::new_unchecked(
self.data_type,
immutable_offsets,
immutable_values,
self.offsets,
self.values,
Some(mutable_bitmap.into()),
)
})
}
(Left(immutable_values), Right(mutable_offsets)) => {
(None, Some(offsets)) => {
// Safety: invariants are preserved
Left(unsafe {
Utf8Array::new_unchecked(
self.data_type,
mutable_offsets.into(),
immutable_values,
offsets.into(),
self.values,
Some(mutable_bitmap.into()),
)
})
}
(Right(mutable_values), Left(immutable_offsets)) => {
(Some(mutable_values), None) => {
// Safety: invariants are preserved
Left(unsafe {
Utf8Array::new_unchecked(
self.data_type,
immutable_offsets,
self.offsets,
mutable_values.into(),
Some(mutable_bitmap.into()),
)
})
}
(Right(mutable_values), Right(mutable_offsets)) => {
Right(MutableUtf8Array::from_data(
(Some(values), Some(offsets)) => Right(unsafe {
MutableUtf8Array::from_data_unchecked(
self.data_type,
mutable_offsets,
mutable_values,
offsets,
values,
Some(mutable_bitmap),
))
}
)
}),
},
}
} else {
match (self.values.into_mut(), self.offsets.into_mut()) {
(Left(immutable_values), Left(immutable_offsets)) => Left(unsafe {
Utf8Array::new_unchecked(
self.data_type,
immutable_offsets,
immutable_values,
None,
)
match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
) {
(None, None) => Left(unsafe {
Utf8Array::new_unchecked(self.data_type, self.offsets, self.values, None)
}),
(Left(immutable_values), Right(mutable_offsets)) => Left(unsafe {
Utf8Array::new_unchecked(
self.data_type,
mutable_offsets.into(),
immutable_values,
None,
)
(None, Some(offsets)) => Left(unsafe {
Utf8Array::new_unchecked(self.data_type, offsets.into(), self.values, None)
}),
(Right(mutable_values), Left(immutable_offsets)) => Left(unsafe {
Utf8Array::from_data(
self.data_type,
immutable_offsets,
mutable_values.into(),
None,
)
(Some(values), None) => Left(unsafe {
Utf8Array::new_unchecked(self.data_type, self.offsets, values.into(), None)
}),
(Some(values), Some(offsets)) => Right(unsafe {
MutableUtf8Array::from_data_unchecked(self.data_type, offsets, values, None)
}),
(Right(mutable_values), Right(mutable_offsets)) => {
Right(MutableUtf8Array::from_data(
self.data_type,
mutable_offsets,
mutable_values,
None,
))
}
}
}
}
Expand Down
57 changes: 43 additions & 14 deletions src/array/utf8/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{iter::FromIterator, sync::Arc};

use crate::{
array::{
specification::{check_offsets_and_utf8, check_offsets_minimal},
specification::{check_offsets_minimal, try_check_offsets_and_utf8},
Array, MutableArray, Offset, TryExtend, TryPush,
},
bitmap::MutableBitmap,
Expand Down Expand Up @@ -75,6 +75,47 @@ impl<O: Offset> MutableUtf8Array<O> {
}
}

/// Returns a [`MutableUtf8Array`] created from its internal representation.
///
/// # Errors
/// This function returns an error iff:
/// * the offsets are not monotonically increasing
/// * The last offset is not equal to the values' length.
/// * the validity's length is not equal to `offsets.len() - 1`.
/// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`.
/// * The `values` between two consecutive `offsets` are not valid utf8
/// # Implementation
/// This function is `O(N)` - checking monotinicity and utf8 is `O(N)`
pub fn try_new(
data_type: DataType,
offsets: Vec<O>,
values: Vec<u8>,
validity: Option<MutableBitmap>,
) -> Result<Self> {
try_check_offsets_and_utf8(&offsets, &values)?;
if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len() - 1)
{
return Err(Error::oos(
"validity's length must be equal to the number of values",
));
}

if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
return Err(Error::oos(
"MutableUtf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8",
));
}

Ok(Self {
data_type,
offsets,
values,
validity,
})
}

/// The canonical method to create a [`MutableUtf8Array`] out of low-end APIs.
/// # Panics
/// This function panics iff:
Expand All @@ -87,19 +128,7 @@ impl<O: Offset> MutableUtf8Array<O> {
values: Vec<u8>,
validity: Option<MutableBitmap>,
) -> Self {
check_offsets_and_utf8(&offsets, &values);
if let Some(ref validity) = validity {
assert_eq!(offsets.len() - 1, validity.len());
}
if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
panic!("MutableUtf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8")
}
Self {
data_type,
offsets,
values,
validity,
}
Self::try_new(data_type, offsets, values, validity).unwrap()
}

/// Create a [`MutableUtf8Array`] out of low-end APIs.
Expand Down

0 comments on commit bd46079

Please sign in to comment.