Skip to content

Commit

Permalink
RFC: Simplify decimal (#2440) (#2477)
Browse files Browse the repository at this point in the history
* Simplify decimal (#2440)

* Format

* Fix doc

* Review feedback

* Add docs

* Fix logical merge conflict
  • Loading branch information
tustvold authored Aug 18, 2022
1 parent e60eef3 commit 15f42b2
Show file tree
Hide file tree
Showing 6 changed files with 195 additions and 180 deletions.
112 changes: 55 additions & 57 deletions arrow/src/array/array_decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,30 @@
use crate::array::ArrayAccessor;
use std::convert::From;
use std::fmt;
use std::marker::PhantomData;
use std::{any::Any, iter::FromIterator};

use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray,
};
use super::{BasicDecimalIter, BooleanBufferBuilder, FixedSizeBinaryArray};
use super::{BooleanBufferBuilder, DecimalIter, FixedSizeBinaryArray};
#[allow(deprecated)]
pub use crate::array::DecimalIter;
use crate::buffer::{Buffer, MutableBuffer};
use crate::datatypes::validate_decimal_precision;
use crate::datatypes::{validate_decimal256_precision_with_lt_bytes, DataType};
use crate::datatypes::{
validate_decimal256_precision_with_lt_bytes, DataType, Decimal128Type,
Decimal256Type, DecimalType, NativeDecimalType,
};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{BasicDecimal, Decimal256};
use crate::util::decimal::{Decimal, Decimal256};

/// `Decimal128Array` stores fixed width decimal numbers,
/// with a fixed precision and scale.
///
/// # Examples
///
/// ```
/// use arrow::array::{Array, BasicDecimalArray, Decimal128Array};
/// use arrow::array::{Array, DecimalArray, Decimal128Array};
/// use arrow::datatypes::DataType;
///
/// // Create a DecimalArray with the default precision and scale
Expand Down Expand Up @@ -66,24 +69,29 @@ use crate::util::decimal::{BasicDecimal, Decimal256};
/// assert_eq!(6, decimal_array.scale());
/// ```
///
pub type Decimal128Array = BasicDecimalArray<16>;
pub type Decimal128Array = DecimalArray<Decimal128Type>;

pub type Decimal256Array = BasicDecimalArray<32>;
/// `Decimal256Array` stores fixed width decimal numbers,
/// with a fixed precision and scale
pub type Decimal256Array = DecimalArray<Decimal256Type>;

pub struct BasicDecimalArray<const BYTE_WIDTH: usize> {
/// A generic [`Array`] for fixed width decimal numbers
///
/// See [`Decimal128Array`] and [`Decimal256Array`]
pub struct DecimalArray<T: DecimalType> {
data: ArrayData,
value_data: RawPtrBox<u8>,
precision: usize,
scale: usize,
_phantom: PhantomData<T>,
}

impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
pub const VALUE_LENGTH: i32 = BYTE_WIDTH as i32;
const DEFAULT_TYPE: DataType = BasicDecimal::<BYTE_WIDTH>::DEFAULT_TYPE;
pub const MAX_PRECISION: usize = BasicDecimal::<BYTE_WIDTH>::MAX_PRECISION;
pub const MAX_SCALE: usize = BasicDecimal::<BYTE_WIDTH>::MAX_SCALE;
const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType =
BasicDecimal::<BYTE_WIDTH>::TYPE_CONSTRUCTOR;
impl<T: DecimalType> DecimalArray<T> {
pub const VALUE_LENGTH: i32 = T::BYTE_LENGTH as i32;
const DEFAULT_TYPE: DataType = T::DEFAULT_TYPE;
pub const MAX_PRECISION: usize = T::MAX_PRECISION;
pub const MAX_SCALE: usize = T::MAX_SCALE;
const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType = T::TYPE_CONSTRUCTOR;

pub fn data(&self) -> &ArrayData {
&self.data
Expand All @@ -100,7 +108,7 @@ impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
}

/// Returns the element at index `i`.
pub fn value(&self, i: usize) -> BasicDecimal<BYTE_WIDTH> {
pub fn value(&self, i: usize) -> Decimal<T> {
assert!(i < self.data().len(), "Out of bounds access");

unsafe { self.value_unchecked(i) }
Expand All @@ -109,19 +117,17 @@ impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
/// Returns the element at index `i`.
/// # Safety
/// Caller is responsible for ensuring that the index is within the bounds of the array
pub unsafe fn value_unchecked(&self, i: usize) -> BasicDecimal<BYTE_WIDTH> {
pub unsafe fn value_unchecked(&self, i: usize) -> Decimal<T> {
let data = self.data();
let offset = i + data.offset();
let raw_val = {
let pos = self.value_offset_at(offset);
std::slice::from_raw_parts(
T::Native::from_slice(std::slice::from_raw_parts(
self.raw_value_data_ptr().offset(pos as isize),
Self::VALUE_LENGTH as usize,
)
.try_into()
.unwrap()
))
};
BasicDecimal::<BYTE_WIDTH>::new(self.precision(), self.scale(), raw_val)
Decimal::new(self.precision(), self.scale(), &raw_val)
}

/// Returns the offset for the element at index `i`.
Expand Down Expand Up @@ -304,7 +310,8 @@ impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {

// validate all the data in the array are valid within the new precision or not
fn validate_data(&self, precision: usize) -> Result<()> {
match BYTE_WIDTH {
// TODO: Move into DecimalType
match Self::VALUE_LENGTH {
16 => self
.as_any()
.downcast_ref::<Decimal128Array>()
Expand Down Expand Up @@ -376,15 +383,15 @@ impl Decimal256Array {
}
}

impl<const BYTE_WIDTH: usize> From<ArrayData> for BasicDecimalArray<BYTE_WIDTH> {
impl<T: DecimalType> From<ArrayData> for DecimalArray<T> {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.buffers().len(),
1,
"DecimalArray data should contain 1 buffer only (values)"
);
let values = data.buffers()[0].as_ptr();
let (precision, scale) = match (data.data_type(), BYTE_WIDTH) {
let (precision, scale) = match (data.data_type(), Self::VALUE_LENGTH) {
(DataType::Decimal128(precision, scale), 16)
| (DataType::Decimal256(precision, scale), 32) => (*precision, *scale),
_ => panic!("Expected data type to be Decimal"),
Expand All @@ -394,27 +401,18 @@ impl<const BYTE_WIDTH: usize> From<ArrayData> for BasicDecimalArray<BYTE_WIDTH>
value_data: unsafe { RawPtrBox::new(values) },
precision,
scale,
_phantom: Default::default(),
}
}
}

impl<'a> Decimal128Array {
/// Constructs a new iterator that iterates `Decimal128` values as i128 values.
/// This is kept mostly for back-compatibility purpose.
/// Suggests to use `iter()` that returns `Decimal128Iter`.
#[allow(deprecated)]
pub fn i128_iter(&'a self) -> DecimalIter<'a> {
DecimalIter::<'a>::new(self)
}
}

fn build_decimal_array_from<const BYTE_WIDTH: usize>(
fn build_decimal_array_from<T: DecimalType>(
null_buf: BooleanBufferBuilder,
buffer: Buffer,
) -> BasicDecimalArray<BYTE_WIDTH> {
) -> DecimalArray<T> {
let data = unsafe {
ArrayData::new_unchecked(
BasicDecimalArray::<BYTE_WIDTH>::default_type(),
DecimalArray::<T>::default_type(),
null_buf.len(),
None,
Some(null_buf.into()),
Expand All @@ -423,7 +421,7 @@ fn build_decimal_array_from<const BYTE_WIDTH: usize>(
vec![],
)
};
BasicDecimalArray::<BYTE_WIDTH>::from(data)
DecimalArray::from(data)
}

impl<Ptr: Into<Decimal256>> FromIterator<Option<Ptr>> for Decimal256Array {
Expand All @@ -446,7 +444,7 @@ impl<Ptr: Into<Decimal256>> FromIterator<Option<Ptr>> for Decimal256Array {
}
});

build_decimal_array_from::<32>(null_buf, buffer.into())
build_decimal_array_from(null_buf, buffer.into())
}
}

Expand All @@ -471,11 +469,11 @@ impl<Ptr: Into<i128>> FromIterator<Option<Ptr>> for Decimal128Array {
})
.collect();

build_decimal_array_from::<16>(null_buf, buffer)
build_decimal_array_from(null_buf, buffer)
}
}

impl<const BYTE_WIDTH: usize> Array for BasicDecimalArray<BYTE_WIDTH> {
impl<T: DecimalType> Array for DecimalArray<T> {
fn as_any(&self) -> &dyn Any {
self
}
Expand All @@ -489,18 +487,18 @@ impl<const BYTE_WIDTH: usize> Array for BasicDecimalArray<BYTE_WIDTH> {
}
}

impl<const BYTE_WIDTH: usize> From<BasicDecimalArray<BYTE_WIDTH>> for ArrayData {
fn from(array: BasicDecimalArray<BYTE_WIDTH>) -> Self {
impl<T: DecimalType> From<DecimalArray<T>> for ArrayData {
fn from(array: DecimalArray<T>) -> Self {
array.data
}
}

impl<const BYTE_WIDTH: usize> fmt::Debug for BasicDecimalArray<BYTE_WIDTH> {
impl<T: DecimalType> fmt::Debug for DecimalArray<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Decimal{}Array<{}, {}>\n[\n",
BYTE_WIDTH * 8,
T::BYTE_LENGTH * 8,
self.precision,
self.scale
)?;
Expand All @@ -513,31 +511,31 @@ impl<const BYTE_WIDTH: usize> fmt::Debug for BasicDecimalArray<BYTE_WIDTH> {
}
}

impl<'a, const BYTE_WIDTH: usize> ArrayAccessor for &'a BasicDecimalArray<BYTE_WIDTH> {
type Item = BasicDecimal<BYTE_WIDTH>;
impl<'a, T: DecimalType> ArrayAccessor for &'a DecimalArray<T> {
type Item = Decimal<T>;

fn value(&self, index: usize) -> Self::Item {
BasicDecimalArray::<BYTE_WIDTH>::value(self, index)
DecimalArray::<T>::value(self, index)
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
BasicDecimalArray::<BYTE_WIDTH>::value_unchecked(self, index)
DecimalArray::<T>::value_unchecked(self, index)
}
}

impl<'a, const BYTE_WIDTH: usize> IntoIterator for &'a BasicDecimalArray<BYTE_WIDTH> {
type Item = Option<BasicDecimal<BYTE_WIDTH>>;
type IntoIter = BasicDecimalIter<'a, BYTE_WIDTH>;
impl<'a, T: DecimalType> IntoIterator for &'a DecimalArray<T> {
type Item = Option<Decimal<T>>;
type IntoIter = DecimalIter<'a, T>;

fn into_iter(self) -> Self::IntoIter {
BasicDecimalIter::<'a, BYTE_WIDTH>::new(self)
DecimalIter::<'a, T>::new(self)
}
}

impl<'a, const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
impl<'a, T: DecimalType> DecimalArray<T> {
/// constructs a new iterator
pub fn iter(&'a self) -> BasicDecimalIter<'a, BYTE_WIDTH> {
BasicDecimalIter::<'a, BYTE_WIDTH>::new(self)
pub fn iter(&'a self) -> DecimalIter<'a, T> {
DecimalIter::<'a, T>::new(self)
}
}

Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/builder/decimal_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ mod tests {
.expect("should not validate invalid value at builder");

let array = builder.finish();
let array_data = array_decimal::BasicDecimalArray::data(&array);
let array_data = array_decimal::DecimalArray::data(&array);
array_data.validate_values().unwrap();
}
}
70 changes: 8 additions & 62 deletions arrow/src/array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
// under the License.

use crate::array::array::ArrayAccessor;
use crate::array::BasicDecimalArray;
use crate::array::DecimalArray;
use crate::datatypes::{Decimal128Type, Decimal256Type};

use super::{
Array, BooleanArray, Decimal128Array, GenericBinaryArray, GenericListArray,
GenericStringArray, PrimitiveArray,
BooleanArray, GenericBinaryArray, GenericListArray, GenericStringArray,
PrimitiveArray,
};

/// an iterator that returns Some(T) or None, that can be used on any [`ArrayAccessor`]
Expand Down Expand Up @@ -104,69 +105,14 @@ pub type GenericStringIter<'a, T> = ArrayIter<&'a GenericStringArray<T>>;
pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray<T>>;
pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray<O>>;

pub type BasicDecimalIter<'a, const BYTE_WIDTH: usize> =
ArrayIter<&'a BasicDecimalArray<BYTE_WIDTH>>;
pub type DecimalIter<'a, T> = ArrayIter<&'a DecimalArray<T>>;
/// an iterator that returns `Some(Decimal128)` or `None`, that can be used on a
/// [`Decimal128Array`]
pub type Decimal128Iter<'a> = BasicDecimalIter<'a, 16>;
/// [`super::Decimal128Array`]
pub type Decimal128Iter<'a> = DecimalIter<'a, Decimal128Type>;

/// an iterator that returns `Some(Decimal256)` or `None`, that can be used on a
/// [`super::Decimal256Array`]
pub type Decimal256Iter<'a> = BasicDecimalIter<'a, 32>;
/// an iterator that returns `Some(i128)` or `None`, that can be used on a
/// [`Decimal128Array`]
#[derive(Debug)]
#[deprecated(note = "Please use `Decimal128Iter` instead. \
`DecimalIter` iterates `Decimal128` values as i128 values. \
This is kept mostly for back-compatibility purpose. Suggests to use `Decimal128Array.iter()` \
that returns `Decimal128Iter`.")]
pub struct DecimalIter<'a> {
array: &'a Decimal128Array,
current: usize,
current_end: usize,
}

#[allow(deprecated)]
impl<'a> DecimalIter<'a> {
pub fn new(array: &'a Decimal128Array) -> Self {
Self {
array,
current: 0,
current_end: array.len(),
}
}
}

#[allow(deprecated)]
impl<'a> std::iter::Iterator for DecimalIter<'a> {
type Item = Option<i128>;

fn next(&mut self) -> Option<Self::Item> {
if self.current == self.current_end {
None
} else {
let old = self.current;
self.current += 1;
// TODO: Improve performance by avoiding bounds check here
// (by using adding a `value_unchecked, for example)
if self.array.is_null(old) {
Some(None)
} else {
Some(Some(self.array.value(old).as_i128()))
}
}
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remain = self.current_end - self.current;
(remain, Some(remain))
}
}

/// iterator has known size.
#[allow(deprecated)]
impl<'a> std::iter::ExactSizeIterator for DecimalIter<'a> {}
pub type Decimal256Iter<'a> = DecimalIter<'a, Decimal256Type>;

#[cfg(test)]
mod tests {
Expand Down
5 changes: 1 addition & 4 deletions arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,15 +199,12 @@ pub(crate) use self::data::BufferSpec;
pub use self::array_binary::BinaryArray;
pub use self::array_binary::LargeBinaryArray;
pub use self::array_boolean::BooleanArray;
pub use self::array_decimal::BasicDecimalArray;
pub use self::array_decimal::Decimal128Array;
pub use self::array_decimal::Decimal256Array;
pub use self::array_decimal::DecimalArray;
pub use self::array_fixed_size_binary::FixedSizeBinaryArray;
pub use self::array_fixed_size_list::FixedSizeListArray;

#[deprecated(note = "Please use `Decimal128Array` instead")]
pub type DecimalArray = Decimal128Array;

pub use self::array_dictionary::{DictionaryArray, TypedDictionaryArray};
pub use self::array_list::LargeListArray;
pub use self::array_list::ListArray;
Expand Down
Loading

0 comments on commit 15f42b2

Please sign in to comment.