From e16580ca58904d05253660a27bf893118c16ba2d Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Tue, 18 May 2021 01:45:56 -0400 Subject: [PATCH 1/4] Add (simd) modulus op --- arrow/src/compute/kernels/arithmetic.rs | 532 +++++++++++++++++++++++- arrow/src/datatypes/numeric.rs | 8 +- arrow/src/error.rs | 2 + 3 files changed, 538 insertions(+), 4 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index d7aadf144d43..66644551357d 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -22,7 +22,7 @@ //! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information. -use std::ops::{Add, Div, Mul, Neg, Sub}; +use std::ops::{Add, Div, Mul, Neg, Rem, Sub}; use num::{One, Zero}; @@ -189,6 +189,74 @@ where Ok(PrimitiveArray::::from(data)) } +/// Helper function to modulus two arrays. +/// +/// # Errors +/// +/// This function errors if: +/// * the arrays have different lengths +/// * a division by zero is found +fn math_modulus( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> +where + T: ArrowNumericType, + T::Native: Rem + Zero, +{ + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform math operation on arrays of different length".to_string(), + )); + } + + let null_bit_buffer = + combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?; + + let buffer = if let Some(b) = &null_bit_buffer { + let values = left.values().iter().zip(right.values()).enumerate().map( + |(i, (left, right))| { + let is_valid = unsafe { bit_util::get_bit_raw(b.as_ptr(), i) }; + if is_valid { + if right.is_zero() { + Err(ArrowError::ModulusByZero) + } else { + Ok(*left % *right) + } + } else { + Ok(T::default_value()) + } + }, + ); + unsafe { Buffer::try_from_trusted_len_iter(values) } + } else { + // no value is null + let values = left + .values() + .iter() + .zip(right.values()) + .map(|(left, right)| { + if right.is_zero() { + Err(ArrowError::ModulusByZero) + } else { + Ok(*left % *right) + } + }); + unsafe { Buffer::try_from_trusted_len_iter(values) } + }?; + + let data = ArrayData::new( + T::DATA_TYPE, + left.len(), + None, + null_bit_buffer, + 0, + vec![buffer], + vec![], + ); + Ok(PrimitiveArray::::from(data)) +} + /// Helper function to divide two arrays. /// /// # Errors @@ -257,6 +325,34 @@ where Ok(PrimitiveArray::::from(data)) } +/// Scalar-modulo version of `math_modulus`. +fn math_modulus_scalar( + array: &PrimitiveArray, + modulo: T::Native, +) -> Result> +where + T: ArrowNumericType, + T::Native: Rem + Zero, +{ + if modulo.is_zero() { + return Err(ArrowError::ModulusByZero); + } + + let values = array.values().iter().map(|value| *value % modulo); + let buffer = unsafe { Buffer::from_trusted_len_iter(values) }; + + let data = ArrayData::new( + T::DATA_TYPE, + array.len(), + None, + array.data_ref().null_buffer().cloned(), + 0, + vec![buffer], + vec![], + ); + Ok(PrimitiveArray::::from(data)) +} + /// Scalar-divisor version of `math_divide`. fn math_divide_scalar( array: &PrimitiveArray, @@ -348,6 +444,40 @@ where Ok(PrimitiveArray::::from(data)) } +/// SIMD vectorized implementation of `left % right`. +/// If any of the lanes marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` +/// is returned. The contents of no-valid lanes are undefined. +#[cfg(simd)] +#[inline] +fn simd_checked_modulus( + valid_mask: Option, + left: T::Simd, + right: T::Simd, +) -> Result +where + T::Native: One + Zero, +{ + let zero = T::init(T::Native::zero()); + let one = T::init(T::Native::one()); + + let right_no_invalid_zeros = match valid_mask { + Some(mask) => { + let simd_mask = T::mask_from_u64(mask); + // select `1` for invalid lanes, which will be a no-op during division later + T::mask_select(simd_mask, right, one) + } + None => right, + }; + + let zero_mask = T::eq(right_no_invalid_zeros, zero); + + if T::mask_any(zero_mask) { + Err(ArrowError::ModulusByZero) + } else { + Ok(T::bin_op(left, right_no_invalid_zeros, |a, b| a % b)) + } +} + /// SIMD vectorized implementation of `left / right`. /// If any of the lanes marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` /// is returned. The contents of no-valid lanes are undefined. @@ -382,6 +512,40 @@ where } } +/// Scalar implementation of `left % right` for the remainder elements after complete chunks have been processed using SIMD. +/// If any of the values marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` is returned. +#[cfg(simd)] +#[inline] +fn simd_checked_modulus_remainder( + valid_mask: Option, + left_chunks: ChunksExact, + right_chunks: ChunksExact, + result_chunks: ChunksExactMut, +) -> Result<()> +where + T::Native: Zero + Rem, +{ + let result_remainder = result_chunks.into_remainder(); + let left_remainder = left_chunks.remainder(); + let right_remainder = right_chunks.remainder(); + + result_remainder + .iter_mut() + .zip(left_remainder.iter().zip(right_remainder.iter())) + .enumerate() + .try_for_each(|(i, (result_scalar, (left_scalar, right_scalar)))| { + if valid_mask.map(|mask| mask & (1 << i) != 0).unwrap_or(true) { + if *right_scalar == T::Native::zero() { + return Err(ArrowError::ModulusByZero); + } + *result_scalar = *left_scalar % *right_scalar; + } + Ok(()) + })?; + + Ok(()) +} + /// Scalar implementation of `left / right` for the remainder elements after complete chunks have been processed using SIMD. /// If any of the values marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` is returned. #[cfg(simd)] @@ -416,6 +580,34 @@ where Ok(()) } +/// Scalar-modulo version of `simd_checked_modulus_remainder`. +#[cfg(simd)] +#[inline] +fn simd_checked_modulus_scalar_remainder( + array_chunks: ChunksExact, + modulo: T::Native, + result_chunks: ChunksExactMut, +) -> Result<()> +where + T::Native: Zero + Rem, +{ + if modulo.is_zero() { + return Err(ArrowError::ModulusByZero); + } + + let result_remainder = result_chunks.into_remainder(); + let array_remainder = array_chunks.remainder(); + + result_remainder + .iter_mut() + .zip(array_remainder.iter()) + .for_each(|(result_scalar, array_scalar)| { + *result_scalar = *array_scalar % modulo; + }); + + Ok(()) +} + /// Scalar-divisor version of `simd_checked_divide_remainder`. #[cfg(simd)] #[inline] @@ -444,6 +636,126 @@ where Ok(()) } +/// SIMD vectorized version of `modulus`. +/// +/// The modulus kernels need their own implementation as there is a need to handle situations +/// where a modulus by `0` occurs. This is complicated by `NULL` slots and padding. +#[cfg(simd)] +fn simd_modulus( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> +where + T: ArrowNumericType, + T::Native: One + Zero + Rem, +{ + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform math operation on arrays of different length".to_string(), + )); + } + + // Create the combined `Bitmap` + let null_bit_buffer = + combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?; + + let lanes = T::lanes(); + let buffer_size = left.len() * std::mem::size_of::(); + let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); + + match &null_bit_buffer { + Some(b) => { + // combine_option_bitmap returns a slice or new buffer starting at 0 + let valid_chunks = b.bit_chunks(0, left.len()); + + // process data in chunks of 64 elements since we also get 64 bits of validity information at a time + let mut result_chunks = result.typed_data_mut().chunks_exact_mut(64); + let mut left_chunks = left.values().chunks_exact(64); + let mut right_chunks = right.values().chunks_exact(64); + + valid_chunks + .iter() + .zip( + result_chunks + .borrow_mut() + .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut())), + ) + .try_for_each( + |(mut mask, (result_slice, (left_slice, right_slice)))| { + // split chunks further into slices corresponding to the vector length + // the compiler is able to unroll this inner loop and remove bounds checks + // since the outer chunk size (64) is always a multiple of the number of lanes + result_slice + .chunks_exact_mut(lanes) + .zip(left_slice.chunks_exact(lanes).zip(right_slice.chunks_exact(lanes))) + .try_for_each(|(result_slice, (left_slice, right_slice))| -> Result<()> { + let simd_left = T::load(left_slice); + let simd_right = T::load(right_slice); + + let simd_result = simd_checked_modulus::(Some(mask), simd_left, simd_right)?; + + T::write(simd_result, result_slice); + + // skip the shift and avoid overflow for u8 type, which uses 64 lanes. + mask >>= T::lanes() % 64; + + Ok(()) + }) + }, + )?; + + let valid_remainder = valid_chunks.remainder_bits(); + + simd_checked_modulus_remainder::( + Some(valid_remainder), + left_chunks, + right_chunks, + result_chunks, + )?; + } + None => { + let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes); + let mut left_chunks = left.values().chunks_exact(lanes); + let mut right_chunks = right.values().chunks_exact(lanes); + + result_chunks + .borrow_mut() + .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut())) + .try_for_each( + |(result_slice, (left_slice, right_slice))| -> Result<()> { + let simd_left = T::load(left_slice); + let simd_right = T::load(right_slice); + + let simd_result = + simd_checked_modulus::(None, simd_left, simd_right)?; + + T::write(simd_result, result_slice); + + Ok(()) + }, + )?; + + simd_checked_modulus_remainder::( + None, + left_chunks, + right_chunks, + result_chunks, + )?; + } + } + + let data = ArrayData::new( + T::DATA_TYPE, + left.len(), + None, + null_bit_buffer, + 0, + vec![result.into()], + vec![], + ); + Ok(PrimitiveArray::::from(data)) +} + /// SIMD vectorized version of `divide`. /// /// The divide kernels need their own implementation as there is a need to handle situations @@ -564,6 +876,52 @@ where Ok(PrimitiveArray::::from(data)) } +/// SIMD vectorized version of `modulus_scalar`. +#[cfg(simd)] +fn simd_modulus_scalar( + array: &PrimitiveArray, + modulo: T::Native, +) -> Result> +where + T: ArrowNumericType, + T::Native: One + Zero + Rem, +{ + if modulo.is_zero() { + return Err(ArrowError::ModulusByZero); + } + + let lanes = T::lanes(); + let buffer_size = array.len() * std::mem::size_of::(); + let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); + + let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes); + let mut array_chunks = array.values().chunks_exact(lanes); + + result_chunks + .borrow_mut() + .zip(array_chunks.borrow_mut()) + .for_each(|(result_slice, array_slice)| { + let simd_left = T::load(array_slice); + let simd_right = T::init(modulo); + + let simd_result = T::bin_op(simd_left, simd_right, |a, b| a % b); + T::write(simd_result, result_slice); + }); + + simd_checked_modulus_scalar_remainder::(array_chunks, modulo, result_chunks)?; + + let data = ArrayData::new( + T::DATA_TYPE, + array.len(), + None, + array.data_ref().null_buffer().cloned(), + 0, + vec![result.into()], + vec![], + ); + Ok(PrimitiveArray::::from(data)) +} + /// SIMD vectorized version of `divide_scalar`. #[cfg(simd)] fn simd_divide_scalar( @@ -696,6 +1054,7 @@ where + Sub + Mul + Div + + Rem + Zero, { #[cfg(simd)] @@ -704,6 +1063,29 @@ where return math_op(left, right, |a, b| a * b); } +/// Perform `left % right` operation on two arrays. If either left or right value is null +/// then the result is also null. If any right hand value is zero then the result of this +/// operation will be `Err(ArrowError::DivideByZero)`. +pub fn modulus( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> +where + T: datatypes::ArrowNumericType, + T::Native: Add + + Sub + + Mul + + Div + + Rem + + Zero + + One, +{ + #[cfg(simd)] + return simd_modulus(&left, &right); + #[cfg(not(simd))] + return math_modulus(&left, &right); +} + /// Perform `left / right` operation on two arrays. If either left or right value is null /// then the result is also null. If any right hand value is zero then the result of this /// operation will be `Err(ArrowError::DivideByZero)`. @@ -717,6 +1099,7 @@ where + Sub + Mul + Div + + Rem + Zero + One, { @@ -726,6 +1109,29 @@ where return math_divide(&left, &right); } +/// Modulus every value in an array by a scalar. If any value in the array is null then the +/// result is also null. If the scalar is zero then the result of this operation will be +/// `Err(ArrowError::ModulusByZero)`. +pub fn modulus_scalar( + array: &PrimitiveArray, + modulo: T::Native, +) -> Result> +where + T: datatypes::ArrowNumericType, + T::Native: Add + + Sub + + Mul + + Div + + Rem + + Zero + + One, +{ + #[cfg(simd)] + return simd_modulus_scalar(&array, modulo); + #[cfg(not(simd))] + return math_modulus_scalar(&array, modulo); +} + /// Divide every value in an array by a scalar. If any value in the array is null then the /// result is also null. If the scalar is zero then the result of this operation will be /// `Err(ArrowError::DivideByZero)`. @@ -739,6 +1145,7 @@ where + Sub + Mul + Div + + Rem + Zero + One, { @@ -835,6 +1242,18 @@ mod tests { assert_eq!(9, c.value(4)); } + #[test] + fn test_primitive_array_modulus() { + let a = Int32Array::from(vec![15, 15, 8, 1, 9]); + let b = Int32Array::from(vec![5, 6, 8, 9, 1]); + let c = modulus(&a, &b).unwrap(); + assert_eq!(0, c.value(0)); + assert_eq!(3, c.value(1)); + assert_eq!(0, c.value(2)); + assert_eq!(1, c.value(3)); + assert_eq!(0, c.value(4)); + } + #[test] fn test_primitive_array_divide_scalar() { let a = Int32Array::from(vec![15, 14, 9, 8, 1]); @@ -844,6 +1263,15 @@ mod tests { assert_eq!(c, expected); } + #[test] + fn test_primitive_array_modulus_scalar() { + let a = Int32Array::from(vec![15, 14, 9, 8, 1]); + let b = 3; + let c = modulus_scalar(&a, b).unwrap(); + let expected = Int32Array::from(vec![0, 2, 0, 2, 1]); + assert_eq!(c, expected); + } + #[test] fn test_primitive_array_divide_sliced() { let a = Int32Array::from(vec![0, 0, 0, 15, 15, 8, 1, 9, 0]); @@ -862,6 +1290,24 @@ mod tests { assert_eq!(9, c.value(4)); } + #[test] + fn test_primitive_array_modulus_sliced() { + let a = Int32Array::from(vec![0, 0, 0, 15, 15, 8, 1, 9, 0]); + let b = Int32Array::from(vec![0, 0, 0, 5, 6, 8, 9, 1, 0]); + let a = a.slice(3, 5); + let b = b.slice(3, 5); + let a = a.as_any().downcast_ref::().unwrap(); + let b = b.as_any().downcast_ref::().unwrap(); + + let c = modulus(&a, &b).unwrap(); + assert_eq!(5, c.len()); + assert_eq!(0, c.value(0)); + assert_eq!(3, c.value(1)); + assert_eq!(0, c.value(2)); + assert_eq!(1, c.value(3)); + assert_eq!(0, c.value(4)); + } + #[test] fn test_primitive_array_divide_with_nulls() { let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9), None]); @@ -875,6 +1321,19 @@ mod tests { assert_eq!(true, c.is_null(5)); } + #[test] + fn test_primitive_array_modulus_with_nulls() { + let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9), None]); + let b = Int32Array::from(vec![Some(5), Some(6), Some(8), Some(9), None, None]); + let c = modulus(&a, &b).unwrap(); + assert_eq!(0, c.value(0)); + assert_eq!(true, c.is_null(1)); + assert_eq!(0, c.value(2)); + assert_eq!(1, c.value(3)); + assert_eq!(true, c.is_null(4)); + assert_eq!(true, c.is_null(5)); + } + #[test] fn test_primitive_array_divide_scalar_with_nulls() { let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9), None]); @@ -885,6 +1344,16 @@ mod tests { assert_eq!(c, expected); } + #[test] + fn test_primitive_array_modulus_scalar_with_nulls() { + let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9), None]); + let b = 3; + let c = modulus_scalar(&a, b).unwrap(); + let expected = + Int32Array::from(vec![Some(0), None, Some(2), Some(1), Some(0), None]); + assert_eq!(c, expected); + } + #[test] fn test_primitive_array_divide_with_nulls_sliced() { let a = Int32Array::from(vec![ @@ -938,6 +1407,59 @@ mod tests { assert_eq!(true, c.is_null(5)); } + #[test] + fn test_primitive_array_modulus_with_nulls_sliced() { + let a = Int32Array::from(vec![ + None, + None, + None, + None, + None, + None, + None, + None, + Some(15), + None, + Some(8), + Some(1), + Some(9), + None, + None, + ]); + let b = Int32Array::from(vec![ + None, + None, + None, + None, + None, + None, + None, + None, + Some(5), + Some(6), + Some(8), + Some(9), + None, + None, + None, + ]); + + let a = a.slice(8, 6); + let a = a.as_any().downcast_ref::().unwrap(); + + let b = b.slice(8, 6); + let b = b.as_any().downcast_ref::().unwrap(); + + let c = modulus(&a, &b).unwrap(); + assert_eq!(6, c.len()); + assert_eq!(0, c.value(0)); + assert_eq!(true, c.is_null(1)); + assert_eq!(0, c.value(2)); + assert_eq!(1, c.value(3)); + assert_eq!(true, c.is_null(4)); + assert_eq!(true, c.is_null(5)); + } + #[test] #[should_panic(expected = "DivideByZero")] fn test_primitive_array_divide_by_zero() { @@ -946,6 +1468,14 @@ mod tests { divide(&a, &b).unwrap(); } + #[test] + #[should_panic(expected = "ModulusByZero")] + fn test_primitive_array_modulus_by_zero() { + let a = Int32Array::from(vec![15]); + let b = Int32Array::from(vec![0]); + modulus(&a, &b).unwrap(); + } + #[test] fn test_primitive_array_divide_f64() { let a = Float64Array::from(vec![15.0, 15.0, 8.0]); diff --git a/arrow/src/datatypes/numeric.rs b/arrow/src/datatypes/numeric.rs index 0046398122bb..daa25980adad 100644 --- a/arrow/src/datatypes/numeric.rs +++ b/arrow/src/datatypes/numeric.rs @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. +use super::*; #[cfg(feature = "simd")] use packed_simd::*; #[cfg(feature = "simd")] -use std::ops::{Add, BitAnd, BitAndAssign, BitOr, BitOrAssign, Div, Mul, Neg, Not, Sub}; - -use super::*; +use std::ops::{ + Add, BitAnd, BitAndAssign, BitOr, BitOrAssign, Div, Mul, Neg, Not, Rem, Sub, +}; /// A subtype of primitive type that represents numeric values. /// @@ -32,6 +33,7 @@ where + Sub + Mul + Div + + Rem + Copy, Self::SimdMask: BitAnd + BitOr diff --git a/arrow/src/error.rs b/arrow/src/error.rs index 6bfa077f4abd..71823ad1c3de 100644 --- a/arrow/src/error.rs +++ b/arrow/src/error.rs @@ -34,6 +34,7 @@ pub enum ArrowError { SchemaError(String), ComputeError(String), DivideByZero, + ModulusByZero, CsvError(String), JsonError(String), IoError(String), @@ -110,6 +111,7 @@ impl Display for ArrowError { ArrowError::SchemaError(desc) => write!(f, "Schema error: {}", desc), ArrowError::ComputeError(desc) => write!(f, "Compute error: {}", desc), ArrowError::DivideByZero => write!(f, "Divide by zero error"), + ArrowError::ModulusByZero => write!(f, "Modulus by zero error"), ArrowError::CsvError(desc) => write!(f, "Csv error: {}", desc), ArrowError::JsonError(desc) => write!(f, "Json error: {}", desc), ArrowError::IoError(desc) => write!(f, "Io error: {}", desc), From 03b5fa76018207bba53393badc7d5ae13220d73b Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Tue, 18 May 2021 01:59:40 -0400 Subject: [PATCH 2/4] fix typo --- arrow/src/compute/kernels/arithmetic.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 66644551357d..8dfa0fe6e363 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -445,7 +445,7 @@ where } /// SIMD vectorized implementation of `left % right`. -/// If any of the lanes marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` +/// If any of the lanes marked as valid in `valid_mask` are `0` then an `ArrowError::ModulusByZero` /// is returned. The contents of no-valid lanes are undefined. #[cfg(simd)] #[inline] @@ -513,7 +513,7 @@ where } /// Scalar implementation of `left % right` for the remainder elements after complete chunks have been processed using SIMD. -/// If any of the values marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` is returned. +/// If any of the values marked as valid in `valid_mask` are `0` then an `ArrowError::ModulusByZero` is returned. #[cfg(simd)] #[inline] fn simd_checked_modulus_remainder( @@ -1065,7 +1065,7 @@ where /// Perform `left % right` operation on two arrays. If either left or right value is null /// then the result is also null. If any right hand value is zero then the result of this -/// operation will be `Err(ArrowError::DivideByZero)`. +/// operation will be `Err(ArrowError::ModulusByZero)`. pub fn modulus( left: &PrimitiveArray, right: &PrimitiveArray, From 5573178865565829b2c143e68fc287982d9d4f54 Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Mon, 24 May 2021 11:46:47 -0400 Subject: [PATCH 3/4] fix feature = "simd" --- arrow/src/compute/kernels/arithmetic.rs | 70 ++++++++++++------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 8dfa0fe6e363..b57db6108bd5 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -27,9 +27,9 @@ use std::ops::{Add, Div, Mul, Neg, Rem, Sub}; use num::{One, Zero}; use crate::buffer::Buffer; -#[cfg(simd)] +#[cfg(feature = "simd")] use crate::buffer::MutableBuffer; -#[cfg(not(simd))] +#[cfg(not(feature = "simd"))] use crate::compute::kernels::arity::unary; use crate::compute::util::combine_option_bitmap; use crate::datatypes; @@ -37,13 +37,13 @@ use crate::datatypes::ArrowNumericType; use crate::error::{ArrowError, Result}; use crate::{array::*, util::bit_util}; use num::traits::Pow; -#[cfg(simd)] +#[cfg(feature = "simd")] use std::borrow::BorrowMut; -#[cfg(simd)] +#[cfg(feature = "simd")] use std::slice::{ChunksExact, ChunksExactMut}; /// SIMD vectorized version of `unary_math_op` above specialized for signed numerical values. -#[cfg(simd)] +#[cfg(feature = "simd")] fn simd_signed_unary_math_op( array: &PrimitiveArray, simd_op: SIMD_OP, @@ -91,7 +91,7 @@ where Ok(PrimitiveArray::::from(data)) } -#[cfg(simd)] +#[cfg(feature = "simd")] fn simd_float_unary_math_op( array: &PrimitiveArray, simd_op: SIMD_OP, @@ -382,7 +382,7 @@ where } /// SIMD vectorized version of `math_op` above. -#[cfg(simd)] +#[cfg(feature = "simd")] fn simd_math_op( left: &PrimitiveArray, right: &PrimitiveArray, @@ -447,7 +447,7 @@ where /// SIMD vectorized implementation of `left % right`. /// If any of the lanes marked as valid in `valid_mask` are `0` then an `ArrowError::ModulusByZero` /// is returned. The contents of no-valid lanes are undefined. -#[cfg(simd)] +#[cfg(feature = "simd")] #[inline] fn simd_checked_modulus( valid_mask: Option, @@ -481,7 +481,7 @@ where /// SIMD vectorized implementation of `left / right`. /// If any of the lanes marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` /// is returned. The contents of no-valid lanes are undefined. -#[cfg(simd)] +#[cfg(feature = "simd")] #[inline] fn simd_checked_divide( valid_mask: Option, @@ -514,7 +514,7 @@ where /// Scalar implementation of `left % right` for the remainder elements after complete chunks have been processed using SIMD. /// If any of the values marked as valid in `valid_mask` are `0` then an `ArrowError::ModulusByZero` is returned. -#[cfg(simd)] +#[cfg(feature = "simd")] #[inline] fn simd_checked_modulus_remainder( valid_mask: Option, @@ -548,7 +548,7 @@ where /// Scalar implementation of `left / right` for the remainder elements after complete chunks have been processed using SIMD. /// If any of the values marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` is returned. -#[cfg(simd)] +#[cfg(feature = "simd")] #[inline] fn simd_checked_divide_remainder( valid_mask: Option, @@ -581,7 +581,7 @@ where } /// Scalar-modulo version of `simd_checked_modulus_remainder`. -#[cfg(simd)] +#[cfg(feature = "simd")] #[inline] fn simd_checked_modulus_scalar_remainder( array_chunks: ChunksExact, @@ -609,7 +609,7 @@ where } /// Scalar-divisor version of `simd_checked_divide_remainder`. -#[cfg(simd)] +#[cfg(feature = "simd")] #[inline] fn simd_checked_divide_scalar_remainder( array_chunks: ChunksExact, @@ -640,7 +640,7 @@ where /// /// The modulus kernels need their own implementation as there is a need to handle situations /// where a modulus by `0` occurs. This is complicated by `NULL` slots and padding. -#[cfg(simd)] +#[cfg(feature = "simd")] fn simd_modulus( left: &PrimitiveArray, right: &PrimitiveArray, @@ -760,7 +760,7 @@ where /// /// The divide kernels need their own implementation as there is a need to handle situations /// where a divide by `0` occurs. This is complicated by `NULL` slots and padding. -#[cfg(simd)] +#[cfg(feature = "simd")] fn simd_divide( left: &PrimitiveArray, right: &PrimitiveArray, @@ -877,7 +877,7 @@ where } /// SIMD vectorized version of `modulus_scalar`. -#[cfg(simd)] +#[cfg(feature = "simd")] fn simd_modulus_scalar( array: &PrimitiveArray, modulo: T::Native, @@ -923,7 +923,7 @@ where } /// SIMD vectorized version of `divide_scalar`. -#[cfg(simd)] +#[cfg(feature = "simd")] fn simd_divide_scalar( array: &PrimitiveArray, divisor: T::Native, @@ -982,9 +982,9 @@ where + Div + Zero, { - #[cfg(simd)] + #[cfg(feature = "simd")] return simd_math_op(&left, &right, |a, b| a + b, |a, b| a + b); - #[cfg(not(simd))] + #[cfg(not(feature = "simd"))] return math_op(left, right, |a, b| a + b); } @@ -1002,9 +1002,9 @@ where + Div + Zero, { - #[cfg(simd)] + #[cfg(feature = "simd")] return simd_math_op(&left, &right, |a, b| a - b, |a, b| a - b); - #[cfg(not(simd))] + #[cfg(not(feature = "simd"))] return math_op(left, right, |a, b| a - b); } @@ -1014,9 +1014,9 @@ where T: datatypes::ArrowSignedNumericType, T::Native: Neg, { - #[cfg(simd)] + #[cfg(feature = "simd")] return simd_signed_unary_math_op(array, |x| -x, |x| -x); - #[cfg(not(simd))] + #[cfg(not(feature = "simd"))] return Ok(unary(array, |x| -x)); } @@ -1029,7 +1029,7 @@ where T: datatypes::ArrowFloatNumericType, T::Native: Pow, { - #[cfg(simd)] + #[cfg(feature = "simd")] { let raise_vector = T::init(raise); return simd_float_unary_math_op( @@ -1038,7 +1038,7 @@ where |x| x.pow(raise), ); } - #[cfg(not(simd))] + #[cfg(not(feature = "simd"))] return Ok(unary(array, |x| x.pow(raise))); } @@ -1057,9 +1057,9 @@ where + Rem + Zero, { - #[cfg(simd)] + #[cfg(feature = "simd")] return simd_math_op(&left, &right, |a, b| a * b, |a, b| a * b); - #[cfg(not(simd))] + #[cfg(not(feature = "simd"))] return math_op(left, right, |a, b| a * b); } @@ -1080,9 +1080,9 @@ where + Zero + One, { - #[cfg(simd)] + #[cfg(feature = "simd")] return simd_modulus(&left, &right); - #[cfg(not(simd))] + #[cfg(not(feature = "simd"))] return math_modulus(&left, &right); } @@ -1103,9 +1103,9 @@ where + Zero + One, { - #[cfg(simd)] + #[cfg(feature = "simd")] return simd_divide(&left, &right); - #[cfg(not(simd))] + #[cfg(not(feature = "simd"))] return math_divide(&left, &right); } @@ -1126,9 +1126,9 @@ where + Zero + One, { - #[cfg(simd)] + #[cfg(feature = "simd")] return simd_modulus_scalar(&array, modulo); - #[cfg(not(simd))] + #[cfg(not(feature = "simd"))] return math_modulus_scalar(&array, modulo); } @@ -1149,9 +1149,9 @@ where + Zero + One, { - #[cfg(simd)] + #[cfg(feature = "simd")] return simd_divide_scalar(&array, divisor); - #[cfg(not(simd))] + #[cfg(not(feature = "simd"))] return math_divide_scalar(&array, divisor); } From 2d55a0d104cdfc35e2333cc43d7afbb054e6dbaf Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Wed, 26 May 2021 09:22:30 -0400 Subject: [PATCH 4/4] revert ModulusByZero --- arrow/src/compute/kernels/arithmetic.rs | 24 ++++++++++++------------ arrow/src/error.rs | 2 -- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index b57db6108bd5..50c06b03036f 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -219,7 +219,7 @@ where let is_valid = unsafe { bit_util::get_bit_raw(b.as_ptr(), i) }; if is_valid { if right.is_zero() { - Err(ArrowError::ModulusByZero) + Err(ArrowError::DivideByZero) } else { Ok(*left % *right) } @@ -237,7 +237,7 @@ where .zip(right.values()) .map(|(left, right)| { if right.is_zero() { - Err(ArrowError::ModulusByZero) + Err(ArrowError::DivideByZero) } else { Ok(*left % *right) } @@ -335,7 +335,7 @@ where T::Native: Rem + Zero, { if modulo.is_zero() { - return Err(ArrowError::ModulusByZero); + return Err(ArrowError::DivideByZero); } let values = array.values().iter().map(|value| *value % modulo); @@ -445,7 +445,7 @@ where } /// SIMD vectorized implementation of `left % right`. -/// If any of the lanes marked as valid in `valid_mask` are `0` then an `ArrowError::ModulusByZero` +/// If any of the lanes marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` /// is returned. The contents of no-valid lanes are undefined. #[cfg(feature = "simd")] #[inline] @@ -472,7 +472,7 @@ where let zero_mask = T::eq(right_no_invalid_zeros, zero); if T::mask_any(zero_mask) { - Err(ArrowError::ModulusByZero) + Err(ArrowError::DivideByZero) } else { Ok(T::bin_op(left, right_no_invalid_zeros, |a, b| a % b)) } @@ -513,7 +513,7 @@ where } /// Scalar implementation of `left % right` for the remainder elements after complete chunks have been processed using SIMD. -/// If any of the values marked as valid in `valid_mask` are `0` then an `ArrowError::ModulusByZero` is returned. +/// If any of the values marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` is returned. #[cfg(feature = "simd")] #[inline] fn simd_checked_modulus_remainder( @@ -536,7 +536,7 @@ where .try_for_each(|(i, (result_scalar, (left_scalar, right_scalar)))| { if valid_mask.map(|mask| mask & (1 << i) != 0).unwrap_or(true) { if *right_scalar == T::Native::zero() { - return Err(ArrowError::ModulusByZero); + return Err(ArrowError::DivideByZero); } *result_scalar = *left_scalar % *right_scalar; } @@ -592,7 +592,7 @@ where T::Native: Zero + Rem, { if modulo.is_zero() { - return Err(ArrowError::ModulusByZero); + return Err(ArrowError::DivideByZero); } let result_remainder = result_chunks.into_remainder(); @@ -887,7 +887,7 @@ where T::Native: One + Zero + Rem, { if modulo.is_zero() { - return Err(ArrowError::ModulusByZero); + return Err(ArrowError::DivideByZero); } let lanes = T::lanes(); @@ -1065,7 +1065,7 @@ where /// Perform `left % right` operation on two arrays. If either left or right value is null /// then the result is also null. If any right hand value is zero then the result of this -/// operation will be `Err(ArrowError::ModulusByZero)`. +/// operation will be `Err(ArrowError::DivideByZero)`. pub fn modulus( left: &PrimitiveArray, right: &PrimitiveArray, @@ -1111,7 +1111,7 @@ where /// Modulus every value in an array by a scalar. If any value in the array is null then the /// result is also null. If the scalar is zero then the result of this operation will be -/// `Err(ArrowError::ModulusByZero)`. +/// `Err(ArrowError::DivideByZero)`. pub fn modulus_scalar( array: &PrimitiveArray, modulo: T::Native, @@ -1469,7 +1469,7 @@ mod tests { } #[test] - #[should_panic(expected = "ModulusByZero")] + #[should_panic(expected = "DivideByZero")] fn test_primitive_array_modulus_by_zero() { let a = Int32Array::from(vec![15]); let b = Int32Array::from(vec![0]); diff --git a/arrow/src/error.rs b/arrow/src/error.rs index 71823ad1c3de..6bfa077f4abd 100644 --- a/arrow/src/error.rs +++ b/arrow/src/error.rs @@ -34,7 +34,6 @@ pub enum ArrowError { SchemaError(String), ComputeError(String), DivideByZero, - ModulusByZero, CsvError(String), JsonError(String), IoError(String), @@ -111,7 +110,6 @@ impl Display for ArrowError { ArrowError::SchemaError(desc) => write!(f, "Schema error: {}", desc), ArrowError::ComputeError(desc) => write!(f, "Compute error: {}", desc), ArrowError::DivideByZero => write!(f, "Divide by zero error"), - ArrowError::ModulusByZero => write!(f, "Modulus by zero error"), ArrowError::CsvError(desc) => write!(f, "Csv error: {}", desc), ArrowError::JsonError(desc) => write!(f, "Json error: {}", desc), ArrowError::IoError(desc) => write!(f, "Io error: {}", desc),