diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index 1ebd4e6ba1054..41c7805e14758 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -45,6 +45,7 @@ csv = "1.0.0" num = "0.2" regex = "1.1" lazy_static = "1.2" +packed_simd = "0.3.1" [dev-dependencies] criterion = "0.2" @@ -56,4 +57,8 @@ harness = false [[bench]] name = "builder" -harness = false \ No newline at end of file +harness = false + +[[bench]] +name = "arithmetic_kernels" +harness = false diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs new file mode 100644 index 0000000000000..11a18796f3b4f --- /dev/null +++ b/rust/arrow/benches/arithmetic_kernels.rs @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[macro_use] +extern crate criterion; +use criterion::Criterion; + +extern crate arrow; + +use arrow::array::*; +use arrow::builder::*; +use arrow::compute::arithmetic_kernels::*; +use arrow::compute::array_ops::*; + +fn create_array(size: usize) -> Float32Array { + let mut builder = Float32Builder::new(size); + for _i in 0..size { + builder.append_value(1.0).unwrap(); + } + builder.finish() +} + +fn primitive_array_add(size: usize) { + let arr_a = create_array(size); + let arr_b = create_array(size); + criterion::black_box(math_op(&arr_a, &arr_b, |a, b| Ok(a + b)).unwrap()); +} + +fn primitive_array_add_simd(size: usize) { + let arr_a = create_array(size); + let arr_b = create_array(size); + criterion::black_box(add(&arr_a, &arr_b).unwrap()); +} + +fn add_benchmark(c: &mut Criterion) { + c.bench_function("add 128", |b| b.iter(|| primitive_array_add(128))); + c.bench_function("add 128 simd", |b| b.iter(|| primitive_array_add_simd(128))); + c.bench_function("add 256", |b| b.iter(|| primitive_array_add(256))); + c.bench_function("add 256 simd", |b| b.iter(|| primitive_array_add_simd(256))); + c.bench_function("add 512", |b| b.iter(|| primitive_array_add(512))); + c.bench_function("add 512 simd", |b| b.iter(|| primitive_array_add_simd(512))); + c.bench_function("add 1024", |b| b.iter(|| primitive_array_add(1024))); + c.bench_function("add 1024 simd", |b| { + b.iter(|| primitive_array_add_simd(1024)) + }); +} + +criterion_group!(benches, add_benchmark); +criterion_main!(benches); diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs index 127edb9a9e350..08b53827930c2 100644 --- a/rust/arrow/src/array.rs +++ b/rust/arrow/src/array.rs @@ -236,8 +236,10 @@ impl PrimitiveArray { /// /// Note this doesn't do any bound checking, for performance reason. pub fn value_slice(&self, offset: usize, len: usize) -> &[T::Native] { - let raw = unsafe { std::slice::from_raw_parts(self.raw_values(), self.len()) }; - &raw[offset..offset + len] + let raw = unsafe { + std::slice::from_raw_parts(self.raw_values().offset(offset as isize), len) + }; + &raw[..] } // Returns a new primitive array builder diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index 6172445ec821e..322480b4ee90f 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -19,11 +19,16 @@ //! fixed size aligned at a 64-byte boundary. `MutableBuffer` is like `Buffer`, but it can //! be mutated and grown. +use packed_simd::u8x64; + use std::cmp; use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write}; use std::mem; +use std::ops::BitAnd; +use std::slice::{from_raw_parts, from_raw_parts_mut}; use std::sync::Arc; +use crate::builder::{BufferBuilderTrait, UInt8BufferBuilder}; use crate::error::Result; use crate::memory; use crate::util::bit_util; @@ -140,7 +145,63 @@ impl> From for Buffer { } } +impl BitAnd for Buffer { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self { + assert_eq!( + self.len(), + rhs.len(), + "Buffers must be the same size to apply Bitwise OR." + ); + + // SIMD implementation if available + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let mut result = + MutableBuffer::new(self.len()).with_bitset(self.len(), false); + let lanes = u8x64::lanes(); + for i in (0..self.len()).step_by(lanes) { + let left_data = + unsafe { from_raw_parts(self.raw_data().offset(i as isize), lanes) }; + let left_simd = + unsafe { u8x64::from_slice_unaligned_unchecked(left_data) }; + let right_data = + unsafe { from_raw_parts(rhs.raw_data().offset(i as isize), lanes) }; + let right_simd = + unsafe { u8x64::from_slice_unaligned_unchecked(right_data) }; + let simd_result = left_simd & right_simd; + let result_slice: &mut [u8] = unsafe { + from_raw_parts_mut( + (result.data_mut().as_mut_ptr() as *mut u8).offset(i as isize), + lanes, + ) + }; + unsafe { simd_result.write_to_slice_unaligned_unchecked(result_slice) }; + } + return result.freeze(); + } + + // Default implementation + #[allow(unreachable_code)] + { + let mut builder = UInt8BufferBuilder::new(self.len()); + for i in 0..self.len() { + unsafe { + builder + .append( + self.data().get_unchecked(i) & rhs.data().get_unchecked(i), + ) + .unwrap(); + } + } + builder.finish() + } + } +} + unsafe impl Sync for Buffer {} + unsafe impl Send for Buffer {} /// Similar to `Buffer`, but is growable and can be mutated. A mutable buffer can be @@ -349,6 +410,22 @@ mod tests { assert_ne!(buf1, buf2); } + #[test] + fn test_buffer_bitand() { + let buf1 = Buffer::from([0b01101010]); + let buf2 = Buffer::from([0b01001110]); + let buf3 = buf1 & buf2; + assert_eq!(Buffer::from([0b01001010]), buf3); + } + + #[test] + #[should_panic(expected = "Buffers must be the same size to apply Bitwise OR.")] + fn test_buffer_bitand_different_sizes() { + let buf1 = Buffer::from([1_u8, 1_u8]); + let buf2 = Buffer::from([0b01001110]); + let _buf3 = buf1 & buf2; + } + #[test] fn test_from_raw_parts() { let buf = Buffer::from_raw_parts(null_mut(), 0); diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs new file mode 100644 index 0000000000000..72f00000d61e7 --- /dev/null +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Defines basic arithmetic kernels for `PrimitiveArrays`. +//! +//! These kernels can leverage SIMD if available on your system. Currently no runtime +//! detection is provided, you should enable the specific SIMD intrinsics using +//! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the +//! [here] (https://doc.rust-lang.org/stable/std/arch/) for more information. + +use std::mem; +use std::ops::{Add, Div, Mul, Sub}; +use std::slice::from_raw_parts_mut; + +use num::Zero; + +use crate::array::*; +use crate::buffer::MutableBuffer; +use crate::compute::array_ops::math_op; +use crate::datatypes; +use crate::error::{ArrowError, Result}; + +/// Vectorized version of add operation +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn simd_bin_op( + left: &PrimitiveArray, + right: &PrimitiveArray, + op: F, +) -> Result> +where + T: datatypes::ArrowNumericType + datatypes::ArrowSIMDType, + T::Simd: Add + + Sub + + Mul + + Div, + F: Fn(T::Simd, T::Simd) -> T::Simd, +{ + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform math operation on arrays of different length".to_string(), + )); + } + + let lanes = T::lanes(); + let buffer_size = left.len() * mem::size_of::(); + let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); + + for i in (0..left.len()).step_by(lanes) { + let simd_left = T::load(left.value_slice(i, lanes)); + let simd_right = T::load(right.value_slice(i, lanes)); + let simd_result = T::bin_op(simd_left, simd_right, &op); + + let result_slice: &mut [T::Native] = unsafe { + from_raw_parts_mut( + (result.data_mut().as_mut_ptr() as *mut T::Native).offset(i as isize), + lanes, + ) + }; + T::write(simd_result, result_slice); + } + + Ok(PrimitiveArray::::new(left.len(), result.freeze(), 0, 0)) +} + +/// Perform `left + right` operation on two arrays. If either left or right value is null +/// then the result is also null. +pub fn add( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> +where + T: datatypes::ArrowNumericType + datatypes::ArrowSIMDType, + T::Native: Add + + Sub + + Mul + + Div + + Zero, +{ + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + return simd_bin_op(&left, &right, |a, b| a + b); + + #[allow(unreachable_code)] + math_op(left, right, |a, b| Ok(a + b)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::array::Int32Array; + + #[test] + fn test_primitive_array_add() { + let a = Int32Array::from(vec![5, 6, 7, 8, 9]); + let b = Int32Array::from(vec![6, 7, 8, 9, 8]); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let c = simd_bin_op(&a, &b, |x, y| x + y).unwrap(); + + assert_eq!(11, c.value(0)); + assert_eq!(13, c.value(1)); + assert_eq!(15, c.value(2)); + assert_eq!(17, c.value(3)); + assert_eq!(17, c.value(4)); + } + + let d = add(&a, &b).unwrap(); + assert_eq!(11, d.value(0)); + assert_eq!(13, d.value(1)); + assert_eq!(15, d.value(2)); + assert_eq!(17, d.value(3)); + assert_eq!(17, d.value(4)); + } + + #[test] + fn test_primitive_array_add_mismatched_length() { + let a = Int32Array::from(vec![5, 6, 7, 8, 9]); + let b = Int32Array::from(vec![6, 7, 8]); + let e = add(&a, &b) + .err() + .expect("should have failed due to different lengths"); + assert_eq!( + "ComputeError(\"Cannot perform math operation on arrays of different length\")", + format!("{:?}", e) + ); + } + + #[ignore] + #[test] + fn test_primitive_array_add_with_nulls() { + let a = Int32Array::from(vec![Some(5), None, Some(7), None]); + let b = Int32Array::from(vec![None, None, Some(6), Some(7)]); + let c = add(&a, &b).unwrap(); + assert_eq!(true, c.is_null(0)); + assert_eq!(true, c.is_null(1)); + assert_eq!(false, c.is_null(2)); + assert_eq!(true, c.is_null(3)); + assert_eq!(13, c.value(2)); + } +} diff --git a/rust/arrow/src/array_ops.rs b/rust/arrow/src/compute/array_ops.rs similarity index 91% rename from rust/arrow/src/array_ops.rs rename to rust/arrow/src/compute/array_ops.rs index 6e847c8b378f2..d4f35ef267cb5 100644 --- a/rust/arrow/src/array_ops.rs +++ b/rust/arrow/src/compute/array_ops.rs @@ -21,29 +21,12 @@ use std::ops::{Add, Div, Mul, Sub}; use num::Zero; -use crate::array::{Array, BooleanArray, PrimitiveArray}; +use crate::array::*; use crate::builder::PrimitiveBuilder; use crate::datatypes; use crate::datatypes::ArrowNumericType; use crate::error::{ArrowError, Result}; -/// Perform `left + right` operation on two arrays. If either left or right value is null -/// then the result is also null. -pub fn add( - left: &PrimitiveArray, - right: &PrimitiveArray, -) -> Result> -where - T: datatypes::ArrowNumericType, - T::Native: Add - + Sub - + Mul - + Div - + Zero, -{ - math_op(left, right, |a, b| Ok(a + b)) -} - /// Perform `left - right` operation on two arrays. If either left or right value is null /// then the result is also null. pub fn subtract( @@ -105,7 +88,7 @@ where /// Helper function to perform math lambda function on values from two arrays. If either /// left or right value is null then the output value is also null, so `1 + null` is /// `null`. -fn math_op( +pub fn math_op( left: &PrimitiveArray, right: &PrimitiveArray, op: F, @@ -369,31 +352,6 @@ mod tests { use super::*; use crate::array::{Float64Array, Int32Array}; - #[test] - fn test_primitive_array_add() { - let a = Int32Array::from(vec![5, 6, 7, 8, 9]); - let b = Int32Array::from(vec![6, 7, 8, 9, 8]); - let c = add(&a, &b).unwrap(); - assert_eq!(11, c.value(0)); - assert_eq!(13, c.value(1)); - assert_eq!(15, c.value(2)); - assert_eq!(17, c.value(3)); - assert_eq!(17, c.value(4)); - } - - #[test] - fn test_primitive_array_add_mismatched_length() { - let a = Int32Array::from(vec![5, 6, 7, 8, 9]); - let b = Int32Array::from(vec![6, 7, 8]); - let e = add(&a, &b) - .err() - .expect("should have failed due to different lengths"); - assert_eq!( - "ComputeError(\"Cannot perform math operation on arrays of different length\")", - format!("{:?}", e) - ); - } - #[test] fn test_primitive_array_subtract() { let a = Int32Array::from(vec![1, 2, 3, 4, 5]); @@ -450,18 +408,6 @@ mod tests { assert_eq!(1.0, c.value(2)); } - #[test] - fn test_primitive_array_add_with_nulls() { - let a = Int32Array::from(vec![Some(5), None, Some(7), None]); - let b = Int32Array::from(vec![None, None, Some(6), Some(7)]); - let c = add(&a, &b).unwrap(); - assert_eq!(true, c.is_null(0)); - assert_eq!(true, c.is_null(1)); - assert_eq!(false, c.is_null(2)); - assert_eq!(true, c.is_null(3)); - assert_eq!(13, c.value(2)); - } - #[test] fn test_primitive_array_sum() { let a = Int32Array::from(vec![1, 2, 3, 4, 5]); diff --git a/rust/arrow/src/compute/mod.rs b/rust/arrow/src/compute/mod.rs new file mode 100644 index 0000000000000..c5f1f5a0df2b3 --- /dev/null +++ b/rust/arrow/src/compute/mod.rs @@ -0,0 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod arithmetic_kernels; +pub mod array_ops; diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 36f73414f46da..31130edf21573 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -23,9 +23,11 @@ use std::fmt; use std::mem::size_of; +use std::ops::{Add, Div, Mul, Sub}; use std::slice::from_raw_parts; use std::str::FromStr; +use packed_simd::*; use serde_derive::{Deserialize, Serialize}; use serde_json::{json, Value}; @@ -165,6 +167,84 @@ impl ArrowNumericType for UInt64Type {} impl ArrowNumericType for Float32Type {} impl ArrowNumericType for Float64Type {} +/// A subtype of primitive type that represents SIMD capable types. SIMD operations are +/// defined in this trait and are leveraged in the `compute` module if available. +pub trait ArrowSIMDType: ArrowPrimitiveType +where + Self::Simd: Add + + Sub + + Mul + + Div, +{ + /// Defines the SIMD type that should be used for this numeric type + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + type Simd; + + /// The number of SIMD lanes available + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn lanes() -> usize; + + /// Loads a slice into a SIMD register + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn load(slice: &[Self::Native]) -> Self::Simd; + + /// Performs a SIMD add operation + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn bin_op Self::Simd>( + left: Self::Simd, + right: Self::Simd, + op: F, + ) -> Self::Simd; + + /// Writes a SIMD result back to a slice + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn write(simd_result: Self::Simd, slice: &mut [Self::Native]); +} + +macro_rules! make_simd_type { + ($impl_ty:ty, $native_ty:ty, $simd_ty:ident) => { + impl ArrowSIMDType for $impl_ty { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + type Simd = $simd_ty; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn lanes() -> usize { + $simd_ty::lanes() + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn load(slice: &[$native_ty]) -> $simd_ty { + unsafe { $simd_ty::from_slice_unaligned_unchecked(slice) } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn bin_op $simd_ty>( + left: $simd_ty, + right: $simd_ty, + op: F, + ) -> $simd_ty { + op(left, right) + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn write(simd_result: $simd_ty, slice: &mut [$native_ty]) { + unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) }; + } + } + }; +} + +make_simd_type!(Int8Type, i8, i8x64); +make_simd_type!(Int16Type, i16, i16x32); +make_simd_type!(Int32Type, i32, i32x16); +make_simd_type!(Int64Type, i64, i64x8); +make_simd_type!(UInt8Type, u8, u8x64); +make_simd_type!(UInt16Type, u16, u16x32); +make_simd_type!(UInt32Type, u32, u32x16); +make_simd_type!(UInt64Type, u64, u64x8); +make_simd_type!(Float32Type, f32, f32x16); +make_simd_type!(Float64Type, f64, f64x8); + /// Allows conversion from supported Arrow types to a byte slice. pub trait ToByteSlice { /// Converts this instance into a byte slice diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs index dbac4db115165..ca06fc1e612b3 100644 --- a/rust/arrow/src/lib.rs +++ b/rust/arrow/src/lib.rs @@ -30,10 +30,10 @@ pub mod array; pub mod array_data; -pub mod array_ops; pub mod bitmap; pub mod buffer; pub mod builder; +pub mod compute; pub mod csv; pub mod datatypes; pub mod error; diff --git a/rust/arrow/src/mod.rs b/rust/arrow/src/mod.rs index b9fa43ab8184b..7fd10995afdcf 100644 --- a/rust/arrow/src/mod.rs +++ b/rust/arrow/src/mod.rs @@ -17,6 +17,7 @@ pub mod array; pub mod array_data; +pub mod compute; pub mod bitmap; pub mod buffer; pub mod builder;