From 5be0f453efa1ef16d1d1267e96e6831f52b7cb93 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Wed, 16 Jan 2019 23:01:37 -0500 Subject: [PATCH 01/24] Working with packed-simd --- rust/arrow/Cargo.toml | 7 +++- rust/arrow/benches/array_ops.rs | 61 +++++++++++++++++++++++++++++++++ rust/arrow/src/array.rs | 4 +-- rust/arrow/src/array_ops.rs | 56 +++++++++++++++++++++++++++++- 4 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 rust/arrow/benches/array_ops.rs diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index 1ebd4e6ba1054..7df6a06a8010d 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -45,6 +45,7 @@ csv = "1.0.0" num = "0.2" regex = "1.1" lazy_static = "1.2" +packed_simd = "0.3.1" [dev-dependencies] criterion = "0.2" @@ -56,4 +57,8 @@ harness = false [[bench]] name = "builder" -harness = false \ No newline at end of file +harness = false + +[[bench]] +name = "array_ops" +harness = false diff --git a/rust/arrow/benches/array_ops.rs b/rust/arrow/benches/array_ops.rs new file mode 100644 index 0000000000000..288bd19bb61b0 --- /dev/null +++ b/rust/arrow/benches/array_ops.rs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[macro_use] +extern crate criterion; +use criterion::Criterion; + +extern crate arrow; + +use arrow::array_ops::*; +use arrow::builder::*; +use arrow::array::*; + + +fn create_array(size: usize) -> Float32Array { + let mut builder = Float32Builder::new(size); + for i in 0..size { + builder.append_value(1.0).unwrap(); + } + builder.finish() +} + +fn primitive_array_add(size: usize) { + let arr_a = create_array(size); + let arr_b = create_array(size); + criterion::black_box(add(&arr_a, &arr_b).unwrap()); +} + +fn primitive_array_add_simd(size: usize) { + let arr_a = create_array(size); + let arr_b = create_array(size); + criterion::black_box(simd_add(&arr_a, &arr_b).unwrap()); +} + +fn add_benchmark(c: &mut Criterion) { + c.bench_function("add 128", |b| b.iter(|| primitive_array_add(128))); + c.bench_function("add 128 simd", |b| b.iter(|| primitive_array_add_simd(128))); + c.bench_function("add 256", |b| b.iter(|| primitive_array_add(256))); + c.bench_function("add 256 simd", |b| b.iter(|| primitive_array_add_simd(256))); + c.bench_function("add 512", |b| b.iter(|| primitive_array_add(512))); + c.bench_function("add 512 simd", |b| b.iter(|| primitive_array_add_simd(512))); + c.bench_function("add 1024", |b| b.iter(|| primitive_array_add(1024))); + c.bench_function("add 1024 simd", |b| b.iter(|| primitive_array_add_simd(1024))); +} + +criterion_group!(benches, add_benchmark); +criterion_main!(benches); diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs index 69207f6926592..08e733837d73a 100644 --- a/rust/arrow/src/array.rs +++ b/rust/arrow/src/array.rs @@ -226,8 +226,8 @@ impl PrimitiveArray { /// /// Note this doesn't do any bound checking, for performance reason. pub fn value_slice(&self, offset: usize, len: usize) -> &[T::Native] { - let raw = unsafe { std::slice::from_raw_parts(self.raw_values(), self.len()) }; - &raw[offset..offset + len] + let raw = unsafe { std::slice::from_raw_parts(self.raw_values().offset(offset as isize), offset + len) }; + &raw[..] } // Returns a new primitive array builder diff --git a/rust/arrow/src/array_ops.rs b/rust/arrow/src/array_ops.rs index b6afdf23de154..bb4b5b9e13b5e 100644 --- a/rust/arrow/src/array_ops.rs +++ b/rust/arrow/src/array_ops.rs @@ -18,15 +18,52 @@ //! Defines primitive computations on arrays, e.g. addition, equality, boolean logic. use std::ops::{Add, Div, Mul, Sub}; +use std::mem; +use std::slice::from_raw_parts_mut; use num::Zero; +use packed_simd::f32x4; -use crate::array::{Array, BooleanArray, PrimitiveArray}; +use crate::buffer::MutableBuffer; +use crate::array::*; use crate::builder::PrimitiveBuilder; use crate::datatypes; use crate::datatypes::ArrowNumericType; use crate::error::{ArrowError, Result}; +pub fn simd_add(left: &Float32Array, right: &Float32Array) -> Result +{ + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform math operation on arrays of different length".to_string(), + )); + } + let buffer_size = left.len() * mem::size_of::(); + let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); + + for i in (0..left.len()).step_by(4) { + let simd_left = unsafe {f32x4::from_slice_unaligned_unchecked(left.value_slice(i, 4))}; + let simd_right = unsafe {f32x4::from_slice_unaligned_unchecked(right.value_slice(i, 4))}; + let simd_result = simd_left + simd_right; + + let result_slice: &mut [f32] = unsafe { + from_raw_parts_mut( + (result.data_mut().as_mut_ptr() as *mut f32).offset(i as isize), + 4 + ) + }; + unsafe { + simd_result.write_to_slice_unaligned_unchecked(result_slice); + } + } + + Ok(Float32Array::new( + left.len(), + result.freeze(), + 0, + 0)) +} + /// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> where @@ -335,6 +372,23 @@ mod tests { use super::*; use crate::array::{Float64Array, Int32Array}; + #[test] + fn test_simd() { + let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); + let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); + let c = simd_add(&a, &b).unwrap(); + + for i in 0..c.len() { + println!("{}: {}", i, c.value(i)); + } + assert_eq!(11.0, c.value(0)); + assert_eq!(13.0, c.value(1)); + assert_eq!(15.0, c.value(2)); + assert_eq!(17.0, c.value(3)); + assert_eq!(17.0, c.value(4)); + assert_eq!(30.0, c.value(5)); + } + #[test] fn test_primitive_array_add() { let a = Int32Array::from(vec![5, 6, 7, 8, 9]); From ddd041580a1c2623ba0ed471f9e932bdbc7a075b Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 17 Jan 2019 22:34:53 -0500 Subject: [PATCH 02/24] Simd moved --- rust/arrow/src/compute/add.rs | 141 ++++++++++++++++++++++ rust/arrow/src/{ => compute}/array_ops.rs | 56 +-------- rust/arrow/src/compute/mod.rs | 19 +++ rust/arrow/src/lib.rs | 2 +- rust/arrow/src/mod.rs | 1 + 5 files changed, 163 insertions(+), 56 deletions(-) create mode 100644 rust/arrow/src/compute/add.rs rename rust/arrow/src/{ => compute}/array_ops.rs (91%) create mode 100644 rust/arrow/src/compute/mod.rs diff --git a/rust/arrow/src/compute/add.rs b/rust/arrow/src/compute/add.rs new file mode 100644 index 0000000000000..97e357a1f51fc --- /dev/null +++ b/rust/arrow/src/compute/add.rs @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Defines primitive computations on arrays + +use std::ops::{Add, Div, Mul, Sub}; +use std::mem; +use std::slice::from_raw_parts_mut; + +use num::Zero; +use packed_simd::f32x4; + +use crate::compute::array_ops::math_op; +use crate::buffer::MutableBuffer; +use crate::array::*; +use crate::datatypes; +use crate::error::{ArrowError, Result}; + +pub fn simd_add(left: &Float32Array, right: &Float32Array) -> Result +{ + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform math operation on arrays of different length".to_string(), + )); + } + + let lanes = f32x4::lanes(); + let buffer_size = left.len() * mem::size_of::(); + let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); + + for i in (0..left.len()).step_by(lanes) { + let simd_left = unsafe {f32x4::from_slice_unaligned_unchecked(left.value_slice(i, lanes))}; + let simd_right = unsafe {f32x4::from_slice_unaligned_unchecked(right.value_slice(i, lanes))}; + let simd_result = simd_left + simd_right; + + let result_slice: &mut [f32] = unsafe { + from_raw_parts_mut( + (result.data_mut().as_mut_ptr() as *mut f32).offset(i as isize), + lanes + ) + }; + unsafe { + simd_result.write_to_slice_unaligned_unchecked(result_slice); + } + } + + Ok(Float32Array::new( + left.len(), + result.freeze(), + 0, + 0)) +} + +/// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. +pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> +where + T: datatypes::ArrowNumericType, + T::Native: Add + + Sub + + Mul + + Div + + Zero, +{ + math_op(left, right, |a, b| Ok(a + b)) +} + + +#[cfg(test)] +mod tests { + use super::*; + use crate::array::Int32Array; + + #[test] + fn test_supported() { + println!("SSE: {}", is_x86_feature_detected!("sse")); + println!("SSE2: {}", is_x86_feature_detected!("sse2")); + println!("SSE3: {}", is_x86_feature_detected!("sse3")); + println!("SSSE3: {}", is_x86_feature_detected!("ssse3")); + println!("SSE4.1: {}", is_x86_feature_detected!("sse4.1")); + println!("SSE4.2: {}", is_x86_feature_detected!("sse4.2")); + println!("AVX: {}", is_x86_feature_detected!("avx")); + println!("AVX2: {}", is_x86_feature_detected!("avx2")); + println!("AVX512f: {}", is_x86_feature_detected!("avx512f")); + } + + #[test] + fn test_simd() { + let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); + let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); + let c = simd_add(&a, &b).unwrap(); + + for i in 0..c.len() { + println!("{}: {}", i, c.value(i)); + } + assert_eq!(11.0, c.value(0)); + assert_eq!(13.0, c.value(1)); + assert_eq!(15.0, c.value(2)); + assert_eq!(17.0, c.value(3)); + assert_eq!(17.0, c.value(4)); + assert_eq!(30.0, c.value(5)); + } + + #[test] + fn test_primitive_array_add() { + let a = Int32Array::from(vec![5, 6, 7, 8, 9]); + let b = Int32Array::from(vec![6, 7, 8, 9, 8]); + let c = add(&a, &b).unwrap(); + assert_eq!(11, c.value(0)); + assert_eq!(13, c.value(1)); + assert_eq!(15, c.value(2)); + assert_eq!(17, c.value(3)); + assert_eq!(17, c.value(4)); + } + + #[test] + fn test_primitive_array_add_mismatched_length() { + let a = Int32Array::from(vec![5, 6, 7, 8, 9]); + let b = Int32Array::from(vec![6, 7, 8]); + let e = add(&a, &b) + .err() + .expect("should have failed due to different lengths"); + assert_eq!( + "ComputeError(\"Cannot perform math operation on arrays of different length\")", + format!("{:?}", e) + ); + } +} diff --git a/rust/arrow/src/array_ops.rs b/rust/arrow/src/compute/array_ops.rs similarity index 91% rename from rust/arrow/src/array_ops.rs rename to rust/arrow/src/compute/array_ops.rs index bb4b5b9e13b5e..97074764d8746 100644 --- a/rust/arrow/src/array_ops.rs +++ b/rust/arrow/src/compute/array_ops.rs @@ -18,52 +18,15 @@ //! Defines primitive computations on arrays, e.g. addition, equality, boolean logic. use std::ops::{Add, Div, Mul, Sub}; -use std::mem; -use std::slice::from_raw_parts_mut; use num::Zero; -use packed_simd::f32x4; -use crate::buffer::MutableBuffer; use crate::array::*; use crate::builder::PrimitiveBuilder; use crate::datatypes; use crate::datatypes::ArrowNumericType; use crate::error::{ArrowError, Result}; -pub fn simd_add(left: &Float32Array, right: &Float32Array) -> Result -{ - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Cannot perform math operation on arrays of different length".to_string(), - )); - } - let buffer_size = left.len() * mem::size_of::(); - let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); - - for i in (0..left.len()).step_by(4) { - let simd_left = unsafe {f32x4::from_slice_unaligned_unchecked(left.value_slice(i, 4))}; - let simd_right = unsafe {f32x4::from_slice_unaligned_unchecked(right.value_slice(i, 4))}; - let simd_result = simd_left + simd_right; - - let result_slice: &mut [f32] = unsafe { - from_raw_parts_mut( - (result.data_mut().as_mut_ptr() as *mut f32).offset(i as isize), - 4 - ) - }; - unsafe { - simd_result.write_to_slice_unaligned_unchecked(result_slice); - } - } - - Ok(Float32Array::new( - left.len(), - result.freeze(), - 0, - 0)) -} - /// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> where @@ -125,7 +88,7 @@ where /// Helper function to perform math lambda function on values from two arrays. If either left or /// right value is null then the output value is also null, so `1 + null` is `null`. -fn math_op( +pub fn math_op( left: &PrimitiveArray, right: &PrimitiveArray, op: F, @@ -372,23 +335,6 @@ mod tests { use super::*; use crate::array::{Float64Array, Int32Array}; - #[test] - fn test_simd() { - let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); - let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); - let c = simd_add(&a, &b).unwrap(); - - for i in 0..c.len() { - println!("{}: {}", i, c.value(i)); - } - assert_eq!(11.0, c.value(0)); - assert_eq!(13.0, c.value(1)); - assert_eq!(15.0, c.value(2)); - assert_eq!(17.0, c.value(3)); - assert_eq!(17.0, c.value(4)); - assert_eq!(30.0, c.value(5)); - } - #[test] fn test_primitive_array_add() { let a = Int32Array::from(vec![5, 6, 7, 8, 9]); diff --git a/rust/arrow/src/compute/mod.rs b/rust/arrow/src/compute/mod.rs new file mode 100644 index 0000000000000..f03f66819ac9d --- /dev/null +++ b/rust/arrow/src/compute/mod.rs @@ -0,0 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod array_ops; +pub mod add; diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs index 0ecd97c9890f7..05d94d225dfd0 100644 --- a/rust/arrow/src/lib.rs +++ b/rust/arrow/src/lib.rs @@ -30,7 +30,7 @@ pub mod array; pub mod array_data; -pub mod array_ops; +pub mod compute; pub mod bitmap; pub mod buffer; pub mod builder; diff --git a/rust/arrow/src/mod.rs b/rust/arrow/src/mod.rs index b9fa43ab8184b..7fd10995afdcf 100644 --- a/rust/arrow/src/mod.rs +++ b/rust/arrow/src/mod.rs @@ -17,6 +17,7 @@ pub mod array; pub mod array_data; +pub mod compute; pub mod bitmap; pub mod buffer; pub mod builder; From 0cb88ea0cbf2cbaedf91cf6885c1b3fd73caf574 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 17 Jan 2019 22:56:49 -0500 Subject: [PATCH 03/24] sse and avx2 with benchmark --- rust/arrow/benches/array_ops.rs | 25 ++++++--- rust/arrow/src/compute/add.rs | 94 +++++++++++++++++++++------------ 2 files changed, 77 insertions(+), 42 deletions(-) diff --git a/rust/arrow/benches/array_ops.rs b/rust/arrow/benches/array_ops.rs index 288bd19bb61b0..96b8dbb1fdbfd 100644 --- a/rust/arrow/benches/array_ops.rs +++ b/rust/arrow/benches/array_ops.rs @@ -21,7 +21,8 @@ use criterion::Criterion; extern crate arrow; -use arrow::array_ops::*; +use arrow::compute::array_ops::*; +use arrow::compute::add::{add_sse, add_avx2}; use arrow::builder::*; use arrow::array::*; @@ -40,21 +41,31 @@ fn primitive_array_add(size: usize) { criterion::black_box(add(&arr_a, &arr_b).unwrap()); } -fn primitive_array_add_simd(size: usize) { +fn primitive_array_add_sse(size: usize) { let arr_a = create_array(size); let arr_b = create_array(size); - criterion::black_box(simd_add(&arr_a, &arr_b).unwrap()); + criterion::black_box(add_sse(&arr_a, &arr_b).unwrap()); +} + +fn primitive_array_add_avx2(size: usize) { + let arr_a = create_array(size); + let arr_b = create_array(size); + criterion::black_box(add_avx2(&arr_a, &arr_b).unwrap()); } fn add_benchmark(c: &mut Criterion) { c.bench_function("add 128", |b| b.iter(|| primitive_array_add(128))); - c.bench_function("add 128 simd", |b| b.iter(|| primitive_array_add_simd(128))); + c.bench_function("add 128 simd", |b| b.iter(|| primitive_array_add_sse(128))); + c.bench_function("add 128 simd", |b| b.iter(|| primitive_array_add_avx2(128))); c.bench_function("add 256", |b| b.iter(|| primitive_array_add(256))); - c.bench_function("add 256 simd", |b| b.iter(|| primitive_array_add_simd(256))); + c.bench_function("add 256 simd", |b| b.iter(|| primitive_array_add_sse(256))); + c.bench_function("add 256 simd", |b| b.iter(|| primitive_array_add_avx2(256))); c.bench_function("add 512", |b| b.iter(|| primitive_array_add(512))); - c.bench_function("add 512 simd", |b| b.iter(|| primitive_array_add_simd(512))); + c.bench_function("add 512 simd", |b| b.iter(|| primitive_array_add_sse(512))); + c.bench_function("add 512 simd", |b| b.iter(|| primitive_array_add_avx2(512))); c.bench_function("add 1024", |b| b.iter(|| primitive_array_add(1024))); - c.bench_function("add 1024 simd", |b| b.iter(|| primitive_array_add_simd(1024))); + c.bench_function("add 1024 simd", |b| b.iter(|| primitive_array_add_sse(1024))); + c.bench_function("add 1024 simd", |b| b.iter(|| primitive_array_add_avx2(1024))); } criterion_group!(benches, add_benchmark); diff --git a/rust/arrow/src/compute/add.rs b/rust/arrow/src/compute/add.rs index 97e357a1f51fc..30811af5c76aa 100644 --- a/rust/arrow/src/compute/add.rs +++ b/rust/arrow/src/compute/add.rs @@ -22,7 +22,7 @@ use std::mem; use std::slice::from_raw_parts_mut; use num::Zero; -use packed_simd::f32x4; +use packed_simd::*; use crate::compute::array_ops::math_op; use crate::buffer::MutableBuffer; @@ -30,41 +30,48 @@ use crate::array::*; use crate::datatypes; use crate::error::{ArrowError, Result}; -pub fn simd_add(left: &Float32Array, right: &Float32Array) -> Result -{ - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Cannot perform math operation on arrays of different length".to_string(), - )); - } - - let lanes = f32x4::lanes(); - let buffer_size = left.len() * mem::size_of::(); - let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); - - for i in (0..left.len()).step_by(lanes) { - let simd_left = unsafe {f32x4::from_slice_unaligned_unchecked(left.value_slice(i, lanes))}; - let simd_right = unsafe {f32x4::from_slice_unaligned_unchecked(right.value_slice(i, lanes))}; - let simd_result = simd_left + simd_right; - - let result_slice: &mut [f32] = unsafe { - from_raw_parts_mut( - (result.data_mut().as_mut_ptr() as *mut f32).offset(i as isize), - lanes - ) - }; - unsafe { - simd_result.write_to_slice_unaligned_unchecked(result_slice); +macro_rules! simd_add { + ($instruction_set:ident, $simd_ty:ident, $native_ty:ty, $array_ty:ident) => { + pub fn $instruction_set(left: &$array_ty, right: &$array_ty) -> Result<$array_ty> + { + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform math operation on arrays of different length".to_string(), + )); + } + + let lanes = $simd_ty::lanes(); + let buffer_size = left.len() * mem::size_of::<$native_ty>(); + let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); + + for i in (0..left.len()).step_by(lanes) { + let simd_left = unsafe {$simd_ty::from_slice_unaligned_unchecked(left.value_slice(i, lanes))}; + let simd_right = unsafe {$simd_ty::from_slice_unaligned_unchecked(right.value_slice(i, lanes))}; + let simd_result = simd_left + simd_right; + + let result_slice: &mut [$native_ty] = unsafe { + from_raw_parts_mut( + (result.data_mut().as_mut_ptr() as *mut $native_ty).offset(i as isize), + lanes + ) + }; + unsafe { + simd_result.write_to_slice_unaligned_unchecked(result_slice); + } + } + + Ok($array_ty::new( + left.len(), + result.freeze(), + 0, + 0)) } - } - - Ok(Float32Array::new( - left.len(), - result.freeze(), - 0, - 0)) + }; } +simd_add!(add_avx2, f32x8, f32, Float32Array); +simd_add!(add_sse, f32x4, f32, Float32Array); + /// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> where @@ -98,10 +105,27 @@ mod tests { } #[test] - fn test_simd() { + fn test_simd_avx2() { + let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); + let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); + let c = add_avx2(&a, &b).unwrap(); + + for i in 0..c.len() { + println!("{}: {}", i, c.value(i)); + } + assert_eq!(11.0, c.value(0)); + assert_eq!(13.0, c.value(1)); + assert_eq!(15.0, c.value(2)); + assert_eq!(17.0, c.value(3)); + assert_eq!(17.0, c.value(4)); + assert_eq!(30.0, c.value(5)); + } + + #[test] + fn test_simd_sse() { let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); - let c = simd_add(&a, &b).unwrap(); + let c = add_sse(&a, &b).unwrap(); for i in 0..c.len() { println!("{}: {}", i, c.value(i)); From 27b1c0ddda7b7da7b034d68f6179a53100312f4a Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Mon, 21 Jan 2019 22:59:32 -0500 Subject: [PATCH 04/24] Added conditional compilation and renamed --- .../{array_ops.rs => arithmetic_kernels.rs} | 24 +++++++------- .../compute/{add.rs => arithmetic_kernels.rs} | 31 +++++++++---------- rust/arrow/src/compute/mod.rs | 2 +- 3 files changed, 27 insertions(+), 30 deletions(-) rename rust/arrow/benches/{array_ops.rs => arithmetic_kernels.rs} (71%) rename rust/arrow/src/compute/{add.rs => arithmetic_kernels.rs} (83%) diff --git a/rust/arrow/benches/array_ops.rs b/rust/arrow/benches/arithmetic_kernels.rs similarity index 71% rename from rust/arrow/benches/array_ops.rs rename to rust/arrow/benches/arithmetic_kernels.rs index 96b8dbb1fdbfd..bf6950febd6b0 100644 --- a/rust/arrow/benches/array_ops.rs +++ b/rust/arrow/benches/arithmetic_kernels.rs @@ -22,14 +22,14 @@ use criterion::Criterion; extern crate arrow; use arrow::compute::array_ops::*; -use arrow::compute::add::{add_sse, add_avx2}; +use arrow::compute::arithmetic_kernels::{add_sse, add_avx2}; use arrow::builder::*; use arrow::array::*; fn create_array(size: usize) -> Float32Array { let mut builder = Float32Builder::new(size); - for i in 0..size { + for _i in 0..size { builder.append_value(1.0).unwrap(); } builder.finish() @@ -44,28 +44,28 @@ fn primitive_array_add(size: usize) { fn primitive_array_add_sse(size: usize) { let arr_a = create_array(size); let arr_b = create_array(size); - criterion::black_box(add_sse(&arr_a, &arr_b).unwrap()); + criterion::black_box(unsafe{add_sse(&arr_a, &arr_b).unwrap()}); } fn primitive_array_add_avx2(size: usize) { let arr_a = create_array(size); let arr_b = create_array(size); - criterion::black_box(add_avx2(&arr_a, &arr_b).unwrap()); + criterion::black_box(unsafe{add_avx2(&arr_a, &arr_b).unwrap()}); } fn add_benchmark(c: &mut Criterion) { c.bench_function("add 128", |b| b.iter(|| primitive_array_add(128))); - c.bench_function("add 128 simd", |b| b.iter(|| primitive_array_add_sse(128))); - c.bench_function("add 128 simd", |b| b.iter(|| primitive_array_add_avx2(128))); + c.bench_function("add 128 sse", |b| b.iter(|| primitive_array_add_sse(128))); + c.bench_function("add 128 avx2", |b| b.iter(|| primitive_array_add_avx2(128))); c.bench_function("add 256", |b| b.iter(|| primitive_array_add(256))); - c.bench_function("add 256 simd", |b| b.iter(|| primitive_array_add_sse(256))); - c.bench_function("add 256 simd", |b| b.iter(|| primitive_array_add_avx2(256))); + c.bench_function("add 256 sse", |b| b.iter(|| primitive_array_add_sse(256))); + c.bench_function("add 256 avx2", |b| b.iter(|| primitive_array_add_avx2(256))); c.bench_function("add 512", |b| b.iter(|| primitive_array_add(512))); - c.bench_function("add 512 simd", |b| b.iter(|| primitive_array_add_sse(512))); - c.bench_function("add 512 simd", |b| b.iter(|| primitive_array_add_avx2(512))); + c.bench_function("add 512 sse", |b| b.iter(|| primitive_array_add_sse(512))); + c.bench_function("add 512 avx2", |b| b.iter(|| primitive_array_add_avx2(512))); c.bench_function("add 1024", |b| b.iter(|| primitive_array_add(1024))); - c.bench_function("add 1024 simd", |b| b.iter(|| primitive_array_add_sse(1024))); - c.bench_function("add 1024 simd", |b| b.iter(|| primitive_array_add_avx2(1024))); + c.bench_function("add 1024 sse", |b| b.iter(|| primitive_array_add_sse(1024))); + c.bench_function("add 1024 avx2", |b| b.iter(|| primitive_array_add_avx2(1024))); } criterion_group!(benches, add_benchmark); diff --git a/rust/arrow/src/compute/add.rs b/rust/arrow/src/compute/arithmetic_kernels.rs similarity index 83% rename from rust/arrow/src/compute/add.rs rename to rust/arrow/src/compute/arithmetic_kernels.rs index 30811af5c76aa..3dcaa602255ce 100644 --- a/rust/arrow/src/compute/add.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -31,8 +31,9 @@ use crate::datatypes; use crate::error::{ArrowError, Result}; macro_rules! simd_add { - ($instruction_set:ident, $simd_ty:ident, $native_ty:ty, $array_ty:ident) => { - pub fn $instruction_set(left: &$array_ty, right: &$array_ty) -> Result<$array_ty> + ($ins_set:expr, $instruction_set:ident, $simd_ty:ident, $native_ty:ty, $array_ty:ident) => { + #[target_feature(enable = $ins_set)] + pub unsafe fn $instruction_set(left: &$array_ty, right: &$array_ty) -> Result<$array_ty> { if left.len() != right.len() { return Err(ArrowError::ComputeError( @@ -45,19 +46,15 @@ macro_rules! simd_add { let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); for i in (0..left.len()).step_by(lanes) { - let simd_left = unsafe {$simd_ty::from_slice_unaligned_unchecked(left.value_slice(i, lanes))}; - let simd_right = unsafe {$simd_ty::from_slice_unaligned_unchecked(right.value_slice(i, lanes))}; + let simd_left = $simd_ty::from_slice_unaligned_unchecked(left.value_slice(i, lanes)); + let simd_right = $simd_ty::from_slice_unaligned_unchecked(right.value_slice(i, lanes)); let simd_result = simd_left + simd_right; - let result_slice: &mut [$native_ty] = unsafe { - from_raw_parts_mut( - (result.data_mut().as_mut_ptr() as *mut $native_ty).offset(i as isize), - lanes - ) - }; - unsafe { - simd_result.write_to_slice_unaligned_unchecked(result_slice); - } + let result_slice: &mut [$native_ty] = from_raw_parts_mut( + (result.data_mut().as_mut_ptr() as *mut $native_ty).offset(i as isize), + lanes + ); + simd_result.write_to_slice_unaligned_unchecked(result_slice); } Ok($array_ty::new( @@ -69,8 +66,8 @@ macro_rules! simd_add { }; } -simd_add!(add_avx2, f32x8, f32, Float32Array); -simd_add!(add_sse, f32x4, f32, Float32Array); +simd_add!("sse", add_sse, f32x4, f32, Float32Array); +simd_add!("avx2", add_avx2, f32x8, f32, Float32Array); /// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> @@ -108,7 +105,7 @@ mod tests { fn test_simd_avx2() { let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); - let c = add_avx2(&a, &b).unwrap(); + let c = unsafe{ add_avx2(&a, &b).unwrap() }; for i in 0..c.len() { println!("{}: {}", i, c.value(i)); @@ -125,7 +122,7 @@ mod tests { fn test_simd_sse() { let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); - let c = add_sse(&a, &b).unwrap(); + let c = unsafe{ add_sse(&a, &b).unwrap() }; for i in 0..c.len() { println!("{}: {}", i, c.value(i)); diff --git a/rust/arrow/src/compute/mod.rs b/rust/arrow/src/compute/mod.rs index f03f66819ac9d..981ca9d16bad7 100644 --- a/rust/arrow/src/compute/mod.rs +++ b/rust/arrow/src/compute/mod.rs @@ -16,4 +16,4 @@ // under the License. pub mod array_ops; -pub mod add; +pub mod arithmetic_kernels; From 6a26c536858c4003e9477c85720673f9d2783e3f Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Mon, 21 Jan 2019 23:08:48 -0500 Subject: [PATCH 05/24] Fixed lints --- rust/arrow/benches/arithmetic_kernels.rs | 15 ++++++----- rust/arrow/src/array.rs | 4 ++- rust/arrow/src/compute/arithmetic_kernels.rs | 28 +++++++++----------- rust/arrow/src/compute/mod.rs | 2 +- rust/arrow/src/lib.rs | 2 +- 5 files changed, 25 insertions(+), 26 deletions(-) diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs index bf6950febd6b0..19031e1549bb5 100644 --- a/rust/arrow/benches/arithmetic_kernels.rs +++ b/rust/arrow/benches/arithmetic_kernels.rs @@ -21,11 +21,10 @@ use criterion::Criterion; extern crate arrow; -use arrow::compute::array_ops::*; -use arrow::compute::arithmetic_kernels::{add_sse, add_avx2}; -use arrow::builder::*; use arrow::array::*; - +use arrow::builder::*; +use arrow::compute::arithmetic_kernels::{add_avx2, add_sse}; +use arrow::compute::array_ops::*; fn create_array(size: usize) -> Float32Array { let mut builder = Float32Builder::new(size); @@ -44,13 +43,13 @@ fn primitive_array_add(size: usize) { fn primitive_array_add_sse(size: usize) { let arr_a = create_array(size); let arr_b = create_array(size); - criterion::black_box(unsafe{add_sse(&arr_a, &arr_b).unwrap()}); + criterion::black_box(unsafe { add_sse(&arr_a, &arr_b).unwrap() }); } fn primitive_array_add_avx2(size: usize) { let arr_a = create_array(size); let arr_b = create_array(size); - criterion::black_box(unsafe{add_avx2(&arr_a, &arr_b).unwrap()}); + criterion::black_box(unsafe { add_avx2(&arr_a, &arr_b).unwrap() }); } fn add_benchmark(c: &mut Criterion) { @@ -65,7 +64,9 @@ fn add_benchmark(c: &mut Criterion) { c.bench_function("add 512 avx2", |b| b.iter(|| primitive_array_add_avx2(512))); c.bench_function("add 1024", |b| b.iter(|| primitive_array_add(1024))); c.bench_function("add 1024 sse", |b| b.iter(|| primitive_array_add_sse(1024))); - c.bench_function("add 1024 avx2", |b| b.iter(|| primitive_array_add_avx2(1024))); + c.bench_function("add 1024 avx2", |b| { + b.iter(|| primitive_array_add_avx2(1024)) + }); } criterion_group!(benches, add_benchmark); diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs index 08e733837d73a..37d5e70864132 100644 --- a/rust/arrow/src/array.rs +++ b/rust/arrow/src/array.rs @@ -226,7 +226,9 @@ impl PrimitiveArray { /// /// Note this doesn't do any bound checking, for performance reason. pub fn value_slice(&self, offset: usize, len: usize) -> &[T::Native] { - let raw = unsafe { std::slice::from_raw_parts(self.raw_values().offset(offset as isize), offset + len) }; + let raw = unsafe { + std::slice::from_raw_parts(self.raw_values().offset(offset as isize), offset + len) + }; &raw[..] } diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index 3dcaa602255ce..64268a0d7869c 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -17,24 +17,23 @@ //! Defines primitive computations on arrays -use std::ops::{Add, Div, Mul, Sub}; use std::mem; +use std::ops::{Add, Div, Mul, Sub}; use std::slice::from_raw_parts_mut; use num::Zero; use packed_simd::*; -use crate::compute::array_ops::math_op; -use crate::buffer::MutableBuffer; use crate::array::*; +use crate::buffer::MutableBuffer; +use crate::compute::array_ops::math_op; use crate::datatypes; use crate::error::{ArrowError, Result}; macro_rules! simd_add { ($ins_set:expr, $instruction_set:ident, $simd_ty:ident, $native_ty:ty, $array_ty:ident) => { #[target_feature(enable = $ins_set)] - pub unsafe fn $instruction_set(left: &$array_ty, right: &$array_ty) -> Result<$array_ty> - { + pub unsafe fn $instruction_set(left: &$array_ty, right: &$array_ty) -> Result<$array_ty> { if left.len() != right.len() { return Err(ArrowError::ComputeError( "Cannot perform math operation on arrays of different length".to_string(), @@ -46,22 +45,20 @@ macro_rules! simd_add { let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); for i in (0..left.len()).step_by(lanes) { - let simd_left = $simd_ty::from_slice_unaligned_unchecked(left.value_slice(i, lanes)); - let simd_right = $simd_ty::from_slice_unaligned_unchecked(right.value_slice(i, lanes)); + let simd_left = + $simd_ty::from_slice_unaligned_unchecked(left.value_slice(i, lanes)); + let simd_right = + $simd_ty::from_slice_unaligned_unchecked(right.value_slice(i, lanes)); let simd_result = simd_left + simd_right; let result_slice: &mut [$native_ty] = from_raw_parts_mut( (result.data_mut().as_mut_ptr() as *mut $native_ty).offset(i as isize), - lanes + lanes, ); simd_result.write_to_slice_unaligned_unchecked(result_slice); } - Ok($array_ty::new( - left.len(), - result.freeze(), - 0, - 0)) + Ok($array_ty::new(left.len(), result.freeze(), 0, 0)) } }; } @@ -82,7 +79,6 @@ where math_op(left, right, |a, b| Ok(a + b)) } - #[cfg(test)] mod tests { use super::*; @@ -105,7 +101,7 @@ mod tests { fn test_simd_avx2() { let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); - let c = unsafe{ add_avx2(&a, &b).unwrap() }; + let c = unsafe { add_avx2(&a, &b).unwrap() }; for i in 0..c.len() { println!("{}: {}", i, c.value(i)); @@ -122,7 +118,7 @@ mod tests { fn test_simd_sse() { let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); - let c = unsafe{ add_sse(&a, &b).unwrap() }; + let c = unsafe { add_sse(&a, &b).unwrap() }; for i in 0..c.len() { println!("{}: {}", i, c.value(i)); diff --git a/rust/arrow/src/compute/mod.rs b/rust/arrow/src/compute/mod.rs index 981ca9d16bad7..c5f1f5a0df2b3 100644 --- a/rust/arrow/src/compute/mod.rs +++ b/rust/arrow/src/compute/mod.rs @@ -15,5 +15,5 @@ // specific language governing permissions and limitations // under the License. -pub mod array_ops; pub mod arithmetic_kernels; +pub mod array_ops; diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs index 05d94d225dfd0..f8926fd085e01 100644 --- a/rust/arrow/src/lib.rs +++ b/rust/arrow/src/lib.rs @@ -30,10 +30,10 @@ pub mod array; pub mod array_data; -pub mod compute; pub mod bitmap; pub mod buffer; pub mod builder; +pub mod compute; pub mod csv; pub mod datatypes; pub mod error; From e5b8780d0fe6438669f93154b75efe35b4ff7607 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Tue, 22 Jan 2019 23:19:51 -0500 Subject: [PATCH 06/24] Cleaned up implementation --- rust/arrow/Cargo.toml | 2 +- rust/arrow/benches/arithmetic_kernels.rs | 25 ++---- rust/arrow/src/compute/arithmetic_kernels.rs | 89 ++++++++------------ 3 files changed, 41 insertions(+), 75 deletions(-) diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index 7df6a06a8010d..41c7805e14758 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -60,5 +60,5 @@ name = "builder" harness = false [[bench]] -name = "array_ops" +name = "arithmetic_kernels" harness = false diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs index 19031e1549bb5..81674956b2afb 100644 --- a/rust/arrow/benches/arithmetic_kernels.rs +++ b/rust/arrow/benches/arithmetic_kernels.rs @@ -23,7 +23,7 @@ extern crate arrow; use arrow::array::*; use arrow::builder::*; -use arrow::compute::arithmetic_kernels::{add_avx2, add_sse}; +use arrow::compute::arithmetic_kernels::add_simd; use arrow::compute::array_ops::*; fn create_array(size: usize) -> Float32Array { @@ -40,33 +40,22 @@ fn primitive_array_add(size: usize) { criterion::black_box(add(&arr_a, &arr_b).unwrap()); } -fn primitive_array_add_sse(size: usize) { - let arr_a = create_array(size); - let arr_b = create_array(size); - criterion::black_box(unsafe { add_sse(&arr_a, &arr_b).unwrap() }); -} -fn primitive_array_add_avx2(size: usize) { +fn primitive_array_add_simd(size: usize) { let arr_a = create_array(size); let arr_b = create_array(size); - criterion::black_box(unsafe { add_avx2(&arr_a, &arr_b).unwrap() }); + criterion::black_box(add_simd(&arr_a, &arr_b).unwrap()); } fn add_benchmark(c: &mut Criterion) { c.bench_function("add 128", |b| b.iter(|| primitive_array_add(128))); - c.bench_function("add 128 sse", |b| b.iter(|| primitive_array_add_sse(128))); - c.bench_function("add 128 avx2", |b| b.iter(|| primitive_array_add_avx2(128))); + c.bench_function("add 128 simd", |b| b.iter(|| primitive_array_add_simd(128))); c.bench_function("add 256", |b| b.iter(|| primitive_array_add(256))); - c.bench_function("add 256 sse", |b| b.iter(|| primitive_array_add_sse(256))); - c.bench_function("add 256 avx2", |b| b.iter(|| primitive_array_add_avx2(256))); + c.bench_function("add 256 simd", |b| b.iter(|| primitive_array_add_simd(256))); c.bench_function("add 512", |b| b.iter(|| primitive_array_add(512))); - c.bench_function("add 512 sse", |b| b.iter(|| primitive_array_add_sse(512))); - c.bench_function("add 512 avx2", |b| b.iter(|| primitive_array_add_avx2(512))); + c.bench_function("add 512 simd", |b| b.iter(|| primitive_array_add_simd(512))); c.bench_function("add 1024", |b| b.iter(|| primitive_array_add(1024))); - c.bench_function("add 1024 sse", |b| b.iter(|| primitive_array_add_sse(1024))); - c.bench_function("add 1024 avx2", |b| { - b.iter(|| primitive_array_add_avx2(1024)) - }); + c.bench_function("add 1024 simd", |b| b.iter(|| primitive_array_add_simd(1024))); } criterion_group!(benches, add_benchmark); diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index 64268a0d7869c..651f98e2acadd 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -30,41 +30,35 @@ use crate::compute::array_ops::math_op; use crate::datatypes; use crate::error::{ArrowError, Result}; -macro_rules! simd_add { - ($ins_set:expr, $instruction_set:ident, $simd_ty:ident, $native_ty:ty, $array_ty:ident) => { - #[target_feature(enable = $ins_set)] - pub unsafe fn $instruction_set(left: &$array_ty, right: &$array_ty) -> Result<$array_ty> { - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Cannot perform math operation on arrays of different length".to_string(), - )); - } - - let lanes = $simd_ty::lanes(); - let buffer_size = left.len() * mem::size_of::<$native_ty>(); - let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); - - for i in (0..left.len()).step_by(lanes) { - let simd_left = - $simd_ty::from_slice_unaligned_unchecked(left.value_slice(i, lanes)); - let simd_right = - $simd_ty::from_slice_unaligned_unchecked(right.value_slice(i, lanes)); - let simd_result = simd_left + simd_right; - - let result_slice: &mut [$native_ty] = from_raw_parts_mut( - (result.data_mut().as_mut_ptr() as *mut $native_ty).offset(i as isize), - lanes, - ); - simd_result.write_to_slice_unaligned_unchecked(result_slice); - } - - Ok($array_ty::new(left.len(), result.freeze(), 0, 0)) - } - }; -} +pub fn add_simd(left: &Float32Array, right: &Float32Array) -> Result { + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform math operation on arrays of different length".to_string(), + )); + } -simd_add!("sse", add_sse, f32x4, f32, Float32Array); -simd_add!("avx2", add_avx2, f32x8, f32, Float32Array); + let lanes = f32x16::lanes(); + let buffer_size = left.len() * mem::size_of::(); + let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); + + for i in (0..left.len()).step_by(lanes) { + let simd_left = unsafe { + f32x16::from_slice_unaligned_unchecked(left.value_slice(i, lanes)) + }; + let simd_right = unsafe { + f32x16::from_slice_unaligned_unchecked(right.value_slice(i, lanes)) + }; + let simd_result = simd_left + simd_right; + + let result_slice: &mut [f32] = unsafe {from_raw_parts_mut( + (result.data_mut().as_mut_ptr() as *mut f32).offset(i as isize), + lanes, + )}; + unsafe {simd_result.write_to_slice_unaligned_unchecked(result_slice)}; + } + + Ok(Float32Array::new(left.len(), result.freeze(), 0, 0)) +} /// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> @@ -98,31 +92,14 @@ mod tests { } #[test] - fn test_simd_avx2() { - let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); - let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); - let c = unsafe { add_avx2(&a, &b).unwrap() }; - - for i in 0..c.len() { - println!("{}: {}", i, c.value(i)); - } - assert_eq!(11.0, c.value(0)); - assert_eq!(13.0, c.value(1)); - assert_eq!(15.0, c.value(2)); - assert_eq!(17.0, c.value(3)); - assert_eq!(17.0, c.value(4)); - assert_eq!(30.0, c.value(5)); - } - - #[test] - fn test_simd_sse() { + fn test_simd() { let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); - let c = unsafe { add_sse(&a, &b).unwrap() }; + let c = add_simd(&a, &b).unwrap(); - for i in 0..c.len() { - println!("{}: {}", i, c.value(i)); - } +// for i in 0..c.len() { +// println!("{}: {}", i, c.value(i)); +// } assert_eq!(11.0, c.value(0)); assert_eq!(13.0, c.value(1)); assert_eq!(15.0, c.value(2)); From a07d99a598cff44a90a8c41a92f2c5de5174b2d8 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Tue, 22 Jan 2019 23:23:03 -0500 Subject: [PATCH 07/24] Fixed lints --- rust/arrow/benches/arithmetic_kernels.rs | 5 ++-- rust/arrow/src/compute/arithmetic_kernels.rs | 25 +++++++++----------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs index 81674956b2afb..dd3a0e309b3ec 100644 --- a/rust/arrow/benches/arithmetic_kernels.rs +++ b/rust/arrow/benches/arithmetic_kernels.rs @@ -40,7 +40,6 @@ fn primitive_array_add(size: usize) { criterion::black_box(add(&arr_a, &arr_b).unwrap()); } - fn primitive_array_add_simd(size: usize) { let arr_a = create_array(size); let arr_b = create_array(size); @@ -55,7 +54,9 @@ fn add_benchmark(c: &mut Criterion) { c.bench_function("add 512", |b| b.iter(|| primitive_array_add(512))); c.bench_function("add 512 simd", |b| b.iter(|| primitive_array_add_simd(512))); c.bench_function("add 1024", |b| b.iter(|| primitive_array_add(1024))); - c.bench_function("add 1024 simd", |b| b.iter(|| primitive_array_add_simd(1024))); + c.bench_function("add 1024 simd", |b| { + b.iter(|| primitive_array_add_simd(1024)) + }); } criterion_group!(benches, add_benchmark); diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index 651f98e2acadd..ef650cb3e3a3e 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -42,19 +42,19 @@ pub fn add_simd(left: &Float32Array, right: &Float32Array) -> Result Date: Thu, 24 Jan 2019 21:21:35 -0500 Subject: [PATCH 08/24] Generics working --- rust/arrow/src/compute/arithmetic_kernels.rs | 30 +++++++-------- rust/arrow/src/datatypes.rs | 40 ++++++++++++++++++++ 2 files changed, 55 insertions(+), 15 deletions(-) diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index ef650cb3e3a3e..cbee0282ea808 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -22,7 +22,6 @@ use std::ops::{Add, Div, Mul, Sub}; use std::slice::from_raw_parts_mut; use num::Zero; -use packed_simd::*; use crate::array::*; use crate::buffer::MutableBuffer; @@ -30,34 +29,35 @@ use crate::compute::array_ops::math_op; use crate::datatypes; use crate::error::{ArrowError, Result}; -pub fn add_simd(left: &Float32Array, right: &Float32Array) -> Result { - if left.len() != right.len() { +pub fn add_simd(x: &PrimitiveArray, y: &PrimitiveArray) -> Result> +where + T: datatypes::SimdType, +{ + if x.len() != y.len() { return Err(ArrowError::ComputeError( "Cannot perform math operation on arrays of different length".to_string(), )); } - let lanes = f32x16::lanes(); - let buffer_size = left.len() * mem::size_of::(); + let lanes = T::lanes(); + let buffer_size = x.len() * mem::size_of::(); let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); - for i in (0..left.len()).step_by(lanes) { - let simd_left = - unsafe { f32x16::from_slice_unaligned_unchecked(left.value_slice(i, lanes)) }; - let simd_right = - unsafe { f32x16::from_slice_unaligned_unchecked(right.value_slice(i, lanes)) }; - let simd_result = simd_left + simd_right; + for i in (0..x.len()).step_by(lanes) { + let simd_x = T::load(x.value_slice(i, lanes)); + let simd_y = T::load(y.value_slice(i, lanes)); + let simd_z = T::add(simd_x, simd_y); - let result_slice: &mut [f32] = unsafe { + let result_slice: &mut [T::Native] = unsafe { from_raw_parts_mut( - (result.data_mut().as_mut_ptr() as *mut f32).offset(i as isize), + (result.data_mut().as_mut_ptr() as *mut T::Native).offset(i as isize), lanes, ) }; - unsafe { simd_result.write_to_slice_unaligned_unchecked(result_slice) }; + T::write(simd_z, result_slice); } - Ok(Float32Array::new(left.len(), result.freeze(), 0, 0)) + Ok(PrimitiveArray::::new(x.len(), result.freeze(), 0, 0)) } /// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 5008a97624a40..fbaccd2538c4f 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -26,6 +26,7 @@ use std::mem::size_of; use std::slice::from_raw_parts; use std::str::FromStr; +use packed_simd::*; use serde_derive::{Deserialize, Serialize}; use serde_json::{json, Value}; @@ -151,9 +152,28 @@ make_type!(UInt64Type, u64, DataType::UInt64, 64, 0u64); make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32); make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64); + + + + + + /// A subtype of primitive type that represents numeric values. pub trait ArrowNumericType: ArrowPrimitiveType {} +pub trait SimdType: ArrowNumericType{ + type Simd; + + fn lanes() -> usize; + + fn load(slice: &[Self::Native]) -> Self::Simd; + + fn add(left: Self::Simd, right: Self::Simd) -> Self::Simd; + + fn write(simd_result: Self::Simd, slice: &mut [Self::Native]); + +} + impl ArrowNumericType for Int8Type {} impl ArrowNumericType for Int16Type {} impl ArrowNumericType for Int32Type {} @@ -163,6 +183,26 @@ impl ArrowNumericType for UInt16Type {} impl ArrowNumericType for UInt32Type {} impl ArrowNumericType for UInt64Type {} impl ArrowNumericType for Float32Type {} + +impl SimdType for Float32Type { + type Simd = f32x16; + + fn lanes() -> usize { + f32x16::lanes() + } + + fn load(slice: &[f32]) -> f32x16 { + unsafe { f32x16::from_slice_unaligned_unchecked(slice) } + } + + fn add(left: f32x16, right: f32x16) -> f32x16 { + left + right + } + + fn write(simd_result: f32x16, slice: &mut [f32]) { + unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) }; + } +} impl ArrowNumericType for Float64Type {} /// Allows conversion from supported Arrow types to a byte slice. From dd71421c9a3f8eac79555ca4397a05d67ee17ed1 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 22:10:32 -0500 Subject: [PATCH 09/24] Simd operations added to `ArrowNumericType` --- rust/arrow/src/compute/arithmetic_kernels.rs | 31 ++++---- rust/arrow/src/datatypes.rs | 77 +++++++++++--------- 2 files changed, 61 insertions(+), 47 deletions(-) diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index cbee0282ea808..0ebf81696ba72 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -29,24 +29,25 @@ use crate::compute::array_ops::math_op; use crate::datatypes; use crate::error::{ArrowError, Result}; -pub fn add_simd(x: &PrimitiveArray, y: &PrimitiveArray) -> Result> +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +pub fn add_simd(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> where - T: datatypes::SimdType, + T: datatypes::ArrowNumericType, { - if x.len() != y.len() { + if left.len() != right.len() { return Err(ArrowError::ComputeError( "Cannot perform math operation on arrays of different length".to_string(), )); } let lanes = T::lanes(); - let buffer_size = x.len() * mem::size_of::(); + let buffer_size = left.len() * mem::size_of::(); let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); - for i in (0..x.len()).step_by(lanes) { - let simd_x = T::load(x.value_slice(i, lanes)); - let simd_y = T::load(y.value_slice(i, lanes)); - let simd_z = T::add(simd_x, simd_y); + for i in (0..left.len()).step_by(lanes) { + let simd_left = T::load(left.value_slice(i, lanes)); + let simd_right = T::load(right.value_slice(i, lanes)); + let simd_result = T::add(simd_left, simd_right); let result_slice: &mut [T::Native] = unsafe { from_raw_parts_mut( @@ -54,22 +55,26 @@ where lanes, ) }; - T::write(simd_z, result_slice); + T::write(simd_result, result_slice); } - Ok(PrimitiveArray::::new(x.len(), result.freeze(), 0, 0)) + Ok(PrimitiveArray::::new(left.len(), result.freeze(), 0, 0)) } /// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> -where - T: datatypes::ArrowNumericType, - T::Native: Add + where + T: datatypes::ArrowNumericType, + T::Native: Add + Sub + Mul + Div + Zero, { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + return add_simd(&left, &right); + + #[allow(unreachable_code)] math_op(left, right, |a, b| Ok(a + b)) } diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index fbaccd2538c4f..6aa92c7d47703 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -153,57 +153,66 @@ make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32); make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64); +/// A subtype of primitive type that represents numeric values +pub trait ArrowNumericType: ArrowPrimitiveType { - - - - -/// A subtype of primitive type that represents numeric values. -pub trait ArrowNumericType: ArrowPrimitiveType {} - -pub trait SimdType: ArrowNumericType{ + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] type Simd; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn lanes() -> usize; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn load(slice: &[Self::Native]) -> Self::Simd; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn add(left: Self::Simd, right: Self::Simd) -> Self::Simd; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn write(simd_result: Self::Simd, slice: &mut [Self::Native]); } -impl ArrowNumericType for Int8Type {} -impl ArrowNumericType for Int16Type {} -impl ArrowNumericType for Int32Type {} -impl ArrowNumericType for Int64Type {} -impl ArrowNumericType for UInt8Type {} -impl ArrowNumericType for UInt16Type {} -impl ArrowNumericType for UInt32Type {} -impl ArrowNumericType for UInt64Type {} -impl ArrowNumericType for Float32Type {} - -impl SimdType for Float32Type { - type Simd = f32x16; - - fn lanes() -> usize { - f32x16::lanes() - } +macro_rules! make_numeric_type { + ($impl_ty:ty, $native_ty:ty, $simd_ty:ident) => { + impl ArrowNumericType for $impl_ty { - fn load(slice: &[f32]) -> f32x16 { - unsafe { f32x16::from_slice_unaligned_unchecked(slice) } - } + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + type Simd = $simd_ty; - fn add(left: f32x16, right: f32x16) -> f32x16 { - left + right - } + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn lanes() -> usize { + $simd_ty::lanes() + } - fn write(simd_result: f32x16, slice: &mut [f32]) { - unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) }; - } + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn load(slice: &[$native_ty]) -> $simd_ty { + unsafe { $simd_ty::from_slice_unaligned_unchecked(slice) } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn add(left: $simd_ty, right: $simd_ty) -> $simd_ty { + left + right + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn write(simd_result: $simd_ty, slice: &mut [$native_ty]) { + unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) }; + } + } + }; } -impl ArrowNumericType for Float64Type {} + +make_numeric_type!(Int8Type, i8, i8x64); +make_numeric_type!(Int16Type, i16, i16x32); +make_numeric_type!(Int32Type, i32, i32x16); +make_numeric_type!(Int64Type, i64, i64x8); +make_numeric_type!(UInt8Type, u8, u8x64); +make_numeric_type!(UInt16Type, u16, u16x32); +make_numeric_type!(UInt32Type, u32, u32x16); +make_numeric_type!(UInt64Type, u64, u64x8); +make_numeric_type!(Float32Type, f32, f32x16); +make_numeric_type!(Float64Type, f64, f64x8); /// Allows conversion from supported Arrow types to a byte slice. pub trait ToByteSlice { From 108238f7856e17189c063268d49a771ce23581d4 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 22:24:55 -0500 Subject: [PATCH 10/24] Cleaned up testing. --- rust/arrow/src/compute/arithmetic_kernels.rs | 51 +++++++------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index 0ebf81696ba72..04aeb9994e25c 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -83,43 +83,28 @@ mod tests { use super::*; use crate::array::Int32Array; - #[test] - fn test_supported() { - println!("SSE: {}", is_x86_feature_detected!("sse")); - println!("SSE2: {}", is_x86_feature_detected!("sse2")); - println!("SSE3: {}", is_x86_feature_detected!("sse3")); - println!("SSSE3: {}", is_x86_feature_detected!("ssse3")); - println!("SSE4.1: {}", is_x86_feature_detected!("sse4.1")); - println!("SSE4.2: {}", is_x86_feature_detected!("sse4.2")); - println!("AVX: {}", is_x86_feature_detected!("avx")); - println!("AVX2: {}", is_x86_feature_detected!("avx2")); - println!("AVX512f: {}", is_x86_feature_detected!("avx512f")); - } - - #[test] - fn test_simd() { - let a = Float32Array::from(vec![5.0, 6.0, 7.0, 8.0, 9.0, 10.0]); - let b = Float32Array::from(vec![6.0, 7.0, 8.0, 9.0, 8.0, 20.0]); - let c = add_simd(&a, &b).unwrap(); - - assert_eq!(11.0, c.value(0)); - assert_eq!(13.0, c.value(1)); - assert_eq!(15.0, c.value(2)); - assert_eq!(17.0, c.value(3)); - assert_eq!(17.0, c.value(4)); - assert_eq!(30.0, c.value(5)); - } - #[test] fn test_primitive_array_add() { let a = Int32Array::from(vec![5, 6, 7, 8, 9]); let b = Int32Array::from(vec![6, 7, 8, 9, 8]); - let c = add(&a, &b).unwrap(); - assert_eq!(11, c.value(0)); - assert_eq!(13, c.value(1)); - assert_eq!(15, c.value(2)); - assert_eq!(17, c.value(3)); - assert_eq!(17, c.value(4)); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let c = add_simd(&a, &b).unwrap(); + + assert_eq!(11, c.value(0)); + assert_eq!(13, c.value(1)); + assert_eq!(15, c.value(2)); + assert_eq!(17, c.value(3)); + assert_eq!(17, c.value(4)); + } + + let d = add(&a, &b).unwrap(); + assert_eq!(11, d.value(0)); + assert_eq!(13, d.value(1)); + assert_eq!(15, d.value(2)); + assert_eq!(17, d.value(3)); + assert_eq!(17, d.value(4)); } #[test] From 3ee5a29bb33d6c576cbcdc1deaab948109a3f9b9 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 22:25:41 -0500 Subject: [PATCH 11/24] Added documentation --- rust/arrow/src/compute/arithmetic_kernels.rs | 8 +++++++- rust/arrow/src/datatypes.rs | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index 04aeb9994e25c..31ecb650ea873 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -15,7 +15,12 @@ // specific language governing permissions and limitations // under the License. -//! Defines primitive computations on arrays +//! Defines basic arithmetic kernels for `PrimitiveArrays`. +//! +//! These kernels can leverage SIMD if available on your system. Currently no runtime detection +//! is provided, you should enable the specific SIMD intrinsics using +//! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the +//! [here] (https://doc.rust-lang.org/stable/std/arch/) for more information. use std::mem; use std::ops::{Add, Div, Mul, Sub}; @@ -29,6 +34,7 @@ use crate::compute::array_ops::math_op; use crate::datatypes; use crate::error::{ArrowError, Result}; +/// Vectorized version of add operation #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn add_simd(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> where diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 6aa92c7d47703..1bf6b17a47847 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -153,21 +153,28 @@ make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32); make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64); -/// A subtype of primitive type that represents numeric values +/// A subtype of primitive type that represents numeric values, if available a SIMD type and +/// SIMD operations are defined in this trait. SIMD is leveraged in the `compute` module if +/// available. pub trait ArrowNumericType: ArrowPrimitiveType { + /// Defines the SIMD type that should be used for this numeric type #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] type Simd; + /// The number of SIMD lanes available #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn lanes() -> usize; + /// Loads a slice into a SIMD register #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn load(slice: &[Self::Native]) -> Self::Simd; + /// Performs a SIMD add operation #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn add(left: Self::Simd, right: Self::Simd) -> Self::Simd; + /// Writes a SIMD result back to a slice #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn write(simd_result: Self::Simd, slice: &mut [Self::Native]); From 6cf1fd1740e305a9b880283cbee1810079c1fa9b Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 22:27:11 -0500 Subject: [PATCH 12/24] Removed add tests from array_ops. --- rust/arrow/src/compute/array_ops.rs | 38 ----------------------------- 1 file changed, 38 deletions(-) diff --git a/rust/arrow/src/compute/array_ops.rs b/rust/arrow/src/compute/array_ops.rs index 97074764d8746..5c930e6ce2222 100644 --- a/rust/arrow/src/compute/array_ops.rs +++ b/rust/arrow/src/compute/array_ops.rs @@ -27,19 +27,6 @@ use crate::datatypes; use crate::datatypes::ArrowNumericType; use crate::error::{ArrowError, Result}; -/// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. -pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> -where - T: datatypes::ArrowNumericType, - T::Native: Add - + Sub - + Mul - + Div - + Zero, -{ - math_op(left, right, |a, b| Ok(a + b)) -} - /// Perform `left - right` operation on two arrays. If either left or right value is null then the result is also null. pub fn subtract(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> where @@ -335,31 +322,6 @@ mod tests { use super::*; use crate::array::{Float64Array, Int32Array}; - #[test] - fn test_primitive_array_add() { - let a = Int32Array::from(vec![5, 6, 7, 8, 9]); - let b = Int32Array::from(vec![6, 7, 8, 9, 8]); - let c = add(&a, &b).unwrap(); - assert_eq!(11, c.value(0)); - assert_eq!(13, c.value(1)); - assert_eq!(15, c.value(2)); - assert_eq!(17, c.value(3)); - assert_eq!(17, c.value(4)); - } - - #[test] - fn test_primitive_array_add_mismatched_length() { - let a = Int32Array::from(vec![5, 6, 7, 8, 9]); - let b = Int32Array::from(vec![6, 7, 8]); - let e = add(&a, &b) - .err() - .expect("should have failed due to different lengths"); - assert_eq!( - "ComputeError(\"Cannot perform math operation on arrays of different length\")", - format!("{:?}", e) - ); - } - #[test] fn test_primitive_array_subtract() { let a = Int32Array::from(vec![1, 2, 3, 4, 5]); From 236608bb5b66422d63f3d1ff1447112a4206e623 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 22:31:23 -0500 Subject: [PATCH 13/24] Moved test with nulls. --- rust/arrow/src/compute/arithmetic_kernels.rs | 12 ++++++++++++ rust/arrow/src/compute/array_ops.rs | 11 ----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index 31ecb650ea873..9b25a4b09face 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -125,4 +125,16 @@ mod tests { format!("{:?}", e) ); } + +// #[test] +// fn test_primitive_array_add_with_nulls() { +// let a = Int32Array::from(vec![Some(5), None, Some(7), None]); +// let b = Int32Array::from(vec![None, None, Some(6), Some(7)]); +// let c = add(&a, &b).unwrap(); +// assert_eq!(true, c.is_null(0)); +// assert_eq!(true, c.is_null(1)); +// assert_eq!(false, c.is_null(2)); +// assert_eq!(true, c.is_null(3)); +// assert_eq!(13, c.value(2)); +// } } diff --git a/rust/arrow/src/compute/array_ops.rs b/rust/arrow/src/compute/array_ops.rs index 5c930e6ce2222..76f4841a9686b 100644 --- a/rust/arrow/src/compute/array_ops.rs +++ b/rust/arrow/src/compute/array_ops.rs @@ -378,17 +378,6 @@ mod tests { assert_eq!(1.0, c.value(2)); } - #[test] - fn test_primitive_array_add_with_nulls() { - let a = Int32Array::from(vec![Some(5), None, Some(7), None]); - let b = Int32Array::from(vec![None, None, Some(6), Some(7)]); - let c = add(&a, &b).unwrap(); - assert_eq!(true, c.is_null(0)); - assert_eq!(true, c.is_null(1)); - assert_eq!(false, c.is_null(2)); - assert_eq!(true, c.is_null(3)); - assert_eq!(13, c.value(2)); - } #[test] fn test_primitive_array_sum() { From 2f8dd560edf505f14fe07f1f546ba36514cde37c Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 22:52:01 -0500 Subject: [PATCH 14/24] Fixed benchmarks. --- rust/arrow/benches/arithmetic_kernels.rs | 6 +++--- rust/arrow/src/compute/arithmetic_kernels.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs index dd3a0e309b3ec..11a18796f3b4f 100644 --- a/rust/arrow/benches/arithmetic_kernels.rs +++ b/rust/arrow/benches/arithmetic_kernels.rs @@ -23,7 +23,7 @@ extern crate arrow; use arrow::array::*; use arrow::builder::*; -use arrow::compute::arithmetic_kernels::add_simd; +use arrow::compute::arithmetic_kernels::*; use arrow::compute::array_ops::*; fn create_array(size: usize) -> Float32Array { @@ -37,13 +37,13 @@ fn create_array(size: usize) -> Float32Array { fn primitive_array_add(size: usize) { let arr_a = create_array(size); let arr_b = create_array(size); - criterion::black_box(add(&arr_a, &arr_b).unwrap()); + criterion::black_box(math_op(&arr_a, &arr_b, |a, b| Ok(a + b)).unwrap()); } fn primitive_array_add_simd(size: usize) { let arr_a = create_array(size); let arr_b = create_array(size); - criterion::black_box(add_simd(&arr_a, &arr_b).unwrap()); + criterion::black_box(add(&arr_a, &arr_b).unwrap()); } fn add_benchmark(c: &mut Criterion) { diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index 9b25a4b09face..ca155dbd5b322 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -36,7 +36,7 @@ use crate::error::{ArrowError, Result}; /// Vectorized version of add operation #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -pub fn add_simd(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> +fn add_simd(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> where T: datatypes::ArrowNumericType, { From f30743618d69fc3cb3f2d254908eace12b37b4ea Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 23:09:37 -0500 Subject: [PATCH 15/24] Restored test with nulls. --- rust/arrow/src/compute/arithmetic_kernels.rs | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index ca155dbd5b322..f31570d032e84 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -126,15 +126,15 @@ mod tests { ); } -// #[test] -// fn test_primitive_array_add_with_nulls() { -// let a = Int32Array::from(vec![Some(5), None, Some(7), None]); -// let b = Int32Array::from(vec![None, None, Some(6), Some(7)]); -// let c = add(&a, &b).unwrap(); -// assert_eq!(true, c.is_null(0)); -// assert_eq!(true, c.is_null(1)); -// assert_eq!(false, c.is_null(2)); -// assert_eq!(true, c.is_null(3)); -// assert_eq!(13, c.value(2)); -// } + #[test] + fn test_primitive_array_add_with_nulls() { + let a = Int32Array::from(vec![Some(5), None, Some(7), None]); + let b = Int32Array::from(vec![None, None, Some(6), Some(7)]); + let c = add(&a, &b).unwrap(); + assert_eq!(true, c.is_null(0)); + assert_eq!(true, c.is_null(1)); + assert_eq!(false, c.is_null(2)); + assert_eq!(true, c.is_null(3)); + assert_eq!(13, c.value(2)); + } } From 24fe8bbbaaeed3ec05b47187fa790ea9d1da5b53 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 23:25:48 -0500 Subject: [PATCH 16/24] Fixed value slice. --- rust/arrow/src/array.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs index 37d5e70864132..dd4daeacec01f 100644 --- a/rust/arrow/src/array.rs +++ b/rust/arrow/src/array.rs @@ -227,7 +227,7 @@ impl PrimitiveArray { /// Note this doesn't do any bound checking, for performance reason. pub fn value_slice(&self, offset: usize, len: usize) -> &[T::Native] { let raw = unsafe { - std::slice::from_raw_parts(self.raw_values().offset(offset as isize), offset + len) + std::slice::from_raw_parts(self.raw_values().offset(offset as isize), len) }; &raw[..] } @@ -704,6 +704,19 @@ mod tests { use crate::datatypes::{DataType, Field}; use crate::memory; + #[test] + fn test_example() { + + let mut builder = Int16Array::builder(100); + builder.append_value(1).unwrap(); + builder.append_null().unwrap(); + builder.append_slice(&[2, 3, 4]).unwrap(); + let array = builder.finish(); + assert_eq!(5, array.len(), "The array has 5 values, counting the null value"); + assert_eq!(2, array.value(2), "Get the value with index 2"); + assert_eq!(array.value_slice(3, 2), &[3, 4], "Get slice of len 2 starting at idx 3") + } + #[test] fn test_primitive_array_from_vec() { let buf = Buffer::from(&[0, 1, 2, 3, 4].to_byte_slice()); From 627c5aec4cfda0325126899baae8224437b7869a Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 23:26:23 -0500 Subject: [PATCH 17/24] Skipping test for now. --- rust/arrow/src/compute/arithmetic_kernels.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index f31570d032e84..cab114a74b30b 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -126,6 +126,7 @@ mod tests { ); } + #[ignore] #[test] fn test_primitive_array_add_with_nulls() { let a = Int32Array::from(vec![Some(5), None, Some(7), None]); From 6c4742409ab12b51b1adf07713e8dea5676d6298 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 23:28:26 -0500 Subject: [PATCH 18/24] Removed temp example --- rust/arrow/src/array.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs index dd4daeacec01f..a03d4f5d172c4 100644 --- a/rust/arrow/src/array.rs +++ b/rust/arrow/src/array.rs @@ -704,19 +704,6 @@ mod tests { use crate::datatypes::{DataType, Field}; use crate::memory; - #[test] - fn test_example() { - - let mut builder = Int16Array::builder(100); - builder.append_value(1).unwrap(); - builder.append_null().unwrap(); - builder.append_slice(&[2, 3, 4]).unwrap(); - let array = builder.finish(); - assert_eq!(5, array.len(), "The array has 5 values, counting the null value"); - assert_eq!(2, array.value(2), "Get the value with index 2"); - assert_eq!(array.value_slice(3, 2), &[3, 4], "Get slice of len 2 starting at idx 3") - } - #[test] fn test_primitive_array_from_vec() { let buf = Buffer::from(&[0, 1, 2, 3, 4].to_byte_slice()); From 9925e6a56962af787a83033c47ecfcc1fe3d313f Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 24 Jan 2019 23:35:35 -0500 Subject: [PATCH 19/24] Fixed lints --- rust/arrow/src/array.rs | 5 ++--- rust/arrow/src/compute/arithmetic_kernels.rs | 6 +++--- rust/arrow/src/compute/array_ops.rs | 1 - rust/arrow/src/datatypes.rs | 4 ---- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs index a03d4f5d172c4..616ad7077d92e 100644 --- a/rust/arrow/src/array.rs +++ b/rust/arrow/src/array.rs @@ -226,9 +226,8 @@ impl PrimitiveArray { /// /// Note this doesn't do any bound checking, for performance reason. pub fn value_slice(&self, offset: usize, len: usize) -> &[T::Native] { - let raw = unsafe { - std::slice::from_raw_parts(self.raw_values().offset(offset as isize), len) - }; + let raw = + unsafe { std::slice::from_raw_parts(self.raw_values().offset(offset as isize), len) }; &raw[..] } diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index cab114a74b30b..6e8d8c2114ea6 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -69,9 +69,9 @@ where /// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> - where - T: datatypes::ArrowNumericType, - T::Native: Add +where + T: datatypes::ArrowNumericType, + T::Native: Add + Sub + Mul + Div diff --git a/rust/arrow/src/compute/array_ops.rs b/rust/arrow/src/compute/array_ops.rs index 76f4841a9686b..5c39b7ea85d63 100644 --- a/rust/arrow/src/compute/array_ops.rs +++ b/rust/arrow/src/compute/array_ops.rs @@ -378,7 +378,6 @@ mod tests { assert_eq!(1.0, c.value(2)); } - #[test] fn test_primitive_array_sum() { let a = Int32Array::from(vec![1, 2, 3, 4, 5]); diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 1bf6b17a47847..a2d67c6ab9984 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -152,12 +152,10 @@ make_type!(UInt64Type, u64, DataType::UInt64, 64, 0u64); make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32); make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64); - /// A subtype of primitive type that represents numeric values, if available a SIMD type and /// SIMD operations are defined in this trait. SIMD is leveraged in the `compute` module if /// available. pub trait ArrowNumericType: ArrowPrimitiveType { - /// Defines the SIMD type that should be used for this numeric type #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] type Simd; @@ -177,13 +175,11 @@ pub trait ArrowNumericType: ArrowPrimitiveType { /// Writes a SIMD result back to a slice #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn write(simd_result: Self::Simd, slice: &mut [Self::Native]); - } macro_rules! make_numeric_type { ($impl_ty:ty, $native_ty:ty, $simd_ty:ident) => { impl ArrowNumericType for $impl_ty { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] type Simd = $simd_ty; From 7f115e86fcff442272e8b2c6f8ba071d41780482 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Sun, 27 Jan 2019 22:46:14 -0500 Subject: [PATCH 20/24] Split SIMD into separate trait. --- rust/arrow/src/compute/arithmetic_kernels.rs | 4 +- rust/arrow/src/datatypes.rs | 45 +++++++++++++------- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index 6e8d8c2114ea6..37c12435a7f5a 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -38,7 +38,7 @@ use crate::error::{ArrowError, Result}; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn add_simd(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> where - T: datatypes::ArrowNumericType, + T: datatypes::ArrowNumericType + datatypes::ArrowSIMDType, { if left.len() != right.len() { return Err(ArrowError::ComputeError( @@ -70,7 +70,7 @@ where /// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> where - T: datatypes::ArrowNumericType, + T: datatypes::ArrowNumericType + datatypes::ArrowSIMDType, T::Native: Add + Sub + Mul diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index a2d67c6ab9984..793d0c346daa1 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -152,10 +152,23 @@ make_type!(UInt64Type, u64, DataType::UInt64, 64, 0u64); make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32); make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64); -/// A subtype of primitive type that represents numeric values, if available a SIMD type and -/// SIMD operations are defined in this trait. SIMD is leveraged in the `compute` module if -/// available. -pub trait ArrowNumericType: ArrowPrimitiveType { +/// A subtype of primitive type that represents numeric values. +pub trait ArrowNumericType: ArrowPrimitiveType { } + +impl ArrowNumericType for Int8Type {} +impl ArrowNumericType for Int16Type {} +impl ArrowNumericType for Int32Type {} +impl ArrowNumericType for Int64Type {} +impl ArrowNumericType for UInt8Type {} +impl ArrowNumericType for UInt16Type {} +impl ArrowNumericType for UInt32Type {} +impl ArrowNumericType for UInt64Type {} +impl ArrowNumericType for Float32Type {} +impl ArrowNumericType for Float64Type {} + +/// A subtype of primitive type that represents SIMD capable types. SIMD operations are defined +/// in this trait and are leveraged in the `compute` module if available. +pub trait ArrowSIMDType: ArrowPrimitiveType { /// Defines the SIMD type that should be used for this numeric type #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] type Simd; @@ -177,9 +190,9 @@ pub trait ArrowNumericType: ArrowPrimitiveType { fn write(simd_result: Self::Simd, slice: &mut [Self::Native]); } -macro_rules! make_numeric_type { +macro_rules! make_simd_type { ($impl_ty:ty, $native_ty:ty, $simd_ty:ident) => { - impl ArrowNumericType for $impl_ty { + impl ArrowSIMDType for $impl_ty { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] type Simd = $simd_ty; @@ -206,16 +219,16 @@ macro_rules! make_numeric_type { }; } -make_numeric_type!(Int8Type, i8, i8x64); -make_numeric_type!(Int16Type, i16, i16x32); -make_numeric_type!(Int32Type, i32, i32x16); -make_numeric_type!(Int64Type, i64, i64x8); -make_numeric_type!(UInt8Type, u8, u8x64); -make_numeric_type!(UInt16Type, u16, u16x32); -make_numeric_type!(UInt32Type, u32, u32x16); -make_numeric_type!(UInt64Type, u64, u64x8); -make_numeric_type!(Float32Type, f32, f32x16); -make_numeric_type!(Float64Type, f64, f64x8); +make_simd_type!(Int8Type, i8, i8x64); +make_simd_type!(Int16Type, i16, i16x32); +make_simd_type!(Int32Type, i32, i32x16); +make_simd_type!(Int64Type, i64, i64x8); +make_simd_type!(UInt8Type, u8, u8x64); +make_simd_type!(UInt16Type, u16, u16x32); +make_simd_type!(UInt32Type, u32, u32x16); +make_simd_type!(UInt64Type, u64, u64x8); +make_simd_type!(Float32Type, f32, f32x16); +make_simd_type!(Float64Type, f64, f64x8); /// Allows conversion from supported Arrow types to a byte slice. pub trait ToByteSlice { From e5fad8a3b624065b2bcb23423e5227a5f5106bc2 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Mon, 28 Jan 2019 11:08:50 -0500 Subject: [PATCH 21/24] Updated to be generic over binary ops --- rust/arrow/src/compute/arithmetic_kernels.rs | 13 +++++++++---- rust/arrow/src/compute/array_ops.rs | 2 +- rust/arrow/src/datatypes.rs | 14 ++++++++++---- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index 37c12435a7f5a..b22ec719b0ed8 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -36,9 +36,14 @@ use crate::error::{ArrowError, Result}; /// Vectorized version of add operation #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -fn add_simd(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> +fn simd_bin_op(left: &PrimitiveArray, right: &PrimitiveArray, op: F) -> Result> where T: datatypes::ArrowNumericType + datatypes::ArrowSIMDType, + T::Simd: Add + + Sub + + Mul + + Div, + F: Fn(T::Simd, T::Simd) -> T::Simd, { if left.len() != right.len() { return Err(ArrowError::ComputeError( @@ -53,7 +58,7 @@ where for i in (0..left.len()).step_by(lanes) { let simd_left = T::load(left.value_slice(i, lanes)); let simd_right = T::load(right.value_slice(i, lanes)); - let simd_result = T::add(simd_left, simd_right); + let simd_result = T::bin_op(simd_left, simd_right, &op); let result_slice: &mut [T::Native] = unsafe { from_raw_parts_mut( @@ -78,7 +83,7 @@ where + Zero, { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - return add_simd(&left, &right); + return simd_bin_op(&left, &right, |a, b| a + b); #[allow(unreachable_code)] math_op(left, right, |a, b| Ok(a + b)) @@ -96,7 +101,7 @@ mod tests { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - let c = add_simd(&a, &b).unwrap(); + let c = simd_bin_op(&a, &b, |x, y| x + y).unwrap(); assert_eq!(11, c.value(0)); assert_eq!(13, c.value(1)); diff --git a/rust/arrow/src/compute/array_ops.rs b/rust/arrow/src/compute/array_ops.rs index 60739ed4e75fb..bce36b35749c2 100644 --- a/rust/arrow/src/compute/array_ops.rs +++ b/rust/arrow/src/compute/array_ops.rs @@ -84,7 +84,7 @@ where /// Helper function to perform math lambda function on values from two arrays. If either /// left or right value is null then the output value is also null, so `1 + null` is /// `null`. -fn math_op( +pub fn math_op( left: &PrimitiveArray, right: &PrimitiveArray, op: F, diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 5dc93fea2b398..b636866956683 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -26,6 +26,7 @@ use std::mem::size_of; use std::slice::from_raw_parts; use std::str::FromStr; +use std::ops::{Add, Div, Mul, Sub}; use packed_simd::*; use serde_derive::{Deserialize, Serialize}; use serde_json::{json, Value}; @@ -168,7 +169,12 @@ impl ArrowNumericType for Float64Type {} /// A subtype of primitive type that represents SIMD capable types. SIMD operations are defined /// in this trait and are leveraged in the `compute` module if available. -pub trait ArrowSIMDType: ArrowPrimitiveType { +pub trait ArrowSIMDType: ArrowPrimitiveType +where Self::Simd : Add ++ Sub ++ Mul ++ Div +{ /// Defines the SIMD type that should be used for this numeric type #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] type Simd; @@ -183,7 +189,7 @@ pub trait ArrowSIMDType: ArrowPrimitiveType { /// Performs a SIMD add operation #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - fn add(left: Self::Simd, right: Self::Simd) -> Self::Simd; + fn bin_op Self::Simd>(left: Self::Simd, right: Self::Simd, op: F) -> Self::Simd; /// Writes a SIMD result back to a slice #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -207,8 +213,8 @@ macro_rules! make_simd_type { } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - fn add(left: $simd_ty, right: $simd_ty) -> $simd_ty { - left + right + fn bin_op $simd_ty>(left: $simd_ty, right: $simd_ty, op: F) -> $simd_ty { + op(left, right) } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] From 36608f84e6b058c3c4b6ba751f690458841ad4cb Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 31 Jan 2019 23:29:49 -0500 Subject: [PATCH 22/24] Implemented `Bitor` trait for `Buffer` --- rust/arrow/src/buffer.rs | 78 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index 6172445ec821e..ab6f7041f8cb3 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -19,11 +19,16 @@ //! fixed size aligned at a 64-byte boundary. `MutableBuffer` is like `Buffer`, but it can //! be mutated and grown. +use packed_simd::u8x64; + use std::cmp; use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write}; use std::mem; +use std::ops::BitOr; +use std::slice::{from_raw_parts, from_raw_parts_mut}; use std::sync::Arc; +use crate::builder::{BufferBuilderTrait, UInt8BufferBuilder}; use crate::error::Result; use crate::memory; use crate::util::bit_util; @@ -140,7 +145,63 @@ impl> From for Buffer { } } +impl BitOr for Buffer { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self { + assert_eq!( + self.len(), + rhs.len(), + "Buffers must be the same size to apply Bitwise OR." + ); + + // SIMD implementation if available + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let mut result = + MutableBuffer::new(self.len()).with_bitset(self.len(), false); + let lanes = u8x64::lanes(); + for i in (0..self.len()).step_by(lanes) { + let left_data = + unsafe { from_raw_parts(self.raw_data().offset(i as isize), lanes) }; + let left_simd = + unsafe { u8x64::from_slice_unaligned_unchecked(left_data) }; + let right_data = + unsafe { from_raw_parts(rhs.raw_data().offset(i as isize), lanes) }; + let right_simd = + unsafe { u8x64::from_slice_unaligned_unchecked(right_data) }; + let simd_result = left_simd | right_simd; + let result_slice: &mut [u8] = unsafe { + from_raw_parts_mut( + (result.data_mut().as_mut_ptr() as *mut u8).offset(i as isize), + lanes, + ) + }; + unsafe { simd_result.write_to_slice_unaligned_unchecked(result_slice) }; + } + return result.freeze(); + } + + // Default implementation + #[allow(unreachable_code)] + { + let mut builder = UInt8BufferBuilder::new(self.len()); + for i in 0..self.len() { + unsafe { + builder + .append( + self.data().get_unchecked(i) | rhs.data().get_unchecked(i), + ) + .unwrap(); + } + } + builder.finish() + } + } +} + unsafe impl Sync for Buffer {} + unsafe impl Send for Buffer {} /// Similar to `Buffer`, but is growable and can be mutated. A mutable buffer can be @@ -349,6 +410,23 @@ mod tests { assert_ne!(buf1, buf2); } + #[test] + fn test_buffer_bitor() { + let buf1 = Buffer::from([0b01101010]); + let buf2 = Buffer::from([0b01001110]); + let buf3 = buf1 | buf2; + assert_eq!(Buffer::from([0b01101110]), buf3); + } + + #[test] + #[should_panic(expected = "Buffers must be the same size to apply Bitwise OR.")] + fn test_buffer_bitor_different_sizes() { + let buf1 = Buffer::from([1_u8, 1_u8]); + let buf2 = Buffer::from([0b01001110]); + let buf3 = buf1 | buf2; + assert_eq!(Buffer::from([0b01101110]), buf3); + } + #[test] fn test_from_raw_parts() { let buf = Buffer::from_raw_parts(null_mut(), 0); From cb00309dc6bb37a72b178c7a08072c559cc17c41 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 31 Jan 2019 23:31:28 -0500 Subject: [PATCH 23/24] Fixed linting --- rust/arrow/src/array.rs | 5 ++-- rust/arrow/src/compute/arithmetic_kernels.rs | 24 ++++++++++------ rust/arrow/src/compute/array_ops.rs | 8 ++++-- rust/arrow/src/datatypes.rs | 29 +++++++++++++------- 4 files changed, 44 insertions(+), 22 deletions(-) diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs index 4a019dac1431e..08b53827930c2 100644 --- a/rust/arrow/src/array.rs +++ b/rust/arrow/src/array.rs @@ -236,8 +236,9 @@ impl PrimitiveArray { /// /// Note this doesn't do any bound checking, for performance reason. pub fn value_slice(&self, offset: usize, len: usize) -> &[T::Native] { - let raw = - unsafe { std::slice::from_raw_parts(self.raw_values().offset(offset as isize), len) }; + let raw = unsafe { + std::slice::from_raw_parts(self.raw_values().offset(offset as isize), len) + }; &raw[..] } diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index b22ec719b0ed8..72f00000d61e7 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -17,8 +17,8 @@ //! Defines basic arithmetic kernels for `PrimitiveArrays`. //! -//! These kernels can leverage SIMD if available on your system. Currently no runtime detection -//! is provided, you should enable the specific SIMD intrinsics using +//! These kernels can leverage SIMD if available on your system. Currently no runtime +//! detection is provided, you should enable the specific SIMD intrinsics using //! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the //! [here] (https://doc.rust-lang.org/stable/std/arch/) for more information. @@ -36,13 +36,17 @@ use crate::error::{ArrowError, Result}; /// Vectorized version of add operation #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -fn simd_bin_op(left: &PrimitiveArray, right: &PrimitiveArray, op: F) -> Result> +fn simd_bin_op( + left: &PrimitiveArray, + right: &PrimitiveArray, + op: F, +) -> Result> where T: datatypes::ArrowNumericType + datatypes::ArrowSIMDType, T::Simd: Add - + Sub - + Mul - + Div, + + Sub + + Mul + + Div, F: Fn(T::Simd, T::Simd) -> T::Simd, { if left.len() != right.len() { @@ -72,8 +76,12 @@ where Ok(PrimitiveArray::::new(left.len(), result.freeze(), 0, 0)) } -/// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null. -pub fn add(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> +/// Perform `left + right` operation on two arrays. If either left or right value is null +/// then the result is also null. +pub fn add( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> where T: datatypes::ArrowNumericType + datatypes::ArrowSIMDType, T::Native: Add diff --git a/rust/arrow/src/compute/array_ops.rs b/rust/arrow/src/compute/array_ops.rs index bce36b35749c2..d4f35ef267cb5 100644 --- a/rust/arrow/src/compute/array_ops.rs +++ b/rust/arrow/src/compute/array_ops.rs @@ -27,8 +27,12 @@ use crate::datatypes; use crate::datatypes::ArrowNumericType; use crate::error::{ArrowError, Result}; -/// Perform `left - right` operation on two arrays. If either left or right value is null then the result is also null. -pub fn subtract(left: &PrimitiveArray, right: &PrimitiveArray) -> Result> +/// Perform `left - right` operation on two arrays. If either left or right value is null +/// then the result is also null. +pub fn subtract( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> where T: datatypes::ArrowNumericType, T::Native: Add diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index b636866956683..31130edf21573 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -23,10 +23,10 @@ use std::fmt; use std::mem::size_of; +use std::ops::{Add, Div, Mul, Sub}; use std::slice::from_raw_parts; use std::str::FromStr; -use std::ops::{Add, Div, Mul, Sub}; use packed_simd::*; use serde_derive::{Deserialize, Serialize}; use serde_json::{json, Value}; @@ -154,7 +154,7 @@ make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32); make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64); /// A subtype of primitive type that represents numeric values. -pub trait ArrowNumericType: ArrowPrimitiveType { } +pub trait ArrowNumericType: ArrowPrimitiveType {} impl ArrowNumericType for Int8Type {} impl ArrowNumericType for Int16Type {} @@ -167,13 +167,14 @@ impl ArrowNumericType for UInt64Type {} impl ArrowNumericType for Float32Type {} impl ArrowNumericType for Float64Type {} -/// A subtype of primitive type that represents SIMD capable types. SIMD operations are defined -/// in this trait and are leveraged in the `compute` module if available. +/// A subtype of primitive type that represents SIMD capable types. SIMD operations are +/// defined in this trait and are leveraged in the `compute` module if available. pub trait ArrowSIMDType: ArrowPrimitiveType -where Self::Simd : Add -+ Sub -+ Mul -+ Div +where + Self::Simd: Add + + Sub + + Mul + + Div, { /// Defines the SIMD type that should be used for this numeric type #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -189,7 +190,11 @@ where Self::Simd : Add /// Performs a SIMD add operation #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - fn bin_op Self::Simd>(left: Self::Simd, right: Self::Simd, op: F) -> Self::Simd; + fn bin_op Self::Simd>( + left: Self::Simd, + right: Self::Simd, + op: F, + ) -> Self::Simd; /// Writes a SIMD result back to a slice #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -213,7 +218,11 @@ macro_rules! make_simd_type { } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - fn bin_op $simd_ty>(left: $simd_ty, right: $simd_ty, op: F) -> $simd_ty { + fn bin_op $simd_ty>( + left: $simd_ty, + right: $simd_ty, + op: F, + ) -> $simd_ty { op(left, right) } From f866d57c8fa6b38f9074f78b3e59fc41b9e9fc76 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Thu, 31 Jan 2019 23:40:38 -0500 Subject: [PATCH 24/24] Changed `BitOr` to `BitAnd` --- rust/arrow/src/buffer.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index ab6f7041f8cb3..322480b4ee90f 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -24,7 +24,7 @@ use packed_simd::u8x64; use std::cmp; use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write}; use std::mem; -use std::ops::BitOr; +use std::ops::BitAnd; use std::slice::{from_raw_parts, from_raw_parts_mut}; use std::sync::Arc; @@ -145,10 +145,10 @@ impl> From for Buffer { } } -impl BitOr for Buffer { +impl BitAnd for Buffer { type Output = Self; - fn bitor(self, rhs: Self) -> Self { + fn bitand(self, rhs: Self) -> Self { assert_eq!( self.len(), rhs.len(), @@ -170,7 +170,7 @@ impl BitOr for Buffer { unsafe { from_raw_parts(rhs.raw_data().offset(i as isize), lanes) }; let right_simd = unsafe { u8x64::from_slice_unaligned_unchecked(right_data) }; - let simd_result = left_simd | right_simd; + let simd_result = left_simd & right_simd; let result_slice: &mut [u8] = unsafe { from_raw_parts_mut( (result.data_mut().as_mut_ptr() as *mut u8).offset(i as isize), @@ -190,7 +190,7 @@ impl BitOr for Buffer { unsafe { builder .append( - self.data().get_unchecked(i) | rhs.data().get_unchecked(i), + self.data().get_unchecked(i) & rhs.data().get_unchecked(i), ) .unwrap(); } @@ -411,20 +411,19 @@ mod tests { } #[test] - fn test_buffer_bitor() { + fn test_buffer_bitand() { let buf1 = Buffer::from([0b01101010]); let buf2 = Buffer::from([0b01001110]); - let buf3 = buf1 | buf2; - assert_eq!(Buffer::from([0b01101110]), buf3); + let buf3 = buf1 & buf2; + assert_eq!(Buffer::from([0b01001010]), buf3); } #[test] #[should_panic(expected = "Buffers must be the same size to apply Bitwise OR.")] - fn test_buffer_bitor_different_sizes() { + fn test_buffer_bitand_different_sizes() { let buf1 = Buffer::from([1_u8, 1_u8]); let buf2 = Buffer::from([0b01001110]); - let buf3 = buf1 | buf2; - assert_eq!(Buffer::from([0b01101110]), buf3); + let _buf3 = buf1 & buf2; } #[test]