Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] [Rust] Add explicit SIMD vectorization for arithmetic ops in "array_ops" #3451

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion rust/arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ csv = "1.0.0"
num = "0.2"
regex = "1.1"
lazy_static = "1.2"
packed_simd = "0.3.1"

[dev-dependencies]
criterion = "0.2"
Expand All @@ -56,4 +57,8 @@ harness = false

[[bench]]
name = "builder"
harness = false
harness = false

[[bench]]
name = "arithmetic_kernels"
harness = false
63 changes: 63 additions & 0 deletions rust/arrow/benches/arithmetic_kernels.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[macro_use]
extern crate criterion;
use criterion::Criterion;

extern crate arrow;

use arrow::array::*;
use arrow::builder::*;
use arrow::compute::arithmetic_kernels::*;
use arrow::compute::array_ops::*;

fn create_array(size: usize) -> Float32Array {
let mut builder = Float32Builder::new(size);
for _i in 0..size {
builder.append_value(1.0).unwrap();
}
builder.finish()
}

fn primitive_array_add(size: usize) {
let arr_a = create_array(size);
let arr_b = create_array(size);
criterion::black_box(math_op(&arr_a, &arr_b, |a, b| Ok(a + b)).unwrap());
}

fn primitive_array_add_simd(size: usize) {
let arr_a = create_array(size);
let arr_b = create_array(size);
criterion::black_box(add(&arr_a, &arr_b).unwrap());
}

fn add_benchmark(c: &mut Criterion) {
c.bench_function("add 128", |b| b.iter(|| primitive_array_add(128)));
c.bench_function("add 128 simd", |b| b.iter(|| primitive_array_add_simd(128)));
c.bench_function("add 256", |b| b.iter(|| primitive_array_add(256)));
c.bench_function("add 256 simd", |b| b.iter(|| primitive_array_add_simd(256)));
c.bench_function("add 512", |b| b.iter(|| primitive_array_add(512)));
c.bench_function("add 512 simd", |b| b.iter(|| primitive_array_add_simd(512)));
c.bench_function("add 1024", |b| b.iter(|| primitive_array_add(1024)));
c.bench_function("add 1024 simd", |b| {
b.iter(|| primitive_array_add_simd(1024))
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
5 changes: 3 additions & 2 deletions rust/arrow/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,9 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
///
/// Note this doesn't do any bound checking, for performance reason.
pub fn value_slice(&self, offset: usize, len: usize) -> &[T::Native] {
let raw = unsafe { std::slice::from_raw_parts(self.raw_values(), self.len()) };
&raw[offset..offset + len]
let raw =
unsafe { std::slice::from_raw_parts(self.raw_values().offset(offset as isize), len) };
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says that it does not do bounds checking but I found that it did.

&raw[..]
}

// Returns a new primitive array builder
Expand Down
141 changes: 141 additions & 0 deletions rust/arrow/src/compute/arithmetic_kernels.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Defines basic arithmetic kernels for `PrimitiveArrays`.
//!
//! These kernels can leverage SIMD if available on your system. Currently no runtime detection
//! is provided, you should enable the specific SIMD intrinsics using
//! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the
//! [here] (https://doc.rust-lang.org/stable/std/arch/) for more information.

use std::mem;
use std::ops::{Add, Div, Mul, Sub};
use std::slice::from_raw_parts_mut;

use num::Zero;

use crate::array::*;
use crate::buffer::MutableBuffer;
use crate::compute::array_ops::math_op;
use crate::datatypes;
use crate::error::{ArrowError, Result};

/// Vectorized version of add operation
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn add_simd<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
{
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform math operation on arrays of different length".to_string(),
));
}

let lanes = T::lanes();
let buffer_size = left.len() * mem::size_of::<T::Native>();
let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);

for i in (0..left.len()).step_by(lanes) {
let simd_left = T::load(left.value_slice(i, lanes));
let simd_right = T::load(right.value_slice(i, lanes));
let simd_result = T::add(simd_left, simd_right);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How are we going to handle nulls?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still have to work on nulls


let result_slice: &mut [T::Native] = unsafe {
from_raw_parts_mut(
(result.data_mut().as_mut_ptr() as *mut T::Native).offset(i as isize),
lanes,
)
};
T::write(simd_result, result_slice);
}

Ok(PrimitiveArray::<T>::new(left.len(), result.freeze(), 0, 0))
}

/// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null.
pub fn add<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T::Native: Add<Output = T::Native>
+ Sub<Output = T::Native>
+ Mul<Output = T::Native>
+ Div<Output = T::Native>
+ Zero,
{
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make this runtime detection? e.g.:

    if is_x86_feature_detected!("avx2") {
        return add_simd(&left, &right);
    } else {
        math_op(left, right, |a, b| Ok(a + b))
    }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See below.

return add_simd(&left, &right);

#[allow(unreachable_code)]
math_op(left, right, |a, b| Ok(a + b))
}

#[cfg(test)]
mod tests {
use super::*;
use crate::array::Int32Array;

#[test]
fn test_primitive_array_add() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
let b = Int32Array::from(vec![6, 7, 8, 9, 8]);

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
let c = add_simd(&a, &b).unwrap();

assert_eq!(11, c.value(0));
assert_eq!(13, c.value(1));
assert_eq!(15, c.value(2));
assert_eq!(17, c.value(3));
assert_eq!(17, c.value(4));
}

let d = add(&a, &b).unwrap();
assert_eq!(11, d.value(0));
assert_eq!(13, d.value(1));
assert_eq!(15, d.value(2));
assert_eq!(17, d.value(3));
assert_eq!(17, d.value(4));
}

#[test]
fn test_primitive_array_add_mismatched_length() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
let b = Int32Array::from(vec![6, 7, 8]);
let e = add(&a, &b)
.err()
.expect("should have failed due to different lengths");
assert_eq!(
"ComputeError(\"Cannot perform math operation on arrays of different length\")",
format!("{:?}", e)
);
}

#[ignore]
#[test]
fn test_primitive_array_add_with_nulls() {
let a = Int32Array::from(vec![Some(5), None, Some(7), None]);
let b = Int32Array::from(vec![None, None, Some(6), Some(7)]);
let c = add(&a, &b).unwrap();
assert_eq!(true, c.is_null(0));
assert_eq!(true, c.is_null(1));
assert_eq!(false, c.is_null(2));
assert_eq!(true, c.is_null(3));
assert_eq!(13, c.value(2));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,12 @@ use std::ops::{Add, Div, Mul, Sub};

use num::Zero;

use crate::array::{Array, BooleanArray, PrimitiveArray};
use crate::array::*;
use crate::builder::PrimitiveBuilder;
use crate::datatypes;
use crate::datatypes::ArrowNumericType;
use crate::error::{ArrowError, Result};

/// Perform `left + right` operation on two arrays. If either left or right value is null then the result is also null.
pub fn add<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T::Native: Add<Output = T::Native>
+ Sub<Output = T::Native>
+ Mul<Output = T::Native>
+ Div<Output = T::Native>
+ Zero,
{
math_op(left, right, |a, b| Ok(a + b))
}

/// Perform `left - right` operation on two arrays. If either left or right value is null then the result is also null.
pub fn subtract<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
where
Expand Down Expand Up @@ -88,7 +75,7 @@ where

/// Helper function to perform math lambda function on values from two arrays. If either left or
/// right value is null then the output value is also null, so `1 + null` is `null`.
fn math_op<T, F>(
pub fn math_op<T, F>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
op: F,
Expand Down Expand Up @@ -335,31 +322,6 @@ mod tests {
use super::*;
use crate::array::{Float64Array, Int32Array};

#[test]
fn test_primitive_array_add() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
let c = add(&a, &b).unwrap();
assert_eq!(11, c.value(0));
assert_eq!(13, c.value(1));
assert_eq!(15, c.value(2));
assert_eq!(17, c.value(3));
assert_eq!(17, c.value(4));
}

#[test]
fn test_primitive_array_add_mismatched_length() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
let b = Int32Array::from(vec![6, 7, 8]);
let e = add(&a, &b)
.err()
.expect("should have failed due to different lengths");
assert_eq!(
"ComputeError(\"Cannot perform math operation on arrays of different length\")",
format!("{:?}", e)
);
}

#[test]
fn test_primitive_array_subtract() {
let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
Expand Down Expand Up @@ -416,18 +378,6 @@ mod tests {
assert_eq!(1.0, c.value(2));
}

#[test]
fn test_primitive_array_add_with_nulls() {
let a = Int32Array::from(vec![Some(5), None, Some(7), None]);
let b = Int32Array::from(vec![None, None, Some(6), Some(7)]);
let c = add(&a, &b).unwrap();
assert_eq!(true, c.is_null(0));
assert_eq!(true, c.is_null(1));
assert_eq!(false, c.is_null(2));
assert_eq!(true, c.is_null(3));
assert_eq!(13, c.value(2));
}

#[test]
fn test_primitive_array_sum() {
let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
Expand Down
19 changes: 19 additions & 0 deletions rust/arrow/src/compute/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

pub mod arithmetic_kernels;
pub mod array_ops;
Loading