Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Migrated to portable simd #747

Merged
merged 5 commits into from
Mar 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2021-12-10
toolchain: nightly-2022-03-03
override: true
- uses: Swatinem/rust-cache@v1
with:
Expand All @@ -99,7 +99,7 @@ jobs:
submodules: true # needed to test IPC, which are located in a submodule
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2021-12-10
toolchain: nightly-2022-03-03
override: true
- uses: Swatinem/rust-cache@v1
with:
Expand Down Expand Up @@ -189,7 +189,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2022-01-17
toolchain: nightly-2022-03-03
override: true
- uses: Swatinem/rust-cache@v1
- name: Run
Expand Down
4 changes: 1 addition & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ itertools = { version = "^0.10", optional = true }

base64 = { version = "0.13.0", optional = true }

packed_simd = { version = "0.3", optional = true, package = "packed_simd_2" }

# to write to parquet as a stream
futures = { version = "0.3", optional = true }

Expand Down Expand Up @@ -211,8 +209,8 @@ compute = [
"compute_window"
]
benchmarks = ["rand"]
simd = ["packed_simd"]
serde_types = ["serde", "serde_derive"]
simd = []

[package.metadata.cargo-all-features]
allowlist = ["compute", "compute_sort", "compute_hash", "compute_nullif"]
Expand Down
15 changes: 12 additions & 3 deletions benches/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ use arrow2::array::*;
use arrow2::compute::aggregate::*;
use arrow2::util::bench_util::*;

fn bench_sum(arr_a: &PrimitiveArray<f32>) {
fn bench_sum(arr_a: &dyn Array) {
sum(criterion::black_box(arr_a)).unwrap();
}

fn bench_min(arr_a: &PrimitiveArray<f32>) {
min_primitive(criterion::black_box(arr_a)).unwrap();
fn bench_min(arr_a: &dyn Array) {
min(criterion::black_box(arr_a)).unwrap();
}

fn add_benchmark(c: &mut Criterion) {
Expand All @@ -24,6 +24,15 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| bench_min(&arr_a))
});

let arr_a = create_primitive_array::<i32>(size, 0.0);

c.bench_function(&format!("sum 2^{} i32", log2_size), |b| {
b.iter(|| bench_sum(&arr_a))
});
c.bench_function(&format!("min 2^{} i32", log2_size), |b| {
b.iter(|| bench_min(&arr_a))
});

let arr_a = create_primitive_array::<f32>(size, 0.1);

c.bench_function(&format!("sum null 2^{} f32", log2_size), |b| {
Expand Down
20 changes: 10 additions & 10 deletions src/compute/aggregate/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ pub trait SimdOrd<T> {
/// reduce itself to the maximum
fn min_element(self) -> T;
/// lane-wise maximum between two instances
fn max(self, x: Self) -> Self;
fn max_lane(self, x: Self) -> Self;
/// lane-wise minimum between two instances
fn min(self, x: Self) -> Self;
fn min_lane(self, x: Self) -> Self;
/// returns a new instance with all lanes equal to `MIN`
fn new_min() -> Self;
/// returns a new instance with all lanes equal to `MAX`
Expand Down Expand Up @@ -120,11 +120,11 @@ where

let chunk_reduced = chunks.fold(T::Simd::new_min(), |acc, chunk| {
let chunk = T::Simd::from_chunk(chunk);
acc.min(chunk)
acc.min_lane(chunk)
});

let remainder = T::Simd::from_incomplete_chunk(remainder, T::Simd::MAX);
let reduced = chunk_reduced.min(remainder);
let reduced = chunk_reduced.min_lane(remainder);

reduced.min_element()
}
Expand All @@ -143,14 +143,14 @@ where
let chunk = T::Simd::from_chunk(chunk);
let mask = <T::Simd as NativeSimd>::Mask::from_chunk(validity_chunk);
let chunk = chunk.select(mask, T::Simd::new_min());
acc.min(chunk)
acc.min_lane(chunk)
},
);

let remainder = T::Simd::from_incomplete_chunk(chunks.remainder(), T::Simd::MAX);
let mask = <T::Simd as NativeSimd>::Mask::from_chunk(validity_masks.remainder());
let remainder = remainder.select(mask, T::Simd::new_min());
let reduced = chunk_reduced.min(remainder);
let reduced = chunk_reduced.min_lane(remainder);

reduced.min_element()
}
Expand Down Expand Up @@ -199,11 +199,11 @@ where

let chunk_reduced = chunks.fold(T::Simd::new_max(), |acc, chunk| {
let chunk = T::Simd::from_chunk(chunk);
acc.max(chunk)
acc.max_lane(chunk)
});

let remainder = T::Simd::from_incomplete_chunk(remainder, T::Simd::MIN);
let reduced = chunk_reduced.max(remainder);
let reduced = chunk_reduced.max_lane(remainder);

reduced.max_element()
}
Expand All @@ -222,14 +222,14 @@ where
let chunk = T::Simd::from_chunk(chunk);
let mask = <T::Simd as NativeSimd>::Mask::from_chunk(validity_chunk);
let chunk = chunk.select(mask, T::Simd::new_max());
acc.max(chunk)
acc.max_lane(chunk)
},
);

let remainder = T::Simd::from_incomplete_chunk(chunks.remainder(), T::Simd::MIN);
let mask = <T::Simd as NativeSimd>::Mask::from_chunk(validity_masks.remainder());
let remainder = remainder.select(mask, T::Simd::new_max());
let reduced = chunk_reduced.max(remainder);
let reduced = chunk_reduced.max_lane(remainder);

reduced.max_element()
}
Expand Down
8 changes: 4 additions & 4 deletions src/compute/aggregate/simd/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ macro_rules! simd_ord_int {
}

#[inline]
fn max(self, x: Self) -> Self {
fn max_lane(self, x: Self) -> Self {
let mut result = <$simd>::default();
result
.0
Expand All @@ -82,7 +82,7 @@ macro_rules! simd_ord_int {
}

#[inline]
fn min(self, x: Self) -> Self {
fn min_lane(self, x: Self) -> Self {
let mut result = <$simd>::default();
result
.0
Expand Down Expand Up @@ -123,7 +123,7 @@ macro_rules! simd_ord_float {
}

#[inline]
fn max(self, x: Self) -> Self {
fn max_lane(self, x: Self) -> Self {
let mut result = <$simd>::default();
result
.0
Expand All @@ -135,7 +135,7 @@ macro_rules! simd_ord_float {
}

#[inline]
fn min(self, x: Self) -> Self {
fn min_lane(self, x: Self) -> Self {
let mut result = <$simd>::default();
result
.0
Expand Down
36 changes: 18 additions & 18 deletions src/compute/aggregate/simd/packed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@ macro_rules! simd_sum {
};
}

simd_sum!(f32x16, f32, sum);
simd_sum!(f64x8, f64, sum);
simd_sum!(u8x64, u8, wrapping_sum);
simd_sum!(u16x32, u16, wrapping_sum);
simd_sum!(u32x16, u32, wrapping_sum);
simd_sum!(u64x8, u64, wrapping_sum);
simd_sum!(i8x64, i8, wrapping_sum);
simd_sum!(i16x32, i16, wrapping_sum);
simd_sum!(i32x16, i32, wrapping_sum);
simd_sum!(i64x8, i64, wrapping_sum);
simd_sum!(f32x16, f32, horizontal_sum);
simd_sum!(f64x8, f64, horizontal_sum);
simd_sum!(u8x64, u8, horizontal_sum);
simd_sum!(u16x32, u16, horizontal_sum);
simd_sum!(u32x16, u32, horizontal_sum);
simd_sum!(u64x8, u64, horizontal_sum);
simd_sum!(i8x64, i8, horizontal_sum);
simd_sum!(i16x32, i16, horizontal_sum);
simd_sum!(i32x16, i32, horizontal_sum);
simd_sum!(i64x8, i64, horizontal_sum);

macro_rules! simd_ord_int {
($simd:tt, $type:ty) => {
Expand All @@ -33,21 +33,21 @@ macro_rules! simd_ord_int {

#[inline]
fn max_element(self) -> $type {
self.max_element()
self.horizontal_max()
}

#[inline]
fn min_element(self) -> $type {
self.min_element()
self.horizontal_min()
}

#[inline]
fn max(self, x: Self) -> Self {
fn max_lane(self, x: Self) -> Self {
self.max(x)
}

#[inline]
fn min(self, x: Self) -> Self {
fn min_lane(self, x: Self) -> Self {
self.min(x)
}

Expand All @@ -72,21 +72,21 @@ macro_rules! simd_ord_float {

#[inline]
fn max_element(self) -> $type {
self.max_element()
self.horizontal_max()
}

#[inline]
fn min_element(self) -> $type {
self.min_element()
self.horizontal_min()
}

#[inline]
fn max(self, x: Self) -> Self {
fn max_lane(self, x: Self) -> Self {
self.max(x)
}

#[inline]
fn min(self, x: Self) -> Self {
fn min_lane(self, x: Self) -> Self {
self.min(x)
}

Expand Down
20 changes: 10 additions & 10 deletions src/compute/comparison/simd/packed.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::convert::TryInto;
use std::simd::ToBitMask;

use packed_simd::*;

use crate::types::simd::*;
use crate::types::{days_ms, months_days_ns};

use super::*;
Expand All @@ -15,48 +15,48 @@ macro_rules! simd8 {
impl Simd8Lanes<$type> for $md {
#[inline]
fn from_chunk(v: &[$type]) -> Self {
<$md>::from_slice_unaligned(v)
<$md>::from_slice(v)
}

#[inline]
fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self {
let mut a = [remaining; 8];
a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b);
Self::from_chunk(a.as_ref())
Self::from_array(a)
}
}

impl Simd8PartialEq for $md {
#[inline]
fn eq(self, other: Self) -> u8 {
self.eq(other).bitmask()
self.lanes_eq(other).to_bitmask()
}

#[inline]
fn neq(self, other: Self) -> u8 {
self.ne(other).bitmask()
self.lanes_ne(other).to_bitmask()
}
}

impl Simd8PartialOrd for $md {
#[inline]
fn lt_eq(self, other: Self) -> u8 {
self.le(other).bitmask()
self.lanes_le(other).to_bitmask()
}

#[inline]
fn lt(self, other: Self) -> u8 {
self.lt(other).bitmask()
self.lanes_lt(other).to_bitmask()
}

#[inline]
fn gt_eq(self, other: Self) -> u8 {
self.ge(other).bitmask()
self.lanes_ge(other).to_bitmask()
}

#[inline]
fn gt(self, other: Self) -> u8 {
self.gt(other).bitmask()
self.lanes_gt(other).to_bitmask()
}
}
};
Expand Down
2 changes: 1 addition & 1 deletion src/doc/lib.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,5 @@ functionality, such as:
* `compute` to operate on arrays (addition, sum, sort, etc.)

The feature `simd` (not part of `full`) produces more explicit SIMD instructions
via [`packed_simd`](https://github.com/rust-lang/packed_simd), but requires the
via [`std::simd`](https://doc.rust-lang.org/nightly/std/simd/index.html), but requires the
nightly channel.
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//
#![allow(clippy::len_without_is_empty)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(feature = "simd", feature(portable_simd))]

#[macro_use]
pub mod array;
Expand Down
4 changes: 2 additions & 2 deletions src/types/simd/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Contains traits and implementations of multi-data used in SIMD.
//! The actual representation is driven by the feature flag `"simd"`, which, if set,
//! uses `packed_simd2` to get the intrinsics.
//! uses [`std::simd`].
use super::{days_ms, months_days_ns};
use super::{BitChunk, BitChunkIter, NativeType};

Expand All @@ -14,7 +14,7 @@ pub trait FromMaskChunk<T> {
/// # Safety
/// The `NativeType` and the `NativeSimd` must have possible a matching alignment.
/// e.g. slicing `&[NativeType]` by `align_of<NativeSimd>()` must be properly aligned/safe.
pub unsafe trait NativeSimd: Default + Copy {
pub unsafe trait NativeSimd: Sized + Default + Copy {
/// Number of lanes
const LANES: usize;
/// The [`NativeType`] of this struct. E.g. `f32` for a `NativeSimd = f32x16`.
Expand Down
Loading