Skip to content

Commit

Permalink
Improved performance of filter performance via Simd selection [3x] (j…
Browse files Browse the repository at this point in the history
  • Loading branch information
sundy-li authored and Dexter Duckworth committed Mar 2, 2022
1 parent df75eb2 commit 4fbcfa2
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 39 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ target-tarpaulin
venv
lcov.info
Cargo.lock
example.arrow
fixtures
settings.json
dev/
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ harness = false
name = "comparison_kernels"
harness = false


[[bench]]
name = "read_parquet"
harness = false
Expand Down
9 changes: 9 additions & 0 deletions benches/filter_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,21 @@ fn add_benchmark(c: &mut Criterion) {
});

let data_array = create_primitive_array::<f32>(size, 0.5);
let data_array_nonull = create_primitive_array::<f32>(size, 0.0);
c.bench_function("filter f32", |b| {
b.iter(|| bench_filter(&data_array, &filter_array))
});
c.bench_function("filter f32 high selectivity", |b| {
b.iter(|| bench_filter(&data_array, &dense_filter_array))
});

c.bench_function("filter f32 nonull", |b| {
b.iter(|| bench_filter(&data_array_nonull, &filter_array))
});
c.bench_function("filter f32 nonull high selectivity", |b| {
b.iter(|| bench_filter(&data_array_nonull, &dense_filter_array))
});

c.bench_function("filter context f32", |b| {
b.iter(|| bench_built_filter(&filter, &data_array))
});
Expand Down
57 changes: 37 additions & 20 deletions src/compute/filter.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
//! Contains operators to filter arrays such as [`filter`].
use crate::array::growable::{make_growable, Growable};
use crate::bitmap::utils::{BitChunkIterExact, BitChunksExact};
use crate::bitmap::utils::{BitChunk, BitChunkIterExact, BitChunksExact};
use crate::bitmap::{utils::SlicesIterator, Bitmap, MutableBitmap};
use crate::chunk::Chunk;
use crate::datatypes::DataType;
use crate::error::Result;
use crate::types::simd::{NativeSimd, Simd};
use crate::types::BitChunkIter;
use crate::types::BitChunkOnes;
use crate::{array::*, types::NativeType};
use num_traits::One;
use num_traits::Zero;

/// Function that can filter arbitrary arrays
pub type Filter<'a> = Box<dyn Fn(&dyn Array) -> Box<dyn Array> + 'a + Send + Sync>;
Expand All @@ -21,20 +23,25 @@ where
I: BitChunkIterExact<<<T as Simd>::Simd as NativeSimd>::Chunk>,
{
let mut chunks = values.chunks_exact(T::Simd::LANES);

let mut new = Vec::<T>::with_capacity(filter_count);
let mut dst = new.as_mut_ptr();
chunks
.by_ref()
.zip(mask_chunks.by_ref())
.for_each(|(chunk, validity_chunk)| {
let iter = BitChunkIter::new(validity_chunk, T::Simd::LANES);
for (value, b) in chunk.iter().zip(iter) {
if b {
unsafe {
dst.write(*value);
dst = dst.add(1);
};
let ones_iter = BitChunkOnes::new(validity_chunk);

let (size, _) = ones_iter.size_hint();
if size == T::Simd::LANES {
// Fast path: all lanes are set
unsafe {
std::ptr::copy(chunk.as_ptr(), dst, size);
dst = dst.add(size);
}
} else {
for pos in ones_iter {
dst.write(chunk[pos]);
dst = dst.add(1);
}
}
});
Expand Down Expand Up @@ -74,22 +81,32 @@ where
let mut validity_chunks = validity.chunks::<<T::Simd as NativeSimd>::Chunk>();

let mut new = Vec::<T>::with_capacity(filter_count);
let mut new_validity = MutableBitmap::with_capacity(filter_count);
let mut dst = new.as_mut_ptr();
let mut new_validity = MutableBitmap::with_capacity(filter_count);

chunks
.by_ref()
.zip(validity_chunks.by_ref())
.zip(mask_chunks.by_ref())
.for_each(|((chunk, validity_chunk), mask_chunk)| {
let mask_iter = BitChunkIter::new(mask_chunk, T::Simd::LANES);
let validity_iter = BitChunkIter::new(validity_chunk, T::Simd::LANES);
for ((value, is_valid), is_selected) in chunk.iter().zip(validity_iter).zip(mask_iter) {
if is_selected {
unsafe {
dst.write(*value);
dst = dst.add(1);
new_validity.push_unchecked(is_valid);
};
let ones_iter = BitChunkOnes::new(mask_chunk);
let (size, _) = ones_iter.size_hint();

if size == T::Simd::LANES {
// Fast path: all lanes are set
unsafe {
std::ptr::copy(chunk.as_ptr(), dst, size);
dst = dst.add(size);
new_validity.extend_from_slice(validity_chunk.to_ne_bytes().as_ref(), 0, size);
}
} else {
for pos in ones_iter {
dst.write(chunk[pos]);
dst = dst.add(1);
new_validity.push(
validity_chunk & (<<<T as Simd>::Simd as NativeSimd>::Chunk>::one() << pos)
> <<<T as Simd>::Simd as NativeSimd>::Chunk>::zero(),
);
}
}
});
Expand Down
88 changes: 70 additions & 18 deletions src/types/bit_chunk.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1,26 @@
use std::{
fmt::Binary,
ops::{BitAnd, BitAndAssign, BitOr, Not, Shl, ShlAssign, ShrAssign},
ops::{BitAndAssign, Not, Shl, ShlAssign, ShrAssign},
};

use num_traits::PrimInt;

use super::NativeType;

/// A chunk of bits. This is used to create masks of a given length
/// whose width is `1` bit. In `simd_packed` notation, this corresponds to `m1xY`.
pub trait BitChunk:
super::private::Sealed
+ PrimInt
+ NativeType
+ Binary
+ BitAnd<Output = Self>
+ ShlAssign
+ Not<Output = Self>
+ ShrAssign<usize>
+ ShlAssign<usize>
+ Shl<usize, Output = Self>
+ Eq
+ BitAndAssign
+ BitOr<Output = Self>
{
/// A value with a single bit set at the most right position.
fn one() -> Self;
/// A value with no bits set.
fn zero() -> Self;
/// convert itself into bytes.
fn to_ne_bytes(self) -> Self::Bytes;
/// convert itself from bytes.
Expand All @@ -34,11 +30,6 @@ pub trait BitChunk:
macro_rules! bit_chunk {
($ty:ty) => {
impl BitChunk for $ty {
#[inline(always)]
fn zero() -> Self {
0
}

#[inline(always)]
fn to_ne_bytes(self) -> Self::Bytes {
self.to_ne_bytes()
Expand All @@ -48,11 +39,6 @@ macro_rules! bit_chunk {
fn from_ne_bytes(v: Self::Bytes) -> Self {
Self::from_ne_bytes(v)
}

#[inline(always)]
fn one() -> Self {
1
}
}
};
}
Expand Down Expand Up @@ -113,6 +99,62 @@ impl<T: BitChunk> Iterator for BitChunkIter<T> {
}
}

// # Safety
// a mathematical invariant of this iterator
unsafe impl<T: BitChunk> crate::trusted_len::TrustedLen for BitChunkIter<T> {}

/// An [`Iterator<Item=usize>`] over a [`BitChunk`].
/// This iterator returns the postion of bit set.
/// Refer: https://lemire.me/blog/2018/03/08/iterating-over-set-bits-quickly-simd-edition/
/// # Example
/// ```
/// use arrow2::types::BitChunkOnes;
/// let a = 0b00010000u8;
/// let iter = BitChunkOnes::new(a);
/// let r = iter.collect::<Vec<_>>();
/// assert_eq!(r, vec![4]);
/// ```
pub struct BitChunkOnes<T: BitChunk> {
value: T,
remaining: usize,
}

impl<T: BitChunk> BitChunkOnes<T> {
/// Creates a new [`BitChunkOnes`] with `len` bits.
#[inline]
pub fn new(value: T) -> Self {
Self {
value,
remaining: value.count_ones() as usize,
}
}
}

impl<T: BitChunk> Iterator for BitChunkOnes<T> {
type Item = usize;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.remaining == 0 {
return None;
}
let v = self.value.trailing_zeros() as usize;
self.value &= self.value - T::one();

self.remaining -= 1;
Some(v)
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.remaining, Some(self.remaining))
}
}

// # Safety
// a mathematical invariant of this iterator
unsafe impl<T: BitChunk> crate::trusted_len::TrustedLen for BitChunkOnes<T> {}

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -125,4 +167,14 @@ mod tests {
let r = iter.collect::<Vec<_>>();
assert_eq!(r, (0..16).map(|x| x == 0 || x == 12).collect::<Vec<_>>(),);
}

#[test]
fn test_ones() {
let a = [0b00000001, 0b00010000]; // 0th and 13th entry
let a = u16::from_ne_bytes(a);
let mut iter = BitChunkOnes::new(a);
assert_eq!(iter.size_hint(), (2, Some(2)));
assert_eq!(iter.next(), Some(0));
assert_eq!(iter.next(), Some(12));
}
}
2 changes: 1 addition & 1 deletion src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
//! for SIMD, at [`mod@simd`].
mod bit_chunk;
pub use bit_chunk::{BitChunk, BitChunkIter};
pub use bit_chunk::{BitChunk, BitChunkIter, BitChunkOnes};
mod index;
pub mod simd;
pub use index::*;
Expand Down

0 comments on commit 4fbcfa2

Please sign in to comment.