From 3b987e39ccd450e7158e5dde6155cc1e398f7fd6 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Thu, 2 Sep 2021 06:08:02 +0000 Subject: [PATCH 1/2] Added benches for bitmap unary. --- Cargo.toml | 4 ++++ benches/bitmap_ops.rs | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 benches/bitmap_ops.rs diff --git a/Cargo.toml b/Cargo.toml index d9abdea1c3a..fae01b13191 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -183,3 +183,7 @@ harness = false [[bench]] name = "concat" harness = false + +[[bench]] +name = "bitmap_ops" +harness = false diff --git a/benches/bitmap_ops.rs b/benches/bitmap_ops.rs new file mode 100644 index 00000000000..a2884799923 --- /dev/null +++ b/benches/bitmap_ops.rs @@ -0,0 +1,37 @@ +use arrow2::bitmap::Bitmap; + +use criterion::{criterion_group, criterion_main, Criterion}; + +fn bench_arrow2(lhs: &Bitmap, rhs: &Bitmap) { + let r = lhs | rhs; + assert!(r.null_count() > 0); +} + +fn add_benchmark(c: &mut Criterion) { + (10..=20).step_by(2).for_each(|log2_size| { + let size = 2usize.pow(log2_size); + + let bitmap: Bitmap = (0..size).into_iter().map(|x| x % 3 == 0).collect(); + c.bench_function(&format!("bitmap aligned not 2^{}", log2_size), |b| { + b.iter(|| { + let r = !&bitmap; + assert!(r.null_count() > 0); + }) + }); + let bitmap1 = bitmap.clone().slice(1, size - 1); + c.bench_function(&format!("bitmap not 2^{}", log2_size), |b| { + b.iter(|| { + let r = !&bitmap1; + assert!(r.null_count() > 0); + }) + }); + + let bitmap1: Bitmap = (0..size).into_iter().map(|x| x % 4 == 0).collect(); + c.bench_function(&format!("bitmap aligned or 2^{}", log2_size), |b| { + b.iter(|| bench_arrow2(&bitmap, &bitmap1)) + }); + }); +} + +criterion_group!(benches, add_benchmark); +criterion_main!(benches); From 0151e4b324d6413b1d8c8ce8ee884e2b805f97c8 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Thu, 2 Sep 2021 06:08:30 +0000 Subject: [PATCH 2/2] Improved perf of binary unary op. --- src/bitmap/bitmap_ops.rs | 35 +++++++++++++------ .../utils/chunk_iterator/chunks_exact.rs | 4 +++ src/bitmap/utils/chunk_iterator/mod.rs | 2 +- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/bitmap/bitmap_ops.rs b/src/bitmap/bitmap_ops.rs index d092a24aef2..b402b717bf4 100644 --- a/src/bitmap/bitmap_ops.rs +++ b/src/bitmap/bitmap_ops.rs @@ -2,7 +2,10 @@ use std::ops::{BitAnd, BitOr, Not}; use crate::buffer::MutableBuffer; -use super::Bitmap; +use super::{ + utils::{BitChunkIterExact, BitChunksExact}, + Bitmap, +}; /// Apply a bitwise operation `op` to four inputs and return the result as a [`Bitmap`]. pub fn quaternary(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, a4: &Bitmap, op: F) -> Bitmap @@ -100,23 +103,33 @@ where Bitmap::from_u8_buffer(buffer, length) } -/// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`]. -pub fn unary(lhs: &Bitmap, op: F) -> Bitmap +fn unary_impl(iter: I, op: F, length: usize) -> Bitmap where + I: BitChunkIterExact, F: Fn(u64) -> u64, { - let mut lhs_chunks = lhs.chunks(); + let rem = op(iter.remainder()); - let chunks = lhs_chunks.by_ref().map(|left| op(left)); - let mut buffer = unsafe { MutableBuffer::from_chunk_iter_unchecked(chunks) }; + let iterator = iter.map(|left| op(left)).chain(std::iter::once(rem)); - let remainder_bytes = lhs_chunks.remainder_len().saturating_add(7) / 8; - let rem = op(lhs_chunks.remainder()); + let buffer = MutableBuffer::from_trusted_len_iter(iterator); - let rem = &rem.to_ne_bytes()[..remainder_bytes]; - buffer.extend_from_slice(rem); + Bitmap::from_u8_buffer(buffer.into(), length) +} - Bitmap::from_u8_buffer(buffer, lhs.len()) +/// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`]. +pub fn unary(lhs: &Bitmap, op: F) -> Bitmap +where + F: Fn(u64) -> u64, +{ + let (slice, offset, length) = lhs.as_slice(); + if offset == 0 { + let iter = BitChunksExact::::new(slice, length); + unary_impl(iter, op, lhs.len()) + } else { + let iter = lhs.chunks::(); + unary_impl(iter, op, lhs.len()) + } } fn and(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap { diff --git a/src/bitmap/utils/chunk_iterator/chunks_exact.rs b/src/bitmap/utils/chunk_iterator/chunks_exact.rs index 5b7389d3e48..25bb865d26c 100644 --- a/src/bitmap/utils/chunk_iterator/chunks_exact.rs +++ b/src/bitmap/utils/chunk_iterator/chunks_exact.rs @@ -1,5 +1,7 @@ use std::{convert::TryInto, slice::ChunksExact}; +use crate::trusted_len::TrustedLen; + use super::{BitChunk, BitChunkIterExact}; /// An iterator over a slice of bytes in [`BitChunk`]s. @@ -84,6 +86,8 @@ impl Iterator for BitChunksExact<'_, T> { } } +unsafe impl TrustedLen for BitChunksExact<'_, T> {} + impl BitChunkIterExact for BitChunksExact<'_, T> { #[inline] fn remainder(&self) -> T { diff --git a/src/bitmap/utils/chunk_iterator/mod.rs b/src/bitmap/utils/chunk_iterator/mod.rs index 3c1cb47b420..3fef1350f7b 100644 --- a/src/bitmap/utils/chunk_iterator/mod.rs +++ b/src/bitmap/utils/chunk_iterator/mod.rs @@ -10,7 +10,7 @@ use crate::{trusted_len::TrustedLen, types::BitChunkIter}; pub(crate) use merge::merge_reversed; /// Trait representing an exact iterator over bytes in [`BitChunk`]. -pub trait BitChunkIterExact: Iterator { +pub trait BitChunkIterExact: TrustedLen { /// The remainder of the iterator. fn remainder(&self) -> B; }