Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added cow APIs (2x-10x vs non-cow) (#1061)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Jun 10, 2022
1 parent 5f657aa commit 4e1dc00
Show file tree
Hide file tree
Showing 17 changed files with 535 additions and 4 deletions.
8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,11 @@ harness = false
[[bench]]
name = "slices_iterator"
harness = false

[[bench]]
name = "bitmap_assign_ops"
harness = false

[[bench]]
name = "assign_ops"
harness = false
29 changes: 29 additions & 0 deletions benches/assign_ops.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use criterion::{criterion_group, criterion_main, Criterion};

use arrow2::{compute::arithmetics::basic::mul_scalar, util::bench_util::create_primitive_array};

fn add_benchmark(c: &mut Criterion) {
(10..=20).step_by(2).for_each(|log2_size| {
let size = 2usize.pow(log2_size);

let mut arr_a = create_primitive_array::<f32>(size, 0.2);
c.bench_function(&format!("apply_mul 2^{}", log2_size), |b| {
b.iter(|| {
criterion::black_box(&mut arr_a)
.apply_values(|x| x.iter_mut().for_each(|x| *x *= 1.01));
assert!(!arr_a.value(10).is_nan());
})
});

let arr_a = create_primitive_array::<f32>(size, 0.2);
c.bench_function(&format!("mul 2^{}", log2_size), |b| {
b.iter(|| {
let a = mul_scalar(criterion::black_box(&arr_a), &1.01f32);
assert!(!a.value(10).is_nan());
})
});
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
47 changes: 47 additions & 0 deletions benches/bitmap_assign_ops.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use criterion::{criterion_group, criterion_main, Criterion};

use arrow2::bitmap::{binary_assign, unary_assign};
use arrow2::bitmap::{Bitmap, MutableBitmap};

fn add_benchmark(c: &mut Criterion) {
(10..=20).step_by(2).for_each(|log2_size| {
let size = 2usize.pow(log2_size);

let mut bitmap: MutableBitmap = (0..size).into_iter().map(|x| x % 3 == 0).collect();
c.bench_function(&format!("mutablebitmap not 2^{}", log2_size), |b| {
b.iter(|| {
unary_assign(criterion::black_box(&mut bitmap), |x: u64| !x);
assert!(!bitmap.is_empty());
})
});

let bitmap: Bitmap = (0..size).into_iter().map(|x| x % 3 == 0).collect();
c.bench_function(&format!("bitmap not 2^{}", log2_size), |b| {
b.iter(|| {
let r = !criterion::black_box(&bitmap);
assert!(!r.is_empty());
})
});

let mut lhs: MutableBitmap = (0..size).into_iter().map(|x| x % 3 == 0).collect();
let rhs: Bitmap = (0..size).into_iter().map(|x| x % 4 == 0).collect();
c.bench_function(&format!("mutablebitmap and 2^{}", log2_size), |b| {
b.iter(|| {
binary_assign(criterion::black_box(&mut lhs), &rhs, |x: u64, y| x & y);
assert!(!bitmap.is_empty());
})
});

let lhs: Bitmap = (0..size).into_iter().map(|x| x % 3 == 0).collect();
let rhs: Bitmap = (0..size).into_iter().map(|x| x % 4 == 0).collect();
c.bench_function(&format!("bitmap and 2^{}", log2_size), |b| {
b.iter(|| {
let r = criterion::black_box(&lhs) & &rhs;
assert!(!r.is_empty());
})
});
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
40 changes: 39 additions & 1 deletion src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{
bitmap::Bitmap,
bitmap::{Bitmap, MutableBitmap},
datatypes::{DataType, PhysicalType},
error::Error,
};
Expand Down Expand Up @@ -92,6 +92,44 @@ impl BooleanArray {
pub fn arced(self) -> std::sync::Arc<dyn Array> {
std::sync::Arc::new(self)
}

/// Applies a function `f` to the values of this array, cloning the values
/// iff they are being shared with others
///
/// This is an API to use clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_values<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
let values = std::mem::take(&mut self.values);
let mut values = values.make_mut();
f(&mut values);
if let Some(validity) = &self.validity {
assert_eq!(validity.len(), values.len());
}
self.values = values.into();
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = self.validity.as_mut() {
let values = std::mem::take(validity);
let mut bitmap = values.make_mut();
f(&mut bitmap);
assert_eq!(bitmap.len(), self.values.len());
*validity = bitmap.into();
}
}
}

// must use
Expand Down
35 changes: 34 additions & 1 deletion src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
bitmap::{
utils::{zip_validity, ZipValidity},
Bitmap,
Bitmap, MutableBitmap,
},
buffer::Buffer,
datatypes::*,
Expand Down Expand Up @@ -252,6 +252,39 @@ impl<T: NativeType> PrimitiveArray<T> {
arr
}

/// Applies a function `f` to the values of this array, cloning the values
/// iff they are being shared with others
///
/// This is an API to use clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {
let values = std::mem::take(&mut self.values);
let mut values = values.make_mut();
f(&mut values);
self.values = values.into();
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = self.validity.as_mut() {
let values = std::mem::take(validity);
let mut bitmap = values.make_mut();
f(&mut bitmap);
assert_eq!(bitmap.len(), self.values.len());
*validity = bitmap.into();
}
}

/// Try to convert this [`PrimitiveArray`] to a [`MutablePrimitiveArray`] via copy-on-write semantics.
///
/// A [`PrimitiveArray`] is backed by a [`Buffer`] and [`Bitmap`] which are essentially `Arc<Vec<_>>`.
Expand Down
191 changes: 191 additions & 0 deletions src/bitmap/assign_ops.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
use crate::bitmap::{Bitmap, MutableBitmap};

use super::utils::{BitChunk, BitChunkIterExact, BitChunksExact};

/// Applies a function to every bit of this [`MutableBitmap`] in chunks
///
/// This function can be for operations like `!` to a [`MutableBitmap`].
pub fn unary_assign<T: BitChunk, F: Fn(T) -> T>(bitmap: &mut MutableBitmap, op: F) {
let mut chunks = bitmap.bitchunks_exact_mut::<T>();

chunks.by_ref().for_each(|chunk| {
let new_chunk: T = match (chunk as &[u8]).try_into() {
Ok(a) => T::from_ne_bytes(a),
Err(_) => unreachable!(),
};
let new_chunk = op(new_chunk);
chunk.copy_from_slice(new_chunk.to_ne_bytes().as_ref());
});

if chunks.remainder().is_empty() {
return;
}
let mut new_remainder = T::zero().to_ne_bytes();
chunks
.remainder()
.iter()
.enumerate()
.for_each(|(index, b)| new_remainder[index] = *b);
new_remainder = op(T::from_ne_bytes(new_remainder)).to_ne_bytes();

let len = chunks.remainder().len();
chunks
.remainder()
.copy_from_slice(&new_remainder.as_ref()[..len]);
}

impl std::ops::Not for MutableBitmap {
type Output = Self;

#[inline]
fn not(mut self) -> Self {
unary_assign(&mut self, |a: u64| !a);
self
}
}

fn binary_assign_impl<I, T, F>(lhs: &mut MutableBitmap, mut rhs: I, op: F)
where
I: BitChunkIterExact<T>,
T: BitChunk,
F: Fn(T, T) -> T,
{
let mut lhs_chunks = lhs.bitchunks_exact_mut::<T>();

lhs_chunks
.by_ref()
.zip(rhs.by_ref())
.for_each(|(lhs, rhs)| {
let new_chunk: T = match (lhs as &[u8]).try_into() {
Ok(a) => T::from_ne_bytes(a),
Err(_) => unreachable!(),
};
let new_chunk = op(new_chunk, rhs);
lhs.copy_from_slice(new_chunk.to_ne_bytes().as_ref());
});

let rem_lhs = lhs_chunks.remainder();
let rem_rhs = rhs.remainder();
if rem_lhs.is_empty() {
return;
}
let mut new_remainder = T::zero().to_ne_bytes();
lhs_chunks
.remainder()
.iter()
.enumerate()
.for_each(|(index, b)| new_remainder[index] = *b);
new_remainder = op(T::from_ne_bytes(new_remainder), rem_rhs).to_ne_bytes();

let len = lhs_chunks.remainder().len();
lhs_chunks
.remainder()
.copy_from_slice(&new_remainder.as_ref()[..len]);
}

/// Apply a bitwise binary operation to a [`MutableBitmap`].
///
/// This function can be used for operations like `&=` to a [`MutableBitmap`].
/// # Panics
/// This function panics iff `lhs.len() != `rhs.len()`
pub fn binary_assign<T: BitChunk, F>(lhs: &mut MutableBitmap, rhs: &Bitmap, op: F)
where
F: Fn(T, T) -> T,
{
assert_eq!(lhs.len(), rhs.len());

let (slice, offset, length) = rhs.as_slice();
if offset == 0 {
let iter = BitChunksExact::<T>::new(slice, length);
binary_assign_impl(lhs, iter, op)
} else {
let rhs_chunks = rhs.chunks::<T>();
binary_assign_impl(lhs, rhs_chunks, op)
}
}

#[inline]
/// Compute bitwise OR operation in-place
fn or_assign<T: BitChunk>(lhs: &mut MutableBitmap, rhs: &Bitmap) {
if rhs.null_count() == 0 {
assert_eq!(lhs.len(), rhs.len());
lhs.clear();
lhs.extend_constant(rhs.len(), true);
} else if rhs.null_count() == rhs.len() {
// bitmap remains
} else {
binary_assign(lhs, rhs, |x: T, y| x | y)
}
}

impl<'a, 'b> std::ops::BitOrAssign<&'a Bitmap> for &'b mut MutableBitmap {
#[inline]
fn bitor_assign(&mut self, rhs: &'a Bitmap) {
or_assign::<u64>(self, rhs)
}
}

impl<'a, 'b> std::ops::BitOr<&'a Bitmap> for MutableBitmap {
type Output = Self;

#[inline]
fn bitor(mut self, rhs: &'a Bitmap) -> Self {
or_assign::<u64>(&mut self, rhs);
self
}
}

#[inline]
/// Compute bitwise `&` between `lhs` and `rhs`, assigning it to `lhs`
fn and_assign<T: BitChunk>(lhs: &mut MutableBitmap, rhs: &Bitmap) {
if rhs.null_count() == 0 {
// bitmap remains
}
if rhs.null_count() == rhs.len() {
assert_eq!(lhs.len(), rhs.len());
lhs.clear();
lhs.extend_constant(rhs.len(), false);
} else {
binary_assign(lhs, rhs, |x: T, y| x & y)
}
}

impl<'a, 'b> std::ops::BitAndAssign<&'a Bitmap> for &'b mut MutableBitmap {
#[inline]
fn bitand_assign(&mut self, rhs: &'a Bitmap) {
and_assign::<u64>(self, rhs)
}
}

impl<'a, 'b> std::ops::BitAnd<&'a Bitmap> for MutableBitmap {
type Output = Self;

#[inline]
fn bitand(mut self, rhs: &'a Bitmap) -> Self {
and_assign::<u64>(&mut self, rhs);
self
}
}

#[inline]
/// Compute bitwise XOR operation
fn xor_assign<T: BitChunk>(lhs: &mut MutableBitmap, rhs: &Bitmap) {
binary_assign(lhs, rhs, |x: T, y| x ^ y)
}

impl<'a, 'b> std::ops::BitXorAssign<&'a Bitmap> for &'b mut MutableBitmap {
#[inline]
fn bitxor_assign(&mut self, rhs: &'a Bitmap) {
xor_assign::<u64>(self, rhs)
}
}

impl<'a, 'b> std::ops::BitXor<&'a Bitmap> for MutableBitmap {
type Output = Self;

#[inline]
fn bitxor(mut self, rhs: &'a Bitmap) -> Self {
xor_assign::<u64>(&mut self, rhs);
self
}
}
Loading

0 comments on commit 4e1dc00

Please sign in to comment.