Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved assign (#1085)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Jun 19, 2022
1 parent ab149ae commit d1ab4ef
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 186 deletions.
7 changes: 3 additions & 4 deletions benches/assign_ops.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use criterion::{criterion_group, criterion_main, Criterion};

use arrow2::compute::arity_assign::binary;
use arrow2::compute::arity_assign::{binary, unary};
use arrow2::{
compute::arithmetics::basic::{mul, mul_scalar},
util::bench_util::*,
Expand All @@ -13,8 +13,7 @@ fn add_benchmark(c: &mut Criterion) {
let mut arr_a = create_primitive_array::<f32>(size, 0.2);
c.bench_function(&format!("apply_mul 2^{}", log2_size), |b| {
b.iter(|| {
criterion::black_box(&mut arr_a)
.apply_values_mut(|x| x.iter_mut().for_each(|x| *x *= 1.01));
unary(criterion::black_box(&mut arr_a), |x| x * 1.01);
assert!(!arr_a.value(10).is_nan());
})
});
Expand All @@ -30,7 +29,7 @@ fn add_benchmark(c: &mut Criterion) {
let mut arr_a = create_primitive_array::<f32>(size, 0.2);
let mut arr_b = create_primitive_array_with_seed::<f32>(size, 0.2, 10);
// convert to be close to 1.01
arr_b.apply_values_mut(|x| x.iter_mut().for_each(|x| *x = 1.01 + *x / 20.0));
unary(&mut arr_b, |x| 1.01 + x / 20.0);

c.bench_function(&format!("apply_mul null 2^{}", log2_size), |b| {
b.iter(|| {
Expand Down
8 changes: 5 additions & 3 deletions examples/cow.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
// This example demos how to operate on arrays in-place.
use arrow2::array::{Array, PrimitiveArray};
use arrow2::compute::arity_assign;

fn main() {
// say we have have received an array
// say we have have received an `Array`
let mut array: Box<dyn Array> = PrimitiveArray::from_vec(vec![1i32, 2]).boxed();

// we can apply a transformation to its values without allocating a new array as follows:

// 1. downcast it to the correct type (known via `array.data_type().to_physical_type()`)
let array = array
.as_any_mut()
.downcast_mut::<PrimitiveArray<i32>>()
.unwrap();

// 2. call `apply_values` with the function to apply over the values
array.apply_values_mut(|x| x.iter_mut().for_each(|x| *x *= 10));
// 2. call `unary` with the function to apply to each value
arity_assign::unary(array, |x| x * 10);

// confirm that it gives the right result :)
assert_eq!(array, &PrimitiveArray::from_vec(vec![10i32, 20]));
Expand Down
19 changes: 0 additions & 19 deletions src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,25 +236,6 @@ impl BooleanArray {
self.values = values.into();
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity_mut<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = self.validity.as_mut() {
let owned_validity = std::mem::take(validity);
let mut mut_bitmap = owned_validity.make_mut();
f(&mut mut_bitmap);
assert_eq!(mut_bitmap.len(), self.values.len());
*validity = mut_bitmap.into();
}
}

/// Try to convert this [`BooleanArray`] to a [`MutableBooleanArray`]
pub fn into_mut(self) -> Either<Self, MutableBooleanArray> {
use Either::*;
Expand Down
55 changes: 10 additions & 45 deletions src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
bitmap::{
utils::{zip_validity, ZipValidity},
Bitmap, MutableBitmap,
Bitmap,
},
buffer::Buffer,
datatypes::*,
Expand Down Expand Up @@ -283,59 +283,24 @@ impl<T: NativeType> PrimitiveArray<T> {
self.values = values;
}

/// Applies a function `f` to the values of this array, cloning the values
/// iff they are being shared with others
///
/// This is an API to use clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics, if `f` modifies the length of `&mut [T]`
pub fn apply_values_mut<F: Fn(&mut [T])>(&mut self, f: F) {
let values = std::mem::take(&mut self.values);
let mut values = values.make_mut();
let len = values.len();
f(&mut values);
assert_eq!(values.len(), len, "values length must remain the same");
self.values = values.into();
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity_mut<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = self.validity.as_mut() {
let owned_validity = std::mem::take(validity);
let mut mut_bitmap = owned_validity.make_mut();
f(&mut mut_bitmap);
assert_eq!(mut_bitmap.len(), self.values.len());
*validity = mut_bitmap.into();
}
}

/// Applies a function `f` to the validity of this array, the caller can decide to make
/// it mutable or not.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity<F: Fn(&mut Bitmap)>(&mut self, f: F) {
if let Some(validity) = self.validity.as_mut() {
f(validity);
/// This function panics if the function `f` modifies the length of the [`Bitmap`].
pub fn apply_validity<F: Fn(Bitmap) -> Bitmap>(&mut self, f: F) {
if let Some(validity) = std::mem::take(&mut self.validity) {
assert_eq!(validity.len(), self.values.len());
self.validity = Some(f(validity))
}
}

/// Returns an option of a mutable reference to the values of this [`PrimitiveArray`].
pub fn get_mut_values(&mut self) -> Option<&mut [T]> {
self.values.get_mut().map(|x| x.as_mut())
}

/// Try to convert this [`PrimitiveArray`] to a [`MutablePrimitiveArray`] via copy-on-write semantics.
///
/// A [`PrimitiveArray`] is backed by a [`Buffer`] and [`Bitmap`] which are essentially `Arc<Vec<_>>`.
Expand Down
20 changes: 1 addition & 19 deletions src/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,27 +90,9 @@ impl<T: NativeType> MutablePrimitiveArray<T> {
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics, if `f` modifies the length of `&mut [T]`
/// This function panics iff `f` panics
pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {
let len = self.values.len();
f(&mut self.values);
assert_eq!(len, self.values.len(), "values length must remain the same")
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = &mut self.validity {
f(validity);
assert_eq!(validity.len(), self.values.len());
}
}
}

Expand Down
26 changes: 17 additions & 9 deletions src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,20 @@ impl<T: NativeType> Buffer<T> {
self.offset
}

/// Gets a mutable reference to its underlying [`Vec`], if it not being shared.
///
/// This operation returns a [`Vec`] iff this [`Buffer`]:
/// * is not an offsetted slice of another [`Buffer`]
/// * has not been cloned (i.e. [`Arc`]`::get_mut` yields [`Some`])
/// * has not been imported from the c data interface (FFI)
pub fn get_mut(&mut self) -> Option<&mut Vec<T>> {
if self.offset != 0 {
None
} else {
Arc::get_mut(&mut self.data).and_then(|b| b.get_vec())
}
}

/// Converts this [`Buffer`] to either a [`Buffer`] or a [`Vec`], returning itself if the conversion
/// is not possible
///
Expand All @@ -145,16 +159,10 @@ impl<T: NativeType> Buffer<T> {
/// * has not been cloned (i.e. [`Arc`]`::get_mut` yields [`Some`])
/// * has not been imported from the c data interface (FFI)
pub fn into_mut(mut self) -> Either<Self, Vec<T>> {
if self.offset != 0 {
Either::Left(self)
if let Some(vec) = self.get_mut() {
Either::Right(std::mem::take(vec))
} else {
match Arc::get_mut(&mut self.data).and_then(|b| b.get_vec()) {
Some(v) => {
let data = std::mem::take(v);
Either::Right(data)
}
None => Either::Left(self),
}
Either::Left(self)
}
}

Expand Down
77 changes: 36 additions & 41 deletions src/compute/arity_assign.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ use super::utils::check_same_len;
use crate::{array::PrimitiveArray, types::NativeType};
use either::Either;

/// Applies an unary function to a [`PrimitiveArray`] in-place via cow semantics.
/// Applies an unary function to a [`PrimitiveArray`], optionally in-place.
///
/// # Implementation
/// This is the fastest method to apply a binary operation and it is often vectorized (SIMD).
/// This function tries to apply the function directly to the values of the array.
/// If that region is shared, this function creates a new region and writes to it.
///
/// # Panics
/// This function panics iff
/// * the arrays have a different length.
Expand All @@ -18,14 +20,22 @@ where
I: NativeType,
F: Fn(I) -> I,
{
array.apply_values_mut(|values| values.iter_mut().for_each(|v| *v = op(*v)));
if let Some(values) = array.get_mut_values() {
// mutate in place
values.iter_mut().for_each(|l| *l = op(*l));
} else {
// alloc and write to new region
let values = array.values().iter().map(|l| op(*l)).collect::<Vec<_>>();
array.set_values(values.into());
}
}

/// Applies a binary operations to two [`PrimitiveArray`], applying the operation
/// in-place to the `lhs` via cow semantics.
/// Applies a binary function to two [`PrimitiveArray`]s, optionally in-place, returning
/// a new [`PrimitiveArray`].
///
/// # Implementation
/// This is the fastest way to perform a binary operation and it is often vectorized (SIMD).
/// This function tries to apply the function directly to the values of the array.
/// If that region is shared, this function creates a new region and writes to it.
/// # Panics
/// This function panics iff
/// * the arrays have a different length.
Expand All @@ -48,54 +58,39 @@ where
// bitmap has an offset.
if let Some(rhs) = rhs.validity() {
if lhs.validity().is_none() {
*lhs = lhs.with_validity(Some(rhs.clone()))
lhs.set_validity(Some(rhs.clone()));
} else {
lhs.apply_validity(|bitmap| {
// we need to take ownership for the `into_mut` call, but leave the `&mut` lhs intact
// so that we can later assign the result to out `&mut bitmap`
let owned_lhs = std::mem::take(bitmap);

*bitmap = match owned_lhs.into_mut() {
// we take alloc and write to new buffer
match bitmap.into_mut() {
Either::Left(immutable) => {
// we allocate a new bitmap
// alloc new region
&immutable & rhs
}
// we can mutate in place, happy days.
Either::Right(mut mutable) => {
// mutate in place
let mut mutable_ref = &mut mutable;
mutable_ref &= rhs;
mutable.into()
}
}
});
}
}
};

// we need to take ownership for the `into_mut` call, but leave the `&mut` lhs intact
// so that we can later assign the result to out `&mut lhs`
let owned_lhs = std::mem::take(lhs);

*lhs = match owned_lhs.into_mut() {
// we take alloc and write to new buffer
Either::Left(mut immutable) => {
let values = immutable
.values()
.iter()
.zip(rhs.values().iter())
.map(|(l, r)| op(*l, *r))
.collect::<Vec<_>>();
immutable.set_values(values.into());
immutable
}
// we can mutate in place
Either::Right(mut mutable) => {
mutable.apply_values(|x| {
x.iter_mut()
.zip(rhs.values().iter())
.for_each(|(l, r)| *l = op(*l, *r))
});
mutable.into()
}
if let Some(values) = lhs.get_mut_values() {
// mutate values in place
values
.iter_mut()
.zip(rhs.values().iter())
.for_each(|(l, r)| *l = op(*l, *r));
} else {
// alloc new region
let values = lhs
.values()
.iter()
.zip(rhs.values().iter())
.map(|(l, r)| op(*l, *r))
.collect::<Vec<_>>();
lhs.set_values(values.into());
}
}
24 changes: 0 additions & 24 deletions tests/it/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,27 +131,3 @@ fn from_iter() {
let a: BooleanArray = iter.collect();
assert_eq!(a.len(), 2);
}

#[test]
fn apply_values() {
let mut a = BooleanArray::from([Some(true), Some(false), None]);
a.apply_values_mut(|x| {
let mut a = std::mem::take(x);
a = !a;
*x = a;
});
let expected = BooleanArray::from([Some(false), Some(true), None]);
assert_eq!(a, expected);
}

#[test]
fn apply_validity() {
let mut a = BooleanArray::from([Some(true), Some(false), None]);
a.apply_validity_mut(|x| {
let mut a = std::mem::take(x);
a = !a;
*x = a;
});
let expected = BooleanArray::from([None, None, Some(false)]);
assert_eq!(a, expected);
}
22 changes: 0 additions & 22 deletions tests/it/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,25 +124,3 @@ fn into_mut_3() {
let array = PrimitiveArray::new(DataType::Int32, values, validity);
assert!(array.into_mut().is_right());
}

#[test]
fn apply_values() {
let mut a = PrimitiveArray::from([Some(1), Some(2), None]);
a.apply_values_mut(|x| {
x[0] = 10;
});
let expected = PrimitiveArray::from([Some(10), Some(2), None]);
assert_eq!(a, expected);
}

#[test]
fn apply_validity() {
let mut a = PrimitiveArray::from([Some(1), Some(2), None]);
a.apply_validity_mut(|x| {
let mut a = std::mem::take(x);
a = !a;
*x = a;
});
let expected = PrimitiveArray::from([None, None, Some(0)]);
assert_eq!(a, expected);
}

0 comments on commit d1ab4ef

Please sign in to comment.