Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ members = [
"vortex-array",
"vortex-btrblocks",
"vortex-buffer",
"vortex-compute",
"vortex-cxx",
"vortex-datafusion",
"vortex-dtype",
Expand Down Expand Up @@ -112,6 +113,7 @@ dirs = "6.0.0"
divan = { package = "codspeed-divan-compat", version = "4.0.4" }
dyn-hash = "0.2.0"
enum-iterator = "2.0.0"
enum-map = "2.7.3"
erased-serde = "0.4"
fastlanes = "0.5"
flatbuffers = "25.2.10"
Expand Down Expand Up @@ -216,6 +218,7 @@ vortex-array = { version = "0.1.0", path = "./vortex-array", default-features =
vortex-btrblocks = { version = "0.1.0", path = "./vortex-btrblocks", default-features = false }
vortex-buffer = { version = "0.1.0", path = "./vortex-buffer", default-features = false }
vortex-bytebool = { version = "0.1.0", path = "./encodings/bytebool", default-features = false }
vortex-compute = { version = "0.1.0", path = "./vortex-compute", default-features = false }
vortex-datafusion = { version = "0.1.0", path = "./vortex-datafusion", default-features = false }
vortex-datetime-parts = { version = "0.1.0", path = "./encodings/datetime-parts", default-features = false }
vortex-decimal-byte-parts = { version = "0.1.0", path = "encodings/decimal-byte-parts", default-features = false }
Expand All @@ -242,6 +245,7 @@ vortex-sequence = { version = "0.1.0", path = "encodings/sequence", default-feat
vortex-sparse = { version = "0.1.0", path = "./encodings/sparse", default-features = false }
vortex-tui = { version = "0.1.0", path = "./vortex-tui", default-features = false }
vortex-utils = { version = "0.1.0", path = "./vortex-utils", default-features = false }
vortex-vector = { version = "0.1.0", path = "./vortex-vector", default-features = false }
vortex-zigzag = { version = "0.1.0", path = "./encodings/zigzag", default-features = false }
vortex-zstd = { version = "0.1.0", path = "./encodings/zstd", default-features = false }
# END crates published by this project
Expand Down
9 changes: 9 additions & 0 deletions vortex-buffer/benches/vortex_bitbuffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,15 @@ fn bitwise_not_vortex_buffer(bencher: Bencher, length: usize) {
});
}

#[divan::bench(args = INPUT_SIZE)]
fn bitwise_not_vortex_buffer_mut(bencher: Bencher, length: usize) {
bencher
.with_inputs(|| BitBufferMut::from_iter((0..length).map(|i| i % 2 == 0)))
.bench_values(|buffer| {
divan::black_box(!buffer);
});
}

#[divan::bench(args = INPUT_SIZE)]
fn bitwise_not_arrow_buffer(bencher: Bencher, length: usize) {
bencher
Expand Down
14 changes: 13 additions & 1 deletion vortex-buffer/src/bit/buf_mut.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::ops::Not;

use arrow_buffer::bit_chunk_iterator::BitChunks;
use bitvec::view::BitView;

use crate::bit::{get_bit_unchecked, set_bit_unchecked, unset_bit_unchecked};
use crate::bit::{get_bit_unchecked, ops, set_bit_unchecked, unset_bit_unchecked};
use crate::{BitBuffer, BufferMut, ByteBufferMut, buffer_mut};

/// A mutable bitset buffer that allows random access to individual bits for set and get.
Expand Down Expand Up @@ -462,6 +464,16 @@ impl Default for BitBufferMut {
}
}

// Mutate-in-place implementation of bitwise NOT.
impl Not for BitBufferMut {
type Output = BitBufferMut;

fn not(mut self) -> Self::Output {
ops::bitwise_unary_op_mut(&mut self, |b| !b);
self
}
}

impl From<&[bool]> for BitBufferMut {
fn from(value: &[bool]) -> Self {
let mut buf = BitBufferMut::new_unset(value.len());
Expand Down
22 changes: 20 additions & 2 deletions vortex-buffer/src/bit/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,22 @@ macro_rules! bitbuffer {
() => (
$crate::BitBuffer::empty()
);

// We capture single-element 0/1 cases to avoid ambiguity with the
// comma-separated expression case.
(0) => {
$crate::BitBuffer::from_iter([false])
};
(1) => {
$crate::BitBuffer::from_iter([true])
};

($elem:expr; $n:expr) => (
$crate::BitBuffer::full($elem, $n)
);
($($x:expr),+ $(,)?) => (
$crate::BitBuffer::from_iter([$($x),+])
);
// Match space-separated bit literals (0 or 1)
($($bit:tt)+) => {
$crate::BitBuffer::from_iter([$( $crate::bitbuffer!(@bit $bit) ),+])
};
Expand All @@ -45,13 +54,22 @@ macro_rules! bitbuffer_mut {
() => (
$crate::BitBufferMut::empty()
);

// We capture single-element 0/1 cases to avoid ambiguity with the
// comma-separated expression case.
(0) => {
$crate::BitBuffer::from_iter([false])
};
(1) => {
$crate::BitBuffer::from_iter([true])
};

($elem:expr; $n:expr) => (
$crate::BitBufferMut::full($elem, $n)
);
($($x:expr),+ $(,)?) => (
$crate::BitBufferMut::from_iter([$($x),+])
);
// Match space-separated bit literals (0 or 1)
($($bit:tt)+) => {
$crate::BitBufferMut::from_iter([$( $crate::bitbuffer_mut!(@bit $bit) ),+])
};
Expand Down
34 changes: 33 additions & 1 deletion vortex-buffer/src/bit/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use crate::trusted_len::TrustedLenExt;
use crate::{BitBuffer, Buffer};
use crate::{BitBuffer, BitBufferMut, Buffer};

pub(super) fn bitwise_unary_op<F: FnMut(u64) -> u64>(buffer: &BitBuffer, op: F) -> BitBuffer {
let iter = buffer.chunks().iter_padded().map(op);
Expand All @@ -13,6 +13,38 @@ pub(super) fn bitwise_unary_op<F: FnMut(u64) -> u64>(buffer: &BitBuffer, op: F)
BitBuffer::new(result, buffer.len())
}

pub(super) fn bitwise_unary_op_mut<F: FnMut(u64) -> u64>(buffer: &mut BitBufferMut, mut op: F) {
let slice_mut = buffer.as_mut_slice();

// The number of complete u64 words in the buffer (unaligned)
let u64_len = slice_mut.len() / 8;
let remainder = slice_mut.len() % 8;

// Create a pointer to the *unaligned* u64 words
let mut ptr = slice_mut.as_mut_ptr() as *mut u64;
for _ in 0..u64_len {
let value = unsafe { ptr.read_unaligned() };
let value = op(value);
unsafe { ptr.write_unaligned(value) };
ptr = unsafe { ptr.add(1) };
}

// Read remainder into a u64;
let mut remainder_u64 = 0u64;
let ptr = ptr as *mut u8;
for i in 0..remainder {
let byte = unsafe { ptr.add(i).read() };
remainder_u64 |= (byte as u64) << (i * 8);
}
let remainder_u64 = op(remainder_u64);

// Write back remainder
for i in 0..remainder {
let byte = ((remainder_u64 >> (i * 8)) & 0xFF) as u8;
unsafe { ptr.add(i).write(byte) };
}
}

pub(super) fn bitwise_binary_op<F: FnMut(u64, u64) -> u64>(
left: &BitBuffer,
right: &BitBuffer,
Expand Down
32 changes: 32 additions & 0 deletions vortex-compute/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[package]
name = "vortex-compute"
authors = { workspace = true }
categories = { workspace = true }
description = "Compute functions that operator over Vortex vectors, buffers, and masks"
edition = { workspace = true }
homepage = { workspace = true }
include = { workspace = true }
keywords = { workspace = true }
license = { workspace = true }
readme = { workspace = true }
repository = { workspace = true }
rust-version = { workspace = true }
version = { workspace = true }

[package.metadata.docs.rs]
all-features = true

[lints]
workspace = true

[dependencies]
vortex-buffer = { workspace = true }
vortex-error = { workspace = true }
vortex-mask = { workspace = true }
vortex-vector = { workspace = true }

[features]
default = ["filter", "logical"]

filter = []
logical = []
67 changes: 67 additions & 0 deletions vortex-compute/src/filter/bitbuffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_buffer::{BitBuffer, BitBufferMut, get_bit};
use vortex_mask::{Mask, MaskIter};

use crate::filter::Filter;

/// If the filter density is above 80%, we use slices to filter the array instead of indices.
// TODO(ngates): we need more experimentation to determine the best threshold here.
const FILTER_SLICES_DENSITY_THRESHOLD: f64 = 0.8;

impl Filter for BitBuffer {
fn filter(&self, mask: &Mask) -> Self {
assert_eq!(mask.len(), self.len());
match mask {
Mask::AllTrue(_) => self.clone(),
Mask::AllFalse(_) => Self::empty(),
Mask::Values(v) => match v.threshold_iter(FILTER_SLICES_DENSITY_THRESHOLD) {
MaskIter::Indices(indices) => filter_indices(self, indices),
MaskIter::Slices(slices) => filter_slices(self, mask.true_count(), slices),
},
}
}
}

fn filter_indices(bools: &BitBuffer, indices: &[usize]) -> BitBuffer {
let buffer = bools.inner().as_ref();
BitBuffer::collect_bool(indices.len(), |idx| {
let idx = *unsafe { indices.get_unchecked(idx) };
get_bit(buffer, bools.offset() + idx)
})
}

fn filter_slices(buffer: &BitBuffer, output_len: usize, slices: &[(usize, usize)]) -> BitBuffer {
let mut builder = BitBufferMut::with_capacity(output_len);
for (start, end) in slices {
// TODO(ngates): we probably want a borrowed slice for things like this.
builder.append_buffer(&buffer.slice(*start..*end));
}
builder.freeze()
}

#[cfg(test)]
mod test {
use vortex_buffer::bitbuffer;

use super::*;

#[test]
fn filter_bool_by_slice_test() {
let bits = bitbuffer![1 1 0];

let filtered = filter_slices(&bits, 2, &[(0, 1), (2, 3)]);
assert_eq!(2, filtered.len());

assert_eq!(filtered, bitbuffer![1 0])
}

#[test]
fn filter_bool_by_index_test() {
let buf = bitbuffer![1 1 0];
let filtered = filter_indices(&buf, &[0, 2]);
assert_eq!(2, filtered.len());
assert_eq!(filtered, bitbuffer![1 0])
}
}
13 changes: 13 additions & 0 deletions vortex-compute/src/filter/bool.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_mask::Mask;
use vortex_vector::{BoolVector, VectorOps};

use crate::filter::Filter;

impl Filter for BoolVector {
fn filter(&self, mask: &Mask) -> Self {
Self::new(self.bits().filter(mask), self.validity().filter(mask))
}
}
87 changes: 87 additions & 0 deletions vortex-compute/src/filter/buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_buffer::{Buffer, BufferMut};
use vortex_mask::{Mask, MaskIter};

use crate::filter::Filter;

// This is modeled after the constant with the equivalent name in arrow-rs.
const FILTER_SLICES_SELECTIVITY_THRESHOLD: f64 = 0.8;

impl<T: Copy> Filter for Buffer<T> {
fn filter(&self, mask: &Mask) -> Self {
assert_eq!(mask.len(), self.len());
match mask {
Mask::AllTrue(_) => self.clone(),
Mask::AllFalse(_) => Self::empty(),
Mask::Values(v) => match v.threshold_iter(FILTER_SLICES_SELECTIVITY_THRESHOLD) {
MaskIter::Indices(indices) => filter_indices(self.as_slice(), indices),
MaskIter::Slices(slices) => {
filter_slices(self.as_slice(), mask.true_count(), slices)
}
},
}
}
}

fn filter_indices<T: Copy>(values: &[T], indices: &[usize]) -> Buffer<T> {
Buffer::<T>::from_trusted_len_iter(indices.iter().map(|&idx| values[idx]))
}

fn filter_slices<T>(values: &[T], output_len: usize, slices: &[(usize, usize)]) -> Buffer<T> {
let mut out = BufferMut::<T>::with_capacity(output_len);
for (start, end) in slices {
out.extend_from_slice(&values[*start..*end]);
}
out.freeze()
}

#[cfg(test)]
mod tests {
use vortex_buffer::buffer;
use vortex_mask::Mask;

use super::*;

#[test]
fn test_filter_buffer_by_indices() {
let buf = buffer![10u32, 20, 30, 40, 50];
let mask = Mask::from_iter([true, false, true, false, true]);

let result = buf.filter(&mask);
assert_eq!(result, buffer![10u32, 30, 50]);
}

#[test]
fn test_filter_buffer_all_true() {
let buf = buffer![1u64, 2, 3];
let mask = Mask::new_true(3);

let result = buf.filter(&mask);
assert_eq!(result, buffer![1u64, 2, 3]);
}

#[test]
fn test_filter_buffer_all_false() {
let buf = buffer![1i32, 2, 3, 4];
let mask = Mask::new_false(4);

let result = buf.filter(&mask);
assert!(result.is_empty());
}

#[test]
fn test_filter_indices_direct() {
let buf = buffer![100u32, 200, 300, 400];
let result = filter_indices(buf.as_slice(), &[0, 2, 3]);
assert_eq!(result, buffer![100u32, 300, 400]);
}

#[test]
fn test_filter_slices_direct() {
let buf = buffer![1u32, 2, 3, 4, 5];
let result = filter_slices(buf.as_slice(), 3, &[(0, 2), (4, 5)]);
assert_eq!(result, buffer![1u32, 2, 5]);
}
}
Loading
Loading