diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs index c69e5c6deb10..641f3b1af90b 100644 --- a/arrow-buffer/src/buffer/ops.rs +++ b/arrow-buffer/src/buffer/ops.rs @@ -71,6 +71,30 @@ pub fn bitwise_bin_op_helper( where F: FnMut(u64, u64) -> u64, { + // If the underlying buffers are aligned to u64 we can apply the operation directly on the u64 slices + // to improve performance. + if left_offset_in_bits == 0 && right_offset_in_bits == 0 { + unsafe { + let (left_prefix, left_u64s, left_suffix) = left.as_slice().align_to::(); + let (right_prefix, right_u64s, right_suffix) = right.as_slice().align_to::(); + // if there is no prefix or suffix, both buffers are aligned and we can do the operation directly + // on u64s + // TODO also handle non empty suffixes by processing them separately + if left_prefix.is_empty() + && right_prefix.is_empty() + && left_suffix.is_empty() + && right_suffix.is_empty() + { + let result_u64s = left_u64s + .iter() + .zip(right_u64s.iter()) + .map(|(l, r)| op(*l, *r)) + .collect::>(); + return result_u64s.into(); + } + } + } + let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits); let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits); @@ -102,6 +126,21 @@ pub fn bitwise_unary_op_helper( where F: FnMut(u64) -> u64, { + // If the underlying buffer is aligned to u64, apply the operation directly on the u64 slices + // to improve performance. + if offset_in_bits == 0 && len_in_bits > 0 { + unsafe { + let (prefix, u64s, suffix) = left.as_slice().align_to::(); + // if there is no prefix or suffix, the buffer is aligned and we can do the operation directly + // on u64s + // TODO also handle non empty suffixes by processing them separately + if prefix.is_empty() && suffix.is_empty() { + let result_u64s = u64s.iter().map(|l| op(*l)).collect::>(); + return result_u64s.into(); + } + } + } + // reserve capacity and set length so we can get a typed view of u64 chunks let mut result = MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false); diff --git a/arrow-buffer/src/util/bit_chunk_iterator.rs b/arrow-buffer/src/util/bit_chunk_iterator.rs index e11383f6f3db..afc428ef24c6 100644 --- a/arrow-buffer/src/util/bit_chunk_iterator.rs +++ b/arrow-buffer/src/util/bit_chunk_iterator.rs @@ -223,7 +223,8 @@ impl<'a> BitChunks<'a> { pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self { assert!( ceil(offset + len, 8) <= buffer.len(), - "offset + len out of bounds" + "offset + len out of bounds. Buffer length in bits: {}, requested offset: {offset}, len: {len}", + buffer.len(), ); let byte_offset = offset / 8;