diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index 299712426d4..c1a348665e0 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -31,6 +31,9 @@ use crate::{BitBuffer, BufferMut, ByteBufferMut, buffer_mut}; #[derive(Debug, Clone, Eq)] pub struct BitBufferMut { buffer: ByteBufferMut, + /// Represents the offset of the bit buffer into the first byte. + /// + /// This is always less than 8 (for when the bit buffer is not aligned to a byte). offset: usize, len: usize, } @@ -162,6 +165,13 @@ impl BitBufferMut { self.buffer.reserve(additional_bytes); } + /// Clears the bit buffer (but keeps any allocated memory). + pub fn clear(&mut self) { + // Since there are no items we need to drop, we simply set the length to 0. + self.len = 0; + self.offset = 0; + } + /// Set the bit at `index` to the given boolean value. /// /// This operation is checked so if `index` exceeds the buffer length, this will panic. diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs index f34eda48b3f..ab2147f483c 100644 --- a/vortex-mask/src/mask_mut.rs +++ b/vortex-mask/src/mask_mut.rs @@ -5,6 +5,7 @@ use std::ops::Sub; use std::sync::Arc; use vortex_buffer::BitBufferMut; +use vortex_error::vortex_panic; use crate::Mask; @@ -56,6 +57,29 @@ impl MaskMut { }) } + /// Returns the boolean value at a given index. + /// + /// # Panics + /// + /// Panics if the index is out of bounds. + pub fn value(&self, index: usize) -> bool { + match &self.0 { + Inner::Empty { .. } => { + vortex_panic!("index out of bounds: the length is 0 but the index is {index}") + } + Inner::Constant { value, len, .. } => { + assert!( + index < *len, + "index out of bounds: the length is {} but the index is {index}", + *len + ); + + *value + } + Inner::Builder(bit_buffer) => bit_buffer.value(index), + } + } + /// Reserve capacity for at least `additional` more values to be appended. pub fn reserve(&mut self, additional: usize) { match &mut self.0 { @@ -71,6 +95,39 @@ impl MaskMut { } } + /// Clears the mask. + /// + /// Note that this method has no effect on the allocated capacity of the mask. + pub fn clear(&mut self) { + match &mut self.0 { + Inner::Empty { .. } => {} + Inner::Constant { capacity, .. } => { + self.0 = Inner::Empty { + capacity: *capacity, + } + } + Inner::Builder(bit_buffer) => bit_buffer.clear(), + }; + } + + /// Shortens the mask, keeping the first `len` bits. + /// + /// If `len` is greater or equal to the vector’s current length, this has no effect. + /// + /// Note that this method has no effect on the allocated capacity of the mask. + pub fn truncate(&mut self, len: usize) { + let truncated_len = len; + if truncated_len > self.len() { + return; + } + + match &mut self.0 { + Inner::Empty { .. } => {} + Inner::Constant { len, .. } => *len = truncated_len.min(*len), + Inner::Builder(bit_buffer) => bit_buffer.truncate(truncated_len), + }; + } + /// Append n values to the mask. pub fn append_n(&mut self, new_value: bool, n: usize) { match &mut self.0 { diff --git a/vortex-vector/src/primitive/from_iter.rs b/vortex-vector/src/primitive/from_iter.rs deleted file mode 100644 index 9cd06b2e1d1..00000000000 --- a/vortex-vector/src/primitive/from_iter.rs +++ /dev/null @@ -1,81 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! [`FromIterator`] and related implementations for [`PVectorMut`]. - -use vortex_buffer::BufferMut; -use vortex_dtype::NativePType; -use vortex_mask::MaskMut; - -use crate::PVectorMut; - -impl FromIterator> for PVectorMut { - /// Creates a new [`PVectorMut`] from an iterator of `Option` values. - /// - /// `None` values will be marked as invalid in the validity mask. - /// - /// # Examples - /// - /// ``` - /// use vortex_vector::{PVectorMut, VectorMutOps}; - /// - /// let mut vec = PVectorMut::::from_iter([Some(1), None, Some(3)]); - /// assert_eq!(vec.len(), 3); - /// ``` - fn from_iter(iter: I) -> Self - where - I: IntoIterator>, - { - let iter = iter.into_iter(); - // Since we do not know the length of the iterator, we can only guess how much memory we - // need to reserve. Note that these hints may be inaccurate. - let (lower_bound, _) = iter.size_hint(); - - // We choose not to use the optional upper bound size hint to match the standard library. - - let mut elements = BufferMut::with_capacity(lower_bound); - let mut validity = MaskMut::with_capacity(lower_bound); - - for opt_val in iter { - match opt_val { - Some(val) => { - elements.push(val); - validity.append_n(true, 1); - } - None => { - elements.push(T::default()); // Use default for invalid entries. - validity.append_n(false, 1); - } - } - } - - PVectorMut { elements, validity } - } -} - -impl FromIterator for PVectorMut { - /// Creates a new [`PVectorMut`] from an iterator of `T` values. - /// - /// All values will be treated as non-null. - /// - /// # Examples - /// - /// ``` - /// use vortex_vector::{PVectorMut, VectorMutOps}; - /// - /// let mut vec = PVectorMut::::from_iter([1, 2, 3, 4]); - /// assert_eq!(vec.len(), 4); - /// ``` - fn from_iter(iter: I) -> Self - where - I: IntoIterator, - { - let buffer = BufferMut::from_iter(iter); - let validity = MaskMut::new_true(buffer.len()); - - PVectorMut { - elements: buffer, - validity, - } - } -} diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs index 9b1b8a24ab2..fff3dad00a7 100644 --- a/vortex-vector/src/primitive/generic.rs +++ b/vortex-vector/src/primitive/generic.rs @@ -68,6 +68,42 @@ impl PVector { Self { elements, validity } } + + /// Gets a nullable element at the given index. + /// + /// If the element at the given index is null, returns `None`. Otherwise, returns `Some(x)`, + /// where `x: T`. + /// + /// # Panics + /// + /// Panics if the index is out of bounds. + pub fn get(&self, index: usize) -> Option { + self.validity.value(index).then(|| self.elements[index]) + } + + /// Returns the internal [`Buffer`] of the [`PVector`]. + /// + /// Note that the internal buffer may hold garbage data in place of nulls. That information is + /// tracked by the [`validity()`](Self::validity). + #[inline] + pub fn elements(&self) -> &Buffer { + &self.elements + } +} + +impl AsRef<[T]> for PVector { + /// Returns an immutable slice over the internal buffer with elements of type `T`. + /// + /// Note that this slice may contain garbage data where the [`validity()`] mask states that an + /// element is invalid. + /// + /// The caller should check the [`validity()`] before performing any operations. + /// + /// [`validity()`]: crate::VectorOps::validity + #[inline] + fn as_ref(&self) -> &[T] { + self.elements.as_slice() + } } impl VectorOps for PVector { diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index e0440deaa11..e8eb6d91dc3 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -15,9 +15,9 @@ use crate::{PVector, VectorMutOps, VectorOps}; /// `T` is expected to be bound by [`NativePType`], which templates an internal [`BufferMut`] /// that stores the elements of the vector. /// -/// `PVectorMut` is the primary way to construct primitive vectors. It provides efficient methods -/// for building vectors incrementally before converting them to an immutable [`PVector`] using -/// the [`freeze`](crate::VectorMutOps::freeze) method. +/// [`PVectorMut`] is the primary way to construct primitive vectors. It provides efficient +/// methods for building vectors incrementally before converting them to an immutable [`PVector`] +/// using the [`freeze`](crate::VectorMutOps::freeze) method. /// /// # Examples /// diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs index b6228d4df14..8001449785b 100644 --- a/vortex-vector/src/primitive/mod.rs +++ b/vortex-vector/src/primitive/mod.rs @@ -23,11 +23,10 @@ pub use generic_mut::PVectorMut; mod vector; pub use vector::PrimitiveVector; +mod pvector_impl; mod vector_mut; pub use vector_mut::PrimitiveVectorMut; -mod from_iter; - mod macros; use vortex_dtype::NativePType; diff --git a/vortex-vector/src/primitive/pvector_impl.rs b/vortex-vector/src/primitive/pvector_impl.rs new file mode 100644 index 00000000000..1b280161429 --- /dev/null +++ b/vortex-vector/src/primitive/pvector_impl.rs @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Helper methods for [`PVectorMut`] that mimic the behavior of [`std::vec::Vec`]. + +use vortex_buffer::BufferMut; +use vortex_dtype::NativePType; +use vortex_mask::MaskMut; + +use crate::{PVectorMut, VectorMutOps}; + +/// Point operations for [`PVectorMut`]. +impl PVectorMut { + /// Gets a nullable element at the given index, **WITHOUT** bounds checking. + /// + /// If the element at the given index is null, returns `None`. Otherwise, returns `Some(x)`, + /// where `x: T`. + /// + /// Note that this `get` method is different from the standard library [`slice::get`], which + /// returns `None` if the index is out of bounds. This method will panic if the index is out of + /// bounds, and return `None` if the elements is null. + /// + /// # Panics + /// + /// Panics if the index is out of bounds. + pub fn get(&self, index: usize) -> Option { + self.validity.value(index).then(|| self.elements[index]) + } + + /// Appends an element to the back of the vector. + /// + /// The element is treated as valid. + pub fn push(&mut self, value: T) { + self.elements.push(value); + self.validity.append_n(true, 1); + } + + /// Pushes a value without bounds checking or validity updates. + /// + /// # Safety + /// + /// The caller must ensure that there is sufficient capacity in both elements and validity + /// buffers. + #[inline] + pub unsafe fn push_unchecked(&mut self, value: T) { + // SAFETY: The caller guarantees there is sufficient capacity in the elements buffer, + // so we can write to the spare capacity and increment the length without bounds checks. + unsafe { + self.elements.spare_capacity_mut()[0].write(value); + self.elements.set_len(self.len() + 1); + } + self.validity.append_n(true, 1); + } + + /// Appends an optional element to the back of the vector, where `None` represents a null + /// element. + pub fn push_opt(&mut self, value: Option) { + if let Some(value) = value { + self.push(value); + } else { + self.elements.push(T::default()); + self.validity.append_n(false, 1); + } + } +} + +impl AsRef<[T]> for PVectorMut { + /// Returns an immutable slice over the internal mutable buffer with elements of type `T`. + /// + /// Note that this slice may contain garbage data where the [`validity()`] mask from the frozen + /// [`PVector`](crate::PVector) type states that an element is invalid. + /// + /// The caller should check the frozen [`validity()`] before performing any operations. + /// + /// [`validity()`]: crate::VectorOps::validity + #[inline] + fn as_ref(&self) -> &[T] { + self.elements.as_slice() + } +} + +impl AsMut<[T]> for PVectorMut { + /// Returns a mutable slice over the internal mutable buffer with elements of type `T`. + /// + /// Note that this slice may contain garbage data where the [`validity()`] mask from the frozen + /// [`PVector`](crate::PVector) type states that an element is invalid. + /// + /// The caller should check the frozen [`validity()`] before performing any operations. + /// + /// [`validity()`]: crate::VectorOps::validity + #[inline] + fn as_mut(&mut self) -> &mut [T] { + self.elements.as_mut_slice() + } +} + +/// Batch operations for [`PVectorMut`]. +impl PVectorMut { + /// Returns the internal [`BufferMut`] of the [`PVectorMut`]. + /// + /// Note that the internal buffer may hold garbage data in place of nulls. That information is + /// tracked by the [`validity()`](Self::validity). + #[inline] + pub fn elements(&self) -> &BufferMut { + &self.elements + } + + /// Returns the validity of the [`PVectorMut`]. + #[inline] + pub fn validity(&self) -> &MaskMut { + &self.validity + } + + /// Resizes the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the difference, with each + /// additional slot filled with `value`, where `None` represent a null. + /// + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + pub fn resize(&mut self, new_len: usize, value: Option) { + let current_len = self.len(); + + if new_len < current_len { + self.truncate(new_len); + } else { + let additional = new_len - current_len; + + match value { + Some(value) => { + self.elements.push_n(value, additional); + self.validity.append_n(true, additional); + } + None => { + self.elements.push_n(T::default(), additional); + self.validity.append_n(false, additional); + } + } + } + } + + /// Clear the vector, removing all elements. + pub fn clear(&mut self) { + self.elements.clear(); + self.validity.clear(); + } + + /// Shortens the vector, keeping the first `len` elements. + pub fn truncate(&mut self, len: usize) { + self.elements.truncate(len); + self.validity.truncate(len); + } +} + +impl Extend> for PVectorMut { + /// Extends the vector from an iterator of optional values. + /// + /// `None` values will be marked as null in the validity mask. + /// + /// # Examples + /// + /// ``` + /// use vortex_vector::{PVectorMut, VectorMutOps, VectorOps}; + /// + /// let mut vec = PVectorMut::from_iter([Some(1i32), None]); + /// vec.extend([Some(3), None, Some(5)]); + /// assert_eq!(vec.len(), 5); + /// + /// let frozen = vec.freeze(); + /// assert_eq!(frozen.validity().true_count(), 3); // Only 3 non-null values. + /// ``` + fn extend>>(&mut self, iter: I) { + let iter = iter.into_iter(); + // Since we do not know the length of the iterator, we can only guess how much memory we + // need to reserve. Note that these hints may be inaccurate. + let (lower_bound, _) = iter.size_hint(); + + // We choose not to use the optional upper bound size hint to match the standard library. + + self.reserve(lower_bound); + + // We have to update validity per-element since it depends on Option variant. + for opt_val in iter { + match opt_val { + Some(val) => { + self.elements.push(val); + self.validity.append_n(true, 1); + } + None => { + self.elements.push(T::default()); + self.validity.append_n(false, 1); + } + } + } + } +} + +impl FromIterator> for PVectorMut { + /// Creates a new [`PVectorMut`] from an iterator of `Option` values. + /// + /// `None` values will be marked as invalid in the validity mask. + /// + /// Internally, this uses the [`Extend>`] trait implementation. + fn from_iter(iter: I) -> Self + where + I: IntoIterator>, + { + let iter = iter.into_iter(); + + let mut vec = Self::with_capacity(iter.size_hint().0); + vec.extend(iter); + + vec + } +} + +impl Extend for PVectorMut { + /// Extends the vector from an iterator of values. + /// + /// All values from the iterator will be marked as non-null in the validity mask. + /// + /// Internally, this uses the [`Extend`] trait implementation. + fn extend>(&mut self, iter: I) { + let start_len = self.len(); + + // Allow the `BufferMut` implementation to handle extending efficiently. + self.elements.extend(iter); + self.validity.append_n(true, self.len() - start_len); + } +} + +impl FromIterator for PVectorMut { + /// Creates a new [`PVectorMut`] from an iterator of `T` values. + /// + /// All values will be treated as non-null. + /// + /// # Examples + /// + /// ``` + /// use vortex_vector::{PVectorMut, VectorMutOps}; + /// + /// let mut vec = PVectorMut::from_iter([1i32, 2, 3, 4]); + /// assert_eq!(vec.len(), 4); + /// ``` + fn from_iter(iter: I) -> Self + where + I: IntoIterator, + { + let iter = iter.into_iter(); + + let mut vec = Self::with_capacity(iter.size_hint().0); + vec.extend(iter); + + vec + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::VectorOps; + + #[test] + fn test_get_methods() { + let vec = PVectorMut::from_iter([Some(1), None, Some(3), None, Some(5)]); + + // Test get_checked - bounds and nulls. + assert_eq!(vec.get(0), Some(1)); + assert_eq!(vec.get(1), None); + + // Test get - nulls. + assert_eq!(vec.get(0), Some(1)); + assert_eq!(vec.get(1), None); + assert_eq!(vec.get(2), Some(3)); + + assert_eq!(vec.elements()[0], 1); + assert_eq!(vec.elements()[2], 3); + + // Also test PVector methods. + let frozen = vec.freeze(); + assert_eq!(frozen.get(0), Some(1)); + assert_eq!(frozen.get(1), None); + assert_eq!(frozen.get(2), Some(3)); + } + + #[test] + #[should_panic(expected = "index out of bounds")] + fn test_get_panic() { + let vec = PVectorMut::from_iter([Some(1), Some(2)]); + let _ = vec.get(10); + } + + #[test] + fn test_push_variants() { + let mut vec = PVectorMut::::with_capacity(10); + vec.push(1); + vec.push_opt(None); + vec.push_opt(Some(3)); + + assert_eq!(vec.len(), 3); + assert_eq!(vec.get(0), Some(1)); + assert_eq!(vec.get(1), None); + assert_eq!(vec.get(2), Some(3)); + + // Test push_unchecked with pre-reserved capacity. + vec.reserve(1); + unsafe { + vec.push_unchecked(4); + } + assert_eq!(vec.get(3), Some(4)); + } + + #[test] + fn test_resize_operations() { + let mut vec = PVectorMut::from_iter([1i32, 2, 3]); + + // Grow with valid values. + vec.resize(5, Some(99)); + assert_eq!(vec.len(), 5); + assert_eq!(vec.get(3), Some(99)); + assert_eq!(vec.get(4), Some(99)); + + // Grow with nulls. + vec.resize(7, None); + assert_eq!(vec.get(5), None); + assert_eq!(vec.get(6), None); + + // Shrink. + vec.resize(2, Some(0)); + assert_eq!(vec.len(), 2); + assert_eq!(vec.get(0), Some(1)); + assert_eq!(vec.get(1), Some(2)); + } + + #[test] + fn test_clear_truncate() { + let mut vec = PVectorMut::from_iter([Some(1), None, Some(3), None, Some(5)]); + let cap = vec.capacity(); + + vec.truncate(3); + assert_eq!(vec.len(), 3); + assert!(vec.capacity() >= cap); // Capacity preserved. + + vec.truncate(10); // Truncate beyond length - no-op. + assert_eq!(vec.len(), 3); + + vec.clear(); + assert_eq!(vec.len(), 0); + assert!(vec.capacity() >= cap); // Capacity still preserved. + } + + #[test] + fn test_slice_access() { + let mut vec = PVectorMut::from_iter([Some(1i32), None, Some(3)]); + let slice = vec.as_ref(); + assert_eq!(slice[0], 1); + assert_eq!(slice[2], 3); + // slice[1] is undefined for null but safe to access. + + let mut_slice = vec.as_mut(); + mut_slice[0] = 10; + assert_eq!(vec.get(0), Some(10)); + + let frozen = vec.freeze(); + assert_eq!(frozen.as_ref()[0], 10); + } + + #[test] + fn test_from_iter_variants() { + // FromIterator - all non-null. + let vec1 = PVectorMut::from_iter([1i32, 2, 3]); + assert_eq!(vec1.len(), 3); + assert!(vec1.freeze().validity().all_true()); + + // FromIterator> - mixed null/non-null. + let vec2 = PVectorMut::from_iter([Some(1i32), None, Some(3)]); + assert_eq!(vec2.len(), 3); + assert_eq!(vec2.freeze().validity().true_count(), 2); + + // Empty iterators. + let empty1 = PVectorMut::from_iter::<[i32; 0]>([]); + let empty2 = PVectorMut::::from_iter(std::iter::empty::>()); + assert_eq!(empty1.len(), 0); + assert_eq!(empty2.len(), 0); + } + + #[test] + fn test_extend_operations() { + let mut vec = PVectorMut::from_iter([1i32, 2]); + + // Extend - all non-null. + vec.extend([3, 4]); + assert_eq!(vec.len(), 4); + assert_eq!(vec.get(3), Some(4)); + + // Extend> - mixed null/non-null. + vec.extend([Some(5), None, Some(7)]); + assert_eq!(vec.len(), 7); + assert_eq!(vec.get(5), None); + assert_eq!(vec.get(6), Some(7)); + + // Extend with iterator that has size hint. + let iter = 8..10; + vec.extend(iter); + assert_eq!(vec.get(8), Some(9)); + } + + #[test] + fn test_empty_vector_edge_cases() { + let empty = PVectorMut::::with_capacity(0); + assert_eq!(empty.len(), 0); + assert_eq!(empty.as_ref().len(), 0); + + let mut mutable_empty = PVectorMut::::with_capacity(0); + mutable_empty.clear(); // No-op on empty. + mutable_empty.truncate(0); // No-op. + mutable_empty.resize(0, None); // No-op. + assert_eq!(mutable_empty.len(), 0); + } + + #[test] + fn test_complex_workflow() { + // Integration test combining multiple operations. + let mut vec = PVectorMut::::with_capacity(2); + vec.extend([1, 2]); // Extend. + vec.push_opt(None); + vec.resize(5, Some(99)); + vec.truncate(4); + vec.extend([Some(10), None]); // Extend>. + + assert_eq!(vec.len(), 6); + let frozen = vec.freeze(); + assert_eq!(frozen.validity().true_count(), 4); + assert_eq!(frozen.get(0), Some(1)); + assert_eq!(frozen.get(2), None); + assert_eq!(frozen.get(3), Some(99)); + assert_eq!(frozen.get(5), None); + } +}