From 0d6f25e876b58cebfe4eccd86827b95f85b006aa Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Thu, 23 Oct 2025 16:30:12 -0400 Subject: [PATCH 1/5] add `StructVector` implementation Signed-off-by: Connor Tsui --- vortex-vector/src/bool/iter.rs | 1 + vortex-vector/src/bool/vector.rs | 2 +- vortex-vector/src/bool/vector_mut.rs | 4 +- vortex-vector/src/lib.rs | 17 +- vortex-vector/src/macros.rs | 8 + vortex-vector/src/ops.rs | 5 + vortex-vector/src/primitive/generic.rs | 2 +- vortex-vector/src/primitive/generic_mut.rs | 4 +- vortex-vector/src/primitive/iter.rs | 1 + vortex-vector/src/private.rs | 3 + vortex-vector/src/struct_/mod.rs | 24 + vortex-vector/src/struct_/vector.rs | 199 +++++++ vortex-vector/src/struct_/vector_mut.rs | 596 +++++++++++++++++++++ vortex-vector/src/vector.rs | 52 +- vortex-vector/src/vector_mut.rs | 58 +- 15 files changed, 951 insertions(+), 25 deletions(-) create mode 100644 vortex-vector/src/struct_/mod.rs create mode 100644 vortex-vector/src/struct_/vector.rs create mode 100644 vortex-vector/src/struct_/vector_mut.rs diff --git a/vortex-vector/src/bool/iter.rs b/vortex-vector/src/bool/iter.rs index 5f9bde7feb8..7246258660d 100644 --- a/vortex-vector/src/bool/iter.rs +++ b/vortex-vector/src/bool/iter.rs @@ -85,6 +85,7 @@ impl FromIterator for BoolVectorMut { /// /// It consumes the mutable vector and iterates over the elements, yielding `None` for null values /// and `Some(value)` for valid values. +#[derive(Debug)] pub struct BoolVectorMutIterator { /// The vector being iterated over. vector: BoolVectorMut, diff --git a/vortex-vector/src/bool/vector.rs b/vortex-vector/src/bool/vector.rs index 2846e1f9907..f56647638f5 100644 --- a/vortex-vector/src/bool/vector.rs +++ b/vortex-vector/src/bool/vector.rs @@ -63,7 +63,7 @@ impl BoolVector { Self { bits, validity } } - /// Decomposes the boolean vector into its constituent parts. + /// Decomposes the boolean vector into its constituent parts (bit buffer and validity). pub fn into_parts(self) -> (BitBuffer, Mask) { (self.bits, self.validity) } diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs index 05ee7c9078a..a9fc47d2d43 100644 --- a/vortex-vector/src/bool/vector_mut.rs +++ b/vortex-vector/src/bool/vector_mut.rs @@ -121,7 +121,7 @@ impl BoolVectorMut { } } - /// Returns the parts of the mutable vector. + /// Decomposes the boolean vector into its constituent parts (bit buffer and validity). pub fn into_parts(self) -> (BitBufferMut, MaskMut) { (self.bits, self.validity) } @@ -151,7 +151,7 @@ impl VectorMutOps for BoolVectorMut { } fn append_nulls(&mut self, n: usize) { - self.bits.append_n(false, n); + self.bits.append_n(false, n); // Note that the value we push doesn't actually matter. self.validity.append_n(false, n); } diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index baede4f8944..b65e4e6fb9a 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -12,17 +12,22 @@ #![deny(clippy::missing_safety_doc)] mod bool; -mod macros; mod null; -mod ops; mod primitive; -mod private; -mod vector; -mod vector_mut; +mod struct_; pub use bool::{BoolVector, BoolVectorMut}; pub use null::{NullVector, NullVectorMut}; -pub use ops::{VectorMutOps, VectorOps}; pub use primitive::{PVector, PVectorMut, PrimitiveVector, PrimitiveVectorMut}; +pub use struct_::{StructVector, StructVectorMut}; + +mod ops; +mod vector; +mod vector_mut; + +pub use ops::{VectorMutOps, VectorOps}; pub use vector::Vector; pub use vector_mut::VectorMut; + +mod macros; +mod private; diff --git a/vortex-vector/src/macros.rs b/vortex-vector/src/macros.rs index f2cc136952a..78ec73379eb 100644 --- a/vortex-vector/src/macros.rs +++ b/vortex-vector/src/macros.rs @@ -48,6 +48,10 @@ macro_rules! match_each_vector { let $vec = v; $body } + $crate::Vector::Struct(v) => { + let $vec = v; + $body + } } }}; } @@ -98,6 +102,10 @@ macro_rules! match_each_vector_mut { let $vec = v; $body } + $crate::VectorMut::Struct(v) => { + let $vec = v; + $body + } } }}; } diff --git a/vortex-vector/src/ops.rs b/vortex-vector/src/ops.rs index ab69e028100..98bfd17ed86 100644 --- a/vortex-vector/src/ops.rs +++ b/vortex-vector/src/ops.rs @@ -70,6 +70,11 @@ pub trait VectorMutOps: private::Sealed + Into { fn reserve(&mut self, additional: usize); /// Extends the vector by appending elements from another vector. + /// + /// # Panics + /// + /// Panics if the `other` vector has the wrong type (for example, a + /// [`StructVector`](crate::StructVector) might have incorrect fields). fn extend_from_vector(&mut self, other: &Self::Immutable); /// Appends `n` null elements to the vector. diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs index 96d9185973b..473e93cd731 100644 --- a/vortex-vector/src/primitive/generic.rs +++ b/vortex-vector/src/primitive/generic.rs @@ -69,7 +69,7 @@ impl PVector { Self { elements, validity } } - /// Decomposes the primitive vector into its constituent parts. + /// Decomposes the primitive vector into its constituent parts (buffer and validity). pub fn into_parts(self) -> (Buffer, Mask) { (self.elements, self.validity) } diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index 5ea0036570d..23275689b73 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -160,7 +160,7 @@ impl PVectorMut { } } - /// Decomposes the primitive vector into its constituent parts. + /// Decomposes the primitive vector into its constituent parts (buffer and validity). pub fn into_parts(self) -> (BufferMut, MaskMut) { (self.elements, self.validity) } @@ -189,7 +189,7 @@ impl VectorMutOps for PVectorMut { } fn append_nulls(&mut self, n: usize) { - self.elements.push_n(T::zero(), n); + self.elements.push_n(T::zero(), n); // Note that the value we push doesn't actually matter. self.validity.append_n(false, n); } diff --git a/vortex-vector/src/primitive/iter.rs b/vortex-vector/src/primitive/iter.rs index c3f651c5b12..c21fd88f79c 100644 --- a/vortex-vector/src/primitive/iter.rs +++ b/vortex-vector/src/primitive/iter.rs @@ -116,6 +116,7 @@ impl FromIterator for PVectorMut { /// /// It consumes the mutable vector and iterates over the elements, yielding `None` for null values /// and `Some(value)` for valid values. +#[derive(Debug)] pub struct PVectorMutIterator { /// The vector being iterated over. vector: PVectorMut, diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs index 37828530e3b..b241e4fd5d8 100644 --- a/vortex-vector/src/private.rs +++ b/vortex-vector/src/private.rs @@ -28,3 +28,6 @@ impl Sealed for PrimitiveVector {} impl Sealed for PrimitiveVectorMut {} impl Sealed for PVector {} impl Sealed for PVectorMut {} + +impl Sealed for StructVector {} +impl Sealed for StructVectorMut {} diff --git a/vortex-vector/src/struct_/mod.rs b/vortex-vector/src/struct_/mod.rs new file mode 100644 index 00000000000..ae6ff7b91bd --- /dev/null +++ b/vortex-vector/src/struct_/mod.rs @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`StructVector`] and [`StructVectorMut`]. + +mod vector; +pub use vector::StructVector; + +mod vector_mut; +pub use vector_mut::StructVectorMut; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: StructVector) -> Self { + Self::Struct(v) + } +} + +impl From for VectorMut { + fn from(v: StructVectorMut) -> Self { + Self::Struct(v) + } +} diff --git a/vortex-vector/src/struct_/vector.rs b/vortex-vector/src/struct_/vector.rs new file mode 100644 index 00000000000..af24055af0a --- /dev/null +++ b/vortex-vector/src/struct_/vector.rs @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`StructVector`]. + +use vortex_error::{VortexExpect, VortexResult, vortex_ensure}; +use vortex_mask::Mask; + +use crate::{StructVectorMut, Vector, VectorMutOps, VectorOps}; + +/// An immutable vector of boolean values. +/// +/// `StructVector` can be considered a borrowed / frozen version of [`StructVectorMut`], which is +/// created via the [`freeze`](crate::VectorMutOps::freeze) method. +/// +/// See the documentation for [`StructVectorMut`] for more information. +#[derive(Debug, Clone)] +pub struct StructVector { + /// The fields of the `StructVector`, each stored column-wise as a [`Vector`]. + pub(super) fields: Box<[Vector]>, + + /// The validity mask (where `true` represents an element is **not** null). + pub(super) validity: Mask, + + /// The length of the vector (which is the same as all field vectors). + /// + /// This is stored here as a convenience, and also helps in the case that the `StructVector` has + /// no fields. + pub(super) len: usize, +} + +impl StructVector { + /// Creates a new [`StructVector`] from the given fields and validity mask. + /// + /// # Panics + /// + /// Panics if: + /// + /// - Any field vector has a length that does not match the length of other fields. + /// - The validity mask length does not match the field length. + pub fn new(fields: Box<[Vector]>, validity: Mask) -> Self { + Self::try_new(fields, validity).vortex_expect( + "`StructVector` fields must have matching length and validity constraints", + ) + } + + /// Tries to create a new [`StructVector`] from the given fields and validity mask. + /// + /// # Errors + /// + /// Returns an error if: + /// + /// - Any field vector has a length that does not match the length of other fields. + /// - The validity mask length does not match the field length. + pub fn try_new(fields: Box<[Vector]>, validity: Mask) -> VortexResult { + let len = if fields.is_empty() { + validity.len() + } else { + fields[0].len() + }; + + Self::validate(&fields, len, &validity)?; + + Ok(Self { + fields, + validity, + len, + }) + } + + /// Creates a new [`StructVector`] from the given fields and validity mask without validation. + /// + /// # Safety + /// + /// The caller must ensure that: + /// + /// - All field vectors have the same length. + /// - The validity mask has a length equal to the field length. + pub unsafe fn new_unchecked(fields: Box<[Vector]>, validity: Mask) -> Self { + let len = if fields.is_empty() { + validity.len() + } else { + fields[0].len() + }; + + debug_assert!( + Self::validate(&fields, len, &validity).is_ok(), + "`StructVector` fields must have matching length and validity constraints" + ); + + Self { + fields, + validity, + len, + } + } + + /// Validates the fields and validity mask for a [`StructVector`]. + /// + /// # Errors + /// + /// Returns an error if: + /// + /// - Any field vector has a length that does not match the length of other fields. + /// - The validity mask length does not match the field length. + fn validate(fields: &[Vector], len: usize, validity: &Mask) -> VortexResult<()> { + // Validate that the validity mask has the correct length. + vortex_ensure!( + validity.len() == len, + "Validity mask length ({}) does not match expected length ({})", + validity.len(), + len + ); + + // Validate that all fields have the correct length. + for (i, field) in fields.iter().enumerate() { + vortex_ensure!( + field.len() == len, + "Field {} has length {} but expected length {}", + i, + field.len(), + len + ); + } + + Ok(()) + } + + /// Decomposes the struct vector into its constituent parts (fields, validity, and length). + pub fn into_parts(self) -> (Box<[Vector]>, Mask, usize) { + (self.fields, self.validity, self.len) + } + + /// Returns the fields of the `StructVector`, each stored column-wise as a [`Vector`]. + pub fn fields(&self) -> &[Vector] { + self.fields.as_ref() + } +} + +impl VectorOps for StructVector { + type Mutable = StructVectorMut; + + fn len(&self) -> usize { + self.len + } + + fn validity(&self) -> &Mask { + &self.validity + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + let validity = match self.validity.try_into_mut() { + Ok(validity) => validity, + Err(validity) => { + return Err(StructVector { validity, ..self }); + } + }; + + // Convert all of the remaining fields to mutable, if possible. + let mut mutable_fields = Vec::with_capacity(self.fields.len()); + let mut fields_iter = self.fields.into_iter(); + + while let Some(field) = fields_iter.next() { + match field.try_into_mut() { + Ok(mutable_field) => { + // We were able to take ownership of the field vector, so add it and keep going. + mutable_fields.push(mutable_field); + } + Err(immutable_field) => { + // We were unable to take ownership, so we must re-freeze all of the fields + // vectors we took ownership over and reconstruct the original `StructVector`. + + let mut all_fields: Vec = mutable_fields + .into_iter() + .map(|mut_field| mut_field.freeze()) + .collect(); + + all_fields.push(immutable_field); + all_fields.extend(fields_iter); + + return Err(StructVector { + fields: all_fields.into_boxed_slice(), + len: self.len, + validity: validity.freeze(), + }); + } + } + } + + Ok(StructVectorMut { + fields: mutable_fields, + len: self.len, + validity, + }) + } +} diff --git a/vortex-vector/src/struct_/vector_mut.rs b/vortex-vector/src/struct_/vector_mut.rs new file mode 100644 index 00000000000..3a1c40f76b3 --- /dev/null +++ b/vortex-vector/src/struct_/vector_mut.rs @@ -0,0 +1,596 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`StructVectorMut`]. + +use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic}; +use vortex_mask::MaskMut; + +use crate::{StructVector, Vector, VectorMut, VectorMutOps, VectorOps}; + +/// A mutable vector of struct values (values with named fields). +/// +/// Struct values are stored column-wise in the vector, so values in the same field are stored next +/// to each other (rather than values in the same struct stored next to each other). +/// +/// # Examples +/// +/// ## Creating a [`StructVector`] and [`StructVectorMut`] +/// +/// ``` +/// use vortex_vector::{StructVectorMut, VectorMut, BoolVectorMut, PVectorMut, NullVector}; +/// use vortex_mask::MaskMut; +/// +/// // Create a struct with three fields: nulls, booleans, and integers. +/// let fields = vec![ +/// NullVector::new(3).try_into_mut().unwrap().into(), +/// BoolVectorMut::from_iter([true, false, true]).into(), +/// PVectorMut::::from_iter([10, 20, 30]).into(), +/// ]; +/// +/// let mut struct_vec = StructVectorMut::new(fields, MaskMut::new_true(3)); +/// assert_eq!(struct_vec.len(), 3); +/// ``` +/// +/// ## Working with [`split_off()`] and [`unsplit()`] +/// +/// [`split_off()`]: VectorMutOps::split_off +/// [`unsplit()`]: VectorMutOps::unsplit +/// +/// ``` +/// use vortex_vector::{StructVectorMut, VectorMut, BoolVectorMut, PVectorMut, NullVector}; +/// use vortex_mask::MaskMut; +/// +/// let fields = vec![ +/// NullVector::new(6).try_into_mut().unwrap().into(), +/// PVectorMut::::from_iter([1, 2, 3, 4, 5, 6]).into(), +/// ]; +/// +/// let mut struct_vec = StructVectorMut::new(fields, MaskMut::new_true(6)); +/// +/// // Split at position 4. +/// let second_part = struct_vec.split_off(4); +/// +/// assert_eq!(struct_vec.len(), 4); +/// assert_eq!(second_part.len(), 2); +/// +/// // Rejoin the parts. +/// struct_vec.unsplit(second_part); +/// assert_eq!(struct_vec.len(), 6); +/// ``` +/// +/// ## Accessing field values +/// +/// ``` +/// use vortex_vector::{StructVectorMut, VectorMut, BoolVectorMut, PVectorMut, NullVector}; +/// use vortex_mask::MaskMut; +/// use vortex_dtype::PTypeDowncast; +/// +/// let fields = vec![ +/// NullVector::new(3).try_into_mut().unwrap().into(), +/// BoolVectorMut::from_iter([true, false, true]).into(), +/// PVectorMut::::from_iter([10, 20, 30]).into(), +/// ]; +/// +/// let struct_vec = StructVectorMut::new(fields, MaskMut::new_true(3)); +/// +/// // Access the boolean field vector (field index 1). +/// if let VectorMut::Bool(bool_vec) = struct_vec.fields[1].clone() { +/// let values: Vec<_> = bool_vec.into_iter().map(|v| v.unwrap()).collect(); +/// assert_eq!(values, vec![true, false, true]); +/// } +/// +/// // Access the integer field column (field index 2). +/// if let VectorMut::Primitive(prim_vec) = struct_vec.fields[2].clone() { +/// let values: Vec<_> = prim_vec.into_i32().into_iter().map(|v| v.unwrap()).collect(); +/// assert_eq!(values, vec![10, 20, 30]); +/// } +/// ``` +#[derive(Debug, Clone)] +pub struct StructVectorMut { + /// The fields of the `StructVectorMut`, each stored column-wise as a [`VectorMut`]. + /// + /// We store this as a mutable vector instead of a fixed-sized type since vectors do not have an + /// associated [`DType`](vortex_dtype::DType), thus users can add field columns if they need. + pub(super) fields: Vec, + + /// The validity mask (where `true` represents an element is **not** null). + pub(super) validity: MaskMut, + + /// The length of the vector (which is the same as all field vectors). + /// + /// This is stored here as a convenience, and also helps in the case that the `StructVector` has + /// no fields. + pub(super) len: usize, +} + +impl StructVectorMut { + /// Creates a new [`StructVectorMut`] with the given fields and validity mask. + /// + /// # Panics + /// + /// Panics if: + /// + /// - Any field vector has a length that does not match the length of other fields. + /// - The validity mask length does not match the field length. + pub fn new(fields: Vec, validity: MaskMut) -> Self { + Self::try_new(fields, validity).vortex_expect( + "`StructVectorMut` fields must have matching length and validity constraints", + ) + } + + /// Tries to create a new [`StructVectorMut`] with the given fields and validity mask. + /// + /// # Errors + /// + /// Returns an error if: + /// + /// - Any field vector has a length that does not match the length of other fields. + /// - The validity mask length does not match the field length. + pub fn try_new(fields: Vec, validity: MaskMut) -> VortexResult { + let len = if fields.is_empty() { + validity.len() + } else { + fields[0].len() + }; + + Self::validate(&fields, len, &validity)?; + + Ok(Self { + fields, + validity, + len, + }) + } + + /// Creates a new [`StructVectorMut`] with the given fields and validity mask without + /// validation. + /// + /// # Safety + /// + /// The caller must ensure that: + /// + /// - All field vectors have the same length. + /// - The validity mask has a length equal to the field length. + pub unsafe fn new_unchecked(fields: Vec, validity: MaskMut) -> Self { + let len = if fields.is_empty() { + validity.len() + } else { + fields[0].len() + }; + + debug_assert!( + Self::validate(&fields, len, &validity).is_ok(), + "`StructVectorMut` fields must have matching length and validity constraints" + ); + + Self { + fields, + validity, + len, + } + } + + /// Validates the fields and validity mask for a [`StructVectorMut`]. + /// + /// # Errors + /// + /// Returns an error if: + /// + /// - Any field vector has a length that does not match `len`. + /// - The validity mask length does not match `len`. + fn validate(fields: &[VectorMut], len: usize, validity: &MaskMut) -> VortexResult<()> { + // Validate that the validity mask has the correct length. + vortex_ensure!( + validity.len() == len, + "Validity mask length ({}) does not match expected length ({})", + validity.len(), + len + ); + + // Validate that all fields have the correct length. + for (i, field) in fields.iter().enumerate() { + vortex_ensure!( + field.len() == len, + "Field {} has length {} but expected length {}", + i, + field.len(), + len + ); + } + + Ok(()) + } + + /// Decomposes the struct vector into its constituent parts (fields, validity, and length). + pub fn into_parts(self) -> (Vec, MaskMut, usize) { + (self.fields, self.validity, self.len) + } + + /// Returns the fields of the `StructVectorMut`, each stored column-wise as a [`VectorMut`]. + pub fn fields(&mut self) -> &[VectorMut] { + self.fields.as_mut_slice() + } + + /// Finds the minimum capacity of all field vectors. + /// + /// This is equal to the maximum amount of scalars we can add before we need to reallocate at + /// least one of the child field vectors. + /// + /// If there are no fields, this returns the length of the vector. + pub fn minimum_capacity(&self) -> usize { + if self.fields.is_empty() { + return self.len; + } + + let mut minimum_capacity = usize::MAX; + for field in &self.fields { + minimum_capacity = minimum_capacity.min(field.capacity()); + } + + minimum_capacity + } +} + +impl VectorMutOps for StructVectorMut { + type Immutable = StructVector; + + fn len(&self) -> usize { + self.len + } + + /// Note that this returns the length of the [`StructVectorMut`]. + /// + /// If you want the actual capacity of the struct vector, use the [`minimum_capacity()`] method. + /// + /// [`minimum_capacity()`]: Self::minimum_capacity + fn capacity(&self) -> usize { + self.len + } + + fn reserve(&mut self, additional: usize) { + // Reserve the additional capacity in each field vector. + for field in &mut self.fields { + field.reserve(additional); + + debug_assert_eq!( + field.len(), + self.len, + "Field length must match `StructVectorMut` length" + ); + } + + self.validity.reserve(additional); + } + + fn extend_from_vector(&mut self, other: &StructVector) { + assert_eq!( + self.fields.len(), + other.fields().len(), + "Cannot extend StructVectorMut: field count mismatch ({} vs {})", + self.fields.len(), + other.fields().len() + ); + + // Extend each field vector. + let pairs = self.fields.iter_mut().zip(other.fields()); + for (self_mut_vector, other_vec) in pairs { + match (self_mut_vector, other_vec) { + (VectorMut::Null(a), Vector::Null(b)) => a.extend_from_vector(b), + (VectorMut::Bool(a), Vector::Bool(b)) => a.extend_from_vector(b), + (VectorMut::Primitive(a), Vector::Primitive(b)) => a.extend_from_vector(b), + (VectorMut::Struct(a), Vector::Struct(b)) => a.extend_from_vector(b), + _ => { + vortex_panic!("Mismatched field types in `StructVectorMut::extend_from_vector`") + } + } + } + + // Extend the validity mask. + self.validity.append_mask(other.validity()); + + self.len += other.len(); + } + + fn append_nulls(&mut self, n: usize) { + for field in &mut self.fields { + field.append_nulls(n); // Note that the value we push to each doesn't actually matter. + } + self.validity.append_n(false, n); + + self.len += n; + } + + fn freeze(self) -> Self::Immutable { + let frozen_fields: Vec = self + .fields + .into_iter() + .map(|mut_field| mut_field.freeze()) + .collect(); + + StructVector { + fields: frozen_fields.into_boxed_slice(), + len: self.len, + validity: self.validity.freeze(), + } + } + + fn split_off(&mut self, at: usize) -> Self { + assert!( + at <= self.capacity(), + "split_off out of bounds: {} > {}", + at, + self.capacity() + ); + + let split_fields: Vec = self + .fields + .iter_mut() + .map(|field| field.split_off(at)) + .collect(); + + let split_validity = self.validity.split_off(at); + + // Update self's state. + let split_len = self.len.saturating_sub(at); + self.len = at; + + Self { + fields: split_fields, + len: split_len, + validity: split_validity, + } + } + + fn unsplit(&mut self, other: Self) { + assert_eq!( + self.fields.len(), + other.fields.len(), + "Cannot unsplit StructVectorMut: field count mismatch ({} vs {})", + self.fields.len(), + other.fields.len() + ); + + // Unsplit each field vector. + let pairs = self.fields.iter_mut().zip(other.fields); + for (self_mut_vector, other_mut_vec) in pairs { + match (self_mut_vector, other_mut_vec) { + (VectorMut::Null(a), VectorMut::Null(b)) => a.unsplit(b), + (VectorMut::Bool(a), VectorMut::Bool(b)) => a.unsplit(b), + (VectorMut::Primitive(a), VectorMut::Primitive(b)) => a.unsplit(b), + (VectorMut::Struct(a), VectorMut::Struct(b)) => a.unsplit(b), + _ => { + vortex_panic!("Mismatched field types in `StructVectorMut::unsplit`") + } + } + } + + self.validity.unsplit(other.validity); + + self.len += other.len; + } +} + +#[cfg(test)] +mod tests { + use vortex_dtype::PTypeDowncast; + use vortex_mask::{Mask, MaskMut}; + + use super::*; + use crate::{BoolVectorMut, NullVector, PVectorMut, VectorMut}; + + #[test] + fn test_empty_fields() { + let mut struct_vec = StructVectorMut::try_new(vec![], MaskMut::new_true(10)).unwrap(); + let second_half = struct_vec.split_off(6); + assert_eq!(struct_vec.len(), 6); + assert_eq!(second_half.len(), 4); + } + + #[test] + fn test_try_into_mut_and_values() { + let struct_vec = StructVector { + fields: vec![ + NullVector::new(5).into(), + BoolVectorMut::from_iter([true, false, true, false, true]) + .freeze() + .into(), + PVectorMut::::from_iter([10, 20, 30, 40, 50]) + .freeze() + .into(), + ] + .into_boxed_slice(), + len: 5, + validity: Mask::AllTrue(5), + }; + + let mut_struct = struct_vec.try_into_mut().unwrap(); + assert_eq!(mut_struct.len(), 5); + + // Verify values are preserved. + if let VectorMut::Bool(bool_vec) = mut_struct.fields[1].clone() { + let values: Vec<_> = bool_vec.into_iter().map(|v| v.unwrap()).collect(); + assert_eq!(values, vec![true, false, true, false, true]); + } + + if let VectorMut::Primitive(prim_vec) = mut_struct.fields[2].clone() { + let values: Vec<_> = prim_vec + .into_i32() + .into_iter() + .map(|v| v.unwrap()) + .collect(); + assert_eq!(values, vec![10, 20, 30, 40, 50]); + } + } + + #[test] + fn test_try_into_mut_shared_ownership() { + // Test that conversion fails when a field has shared ownership. + let bool_field: Vector = BoolVectorMut::from_iter([true, false, true]) + .freeze() + .into(); + let bool_field_clone = bool_field.clone(); + + let struct_vec = StructVector { + fields: vec![ + NullVector::new(3).into(), + bool_field_clone, + PVectorMut::::from_iter([1, 2, 3]).freeze().into(), + ] + .into_boxed_slice(), + len: 3, + validity: Mask::AllTrue(3), + }; + + assert!(struct_vec.try_into_mut().is_err()); + drop(bool_field); // Keep original alive to maintain shared ownership + } + + #[test] + fn test_split_unsplit_values() { + let mut struct_vec = StructVectorMut::try_new( + vec![ + NullVector::new(8).try_into_mut().unwrap().into(), + BoolVectorMut::from_iter([true, false, true, false, true, false, true, false]) + .into(), + PVectorMut::::from_iter([10, 20, 30, 40, 50, 60, 70, 80]).into(), + ], + MaskMut::new_true(8), + ) + .unwrap(); + + let second_half = struct_vec.split_off(5); + assert_eq!(struct_vec.len(), 5); + assert_eq!(second_half.len(), 3); + + // Verify values after split. + if let VectorMut::Bool(bool_vec) = struct_vec.fields[1].clone() { + let values: Vec<_> = bool_vec.into_iter().take(5).map(|v| v.unwrap()).collect(); + assert_eq!(values, vec![true, false, true, false, true]); + } + + if let VectorMut::Bool(bool_vec) = second_half.fields[1].clone() { + let values: Vec<_> = bool_vec.into_iter().map(|v| v.unwrap()).collect(); + assert_eq!(values, vec![false, true, false]); + } + + // Unsplit and verify. + struct_vec.unsplit(second_half); + assert_eq!(struct_vec.len(), 8); + + if let VectorMut::Bool(bool_vec) = struct_vec.fields[1].clone() { + let values: Vec<_> = bool_vec.into_iter().map(|v| v.unwrap()).collect(); + assert_eq!( + values, + vec![true, false, true, false, true, false, true, false] + ); + } + } + + #[test] + fn test_extend_and_append_nulls() { + let mut struct_vec = StructVectorMut::try_new( + vec![ + NullVector::new(3).try_into_mut().unwrap().into(), + BoolVectorMut::from_iter([true, false, true]).into(), + PVectorMut::::from_iter([10, 20, 30]).into(), + ], + MaskMut::new_true(3), + ) + .unwrap(); + + // Test extend. + let to_extend = StructVector { + fields: vec![ + NullVector::new(2).into(), + BoolVectorMut::from_iter([false, true]).freeze().into(), + PVectorMut::::from_iter([40, 50]).freeze().into(), + ] + .into_boxed_slice(), + len: 2, + validity: Mask::AllTrue(2), + }; + + struct_vec.extend_from_vector(&to_extend); + assert_eq!(struct_vec.len(), 5); + + // Test append_nulls. + struct_vec.append_nulls(2); + assert_eq!(struct_vec.len(), 7); + + // Verify final values include nulls. + if let VectorMut::Bool(bool_vec) = struct_vec.fields[1].clone() { + let values: Vec<_> = bool_vec.into_iter().collect(); + assert_eq!( + values, + vec![ + Some(true), + Some(false), + Some(true), + Some(false), + Some(true), + None, + None + ] + ); + } + } + + #[test] + fn test_roundtrip() { + let original_bool = vec![Some(true), None, Some(false), Some(true)]; + let original_int = vec![Some(100i32), None, Some(200), Some(300)]; + + let struct_vec = StructVectorMut::try_new( + vec![ + NullVector::new(4).try_into_mut().unwrap().into(), + BoolVectorMut::from_iter(original_bool.clone()).into(), + PVectorMut::::from_iter(original_int.clone()).into(), + ], + MaskMut::new_true(4), + ) + .unwrap(); + + // Verify roundtrip preserves nulls. + if let VectorMut::Bool(bool_vec) = struct_vec.fields[1].clone() { + let roundtrip: Vec<_> = bool_vec.into_iter().collect(); + assert_eq!(roundtrip, original_bool); + } + + if let VectorMut::Primitive(prim_vec) = struct_vec.fields[2].clone() { + let roundtrip: Vec<_> = prim_vec.into_i32().into_iter().collect(); + assert_eq!(roundtrip, original_int); + } + } + + #[test] + fn test_nested_struct() { + let inner1 = StructVectorMut::try_new( + vec![ + NullVector::new(4).try_into_mut().unwrap().into(), + BoolVectorMut::from_iter([true, false, true, false]).into(), + ], + MaskMut::new_true(4), + ) + .unwrap() + .into(); + + let inner2 = StructVectorMut::try_new( + vec![PVectorMut::::from_iter([100, 200, 300, 400]).into()], + MaskMut::new_true(4), + ) + .unwrap() + .into(); + + let mut outer = + StructVectorMut::try_new(vec![inner1, inner2], MaskMut::new_true(4)).unwrap(); + + let second = outer.split_off(2); + assert_eq!(outer.len(), 2); + assert_eq!(second.len(), 2); + + outer.unsplit(second); + assert_eq!(outer.len(), 4); + assert!(matches!(outer.fields[0], VectorMut::Struct(_))); + } +} diff --git a/vortex-vector/src/vector.rs b/vortex-vector/src/vector.rs index 0fb67cd29b0..ef3dc8c52ad 100644 --- a/vortex-vector/src/vector.rs +++ b/vortex-vector/src/vector.rs @@ -9,7 +9,7 @@ use vortex_error::vortex_panic; use crate::macros::match_each_vector; -use crate::{BoolVector, NullVector, PrimitiveVector, VectorMut, VectorOps}; +use crate::{BoolVector, NullVector, PrimitiveVector, StructVector, VectorMut, VectorOps}; /// An enum over all kinds of immutable vectors, which represent fully decompressed (canonical) /// array data. @@ -41,8 +41,8 @@ pub enum Vector { // List(ListVector), // FixedList // FixedList(FixedListVector), - // Struct - // Struct(StructVector), + /// Vectors of Struct elements. + Struct(StructVector), // Extension // Extension(ExtensionVector), } @@ -69,7 +69,39 @@ impl VectorOps for Vector { } impl Vector { - /// Consumes `self` and returns the inner `NullVector` if `self` is of that variant. + /// Returns a reference to the inner [`NullVector`] if `self` is of that variant. + pub fn as_null(&self) -> &NullVector { + if let Vector::Null(v) = self { + return v; + } + vortex_panic!("Expected NullVector, got {self:?}"); + } + + /// Returns a reference to the inner [`BoolVector`] if `self` is of that variant. + pub fn as_bool(&self) -> &BoolVector { + if let Vector::Bool(v) = self { + return v; + } + vortex_panic!("Expected BoolVector, got {self:?}"); + } + + /// Returns a reference to the inner [`PrimitiveVector`] if `self` is of that variant. + pub fn as_primitive(&self) -> &PrimitiveVector { + if let Vector::Primitive(v) = self { + return v; + } + vortex_panic!("Expected PrimitiveVector, got {self:?}"); + } + + /// Returns a reference to the inner [`StructVector`] if `self` is of that variant. + pub fn as_struct(&self) -> &StructVector { + if let Vector::Struct(v) = self { + return v; + } + vortex_panic!("Expected StructVector, got {self:?}"); + } + + /// Consumes `self` and returns the inner [`NullVector`] if `self` is of that variant. pub fn into_null(self) -> NullVector { if let Vector::Null(v) = self { return v; @@ -77,7 +109,7 @@ impl Vector { vortex_panic!("Expected NullVector, got {self:?}"); } - /// Consumes `self` and returns the inner `BoolVector` if `self` is of that variant. + /// Consumes `self` and returns the inner [`BoolVector`] if `self` is of that variant. pub fn into_bool(self) -> BoolVector { if let Vector::Bool(v) = self { return v; @@ -85,11 +117,19 @@ impl Vector { vortex_panic!("Expected BoolVector, got {self:?}"); } - /// Consumes `self` and returns the inner `PrimitiveVector` if `self` is of that variant. + /// Consumes `self` and returns the inner [`PrimitiveVector`] if `self` is of that variant. pub fn into_primitive(self) -> PrimitiveVector { if let Vector::Primitive(v) = self { return v; } vortex_panic!("Expected PrimitiveVector, got {self:?}"); } + + /// Consumes `self` and returns the inner [`StructVector`] if `self` is of that variant. + pub fn into_struct(self) -> StructVector { + if let Vector::Struct(v) = self { + return v; + } + vortex_panic!("Expected StructVector, got {self:?}"); + } } diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index e2ab22fd70d..eac51a7138c 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -10,7 +10,9 @@ use vortex_dtype::DType; use vortex_error::vortex_panic; use super::macros::match_each_vector_mut; -use crate::{BoolVectorMut, NullVectorMut, PrimitiveVectorMut, Vector, VectorMutOps}; +use crate::{ + BoolVectorMut, NullVectorMut, PrimitiveVectorMut, StructVectorMut, Vector, VectorMutOps, +}; /// An enum over all kinds of mutable vectors, which represent fully decompressed (canonical) array /// data. @@ -23,15 +25,17 @@ use crate::{BoolVectorMut, NullVectorMut, PrimitiveVectorMut, Vector, VectorMutO /// [`VectorOps`](crate::VectorOps) trait. #[derive(Debug, Clone)] pub enum VectorMut { - /// Null mutable vectors. + /// Mutable Null vectors. Null(NullVectorMut), - /// Boolean mutable vectors. + /// Mutable Boolean vectors. Bool(BoolVectorMut), - /// Primitive mutable vectors. + /// Mutable Primitive vectors. /// /// Note that [`PrimitiveVectorMut`] is an enum over the different possible (generic) /// [`PVectorMut`](crate::PVectorMut)s. See the documentation for more information. Primitive(PrimitiveVectorMut), + /// Mutable vectors of Struct elements. + Struct(StructVectorMut), } impl VectorMut { @@ -95,7 +99,39 @@ impl VectorMutOps for VectorMut { } impl VectorMut { - /// Convert into NullVectorMut, panicking if the type does not match. + /// Returns a reference to the inner [`NullVectorMut`] if `self` is of that variant. + pub fn as_null(&self) -> &NullVectorMut { + if let VectorMut::Null(v) = self { + return v; + } + vortex_panic!("Expected NullVectorMut, got {self:?}"); + } + + /// Returns a reference to the inner [`BoolVectorMut`] if `self` is of that variant. + pub fn as_bool(&self) -> &BoolVectorMut { + if let VectorMut::Bool(v) = self { + return v; + } + vortex_panic!("Expected BoolVectorMut, got {self:?}"); + } + + /// Returns a reference to the inner [`PrimitiveVectorMut`] if `self` is of that variant. + pub fn as_primitive(&self) -> &PrimitiveVectorMut { + if let VectorMut::Primitive(v) = self { + return v; + } + vortex_panic!("Expected PrimitiveVectorMut, got {self:?}"); + } + + /// Returns a reference to the inner [`StructVectorMut`] if `self` is of that variant. + pub fn as_struct(&self) -> &StructVectorMut { + if let VectorMut::Struct(v) = self { + return v; + } + vortex_panic!("Expected StructVectorMut, got {self:?}"); + } + + /// Consumes `self` and returns the inner [`NullVectorMut`] if `self` is of that variant. pub fn into_null(self) -> NullVectorMut { if let VectorMut::Null(v) = self { return v; @@ -103,7 +139,7 @@ impl VectorMut { vortex_panic!("Expected NullVectorMut, got {self:?}"); } - /// Convert into BoolVectorMut, panicking if the type does not match. + /// Consumes `self` and returns the inner [`BoolVectorMut`] if `self` is of that variant. pub fn into_bool(self) -> BoolVectorMut { if let VectorMut::Bool(v) = self { return v; @@ -111,13 +147,21 @@ impl VectorMut { vortex_panic!("Expected BoolVectorMut, got {self:?}"); } - /// Convert into PrimitiveVectorMut, panicking if the type does not match. + /// Consumes `self` and returns the inner [`PrimitiveVectorMut`] if `self` is of that variant. pub fn into_primitive(self) -> PrimitiveVectorMut { if let VectorMut::Primitive(v) = self { return v; } vortex_panic!("Expected PrimitiveVectorMut, got {self:?}"); } + + /// Consumes `self` and returns the inner [`StructVectorMut`] if `self` is of that variant. + pub fn into_struct(self) -> StructVectorMut { + if let VectorMut::Struct(v) = self { + return v; + } + vortex_panic!("Expected StructVectorMut, got {self:?}"); + } } #[cfg(test)] From bfef2bfa1499411aeeadcdbf806683c19bba1695 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 28 Oct 2025 10:00:06 -0400 Subject: [PATCH 2/5] address comments and fix bugs Signed-off-by: Connor Tsui --- .../src/arrays/bool/vtable/operator.rs | 2 +- .../src/arrays/primitive/vtable/operator.rs | 2 +- vortex-compute/src/filter/bool.rs | 8 +- vortex-vector/src/bool/vector.rs | 17 +- vortex-vector/src/bool/vector_mut.rs | 15 +- vortex-vector/src/primitive/generic.rs | 17 +- vortex-vector/src/primitive/generic_mut.rs | 15 +- vortex-vector/src/struct_/vector.rs | 74 +++--- vortex-vector/src/struct_/vector_mut.rs | 224 ++++++++++++------ vortex-vector/src/vector_mut.rs | 21 ++ 10 files changed, 236 insertions(+), 159 deletions(-) diff --git a/vortex-array/src/arrays/bool/vtable/operator.rs b/vortex-array/src/arrays/bool/vtable/operator.rs index bfefe6605f7..5a9d0a70d14 100644 --- a/vortex-array/src/arrays/bool/vtable/operator.rs +++ b/vortex-array/src/arrays/bool/vtable/operator.rs @@ -27,7 +27,7 @@ impl OperatorVTable for BoolVTable { // Note that validity already has the mask applied so we only need to apply it to bits. let bits = bits.filter(&mask); - Ok(BoolVector::new(bits, validity).into()) + Ok(BoolVector::try_new(bits, validity)?.into()) })) } } diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs index 898850b9e8d..2b4937ba276 100644 --- a/vortex-array/src/arrays/primitive/vtable/operator.rs +++ b/vortex-array/src/arrays/primitive/vtable/operator.rs @@ -30,7 +30,7 @@ impl OperatorVTable for PrimitiveVTable { // the elements. let elements = elements.filter(&mask); - Ok(PVector::new(elements, validity).into()) + Ok(PVector::try_new(elements, validity)?.into()) })) }) } diff --git a/vortex-compute/src/filter/bool.rs b/vortex-compute/src/filter/bool.rs index 84c626d9953..e0057147184 100644 --- a/vortex-compute/src/filter/bool.rs +++ b/vortex-compute/src/filter/bool.rs @@ -8,6 +8,12 @@ use crate::filter::Filter; impl Filter for BoolVector { fn filter(&self, mask: &Mask) -> Self { - Self::new(self.bits().filter(mask), self.validity().filter(mask)) + let filtered_bits = self.bits().filter(mask); + let filtered_validity = self.validity().filter(mask); + + // SAFETY: We filter the bits and validity with the same mask, and since they came from an + // existing and valid `BoolVector`, we know that the filtered output must have the same + // length. + unsafe { Self::new_unchecked(filtered_bits, filtered_validity) } } } diff --git a/vortex-vector/src/bool/vector.rs b/vortex-vector/src/bool/vector.rs index f56647638f5..9c0a14cfc0f 100644 --- a/vortex-vector/src/bool/vector.rs +++ b/vortex-vector/src/bool/vector.rs @@ -30,8 +30,7 @@ impl BoolVector { /// /// Panics if the length of the validity mask does not match the length of the bits. pub fn new(bits: BitBuffer, validity: Mask) -> Self { - Self::try_new(bits, validity) - .vortex_expect("`BoolVector` validity mask must have the same length as bits") + Self::try_new(bits, validity).vortex_expect("Failed to create `BoolVector`") } /// Tries to create a new [`BoolVector`] from the given bits and validity mask. @@ -53,14 +52,12 @@ impl BoolVector { /// # Safety /// /// The caller must ensure that the validity mask has the same length as the bits. - pub fn new_unchecked(bits: BitBuffer, validity: Mask) -> Self { - debug_assert_eq!( - validity.len(), - bits.len(), - "`BoolVector` validity mask must have the same length as bits" - ); - - Self { bits, validity } + pub unsafe fn new_unchecked(bits: BitBuffer, validity: Mask) -> Self { + if cfg!(debug_assertions) { + Self::new(bits, validity) + } else { + Self { bits, validity } + } } /// Decomposes the boolean vector into its constituent parts (bit buffer and validity). diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs index a9fc47d2d43..fcd5e2ce46e 100644 --- a/vortex-vector/src/bool/vector_mut.rs +++ b/vortex-vector/src/bool/vector_mut.rs @@ -77,8 +77,7 @@ impl BoolVectorMut { /// /// Panics if the length of the validity mask does not match the length of the bits. pub fn new(bits: BitBufferMut, validity: MaskMut) -> Self { - Self::try_new(bits, validity) - .vortex_expect("`BoolVector` validity mask must have the same length as bits") + Self::try_new(bits, validity).vortex_expect("Failed to create `BoolVectorMut`") } /// Tries to create a new [`BoolVectorMut`] from the given bits and validity mask. @@ -104,13 +103,11 @@ impl BoolVectorMut { /// Ideally, they are taken from `into_parts`, mutated in a way that doesn't re-allocate, and /// then passed back to this function. pub unsafe fn new_unchecked(bits: BitBufferMut, validity: MaskMut) -> Self { - debug_assert_eq!( - bits.len(), - validity.len(), - "`BoolVector` validity mask must have the same length as bits" - ); - - Self { bits, validity } + if cfg!(debug_assertions) { + Self::new(bits, validity) + } else { + Self { bits, validity } + } } /// Creates a new mutable boolean vector with the given `capacity`. diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs index 473e93cd731..50d14b69a09 100644 --- a/vortex-vector/src/primitive/generic.rs +++ b/vortex-vector/src/primitive/generic.rs @@ -34,8 +34,7 @@ impl PVector { /// /// Panics if the length of the validity mask does not match the length of the elements buffer. pub fn new(elements: Buffer, validity: Mask) -> Self { - Self::try_new(elements, validity) - .vortex_expect("`PVector` validity mask must have the same length as elements") + Self::try_new(elements, validity).vortex_expect("Failed to create `PVector`") } /// Tries to create a new [`PVector`] from the given elements buffer and validity mask. @@ -59,14 +58,12 @@ impl PVector { /// # Safety /// /// The caller must ensure that the validity mask has the same length as the elements buffer. - pub fn new_unchecked(elements: Buffer, validity: Mask) -> Self { - debug_assert_eq!( - validity.len(), - elements.len(), - "`PVector` validity mask must have the same length as elements" - ); - - Self { elements, validity } + pub unsafe fn new_unchecked(elements: Buffer, validity: Mask) -> Self { + if cfg!(debug_assertions) { + Self::new(elements, validity) + } else { + Self { elements, validity } + } } /// Decomposes the primitive vector into its constituent parts (buffer and validity). diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index 23275689b73..860a88c0db3 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -114,8 +114,7 @@ impl PVectorMut { /// /// Panics if the length of the validity mask does not match the length of the elements buffer. pub fn new(elements: BufferMut, validity: MaskMut) -> Self { - Self::try_new(elements, validity) - .vortex_expect("`PVectorMut` validity mask must have the same length as elements") + Self::try_new(elements, validity).vortex_expect("Failed to create `PVectorMut`") } /// Tries to create a new [`PVectorMut`] from the given elements buffer and validity mask. @@ -143,13 +142,11 @@ impl PVectorMut { /// Ideally, they are taken from `into_parts`, mutated in a way that doesn't re-allocate, and /// then passed back to this function. pub unsafe fn new_unchecked(elements: BufferMut, validity: MaskMut) -> Self { - debug_assert_eq!( - elements.len(), - validity.len(), - "`PVectorMut` validity mask must have the same length as elements" - ); - - Self { elements, validity } + if cfg!(debug_assertions) { + Self::new(elements, validity) + } else { + Self { elements, validity } + } } /// Create a new mutable primitive vector with the given capacity. diff --git a/vortex-vector/src/struct_/vector.rs b/vortex-vector/src/struct_/vector.rs index af24055af0a..ac0f91d5ccc 100644 --- a/vortex-vector/src/struct_/vector.rs +++ b/vortex-vector/src/struct_/vector.rs @@ -8,7 +8,7 @@ use vortex_mask::Mask; use crate::{StructVectorMut, Vector, VectorMutOps, VectorOps}; -/// An immutable vector of boolean values. +/// An immutable vector of struct values. /// /// `StructVector` can be considered a borrowed / frozen version of [`StructVectorMut`], which is /// created via the [`freeze`](crate::VectorMutOps::freeze) method. @@ -39,9 +39,7 @@ impl StructVector { /// - Any field vector has a length that does not match the length of other fields. /// - The validity mask length does not match the field length. pub fn new(fields: Box<[Vector]>, validity: Mask) -> Self { - Self::try_new(fields, validity).vortex_expect( - "`StructVector` fields must have matching length and validity constraints", - ) + Self::try_new(fields, validity).vortex_expect("Failed to create `StructVector`") } /// Tries to create a new [`StructVector`] from the given fields and validity mask. @@ -59,7 +57,24 @@ impl StructVector { fields[0].len() }; - Self::validate(&fields, len, &validity)?; + // Validate that the validity mask has the correct length. + vortex_ensure!( + validity.len() == len, + "Validity mask length ({}) does not match expected length ({})", + validity.len(), + len + ); + + // Validate that all fields have the correct length. + for (i, field) in fields.iter().enumerate() { + vortex_ensure!( + field.len() == len, + "Field {} has length {} but expected length {}", + i, + field.len(), + len + ); + } Ok(Self { fields, @@ -83,47 +98,15 @@ impl StructVector { fields[0].len() }; - debug_assert!( - Self::validate(&fields, len, &validity).is_ok(), - "`StructVector` fields must have matching length and validity constraints" - ); - - Self { - fields, - validity, - len, - } - } - - /// Validates the fields and validity mask for a [`StructVector`]. - /// - /// # Errors - /// - /// Returns an error if: - /// - /// - Any field vector has a length that does not match the length of other fields. - /// - The validity mask length does not match the field length. - fn validate(fields: &[Vector], len: usize, validity: &Mask) -> VortexResult<()> { - // Validate that the validity mask has the correct length. - vortex_ensure!( - validity.len() == len, - "Validity mask length ({}) does not match expected length ({})", - validity.len(), - len - ); - - // Validate that all fields have the correct length. - for (i, field) in fields.iter().enumerate() { - vortex_ensure!( - field.len() == len, - "Field {} has length {} but expected length {}", - i, - field.len(), - len - ); + if cfg!(debug_assertions) { + Self::new(fields, validity) + } else { + Self { + fields, + validity, + len, + } } - - Ok(()) } /// Decomposes the struct vector into its constituent parts (fields, validity, and length). @@ -172,7 +155,6 @@ impl VectorOps for StructVector { Err(immutable_field) => { // We were unable to take ownership, so we must re-freeze all of the fields // vectors we took ownership over and reconstruct the original `StructVector`. - let mut all_fields: Vec = mutable_fields .into_iter() .map(|mut_field| mut_field.freeze()) diff --git a/vortex-vector/src/struct_/vector_mut.rs b/vortex-vector/src/struct_/vector_mut.rs index 3a1c40f76b3..d4afdc88d91 100644 --- a/vortex-vector/src/struct_/vector_mut.rs +++ b/vortex-vector/src/struct_/vector_mut.rs @@ -18,12 +18,14 @@ use crate::{StructVector, Vector, VectorMut, VectorMutOps, VectorOps}; /// ## Creating a [`StructVector`] and [`StructVectorMut`] /// /// ``` -/// use vortex_vector::{StructVectorMut, VectorMut, BoolVectorMut, PVectorMut, NullVector}; +/// use vortex_vector::{ +/// BoolVectorMut, NullVectorMut, PVectorMut, StructVectorMut, VectorMut, VectorMutOps, +/// }; /// use vortex_mask::MaskMut; /// /// // Create a struct with three fields: nulls, booleans, and integers. /// let fields = vec![ -/// NullVector::new(3).try_into_mut().unwrap().into(), +/// NullVectorMut::new(3).into(), /// BoolVectorMut::from_iter([true, false, true]).into(), /// PVectorMut::::from_iter([10, 20, 30]).into(), /// ]; @@ -38,11 +40,13 @@ use crate::{StructVector, Vector, VectorMut, VectorMutOps, VectorOps}; /// [`unsplit()`]: VectorMutOps::unsplit /// /// ``` -/// use vortex_vector::{StructVectorMut, VectorMut, BoolVectorMut, PVectorMut, NullVector}; +/// use vortex_vector::{ +/// BoolVectorMut, NullVectorMut, PVectorMut, StructVectorMut, VectorMut, VectorMutOps, +/// }; /// use vortex_mask::MaskMut; /// /// let fields = vec![ -/// NullVector::new(6).try_into_mut().unwrap().into(), +/// NullVectorMut::new(6).into(), /// PVectorMut::::from_iter([1, 2, 3, 4, 5, 6]).into(), /// ]; /// @@ -62,12 +66,14 @@ use crate::{StructVector, Vector, VectorMut, VectorMutOps, VectorOps}; /// ## Accessing field values /// /// ``` -/// use vortex_vector::{StructVectorMut, VectorMut, BoolVectorMut, PVectorMut, NullVector}; +/// use vortex_vector::{ +/// BoolVectorMut, NullVectorMut, PVectorMut, StructVectorMut, VectorMut, VectorMutOps, +/// }; /// use vortex_mask::MaskMut; /// use vortex_dtype::PTypeDowncast; /// /// let fields = vec![ -/// NullVector::new(3).try_into_mut().unwrap().into(), +/// NullVectorMut::new(3).into(), /// BoolVectorMut::from_iter([true, false, true]).into(), /// PVectorMut::::from_iter([10, 20, 30]).into(), /// ]; @@ -75,13 +81,13 @@ use crate::{StructVector, Vector, VectorMut, VectorMutOps, VectorOps}; /// let struct_vec = StructVectorMut::new(fields, MaskMut::new_true(3)); /// /// // Access the boolean field vector (field index 1). -/// if let VectorMut::Bool(bool_vec) = struct_vec.fields[1].clone() { +/// if let VectorMut::Bool(bool_vec) = struct_vec.fields()[1].clone() { /// let values: Vec<_> = bool_vec.into_iter().map(|v| v.unwrap()).collect(); /// assert_eq!(values, vec![true, false, true]); /// } /// /// // Access the integer field column (field index 2). -/// if let VectorMut::Primitive(prim_vec) = struct_vec.fields[2].clone() { +/// if let VectorMut::Primitive(prim_vec) = struct_vec.fields()[2].clone() { /// let values: Vec<_> = prim_vec.into_i32().into_iter().map(|v| v.unwrap()).collect(); /// assert_eq!(values, vec![10, 20, 30]); /// } @@ -114,9 +120,7 @@ impl StructVectorMut { /// - Any field vector has a length that does not match the length of other fields. /// - The validity mask length does not match the field length. pub fn new(fields: Vec, validity: MaskMut) -> Self { - Self::try_new(fields, validity).vortex_expect( - "`StructVectorMut` fields must have matching length and validity constraints", - ) + Self::try_new(fields, validity).vortex_expect("Failed to create `StructVectorMut`") } /// Tries to create a new [`StructVectorMut`] with the given fields and validity mask. @@ -134,7 +138,24 @@ impl StructVectorMut { fields[0].len() }; - Self::validate(&fields, len, &validity)?; + // Validate that the validity mask has the correct length. + vortex_ensure!( + validity.len() == len, + "Validity mask length ({}) does not match expected length ({})", + validity.len(), + len + ); + + // Validate that all fields have the correct length. + for (i, field) in fields.iter().enumerate() { + vortex_ensure!( + field.len() == len, + "Field {} has length {} but expected length {}", + i, + field.len(), + len + ); + } Ok(Self { fields, @@ -159,47 +180,15 @@ impl StructVectorMut { fields[0].len() }; - debug_assert!( - Self::validate(&fields, len, &validity).is_ok(), - "`StructVectorMut` fields must have matching length and validity constraints" - ); - - Self { - fields, - validity, - len, - } - } - - /// Validates the fields and validity mask for a [`StructVectorMut`]. - /// - /// # Errors - /// - /// Returns an error if: - /// - /// - Any field vector has a length that does not match `len`. - /// - The validity mask length does not match `len`. - fn validate(fields: &[VectorMut], len: usize, validity: &MaskMut) -> VortexResult<()> { - // Validate that the validity mask has the correct length. - vortex_ensure!( - validity.len() == len, - "Validity mask length ({}) does not match expected length ({})", - validity.len(), - len - ); - - // Validate that all fields have the correct length. - for (i, field) in fields.iter().enumerate() { - vortex_ensure!( - field.len() == len, - "Field {} has length {} but expected length {}", - i, - field.len(), - len - ); + if cfg!(debug_assertions) { + Self::new(fields, validity) + } else { + Self { + fields, + validity, + len, + } } - - Ok(()) } /// Decomposes the struct vector into its constituent parts (fields, validity, and length). @@ -208,8 +197,8 @@ impl StructVectorMut { } /// Returns the fields of the `StructVectorMut`, each stored column-wise as a [`VectorMut`]. - pub fn fields(&mut self) -> &[VectorMut] { - self.fields.as_mut_slice() + pub fn fields(&self) -> &[VectorMut] { + self.fields.as_slice() } /// Finds the minimum capacity of all field vectors. @@ -218,17 +207,14 @@ impl StructVectorMut { /// least one of the child field vectors. /// /// If there are no fields, this returns the length of the vector. + /// + /// Note that this takes time in `O(f)`, where `f` is the number of fields. pub fn minimum_capacity(&self) -> usize { - if self.fields.is_empty() { - return self.len; - } - - let mut minimum_capacity = usize::MAX; - for field in &self.fields { - minimum_capacity = minimum_capacity.min(field.capacity()); - } - - minimum_capacity + self.fields + .iter() + .map(|field| field.capacity()) + .min() + .unwrap_or(self.len) } } @@ -239,13 +225,8 @@ impl VectorMutOps for StructVectorMut { self.len } - /// Note that this returns the length of the [`StructVectorMut`]. - /// - /// If you want the actual capacity of the struct vector, use the [`minimum_capacity()`] method. - /// - /// [`minimum_capacity()`]: Self::minimum_capacity fn capacity(&self) -> usize { - self.len + self.minimum_capacity() } fn reserve(&mut self, additional: usize) { @@ -267,7 +248,7 @@ impl VectorMutOps for StructVectorMut { assert_eq!( self.fields.len(), other.fields().len(), - "Cannot extend StructVectorMut: field count mismatch ({} vs {})", + "Cannot extend StructVectorMut: field count mismatch (self had {} but other had {})", self.fields.len(), other.fields().len() ); @@ -377,7 +358,7 @@ mod tests { use vortex_mask::{Mask, MaskMut}; use super::*; - use crate::{BoolVectorMut, NullVector, PVectorMut, VectorMut}; + use crate::{BoolVectorMut, NullVector, NullVectorMut, PVectorMut, VectorMut}; #[test] fn test_empty_fields() { @@ -593,4 +574,103 @@ mod tests { assert_eq!(outer.len(), 4); assert!(matches!(outer.fields[0], VectorMut::Struct(_))); } + + #[test] + fn test_reserve() { + // Test that reserve increases capacity for all fields correctly. + let mut struct_vec = StructVectorMut::try_new( + vec![ + NullVectorMut::new(3).into(), + BoolVectorMut::from_iter([true, false, true]).into(), + PVectorMut::::from_iter([10, 20, 30]).into(), + ], + MaskMut::new_true(3), + ) + .unwrap(); + + let initial_capacity = struct_vec.capacity(); + assert_eq!(struct_vec.len(), 3); + + // Reserve additional capacity. + struct_vec.reserve(50); + + // Capacity should now be at least len + 50. + assert!(struct_vec.capacity() >= 3 + 50); + assert!(struct_vec.capacity() >= initial_capacity + 50); + + // Verify minimum_capacity returns the smallest field capacity. + let min_cap = struct_vec.minimum_capacity(); + for field in struct_vec.fields() { + assert!(field.capacity() >= min_cap); + } + + // Test reserve on an empty struct. + let mut empty_struct = StructVectorMut::try_new( + vec![ + NullVectorMut::new(0).into(), + BoolVectorMut::with_capacity(0).into(), + ], + MaskMut::new_true(0), + ) + .unwrap(); + + empty_struct.reserve(100); + assert!(empty_struct.capacity() >= 100); + } + + #[test] + fn test_freeze_and_new_unchecked() { + // Test new_unchecked creates a valid struct, and freeze preserves data correctly. + let fields = vec![ + NullVectorMut::new(4).into(), + BoolVectorMut::from_iter([Some(true), None, Some(false), Some(true)]).into(), + PVectorMut::::from_iter([Some(100), Some(200), None, Some(400)]).into(), + ]; + + let validity = Mask::from_iter([true, false, true, true]) + .try_into_mut() + .unwrap(); + + // Use new_unchecked to create the struct. + // SAFETY: All fields have length 4 and validity has length 4. + let struct_vec = unsafe { StructVectorMut::new_unchecked(fields, validity) }; + + assert_eq!(struct_vec.len(), 4); + assert_eq!(struct_vec.fields().len(), 3); + + // Freeze the struct and verify data preservation. + let frozen = struct_vec.freeze(); + + assert_eq!(frozen.len(), 4); + assert_eq!(frozen.fields().len(), 3); + + // Verify validity is preserved (only indices 0, 2, 3 are valid at the struct level). + assert_eq!(frozen.validity().true_count(), 3); + + // Verify that `try_into_mut` fails when data isn't owned. + { + let cloned_vector = frozen.fields()[1].clone(); + cloned_vector.try_into_mut().unwrap_err(); + } + + // Verify field data is preserved. + let mut fields = frozen.into_parts().0.into_vec(); + + if let Vector::Primitive(prim_vec) = fields.pop().unwrap() { + let prim_vec_mut = prim_vec.try_into_mut().unwrap(); + let values: Vec<_> = prim_vec_mut.into_i32().into_iter().collect(); + assert_eq!(values, vec![Some(100), Some(200), None, Some(400)]); + } else { + panic!("Expected primitive vector"); + } + + if let Vector::Bool(bool_vec) = fields.pop().unwrap() { + let bool_vec_mut = bool_vec.try_into_mut().unwrap(); + let values: Vec<_> = bool_vec_mut.into_iter().collect(); + // Note: struct-level validity doesn't affect field-level data, only the interpretation. + assert_eq!(values, vec![Some(true), None, Some(false), Some(true)]); + } else { + panic!("Expected bool vector"); + } + } } diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index eac51a7138c..5fb5697c195 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -8,6 +8,7 @@ use vortex_dtype::DType; use vortex_error::vortex_panic; +use vortex_mask::MaskMut; use super::macros::match_each_vector_mut; use crate::{ @@ -47,6 +48,24 @@ impl VectorMut { DType::Primitive(ptype, _) => { PrimitiveVectorMut::with_capacity(*ptype, capacity).into() } + DType::Struct(struct_fields, _) => { + let fields: Vec = struct_fields + .fields() + .map(|dtype| Self::with_capacity(capacity, &dtype)) + .collect(); + let validity = MaskMut::with_capacity(capacity); + + #[cfg(debug_assertions)] + { + for field in &fields { + debug_assert_eq!(field.len(), 0); + } + debug_assert_eq!(validity.len(), 0); + } + + // SAFETY: All fields and validity have length 0, so they all have the same length. + Self::Struct(unsafe { StructVectorMut::new_unchecked(fields, validity) }) + } _ => vortex_panic!("Unsupported dtype for VectorMut"), } } @@ -72,6 +91,7 @@ impl VectorMutOps for VectorMut { (VectorMut::Null(a), Vector::Null(b)) => a.extend_from_vector(b), (VectorMut::Bool(a), Vector::Bool(b)) => a.extend_from_vector(b), (VectorMut::Primitive(a), Vector::Primitive(b)) => a.extend_from_vector(b), + (VectorMut::Struct(a), Vector::Struct(b)) => a.extend_from_vector(b), _ => vortex_panic!("Mismatched vector types"), } } @@ -93,6 +113,7 @@ impl VectorMutOps for VectorMut { (VectorMut::Null(a), VectorMut::Null(b)) => a.unsplit(b), (VectorMut::Bool(a), VectorMut::Bool(b)) => a.unsplit(b), (VectorMut::Primitive(a), VectorMut::Primitive(b)) => a.unsplit(b), + (VectorMut::Struct(a), VectorMut::Struct(b)) => a.unsplit(b), _ => vortex_panic!("Mismatched vector types"), } } From 20c1cdaa367b9cef76b84750be2a5519bd7750ae Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 28 Oct 2025 11:24:02 -0400 Subject: [PATCH 3/5] change immutable struct fields to `Box` Signed-off-by: Connor Tsui --- vortex-vector/src/struct_/vector.rs | 2 +- vortex-vector/src/struct_/vector_mut.rs | 55 ++++++++++++------------- vortex-vector/src/vector_mut.rs | 4 +- 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/vortex-vector/src/struct_/vector.rs b/vortex-vector/src/struct_/vector.rs index ac0f91d5ccc..7360595d471 100644 --- a/vortex-vector/src/struct_/vector.rs +++ b/vortex-vector/src/struct_/vector.rs @@ -173,7 +173,7 @@ impl VectorOps for StructVector { } Ok(StructVectorMut { - fields: mutable_fields, + fields: mutable_fields.into_boxed_slice(), len: self.len, validity, }) diff --git a/vortex-vector/src/struct_/vector_mut.rs b/vortex-vector/src/struct_/vector_mut.rs index d4afdc88d91..c427c258a6a 100644 --- a/vortex-vector/src/struct_/vector_mut.rs +++ b/vortex-vector/src/struct_/vector_mut.rs @@ -94,11 +94,8 @@ use crate::{StructVector, Vector, VectorMut, VectorMutOps, VectorOps}; /// ``` #[derive(Debug, Clone)] pub struct StructVectorMut { - /// The fields of the `StructVectorMut`, each stored column-wise as a [`VectorMut`]. - /// - /// We store this as a mutable vector instead of a fixed-sized type since vectors do not have an - /// associated [`DType`](vortex_dtype::DType), thus users can add field columns if they need. - pub(super) fields: Vec, + /// The (owned) fields of the `StructVectorMut`, each stored column-wise as a [`VectorMut`]. + pub(super) fields: Box<[VectorMut]>, /// The validity mask (where `true` represents an element is **not** null). pub(super) validity: MaskMut, @@ -119,7 +116,7 @@ impl StructVectorMut { /// /// - Any field vector has a length that does not match the length of other fields. /// - The validity mask length does not match the field length. - pub fn new(fields: Vec, validity: MaskMut) -> Self { + pub fn new(fields: Box<[VectorMut]>, validity: MaskMut) -> Self { Self::try_new(fields, validity).vortex_expect("Failed to create `StructVectorMut`") } @@ -131,7 +128,7 @@ impl StructVectorMut { /// /// - Any field vector has a length that does not match the length of other fields. /// - The validity mask length does not match the field length. - pub fn try_new(fields: Vec, validity: MaskMut) -> VortexResult { + pub fn try_new(fields: Box<[VectorMut]>, validity: MaskMut) -> VortexResult { let len = if fields.is_empty() { validity.len() } else { @@ -173,7 +170,7 @@ impl StructVectorMut { /// /// - All field vectors have the same length. /// - The validity mask has a length equal to the field length. - pub unsafe fn new_unchecked(fields: Vec, validity: MaskMut) -> Self { + pub unsafe fn new_unchecked(fields: Box<[VectorMut]>, validity: MaskMut) -> Self { let len = if fields.is_empty() { validity.len() } else { @@ -192,13 +189,13 @@ impl StructVectorMut { } /// Decomposes the struct vector into its constituent parts (fields, validity, and length). - pub fn into_parts(self) -> (Vec, MaskMut, usize) { + pub fn into_parts(self) -> (Box<[VectorMut]>, MaskMut, usize) { (self.fields, self.validity, self.len) } /// Returns the fields of the `StructVectorMut`, each stored column-wise as a [`VectorMut`]. pub fn fields(&self) -> &[VectorMut] { - self.fields.as_slice() + self.fields.as_ref() } /// Finds the minimum capacity of all field vectors. @@ -317,7 +314,7 @@ impl VectorMutOps for StructVectorMut { self.len = at; Self { - fields: split_fields, + fields: split_fields.into_boxed_slice(), len: split_len, validity: split_validity, } @@ -362,7 +359,7 @@ mod tests { #[test] fn test_empty_fields() { - let mut struct_vec = StructVectorMut::try_new(vec![], MaskMut::new_true(10)).unwrap(); + let mut struct_vec = StructVectorMut::try_new(Box::new([]), MaskMut::new_true(10)).unwrap(); let second_half = struct_vec.split_off(6); assert_eq!(struct_vec.len(), 6); assert_eq!(second_half.len(), 4); @@ -430,12 +427,12 @@ mod tests { #[test] fn test_split_unsplit_values() { let mut struct_vec = StructVectorMut::try_new( - vec![ - NullVector::new(8).try_into_mut().unwrap().into(), + Box::new([ + NullVectorMut::new(8).into(), BoolVectorMut::from_iter([true, false, true, false, true, false, true, false]) .into(), PVectorMut::::from_iter([10, 20, 30, 40, 50, 60, 70, 80]).into(), - ], + ]), MaskMut::new_true(8), ) .unwrap(); @@ -471,11 +468,11 @@ mod tests { #[test] fn test_extend_and_append_nulls() { let mut struct_vec = StructVectorMut::try_new( - vec![ + Box::new([ NullVector::new(3).try_into_mut().unwrap().into(), BoolVectorMut::from_iter([true, false, true]).into(), PVectorMut::::from_iter([10, 20, 30]).into(), - ], + ]), MaskMut::new_true(3), ) .unwrap(); @@ -523,11 +520,11 @@ mod tests { let original_int = vec![Some(100i32), None, Some(200), Some(300)]; let struct_vec = StructVectorMut::try_new( - vec![ + Box::new([ NullVector::new(4).try_into_mut().unwrap().into(), BoolVectorMut::from_iter(original_bool.clone()).into(), PVectorMut::::from_iter(original_int.clone()).into(), - ], + ]), MaskMut::new_true(4), ) .unwrap(); @@ -547,24 +544,24 @@ mod tests { #[test] fn test_nested_struct() { let inner1 = StructVectorMut::try_new( - vec![ + Box::new([ NullVector::new(4).try_into_mut().unwrap().into(), BoolVectorMut::from_iter([true, false, true, false]).into(), - ], + ]), MaskMut::new_true(4), ) .unwrap() .into(); let inner2 = StructVectorMut::try_new( - vec![PVectorMut::::from_iter([100, 200, 300, 400]).into()], + Box::new([PVectorMut::::from_iter([100, 200, 300, 400]).into()]), MaskMut::new_true(4), ) .unwrap() .into(); let mut outer = - StructVectorMut::try_new(vec![inner1, inner2], MaskMut::new_true(4)).unwrap(); + StructVectorMut::try_new(Box::new([inner1, inner2]), MaskMut::new_true(4)).unwrap(); let second = outer.split_off(2); assert_eq!(outer.len(), 2); @@ -579,11 +576,11 @@ mod tests { fn test_reserve() { // Test that reserve increases capacity for all fields correctly. let mut struct_vec = StructVectorMut::try_new( - vec![ + Box::new([ NullVectorMut::new(3).into(), BoolVectorMut::from_iter([true, false, true]).into(), PVectorMut::::from_iter([10, 20, 30]).into(), - ], + ]), MaskMut::new_true(3), ) .unwrap(); @@ -606,10 +603,10 @@ mod tests { // Test reserve on an empty struct. let mut empty_struct = StructVectorMut::try_new( - vec![ + Box::new([ NullVectorMut::new(0).into(), BoolVectorMut::with_capacity(0).into(), - ], + ]), MaskMut::new_true(0), ) .unwrap(); @@ -621,11 +618,11 @@ mod tests { #[test] fn test_freeze_and_new_unchecked() { // Test new_unchecked creates a valid struct, and freeze preserves data correctly. - let fields = vec![ + let fields = Box::new([ NullVectorMut::new(4).into(), BoolVectorMut::from_iter([Some(true), None, Some(false), Some(true)]).into(), PVectorMut::::from_iter([Some(100), Some(200), None, Some(400)]).into(), - ]; + ]); let validity = Mask::from_iter([true, false, true, true]) .try_into_mut() diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index 5fb5697c195..eab12dd852e 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -64,7 +64,9 @@ impl VectorMut { } // SAFETY: All fields and validity have length 0, so they all have the same length. - Self::Struct(unsafe { StructVectorMut::new_unchecked(fields, validity) }) + Self::Struct(unsafe { + StructVectorMut::new_unchecked(fields.into_boxed_slice(), validity) + }) } _ => vortex_panic!("Unsupported dtype for VectorMut"), } From b4d5826b1bcea2eb3fa9c2a84fdfe615e1576cf2 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 28 Oct 2025 11:40:41 -0400 Subject: [PATCH 4/5] change mutable struct fields to `Arc` Signed-off-by: Connor Tsui --- vortex-vector/src/struct_/vector.rs | 46 ++++++++++++++++++++----- vortex-vector/src/struct_/vector_mut.rs | 33 +++++++++--------- 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/vortex-vector/src/struct_/vector.rs b/vortex-vector/src/struct_/vector.rs index 7360595d471..81b305a369c 100644 --- a/vortex-vector/src/struct_/vector.rs +++ b/vortex-vector/src/struct_/vector.rs @@ -3,6 +3,8 @@ //! Definition and implementation of [`StructVector`]. +use std::sync::Arc; + use vortex_error::{VortexExpect, VortexResult, vortex_ensure}; use vortex_mask::Mask; @@ -17,7 +19,14 @@ use crate::{StructVectorMut, Vector, VectorMutOps, VectorOps}; #[derive(Debug, Clone)] pub struct StructVector { /// The fields of the `StructVector`, each stored column-wise as a [`Vector`]. - pub(super) fields: Box<[Vector]>, + /// + /// We store these as an [`Arc>`] because we need to call [`try_unwrap()`] in our + /// [`try_into_mut()`] implementation, and since slices are unsized it is not implemented for + /// [`Arc<[Vector]>`]. + /// + /// [`try_unwrap()`]: Arc::try_unwrap + /// [`try_into_mut()`]: Self::try_into_mut + pub(super) fields: Arc>, /// The validity mask (where `true` represents an element is **not** null). pub(super) validity: Mask, @@ -32,25 +41,31 @@ pub struct StructVector { impl StructVector { /// Creates a new [`StructVector`] from the given fields and validity mask. /// + /// Note that we take [`Arc>`] in order to enable easier conversion to + /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut). + /// /// # Panics /// /// Panics if: /// /// - Any field vector has a length that does not match the length of other fields. /// - The validity mask length does not match the field length. - pub fn new(fields: Box<[Vector]>, validity: Mask) -> Self { + pub fn new(fields: Arc>, validity: Mask) -> Self { Self::try_new(fields, validity).vortex_expect("Failed to create `StructVector`") } /// Tries to create a new [`StructVector`] from the given fields and validity mask. /// + /// Note that we take [`Arc>`] in order to enable easier conversion to + /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut). + /// /// # Errors /// /// Returns an error if: /// /// - Any field vector has a length that does not match the length of other fields. /// - The validity mask length does not match the field length. - pub fn try_new(fields: Box<[Vector]>, validity: Mask) -> VortexResult { + pub fn try_new(fields: Arc>, validity: Mask) -> VortexResult { let len = if fields.is_empty() { validity.len() } else { @@ -85,13 +100,16 @@ impl StructVector { /// Creates a new [`StructVector`] from the given fields and validity mask without validation. /// + /// Note that we take [`Arc>`] in order to enable easier conversion to + /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut). + /// /// # Safety /// /// The caller must ensure that: /// /// - All field vectors have the same length. /// - The validity mask has a length equal to the field length. - pub unsafe fn new_unchecked(fields: Box<[Vector]>, validity: Mask) -> Self { + pub unsafe fn new_unchecked(fields: Arc>, validity: Mask) -> Self { let len = if fields.is_empty() { validity.len() } else { @@ -110,7 +128,7 @@ impl StructVector { } /// Decomposes the struct vector into its constituent parts (fields, validity, and length). - pub fn into_parts(self) -> (Box<[Vector]>, Mask, usize) { + pub fn into_parts(self) -> (Arc>, Mask, usize) { (self.fields, self.validity, self.len) } @@ -135,16 +153,26 @@ impl VectorOps for StructVector { where Self: Sized, { + let len = self.len; + let fields = match Arc::try_unwrap(self.fields) { + Ok(fields) => fields, + Err(fields) => return Err(StructVector { fields, ..self }), + }; + let validity = match self.validity.try_into_mut() { Ok(validity) => validity, Err(validity) => { - return Err(StructVector { validity, ..self }); + return Err(StructVector { + fields: Arc::new(fields), + validity, + len, + }); } }; // Convert all of the remaining fields to mutable, if possible. - let mut mutable_fields = Vec::with_capacity(self.fields.len()); - let mut fields_iter = self.fields.into_iter(); + let mut mutable_fields = Vec::with_capacity(fields.len()); + let mut fields_iter = fields.into_iter(); while let Some(field) = fields_iter.next() { match field.try_into_mut() { @@ -164,7 +192,7 @@ impl VectorOps for StructVector { all_fields.extend(fields_iter); return Err(StructVector { - fields: all_fields.into_boxed_slice(), + fields: Arc::new(all_fields.into_boxed_slice()), len: self.len, validity: validity.freeze(), }); diff --git a/vortex-vector/src/struct_/vector_mut.rs b/vortex-vector/src/struct_/vector_mut.rs index c427c258a6a..41ca570cc49 100644 --- a/vortex-vector/src/struct_/vector_mut.rs +++ b/vortex-vector/src/struct_/vector_mut.rs @@ -3,6 +3,8 @@ //! Definition and implementation of [`StructVectorMut`]. +use std::sync::Arc; + use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic}; use vortex_mask::MaskMut; @@ -24,11 +26,11 @@ use crate::{StructVector, Vector, VectorMut, VectorMutOps, VectorOps}; /// use vortex_mask::MaskMut; /// /// // Create a struct with three fields: nulls, booleans, and integers. -/// let fields = vec![ +/// let fields = Box::new([ /// NullVectorMut::new(3).into(), /// BoolVectorMut::from_iter([true, false, true]).into(), /// PVectorMut::::from_iter([10, 20, 30]).into(), -/// ]; +/// ]); /// /// let mut struct_vec = StructVectorMut::new(fields, MaskMut::new_true(3)); /// assert_eq!(struct_vec.len(), 3); @@ -45,10 +47,10 @@ use crate::{StructVector, Vector, VectorMut, VectorMutOps, VectorOps}; /// }; /// use vortex_mask::MaskMut; /// -/// let fields = vec![ +/// let fields = Box::new([ /// NullVectorMut::new(6).into(), /// PVectorMut::::from_iter([1, 2, 3, 4, 5, 6]).into(), -/// ]; +/// ]); /// /// let mut struct_vec = StructVectorMut::new(fields, MaskMut::new_true(6)); /// @@ -72,11 +74,11 @@ use crate::{StructVector, Vector, VectorMut, VectorMutOps, VectorOps}; /// use vortex_mask::MaskMut; /// use vortex_dtype::PTypeDowncast; /// -/// let fields = vec![ +/// let fields = Box::new([ /// NullVectorMut::new(3).into(), /// BoolVectorMut::from_iter([true, false, true]).into(), /// PVectorMut::::from_iter([10, 20, 30]).into(), -/// ]; +/// ]); /// /// let struct_vec = StructVectorMut::new(fields, MaskMut::new_true(3)); /// @@ -287,7 +289,7 @@ impl VectorMutOps for StructVectorMut { .collect(); StructVector { - fields: frozen_fields.into_boxed_slice(), + fields: Arc::new(frozen_fields.into_boxed_slice()), len: self.len, validity: self.validity.freeze(), } @@ -368,7 +370,7 @@ mod tests { #[test] fn test_try_into_mut_and_values() { let struct_vec = StructVector { - fields: vec![ + fields: Arc::new(Box::new([ NullVector::new(5).into(), BoolVectorMut::from_iter([true, false, true, false, true]) .freeze() @@ -376,8 +378,7 @@ mod tests { PVectorMut::::from_iter([10, 20, 30, 40, 50]) .freeze() .into(), - ] - .into_boxed_slice(), + ])), len: 5, validity: Mask::AllTrue(5), }; @@ -410,12 +411,11 @@ mod tests { let bool_field_clone = bool_field.clone(); let struct_vec = StructVector { - fields: vec![ + fields: Arc::new(Box::new([ NullVector::new(3).into(), bool_field_clone, PVectorMut::::from_iter([1, 2, 3]).freeze().into(), - ] - .into_boxed_slice(), + ])), len: 3, validity: Mask::AllTrue(3), }; @@ -479,12 +479,11 @@ mod tests { // Test extend. let to_extend = StructVector { - fields: vec![ + fields: Arc::new(Box::new([ NullVector::new(2).into(), BoolVectorMut::from_iter([false, true]).freeze().into(), PVectorMut::::from_iter([40, 50]).freeze().into(), - ] - .into_boxed_slice(), + ])), len: 2, validity: Mask::AllTrue(2), }; @@ -651,7 +650,7 @@ mod tests { } // Verify field data is preserved. - let mut fields = frozen.into_parts().0.into_vec(); + let mut fields = Arc::try_unwrap(frozen.into_parts().0).unwrap().into_vec(); if let Vector::Primitive(prim_vec) = fields.pop().unwrap() { let prim_vec_mut = prim_vec.try_into_mut().unwrap(); From 5cab7d0d078c03b501b0361a925746445939f76b Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 28 Oct 2025 13:18:15 -0400 Subject: [PATCH 5/5] final touches Signed-off-by: Connor Tsui --- vortex-vector/src/struct_/vector.rs | 20 +------ vortex-vector/src/struct_/vector_mut.rs | 76 ++++++++++++++++++------- 2 files changed, 56 insertions(+), 40 deletions(-) diff --git a/vortex-vector/src/struct_/vector.rs b/vortex-vector/src/struct_/vector.rs index 81b305a369c..60ad93f724f 100644 --- a/vortex-vector/src/struct_/vector.rs +++ b/vortex-vector/src/struct_/vector.rs @@ -66,19 +66,7 @@ impl StructVector { /// - Any field vector has a length that does not match the length of other fields. /// - The validity mask length does not match the field length. pub fn try_new(fields: Arc>, validity: Mask) -> VortexResult { - let len = if fields.is_empty() { - validity.len() - } else { - fields[0].len() - }; - - // Validate that the validity mask has the correct length. - vortex_ensure!( - validity.len() == len, - "Validity mask length ({}) does not match expected length ({})", - validity.len(), - len - ); + let len = validity.len(); // Validate that all fields have the correct length. for (i, field) in fields.iter().enumerate() { @@ -110,11 +98,7 @@ impl StructVector { /// - All field vectors have the same length. /// - The validity mask has a length equal to the field length. pub unsafe fn new_unchecked(fields: Arc>, validity: Mask) -> Self { - let len = if fields.is_empty() { - validity.len() - } else { - fields[0].len() - }; + let len = validity.len(); if cfg!(debug_assertions) { Self::new(fields, validity) diff --git a/vortex-vector/src/struct_/vector_mut.rs b/vortex-vector/src/struct_/vector_mut.rs index 41ca570cc49..5ac6dceba43 100644 --- a/vortex-vector/src/struct_/vector_mut.rs +++ b/vortex-vector/src/struct_/vector_mut.rs @@ -131,19 +131,7 @@ impl StructVectorMut { /// - Any field vector has a length that does not match the length of other fields. /// - The validity mask length does not match the field length. pub fn try_new(fields: Box<[VectorMut]>, validity: MaskMut) -> VortexResult { - let len = if fields.is_empty() { - validity.len() - } else { - fields[0].len() - }; - - // Validate that the validity mask has the correct length. - vortex_ensure!( - validity.len() == len, - "Validity mask length ({}) does not match expected length ({})", - validity.len(), - len - ); + let len = validity.len(); // Validate that all fields have the correct length. for (i, field) in fields.iter().enumerate() { @@ -173,11 +161,7 @@ impl StructVectorMut { /// - All field vectors have the same length. /// - The validity mask has a length equal to the field length. pub unsafe fn new_unchecked(fields: Box<[VectorMut]>, validity: MaskMut) -> Self { - let len = if fields.is_empty() { - validity.len() - } else { - fields[0].len() - }; + let len = validity.len(); if cfg!(debug_assertions) { Self::new(fields, validity) @@ -268,17 +252,19 @@ impl VectorMutOps for StructVectorMut { // Extend the validity mask. self.validity.append_mask(other.validity()); - self.len += other.len(); + + debug_assert_eq!(self.len, self.validity.len()); } fn append_nulls(&mut self, n: usize) { for field in &mut self.fields { field.append_nulls(n); // Note that the value we push to each doesn't actually matter. } - self.validity.append_n(false, n); + self.validity.append_n(false, n); self.len += n; + debug_assert_eq!(self.len, self.validity.len()); } fn freeze(self) -> Self::Immutable { @@ -346,14 +332,14 @@ impl VectorMutOps for StructVectorMut { } self.validity.unsplit(other.validity); - self.len += other.len; + debug_assert_eq!(self.len, self.validity.len()); } } #[cfg(test)] mod tests { - use vortex_dtype::PTypeDowncast; + use vortex_dtype::{DType, FieldNames, Nullability, PType, PTypeDowncast, StructFields}; use vortex_mask::{Mask, MaskMut}; use super::*; @@ -669,4 +655,50 @@ mod tests { panic!("Expected bool vector"); } } + + #[test] + fn test_with_capacity_struct() { + // Create a struct dtype with multiple field types. + let struct_dtype = DType::Struct( + StructFields::new( + FieldNames::from(["null_field", "bool_field", "int_field"]), + vec![ + DType::Null, + DType::Bool(Nullability::NonNullable), + DType::Primitive(PType::I32, Nullability::Nullable), + ], + ), + Nullability::Nullable, + ); + + // Create a VectorMut with capacity using the struct dtype. + let vector_mut = VectorMut::with_capacity(100, &struct_dtype); + + // Verify it's a struct vector. + match vector_mut { + VectorMut::Struct(mut struct_vec) => { + // Check initial state. + assert_eq!(struct_vec.len(), 0); + assert_eq!(struct_vec.fields.len(), 3); + + // Verify each field has the correct type. + assert!(matches!(struct_vec.fields[0], VectorMut::Null(_))); + assert!(matches!(struct_vec.fields[1], VectorMut::Bool(_))); + assert!(matches!(struct_vec.fields[2], VectorMut::Primitive(_))); + + // Check that capacity was reserved (minimum should be at least 100). + assert!(struct_vec.capacity() >= 100); + + // Verify we can actually use the reserved capacity by pushing values. + for _ in 0..50 { + struct_vec.append_nulls(1); + } + assert_eq!(struct_vec.len(), 50); + + // Should not need reallocation since we reserved capacity. + assert!(struct_vec.capacity() >= 100); + } + _ => panic!("Expected VectorMut::Struct"), + } + } }