diff --git a/vortex-array/src/arrays/masked/vtable/mod.rs b/vortex-array/src/arrays/masked/vtable/mod.rs index f2eff52a244..54643bd0054 100644 --- a/vortex-array/src/arrays/masked/vtable/mod.rs +++ b/vortex-array/src/arrays/masked/vtable/mod.rs @@ -4,6 +4,7 @@ mod array; mod canonical; mod operations; +mod operator; mod serde; mod validity; @@ -28,7 +29,7 @@ impl VTable for MaskedVTable { type ComputeVTable = NotSupported; type EncodeVTable = NotSupported; type SerdeVTable = Self; - type OperatorVTable = NotSupported; + type OperatorVTable = Self; fn id(_encoding: &Self::Encoding) -> EncodingId { EncodingId::new_ref("vortex.masked") diff --git a/vortex-array/src/arrays/masked/vtable/operator.rs b/vortex-array/src/arrays/masked/vtable/operator.rs new file mode 100644 index 00000000000..768bc53161c --- /dev/null +++ b/vortex-array/src/arrays/masked/vtable/operator.rs @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_compute::mask::MaskValidity; +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::arrays::{MaskedArray, MaskedVTable}; +use crate::execution::{BatchKernelRef, BindCtx, kernel}; +use crate::vtable::OperatorVTable; + +impl OperatorVTable for MaskedVTable { + fn bind( + array: &MaskedArray, + selection: Option<&ArrayRef>, + ctx: &mut dyn BindCtx, + ) -> VortexResult { + // A masked array performs the intersection of the mask validity with the child validity. + let mask = ctx.bind_validity(&array.validity, array.len(), selection)?; + let child = ctx.bind(&array.child, selection)?; + + Ok(kernel(move || { + let mask = mask.execute()?; + let child = child.execute()?; + Ok(MaskValidity::mask_validity(child, &mask)) + })) + } +} diff --git a/vortex-array/src/compute/arrays/is_not_null.rs b/vortex-array/src/compute/arrays/is_not_null.rs new file mode 100644 index 00000000000..2fedb1dbaec --- /dev/null +++ b/vortex-array/src/compute/arrays/is_not_null.rs @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::hash::Hasher; + +use vortex_dtype::DType; +use vortex_dtype::Nullability::NonNullable; +use vortex_error::VortexResult; +use vortex_mask::Mask; +use vortex_vector::{BoolVector, VectorOps}; + +use crate::execution::{BatchKernelRef, BindCtx, kernel}; +use crate::stats::{ArrayStats, StatsSetRef}; +use crate::vtable::{ArrayVTable, NotSupported, OperatorVTable, VTable, VisitorVTable}; +use crate::{ + ArrayBufferVisitor, ArrayChildVisitor, ArrayEq, ArrayHash, ArrayRef, EncodingId, EncodingRef, + Precision, vtable, +}; + +vtable!(IsNotNull); + +#[derive(Debug, Clone)] +pub struct IsNotNullArray { + child: ArrayRef, + stats: ArrayStats, +} + +impl IsNotNullArray { + /// Create a new is_not_null array. + pub fn new(child: ArrayRef) -> Self { + Self { + child, + stats: ArrayStats::default(), + } + } +} + +#[derive(Debug, Clone)] +pub struct IsNotNullEncoding; + +impl VTable for IsNotNullVTable { + type Array = IsNotNullArray; + type Encoding = IsNotNullEncoding; + type ArrayVTable = Self; + type CanonicalVTable = NotSupported; + type OperationsVTable = NotSupported; + type ValidityVTable = NotSupported; + type VisitorVTable = Self; + type ComputeVTable = NotSupported; + type EncodeVTable = NotSupported; + type SerdeVTable = NotSupported; + type OperatorVTable = Self; + + fn id(_encoding: &Self::Encoding) -> EncodingId { + EncodingId::from("vortex.is_null") + } + + fn encoding(_array: &Self::Array) -> EncodingRef { + EncodingRef::from(IsNotNullEncoding.as_ref()) + } +} + +impl ArrayVTable for IsNotNullVTable { + fn len(array: &IsNotNullArray) -> usize { + array.len() + } + + fn dtype(_array: &IsNotNullArray) -> &DType { + &DType::Bool(NonNullable) + } + + fn stats(array: &IsNotNullArray) -> StatsSetRef<'_> { + array.stats.to_ref(array.as_ref()) + } + + fn array_hash(array: &IsNotNullArray, state: &mut H, precision: Precision) { + array.child.array_hash(state, precision); + } + + fn array_eq(array: &IsNotNullArray, other: &IsNotNullArray, precision: Precision) -> bool { + array.child.array_eq(&other.child, precision) + } +} + +impl VisitorVTable for IsNotNullVTable { + fn visit_buffers(_array: &IsNotNullArray, _visitor: &mut dyn ArrayBufferVisitor) { + // No buffers + } + + fn visit_children(array: &IsNotNullArray, visitor: &mut dyn ArrayChildVisitor) { + visitor.visit_child("child", array.child.as_ref()); + } +} + +impl OperatorVTable for IsNotNullVTable { + fn bind( + array: &IsNotNullArray, + selection: Option<&ArrayRef>, + ctx: &mut dyn BindCtx, + ) -> VortexResult { + let child = ctx.bind(&array.child, selection)?; + Ok(kernel(move || { + let child = child.execute()?; + let is_null = child.validity().to_bit_buffer(); + Ok(BoolVector::new(is_null, Mask::AllTrue(child.len())).into()) + })) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::{bitbuffer, buffer}; + use vortex_error::VortexResult; + use vortex_vector::VectorOps; + + use super::IsNotNullArray; + use crate::IntoArray; + use crate::arrays::PrimitiveArray; + use crate::validity::Validity; + + #[test] + fn test_is_null() -> VortexResult<()> { + let validity = bitbuffer![1 0 1]; + let array = PrimitiveArray::new( + buffer![0, 1, 2], + Validity::Array(validity.clone().into_array()), + ) + .into_array(); + + let result = IsNotNullArray::new(array).execute()?.into_bool(); + assert!(result.validity().all_true()); + assert_eq!(result.bits(), &validity); + + Ok(()) + } +} diff --git a/vortex-array/src/compute/arrays/is_null.rs b/vortex-array/src/compute/arrays/is_null.rs new file mode 100644 index 00000000000..53f04cf43d6 --- /dev/null +++ b/vortex-array/src/compute/arrays/is_null.rs @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::hash::Hasher; +use std::ops::Not; + +use vortex_dtype::DType; +use vortex_dtype::Nullability::NonNullable; +use vortex_error::VortexResult; +use vortex_mask::Mask; +use vortex_vector::{BoolVector, VectorOps}; + +use crate::execution::{BatchKernelRef, BindCtx, kernel}; +use crate::stats::{ArrayStats, StatsSetRef}; +use crate::vtable::{ArrayVTable, NotSupported, OperatorVTable, VTable, VisitorVTable}; +use crate::{ + ArrayBufferVisitor, ArrayChildVisitor, ArrayEq, ArrayHash, ArrayRef, EncodingId, EncodingRef, + Precision, vtable, +}; + +vtable!(IsNull); + +#[derive(Debug, Clone)] +pub struct IsNullArray { + child: ArrayRef, + stats: ArrayStats, +} + +impl IsNullArray { + /// Create a new is_null array. + pub fn new(child: ArrayRef) -> Self { + Self { + child, + stats: ArrayStats::default(), + } + } +} + +#[derive(Debug, Clone)] +pub struct IsNullEncoding; + +impl VTable for IsNullVTable { + type Array = IsNullArray; + type Encoding = IsNullEncoding; + type ArrayVTable = Self; + type CanonicalVTable = NotSupported; + type OperationsVTable = NotSupported; + type ValidityVTable = NotSupported; + type VisitorVTable = Self; + type ComputeVTable = NotSupported; + type EncodeVTable = NotSupported; + type SerdeVTable = NotSupported; + type OperatorVTable = Self; + + fn id(_encoding: &Self::Encoding) -> EncodingId { + EncodingId::from("vortex.is_null") + } + + fn encoding(_array: &Self::Array) -> EncodingRef { + EncodingRef::from(IsNullEncoding.as_ref()) + } +} + +impl ArrayVTable for IsNullVTable { + fn len(array: &IsNullArray) -> usize { + array.len() + } + + fn dtype(_array: &IsNullArray) -> &DType { + &DType::Bool(NonNullable) + } + + fn stats(array: &IsNullArray) -> StatsSetRef<'_> { + array.stats.to_ref(array.as_ref()) + } + + fn array_hash(array: &IsNullArray, state: &mut H, precision: Precision) { + array.child.array_hash(state, precision); + } + + fn array_eq(array: &IsNullArray, other: &IsNullArray, precision: Precision) -> bool { + array.child.array_eq(&other.child, precision) + } +} + +impl VisitorVTable for IsNullVTable { + fn visit_buffers(_array: &IsNullArray, _visitor: &mut dyn ArrayBufferVisitor) { + // No buffers + } + + fn visit_children(array: &IsNullArray, visitor: &mut dyn ArrayChildVisitor) { + visitor.visit_child("child", array.child.as_ref()); + } +} + +impl OperatorVTable for IsNullVTable { + fn bind( + array: &IsNullArray, + selection: Option<&ArrayRef>, + ctx: &mut dyn BindCtx, + ) -> VortexResult { + let child = ctx.bind(&array.child, selection)?; + Ok(kernel(move || { + let child = child.execute()?; + let is_null = child.validity().not().to_bit_buffer(); + Ok(BoolVector::new(is_null, Mask::AllTrue(child.len())).into()) + })) + } +} + +#[cfg(test)] +mod tests { + use std::ops::Not; + + use vortex_buffer::{bitbuffer, buffer}; + use vortex_error::VortexResult; + use vortex_vector::VectorOps; + + use crate::IntoArray; + use crate::arrays::PrimitiveArray; + use crate::compute::arrays::is_null::IsNullArray; + use crate::validity::Validity; + + #[test] + fn test_is_null() -> VortexResult<()> { + let validity = bitbuffer![1 0 1]; + let array = PrimitiveArray::new( + buffer![0, 1, 2], + Validity::Array(validity.clone().into_array()), + ) + .into_array(); + + let result = IsNullArray::new(array).execute()?.into_bool(); + assert!(result.validity().all_true()); + assert_eq!(result.bits(), &validity.not()); + + Ok(()) + } +} diff --git a/vortex-array/src/compute/arrays/mod.rs b/vortex-array/src/compute/arrays/mod.rs index cb099647ec1..b2082b9d05e 100644 --- a/vortex-array/src/compute/arrays/mod.rs +++ b/vortex-array/src/compute/arrays/mod.rs @@ -2,4 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors pub mod arithmetic; +pub mod is_not_null; +pub mod is_null; pub mod logical; diff --git a/vortex-compute/Cargo.toml b/vortex-compute/Cargo.toml index 755c3aac834..b538cd1bdc6 100644 --- a/vortex-compute/Cargo.toml +++ b/vortex-compute/Cargo.toml @@ -28,9 +28,10 @@ vortex-vector = { workspace = true } num-traits = { workspace = true } [features] -default = ["arithmetic", "comparison", "filter", "logical"] +default = ["arithmetic", "comparison", "filter", "logical", "mask"] arithmetic = [] comparison = [] filter = [] logical = [] +mask = [] diff --git a/vortex-compute/src/lib.rs b/vortex-compute/src/lib.rs index dd1d207c701..512024d07c8 100644 --- a/vortex-compute/src/lib.rs +++ b/vortex-compute/src/lib.rs @@ -15,3 +15,5 @@ pub mod comparison; pub mod filter; #[cfg(feature = "logical")] pub mod logical; +#[cfg(feature = "mask")] +pub mod mask; diff --git a/vortex-compute/src/logical/and.rs b/vortex-compute/src/logical/and.rs index 38c7206d4c3..a5f1ec04e34 100644 --- a/vortex-compute/src/logical/and.rs +++ b/vortex-compute/src/logical/and.rs @@ -3,7 +3,6 @@ use std::ops::BitAnd; -use vortex_mask::Mask; use vortex_vector::{BoolVector, VectorOps}; use crate::logical::LogicalAnd; @@ -28,22 +27,6 @@ impl LogicalAnd<&BoolVector> for BoolVector { } } -impl LogicalAnd for &Mask { - type Output = Mask; - - fn and(self, other: Self) -> Self::Output { - self.bitand(other) - } -} - -impl LogicalAnd<&Mask> for Mask { - type Output = Mask; - - fn and(self, other: &Mask) -> Self::Output { - self.bitand(other) - } -} - #[cfg(test)] mod tests { use vortex_buffer::bitbuffer; diff --git a/vortex-compute/src/logical/or.rs b/vortex-compute/src/logical/or.rs index 57af49f90c7..b88cc840c26 100644 --- a/vortex-compute/src/logical/or.rs +++ b/vortex-compute/src/logical/or.rs @@ -3,7 +3,6 @@ use std::ops::{BitAnd, BitOr}; -use vortex_mask::Mask; use vortex_vector::{BoolVector, VectorOps}; use crate::logical::LogicalOr; @@ -28,22 +27,6 @@ impl LogicalOr<&BoolVector> for BoolVector { } } -impl LogicalOr for &Mask { - type Output = Mask; - - fn or(self, other: Self) -> Self::Output { - self.bitor(other) - } -} - -impl LogicalOr<&Mask> for Mask { - type Output = Mask; - - fn or(self, other: &Mask) -> Self::Output { - self.bitor(other) - } -} - #[cfg(test)] mod tests { use vortex_buffer::bitbuffer; diff --git a/vortex-compute/src/mask/mod.rs b/vortex-compute/src/mask/mod.rs new file mode 100644 index 00000000000..1c206ef2e60 --- /dev/null +++ b/vortex-compute/src/mask/mod.rs @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Compute function for masking the validity of vectors. + +use std::ops::BitAnd; + +use vortex_dtype::NativePType; +use vortex_mask::Mask; +use vortex_vector::{ + BoolVector, NullVector, PrimitiveVector, StructVector, Vector, match_each_pvector, + match_each_vector, +}; + +/// Trait for masking the validity of an array or vector. +pub trait MaskValidity { + /// Masks the validity of the object using the provided mask. + /// + /// The output has its validity intersected with the given mask, resulting in a new validity + /// with equal or fewer valid entries. + fn mask_validity(self, mask: &Mask) -> Self; +} + +impl MaskValidity for Vector { + fn mask_validity(self, mask: &Mask) -> Self { + match_each_vector!(self, |v| { MaskValidity::mask_validity(v, mask).into() }) + } +} + +impl MaskValidity for NullVector { + fn mask_validity(self, _mask: &Mask) -> Self { + // Null vectors have no validity to mask; they are always fully null. + self + } +} + +impl MaskValidity for BoolVector { + fn mask_validity(self, mask: &Mask) -> Self { + let (bits, validity) = self.into_parts(); + Self::new(bits, validity.bitand(mask)) + } +} + +impl MaskValidity for PrimitiveVector { + fn mask_validity(self, mask: &Mask) -> Self { + match_each_pvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() }) + } +} + +impl MaskValidity for vortex_vector::PVector { + fn mask_validity(self, mask: &Mask) -> Self { + let (data, validity) = self.into_parts(); + Self::new(data, validity.bitand(mask)) + } +} + +impl MaskValidity for StructVector { + fn mask_validity(self, mask: &Mask) -> Self { + let (fields, validity) = self.into_parts(); + StructVector::new(fields, validity.bitand(mask)) + } +} diff --git a/vortex-vector/src/macros.rs b/vortex-vector/src/macros.rs index 78ec73379eb..25f9de2b615 100644 --- a/vortex-vector/src/macros.rs +++ b/vortex-vector/src/macros.rs @@ -33,6 +33,7 @@ /// /// [`Vector`]: crate::Vector /// [`VectorOps`]: crate::VectorOps +#[macro_export] macro_rules! match_each_vector { ($self:expr, | $vec:ident | $body:block) => {{ match $self { @@ -87,6 +88,7 @@ pub(crate) use match_each_vector; /// /// [`VectorMut`]: crate::VectorMut /// [`VectorMutOps`]: crate::VectorMutOps +#[macro_export] macro_rules! match_each_vector_mut { ($self:expr, | $vec:ident | $body:block) => {{ match $self { diff --git a/vortex-vector/src/primitive/macros.rs b/vortex-vector/src/primitive/macros.rs index f609754f3d0..a89b0cf87f4 100644 --- a/vortex-vector/src/primitive/macros.rs +++ b/vortex-vector/src/primitive/macros.rs @@ -40,6 +40,7 @@ /// /// [`PrimitiveVector`]: crate::PrimitiveVector /// [`VectorOps`]: crate::VectorOps +#[macro_export] macro_rules! match_each_pvector { ($self:expr, | $vec:ident | $body:block) => {{ match $self { diff --git a/vortex-vector/src/struct_/vector.rs b/vortex-vector/src/struct_/vector.rs index 60ad93f724f..d08699ec71f 100644 --- a/vortex-vector/src/struct_/vector.rs +++ b/vortex-vector/src/struct_/vector.rs @@ -112,8 +112,8 @@ impl StructVector { } /// Decomposes the struct vector into its constituent parts (fields, validity, and length). - pub fn into_parts(self) -> (Arc>, Mask, usize) { - (self.fields, self.validity, self.len) + pub fn into_parts(self) -> (Arc>, Mask) { + (self.fields, self.validity) } /// Returns the fields of the `StructVector`, each stored column-wise as a [`Vector`].