From be9f62ee41ecfc9c87aa3753c70947143b191705 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Fri, 7 Nov 2025 11:06:40 -0500 Subject: [PATCH 01/10] pipelined execution Signed-off-by: Nicholas Gates --- encodings/fastlanes/src/for/pipeline.rs | 8 +- vortex-array/src/operator/compare.rs | 8 +- vortex-array/src/pipeline/mod.rs | 96 ++++++++++++---------- vortex-array/src/pipeline/operator/bind.rs | 4 +- vortex-array/src/pipeline/row_selection.rs | 27 ------ vortex-array/src/vtable/operator.rs | 9 +- 6 files changed, 66 insertions(+), 86 deletions(-) delete mode 100644 vortex-array/src/pipeline/row_selection.rs diff --git a/encodings/fastlanes/src/for/pipeline.rs b/encodings/fastlanes/src/for/pipeline.rs index 428c334c88e..2a5b1abbeb5 100644 --- a/encodings/fastlanes/src/for/pipeline.rs +++ b/encodings/fastlanes/src/for/pipeline.rs @@ -7,7 +7,6 @@ use std::marker::PhantomData; use std::sync::Arc; use num_traits::WrappingAdd; -use vortex_array::Array; use vortex_array::operator::{ LengthBounds, Operator, OperatorEq, OperatorHash, OperatorId, OperatorRef, }; @@ -17,8 +16,9 @@ use vortex_array::pipeline::{ BindContext, Element, Kernel, KernelContext, PipelinedOperator, RowSelection, VectorId, }; use vortex_array::vtable::OperatorVTable; -use vortex_dtype::{DType, NativePType, PType, match_each_integer_ptype}; -use vortex_error::{VortexExpect, VortexResult, vortex_bail}; +use vortex_array::Array; +use vortex_dtype::{match_each_integer_ptype, DType, NativePType, PType}; +use vortex_error::{vortex_bail, VortexExpect, VortexResult}; use vortex_scalar::Scalar; use crate::{FoRArray, FoRVTable}; @@ -150,7 +150,7 @@ impl PipelinedOperator for FoROperator { match_each_integer_ptype!(ptype, |T| { match_each_integer_ptype!(self.encoded_ptype, |E| { Ok(Box::new(FoRKernel:: { - child: ctx.children()[0], + child: ctx.pipelined_input()[0], reference: self .reference .as_primitive() diff --git a/vortex-array/src/operator/compare.rs b/vortex-array/src/operator/compare.rs index 96fc20936ca..485cf1f8284 100644 --- a/vortex-array/src/operator/compare.rs +++ b/vortex-array/src/operator/compare.rs @@ -183,7 +183,7 @@ impl PipelinedOperator for CompareOperator { return match_each_native_ptype!(ptype, |T| { match_each_compare_op!(self.op.swap(), |Op| { Ok(Box::new(ScalarComparePrimitiveKernel:: { - lhs: ctx.children()[1], + lhs: ctx.pipelined_input()[1], rhs: lhs_const .scalar() .as_primitive() @@ -201,7 +201,7 @@ impl PipelinedOperator for CompareOperator { return match_each_native_ptype!(ptype, |T| { match_each_compare_op!(self.op, |Op| { Ok(Box::new(ScalarComparePrimitiveKernel:: { - lhs: ctx.children()[0], + lhs: ctx.pipelined_input()[0], rhs: rhs_const .scalar() .as_primitive() @@ -216,8 +216,8 @@ impl PipelinedOperator for CompareOperator { match_each_native_ptype!(ptype, |T| { match_each_compare_op!(self.op, |Op| { Ok(Box::new(ComparePrimitiveKernel:: { - lhs: ctx.children()[0], - rhs: ctx.children()[1], + lhs: ctx.pipelined_input()[0], + rhs: ctx.pipelined_input()[1], _phantom: PhantomData, }) as Box) }) diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs index 866b8e29bb9..ff918d86036 100644 --- a/vortex-array/src/pipeline/mod.rs +++ b/vortex-array/src/pipeline/mod.rs @@ -20,23 +20,20 @@ pub mod bits; pub(crate) mod operator; -pub mod row_selection; mod types; pub mod vec; pub mod view; use std::cell::RefCell; -pub use row_selection::*; -pub use types::*; -use vec::VectorRef; -use vortex_error::VortexResult; - use self::vec::Vector; -use self::view::ViewMut; -use crate::Canonical; use crate::operator::Operator; use crate::pipeline::bits::BitView; +use crate::Canonical; +pub use types::*; +use vec::VectorRef; +use vortex_error::VortexResult; +use vortex_vector::VectorMut; /// The number of elements in each step of a Vortex evaluation operator. pub const N: usize = 1024; @@ -44,10 +41,8 @@ pub const N: usize = 1024; // Number of usize words needed to store N bits pub const N_WORDS: usize = N / usize::BITS as usize; -pub trait PipelinedOperator: Operator { - /// Defines the row selection of this pipeline operator. - fn row_selection(&self) -> RowSelection; - +/// Returned by an array to indicate that it can be executed in a pipelined fashion. +pub trait Pipelined { // Whether this operator works by mutating its first child in-place. // // If `true`, the operator is invoked with the first child's input data passed via the @@ -57,54 +52,70 @@ pub trait PipelinedOperator: Operator { // false // } - /// Bind the operator into a [`Kernel`] for pipelined execution. - fn bind(&self, ctx: &dyn BindContext) -> VortexResult>; - - /// Returns the child indices of this operator that are passed to the kernel as input vectors. - fn vector_children(&self) -> Vec; + /// Returns the indices of the children of this array that should be passed to the kernel as + /// pipelined input vectors, 1024 elements at a time. + /// + /// Any child not listed here will be treated as a batch input, and the full vector will be + /// computed before pipelined execution begins. + fn pipelined_children(&self) -> Vec; - /// Returns the child indices of this operator that are passed to the kernel as batch inputs. - fn batch_children(&self) -> Vec; + /// Bind the operator into a [`Kernel`] for pipelined execution. + /// + /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and + /// batch IDs for batch children. Each child can only be bound once. + fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult>; } /// The context used when binding an operator for execution. pub trait BindContext { - fn children(&self) -> &[VectorId]; + /// Returns a [`VectorId`] that can be passed to the [`KernelContext`] within the body of + /// the [`Kernel`] to access the given child as a pipelined input vector. + /// + /// # Panics + /// + /// If the child index requested here was not listed in [`Pipelined::pipelined_children`]. + fn pipelined_input(&self, child_idx: usize) -> VectorId; - fn batch_inputs(&self) -> &[BatchId]; + /// Returns the batch input vector for the given child. + /// + /// # Panics + /// + /// If the child index requested here was listed in [`Pipelined::pipelined_children`]. + fn batch_input(&self, child_idx: usize) -> Vector; } /// The ID of the vector to use. pub type VectorId = usize; -/// The ID of the batch input to use. -pub type BatchId = usize; -/// A operator provides a push-based way to emit a stream of canonical data. +/// A kernel implements the physical compute required for pipelined execution. It is driven in a +/// push-based way, typically as part of a larger pipeline of kernels. /// /// By passing multiple vector computations through the same operator, we can amortize /// the setup costs (such as DType validation, stats short-circuiting, etc.), and to make better /// use of CPU caches by performing all operations while the data is hot. +/// +/// The [`Kernel::step`] method will be invoked repeatedly to process chunks of data, [`N`] elements +/// at a time. Each invocation is passed a selection mask indicating which elements of the chunk +/// should be written to the start of the output vector. +/// +/// The mutable output vector is **guaranteed** to have a capacity of at least [`N`] elements, and +/// its length will initially be set to zero. It is therefore safe to invoke unchecked writes up to +/// `N` elements. +/// +/// The pipeline may invoke the `Kernel::skip` method to skip over some number of chunks of data. +/// The kernel should mutate any internal state as necessary to account for the skipped data. pub trait Kernel: Send { - /// Attempts to perform a single step of the operator, writing data to the output vector. - /// - /// The kernel step should be stateless and is passed the chunk index as well as the selection - /// mask for this chunk. + /// Skip over the given number of chunks of data. /// - /// Input and output vectors have a `Selection` enum indicating which elements of the vector - /// are valid for processing. This is one of: - /// * Full - all N elements are valid. - /// * Prefix - the first n elements are valid, where n is the true count of the selection mask. - /// * Mask - only the elements indicated by the selection mask are valid. - /// - /// Kernel should inspect the selection enum of the input and iterate the values accordingly. - /// They may choose to write the output vector in any selection mode, but should choose the most - /// efficient mode possible - not forgetting to update the output vector's selection enum. + /// For example, if `n` is 3, then the kernel should skip over `3 * N` elements of input data. + fn skip(&mut self, n: usize); + + /// Attempts to perform a single step of the operator, writing data to the output vector. fn step( - &self, + &mut self, ctx: &KernelContext, - chunk_idx: usize, selection: &BitView, - out: &mut ViewMut, + out: &mut VectorMut, ) -> VortexResult<()>; } @@ -121,9 +132,4 @@ impl KernelContext { pub fn vector(&self, vector_id: VectorId) -> VectorRef<'_> { VectorRef::new(self.vectors[vector_id].borrow()) } - - /// Get a batch input by its ID. - pub fn batch_input(&self, batch_id: BatchId) -> &Canonical { - &self.batch_inputs[batch_id] - } } diff --git a/vortex-array/src/pipeline/operator/bind.rs b/vortex-array/src/pipeline/operator/bind.rs index 16dc77b6972..63612a56bc8 100644 --- a/vortex-array/src/pipeline/operator/bind.rs +++ b/vortex-array/src/pipeline/operator/bind.rs @@ -5,8 +5,8 @@ use vortex_error::{VortexExpect, VortexResult}; -use crate::pipeline::operator::PipelineNode; use crate::pipeline::operator::buffers::VectorAllocationPlan; +use crate::pipeline::operator::PipelineNode; use crate::pipeline::{BatchId, BindContext, Kernel, VectorId}; pub(crate) fn bind_kernels( @@ -44,7 +44,7 @@ struct PipelineBindContext<'a> { } impl BindContext for PipelineBindContext<'_> { - fn children(&self) -> &[VectorId] { + fn pipelined_input(&self) -> &[VectorId] { self.children } diff --git a/vortex-array/src/pipeline/row_selection.rs b/vortex-array/src/pipeline/row_selection.rs deleted file mode 100644 index 549c93bd16c..00000000000 --- a/vortex-array/src/pipeline/row_selection.rs +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use crate::operator::{OperatorEq, OperatorRef}; - -/// Each operator has a row selection over the domain of input rows. -#[derive(Debug, Clone)] -pub enum RowSelection { - /// Defines a new domain of N rows. - Domain(usize), - /// Returns all rows from the domain. - All, - /// Selects rows from the range where the boolean operator resolves to a true bit. - MaskOperator(OperatorRef), -} - -impl PartialEq for RowSelection { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (RowSelection::Domain(n1), RowSelection::Domain(n2)) => n1 == n2, - (RowSelection::All, RowSelection::All) => true, - (RowSelection::MaskOperator(o1), RowSelection::MaskOperator(o2)) => o1.operator_eq(o2), - _ => false, - } - } -} -impl Eq for RowSelection {} diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs index 8f9abbd43a1..c0379350440 100644 --- a/vortex-array/src/vtable/operator.rs +++ b/vortex-array/src/vtable/operator.rs @@ -1,15 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::{VortexResult, vortex_bail}; +use vortex_error::{vortex_bail, VortexResult}; use vortex_mask::Mask; use vortex_vector::Vector; -use crate::ArrayRef; use crate::array::IntoArray; use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx}; use crate::operator::OperatorRef; use crate::vtable::{NotSupported, VTable}; +use crate::ArrayRef; /// A vtable for the new operator-based array functionality. Eventually this vtable will be /// merged into the main `VTable`, but for now it is kept separate to allow for incremental @@ -23,8 +23,7 @@ pub trait OperatorVTable { Ok(None) } - /// Takes the array by ownership, returning a canonical [`Vector`] containing the rows - /// indicated by the given selection [`Mask`]. + /// Returns a canonical [`Vector`] containing the rows indicated by the given selection [`Mask`]. /// /// The returned vector must be the appropriate one for the array's logical type (they are /// one-to-one with Vortex `DType`s), and should respect the output nullability of the array. @@ -47,6 +46,8 @@ pub trait OperatorVTable { Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute() } + /// Returns the + /// Bind the array for execution in batch mode. /// /// This function should return a [`BatchKernelRef`] that can be used to execute the array in From c0399c05853e9dc4eb2ceeec1d4870ea8418fff1 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Fri, 7 Nov 2025 13:06:19 -0500 Subject: [PATCH 02/10] pipelined execution Signed-off-by: Nicholas Gates --- .../fastlanes/src/bitpacking/vtable/mod.rs | 3 +- encodings/fastlanes/src/for/mod.rs | 3 +- encodings/fsst/src/array.rs | 2 +- encodings/fsst/src/lib.rs | 1 - encodings/fsst/src/operator.rs | 194 ------- vortex-array/src/array/mod.rs | 18 +- vortex-array/src/array/operator.rs | 2 +- vortex-array/src/arrays/varbin/mod.rs | 4 +- vortex-array/src/arrays/varbin/operator.rs | 28 - vortex-array/src/lib.rs | 1 - vortex-array/src/operator/canonical.rs | 17 - vortex-array/src/operator/compare.rs | 532 ------------------ vortex-array/src/operator/display.rs | 32 -- vortex-array/src/operator/filter.rs | 148 ----- vortex-array/src/operator/getitem.rs | 73 --- vortex-array/src/operator/hash.rs | 177 ------ vortex-array/src/operator/metrics.rs | 163 ------ vortex-array/src/operator/mod.rs | 242 -------- vortex-array/src/operator/optimize.rs | 33 -- vortex-array/src/operator/slice.rs | 138 ----- vortex-array/src/pipeline/mod.rs | 10 +- vortex-array/src/vtable/operator.rs | 22 +- 22 files changed, 19 insertions(+), 1824 deletions(-) delete mode 100644 encodings/fsst/src/operator.rs delete mode 100644 vortex-array/src/arrays/varbin/operator.rs delete mode 100644 vortex-array/src/operator/canonical.rs delete mode 100644 vortex-array/src/operator/compare.rs delete mode 100644 vortex-array/src/operator/display.rs delete mode 100644 vortex-array/src/operator/filter.rs delete mode 100644 vortex-array/src/operator/getitem.rs delete mode 100644 vortex-array/src/operator/hash.rs delete mode 100644 vortex-array/src/operator/metrics.rs delete mode 100644 vortex-array/src/operator/mod.rs delete mode 100644 vortex-array/src/operator/optimize.rs delete mode 100644 vortex-array/src/operator/slice.rs diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index 0cc6ffc36cf..94add8b7516 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -10,7 +10,6 @@ mod array; mod canonical; mod encode; mod operations; -mod operator; mod serde; mod validity; mod visitor; @@ -29,7 +28,7 @@ impl VTable for BitPackedVTable { type ComputeVTable = NotSupported; type EncodeVTable = Self; type SerdeVTable = Self; - type OperatorVTable = Self; + type OperatorVTable = NotSupported; fn id(_encoding: &Self::Encoding) -> EncodingId { EncodingId::new_ref("fastlanes.bitpacked") diff --git a/encodings/fastlanes/src/for/mod.rs b/encodings/fastlanes/src/for/mod.rs index 13fd9572cfa..f27aed63602 100644 --- a/encodings/fastlanes/src/for/mod.rs +++ b/encodings/fastlanes/src/for/mod.rs @@ -18,7 +18,6 @@ use vortex_scalar::Scalar; mod compress; mod compute; mod ops; -mod pipeline; mod serde; vtable!(FoR); @@ -35,7 +34,7 @@ impl VTable for FoRVTable { type ComputeVTable = NotSupported; type EncodeVTable = Self; type SerdeVTable = Self; - type OperatorVTable = Self; + type OperatorVTable = NotSupported; fn id(_encoding: &Self::Encoding) -> EncodingId { EncodingId::new_ref("fastlanes.for") diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 7df661e604f..4a26a6f0f38 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -32,7 +32,7 @@ impl VTable for FSSTVTable { type ComputeVTable = NotSupported; type EncodeVTable = Self; type SerdeVTable = Self; - type OperatorVTable = Self; + type OperatorVTable = NotSupported; fn id(_encoding: &Self::Encoding) -> EncodingId { EncodingId::new_ref("vortex.fsst") diff --git a/encodings/fsst/src/lib.rs b/encodings/fsst/src/lib.rs index f854708e790..b8e449a66a6 100644 --- a/encodings/fsst/src/lib.rs +++ b/encodings/fsst/src/lib.rs @@ -15,7 +15,6 @@ mod array; mod canonical; mod compress; mod compute; -mod operator; mod ops; mod serde; #[cfg(test)] diff --git a/encodings/fsst/src/operator.rs b/encodings/fsst/src/operator.rs deleted file mode 100644 index f203fcdcb72..00000000000 --- a/encodings/fsst/src/operator.rs +++ /dev/null @@ -1,194 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::any::Any; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; - -use async_trait::async_trait; -use vortex_array::compute::filter; -use vortex_array::operator::filter::FilterOperator; -use vortex_array::operator::slice::SliceOperator; -use vortex_array::operator::{ - BatchBindCtx, BatchExecution, BatchExecutionRef, BatchOperator, LengthBounds, Operator, - OperatorEq, OperatorHash, OperatorId, OperatorRef, -}; -use vortex_array::vtable::OperatorVTable; -use vortex_array::{Array, Canonical}; -use vortex_dtype::DType; -use vortex_error::VortexResult; -use vortex_mask::Mask; - -use crate::{FSSTArray, FSSTVTable}; - -impl OperatorVTable for FSSTVTable { - fn to_operator(array: &FSSTArray) -> VortexResult> { - Ok(Some(Arc::new(array.clone()))) - } -} - -impl OperatorHash for FSSTArray { - fn operator_hash(&self, state: &mut H) { - self.dtype().hash(state); - self.symbols().operator_hash(state); - self.symbol_lengths().operator_hash(state); - self.codes().operator_hash(state); - self.uncompressed_lengths().operator_hash(state); - } -} - -impl OperatorEq for FSSTArray { - fn operator_eq(&self, other: &Self) -> bool { - self.dtype() == other.dtype() - && self.symbols().operator_eq(other.symbols()) - && self.symbol_lengths().operator_eq(other.symbol_lengths()) - && self.codes().operator_eq(other.codes()) - && self - .uncompressed_lengths() - .operator_eq(other.uncompressed_lengths()) - } -} - -impl Operator for FSSTArray { - fn id(&self) -> OperatorId { - self.encoding_id() - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn dtype(&self) -> &DType { - Array::dtype(self.as_ref()) - } - - fn bounds(&self) -> LengthBounds { - Array::len(self.as_ref()).into() - } - - fn children(&self) -> &[OperatorRef] { - // TODO(ngates): we have varbin child - &[] - } - - fn with_children(self: Arc, _children: Vec) -> VortexResult { - Ok(self) - } - - fn reduce_parent( - &self, - parent: OperatorRef, - _child_idx: usize, - ) -> VortexResult> { - if let Some(filter) = parent.as_any().downcast_ref::() { - return Ok(Some(Arc::new(FilteredFSSTOperator { - array: self.clone(), - mask: filter.mask().clone(), - }))); - } - - if let Some(slice) = parent.as_any().downcast_ref::() { - return Ok(Some(Arc::new( - self.slice(slice.range().clone()) - .as_::() - .clone(), - ))); - } - - Ok(None) - } - - fn as_batch(&self) -> Option<&dyn BatchOperator> { - Some(self) - } -} - -impl BatchOperator for FSSTArray { - fn bind(&self, _ctx: &mut dyn BatchBindCtx) -> VortexResult { - Ok(Box::new(FSSTExecution { - array: self.clone(), - })) - } -} - -// TODO(ngates): obviously we should inline the canonical logic here -struct FSSTExecution { - array: FSSTArray, -} - -#[async_trait] -impl BatchExecution for FSSTExecution { - async fn execute(self: Box) -> VortexResult { - Ok(self.array.to_canonical()) - } -} - -#[derive(Debug)] -pub struct FilteredFSSTOperator { - array: FSSTArray, - mask: Mask, -} - -impl OperatorHash for FilteredFSSTOperator { - fn operator_hash(&self, state: &mut H) { - self.array.operator_hash(state); - self.mask.operator_hash(state); - } -} - -impl OperatorEq for FilteredFSSTOperator { - fn operator_eq(&self, other: &Self) -> bool { - self.array.operator_eq(&other.array) && self.mask.operator_eq(&other.mask) - } -} - -impl Operator for FilteredFSSTOperator { - fn id(&self) -> OperatorId { - OperatorId::from("vortex.fsst.filtered") - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn dtype(&self) -> &DType { - self.array.dtype() - } - - fn bounds(&self) -> LengthBounds { - self.mask.len().into() - } - - fn children(&self) -> &[OperatorRef] { - &[] - } - - fn with_children(self: Arc, _children: Vec) -> VortexResult { - Ok(self) - } - - fn as_batch(&self) -> Option<&dyn BatchOperator> { - Some(self) - } -} - -impl BatchOperator for FilteredFSSTOperator { - fn bind(&self, _ctx: &mut dyn BatchBindCtx) -> VortexResult { - Ok(Box::new(FilteredFSSTExecution { - array: self.array.clone(), - mask: self.mask.clone(), - })) - } -} - -struct FilteredFSSTExecution { - array: FSSTArray, - mask: Mask, -} - -#[async_trait] -impl BatchExecution for FilteredFSSTExecution { - async fn execute(self: Box) -> VortexResult { - Ok(filter(self.array.as_ref(), &self.mask)?.to_canonical()) - } -} diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 98c1f3cdc06..d7bc2dc72f4 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -26,12 +26,11 @@ use crate::arrays::{ }; use crate::builders::ArrayBuilder; use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts}; -use crate::operator::OperatorRef; use crate::serde::ArrayChildren; use crate::stats::{Precision, Stat, StatsProviderExt, StatsSetRef}; use crate::vtable::{ - ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, OperatorVTable, SerdeVTable, - VTable, ValidityVTable, VisitorVTable, + ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable, + ValidityVTable, VisitorVTable, }; use crate::{ ArrayEq, ArrayHash, Canonical, DynArrayEq, DynArrayHash, EncodingId, EncodingRef, @@ -168,11 +167,6 @@ pub trait Array: /// call. fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs) -> VortexResult>; - - /// Convert the array to an operator if supported by the encoding. - /// - /// Returns `None` if the encoding does not support operator operations. - fn to_operator(&self) -> VortexResult>; } impl Array for Arc { @@ -275,10 +269,6 @@ impl Array for Arc { ) -> VortexResult> { self.as_ref().invoke(compute_fn, args) } - - fn to_operator(&self) -> VortexResult> { - self.as_ref().to_operator() - } } /// A reference counted pointer to a dynamic [`Array`] trait object. @@ -649,10 +639,6 @@ impl Array for ArrayAdapter { ) -> VortexResult> { >::invoke(&self.0, compute_fn, args) } - - fn to_operator(&self) -> VortexResult> { - >::to_operator(&self.0) - } } impl ArrayHash for ArrayAdapter { diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs index ef612a53933..0a0e0705046 100644 --- a/vortex-array/src/array/operator.rs +++ b/vortex-array/src/array/operator.rs @@ -21,7 +21,7 @@ pub trait ArrayOperator: 'static + Send + Sync { /// # Panics /// /// If the mask length does not match the array length. - /// If the array's implementation returns an invalid vector (wrong length, wrong type, etc). + /// If the array's implementation returns an invalid vector (wrong length, wrong type, etc.). fn execute_batch(&self, selection: &Mask, ctx: &mut dyn ExecutionCtx) -> VortexResult; /// Optimize the array by running the optimization rules. diff --git a/vortex-array/src/arrays/varbin/mod.rs b/vortex-array/src/arrays/varbin/mod.rs index fc911834ca0..5c28fe4466a 100644 --- a/vortex-array/src/arrays/varbin/mod.rs +++ b/vortex-array/src/arrays/varbin/mod.rs @@ -5,7 +5,8 @@ mod array; pub use array::VarBinArray; mod compute; -pub(crate) use compute::varbin_compute_min_max; // For use in `varbinview`. +pub(crate) use compute::varbin_compute_min_max; +// For use in `varbinview`. mod vtable; pub use vtable::{VarBinEncoding, VarBinVTable}; @@ -13,7 +14,6 @@ pub use vtable::{VarBinEncoding, VarBinVTable}; pub mod builder; mod accessor; -mod operator; use vortex_buffer::ByteBuffer; use vortex_dtype::DType; diff --git a/vortex-array/src/arrays/varbin/operator.rs b/vortex-array/src/arrays/varbin/operator.rs deleted file mode 100644 index b20c381cf54..00000000000 --- a/vortex-array/src/arrays/varbin/operator.rs +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::hash::{Hash, Hasher}; - -use crate::arrays::VarBinArray; -use crate::operator::{OperatorEq, OperatorHash}; -use crate::vtable::ValidityHelper; - -impl OperatorHash for VarBinArray { - fn operator_hash(&self, state: &mut H) { - self.dtype.hash(state); - self.bytes().operator_hash(state); - self.offsets().operator_hash(state); - self.validity().operator_hash(state); - } -} - -impl OperatorEq for VarBinArray { - fn operator_eq(&self, other: &Self) -> bool { - self.dtype == other.dtype - && self.bytes().operator_eq(other.bytes()) - && self.offsets().operator_eq(other.offsets()) - && self.validity().operator_eq(other.validity()) - } -} - -// TODO(ngates): impl Operator diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index eb2e1b4492a..31a13731b35 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -44,7 +44,6 @@ mod hash; pub mod iter; mod mask_future; mod metadata; -pub mod operator; pub mod optimizer; mod partial_ord; pub mod patches; diff --git a/vortex-array/src/operator/canonical.rs b/vortex-array/src/operator/canonical.rs deleted file mode 100644 index 2d983e9ad86..00000000000 --- a/vortex-array/src/operator/canonical.rs +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use async_trait::async_trait; -use vortex_error::VortexResult; - -use crate::Canonical; -use crate::operator::BatchExecution; - -pub struct CanonicalExecution(pub Canonical); - -#[async_trait] -impl BatchExecution for CanonicalExecution { - async fn execute(self: Box) -> VortexResult { - Ok(self.0) - } -} diff --git a/vortex-array/src/operator/compare.rs b/vortex-array/src/operator/compare.rs deleted file mode 100644 index 485cf1f8284..00000000000 --- a/vortex-array/src/operator/compare.rs +++ /dev/null @@ -1,532 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::any::Any; -use std::hash::{Hash, Hasher}; -use std::marker::PhantomData; -use std::sync::Arc; - -use itertools::Itertools; -use vortex_dtype::{DType, NativePType, match_each_native_ptype}; -use vortex_error::{VortexExpect, VortexResult, vortex_bail}; - -use crate::arrays::ConstantArray; -use crate::compute::Operator as Op; -use crate::operator::{LengthBounds, Operator, OperatorEq, OperatorHash, OperatorId, OperatorRef}; -use crate::pipeline::bits::BitView; -use crate::pipeline::vec::Selection; -use crate::pipeline::view::ViewMut; -use crate::pipeline::{ - BindContext, Element, Kernel, KernelContext, PipelinedOperator, RowSelection, VectorId, -}; - -#[derive(Debug)] -pub struct CompareOperator { - children: [OperatorRef; 2], - op: Op, - dtype: DType, -} - -impl CompareOperator { - pub fn try_new(lhs: OperatorRef, rhs: OperatorRef, op: Op) -> VortexResult { - if lhs.dtype() != rhs.dtype() { - vortex_bail!( - "Cannot compare arrays with different dtypes: {} and {}", - lhs.dtype(), - rhs.dtype() - ); - } - - let lhs_const = lhs.as_any().downcast_ref::(); - let rhs_const = rhs.as_any().downcast_ref::(); - if lhs_const.is_some() && rhs_const.is_some() { - // TODO(ngates): we should return the Constant result! - } - - let nullability = lhs.dtype().nullability() | rhs.dtype().nullability(); - let dtype = DType::Bool(nullability); - - Ok(CompareOperator { - children: [lhs, rhs], - op, - dtype, - }) - } - - pub fn op(&self) -> Op { - self.op - } -} - -impl OperatorHash for CompareOperator { - fn operator_hash(&self, state: &mut H) { - self.op.hash(state); - self.dtype.hash(state); - self.children.iter().for_each(|c| c.operator_hash(state)); - } -} - -impl OperatorEq for CompareOperator { - fn operator_eq(&self, other: &Self) -> bool { - self.op == other.op - && self.dtype == other.dtype - && self - .children - .iter() - .zip(other.children.iter()) - .all(|(a, b)| a.operator_eq(b)) - } -} - -impl Operator for CompareOperator { - fn id(&self) -> OperatorId { - OperatorId::from("vortex.compare") - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn dtype(&self) -> &DType { - &self.dtype - } - - fn bounds(&self) -> LengthBounds { - self.children[0].bounds() & self.children[1].bounds() - } - - fn children(&self) -> &[OperatorRef] { - &self.children - } - - fn with_children(self: Arc, children: Vec) -> VortexResult { - let (lhs, rhs) = children - .into_iter() - .tuples() - .next() - .vortex_expect("missing"); - Ok(Arc::new(CompareOperator { - children: [lhs, rhs], - op: self.op, - dtype: self.dtype.clone(), - })) - } - - fn as_pipelined(&self) -> Option<&dyn PipelinedOperator> { - // If both children support pipelining, but have different row selections, then we cannot - // pipeline without an alignment step (which we currently do not support). - if let Some((left, right)) = self.children[0] - .as_pipelined() - .zip(self.children[1].as_pipelined()) - && left.row_selection() != right.row_selection() - { - return None; - } - - Some(self) - } -} - -macro_rules! match_each_compare_op { - ($self:expr, | $enc:ident | $body:block) => {{ - match $self { - Op::Eq => { - type $enc = Eq; - $body - } - Op::NotEq => { - type $enc = NotEq; - $body - } - Op::Gt => { - type $enc = Gt; - $body - } - Op::Gte => { - type $enc = Gte; - $body - } - Op::Lt => { - type $enc = Lt; - $body - } - Op::Lte => { - type $enc = Lte; - $body - } - } - }}; -} - -impl PipelinedOperator for CompareOperator { - fn row_selection(&self) -> RowSelection { - self.children[0] - .as_pipelined() - .map(|p| p.row_selection()) - .unwrap_or(RowSelection::All) - } - - #[allow(clippy::cognitive_complexity)] - fn bind(&self, ctx: &dyn BindContext) -> VortexResult> { - debug_assert_eq!(self.children[0].dtype(), self.children[1].dtype()); - - let DType::Primitive(ptype, _) = self.children[0].dtype() else { - vortex_bail!( - "Unsupported type for comparison: {}", - self.children[0].dtype() - ) - }; - - let lhs_const = self.children[0].as_any().downcast_ref::(); - if let Some(lhs_const) = lhs_const { - // LHS is constant, use ScalarComparePrimitiveKernel - return match_each_native_ptype!(ptype, |T| { - match_each_compare_op!(self.op.swap(), |Op| { - Ok(Box::new(ScalarComparePrimitiveKernel:: { - lhs: ctx.pipelined_input()[1], - rhs: lhs_const - .scalar() - .as_primitive() - .typed_value::() - .vortex_expect("scalar value not of type T"), - _phantom: PhantomData, - }) as Box) - }) - }); - } - - let rhs_const = self.children[1].as_any().downcast_ref::(); - if let Some(rhs_const) = rhs_const { - // RHS is constant, use ScalarComparePrimitiveKernel - return match_each_native_ptype!(ptype, |T| { - match_each_compare_op!(self.op, |Op| { - Ok(Box::new(ScalarComparePrimitiveKernel:: { - lhs: ctx.pipelined_input()[0], - rhs: rhs_const - .scalar() - .as_primitive() - .typed_value::() - .vortex_expect("scalar value not of type T"), - _phantom: PhantomData, - }) as Box) - }) - }); - } - - match_each_native_ptype!(ptype, |T| { - match_each_compare_op!(self.op, |Op| { - Ok(Box::new(ComparePrimitiveKernel:: { - lhs: ctx.pipelined_input()[0], - rhs: ctx.pipelined_input()[1], - _phantom: PhantomData, - }) as Box) - }) - }) - } - - fn vector_children(&self) -> Vec { - vec![0, 1] - } - - fn batch_children(&self) -> Vec { - vec![] - } -} - -/// A compare operator for primitive types that compares two vectors element-wise using a binary -/// operation. -/// Kernel that performs primitive type comparisons between two input vectors. -pub struct ComparePrimitiveKernel { - lhs: VectorId, - rhs: VectorId, - _phantom: PhantomData<(T, Op)>, -} - -impl + Send> Kernel for ComparePrimitiveKernel { - fn step( - &self, - ctx: &KernelContext, - _chunk_idx: usize, - selection: &BitView, - out: &mut ViewMut, - ) -> VortexResult<()> { - let lhs_vec = ctx.vector(self.lhs); - let lhs = lhs_vec.as_array::(); - let rhs_vec = ctx.vector(self.rhs); - let rhs = rhs_vec.as_array::(); - let bools = out.as_array_mut::(); - - match (lhs_vec.selection(), rhs_vec.selection()) { - (Selection::Prefix, Selection::Prefix) => { - for i in 0..selection.true_count() { - bools[i] = Op::compare(&lhs[i], &rhs[i]); - } - out.set_selection(Selection::Prefix) - } - (Selection::Mask, Selection::Mask) => { - // TODO(ngates): check density to decide if we should iterate indices or do - // a full scan - let mut pos = 0; - selection.iter_ones(|idx| { - bools[pos] = Op::compare(&lhs[idx], &rhs[idx]); - pos += 1; - }); - out.set_selection(Selection::Prefix) - } - (Selection::Mask, Selection::Prefix) => { - let mut pos = 0; - selection.iter_ones(|idx| { - bools[pos] = Op::compare(&lhs[idx], &rhs[pos]); - pos += 1; - }); - out.set_selection(Selection::Prefix) - } - (Selection::Prefix, Selection::Mask) => { - let mut pos = 0; - selection.iter_ones(|idx| { - bools[pos] = Op::compare(&lhs[pos], &rhs[idx]); - pos += 1; - }); - out.set_selection(Selection::Prefix) - } - } - - Ok(()) - } -} - -struct ScalarComparePrimitiveKernel> { - lhs: VectorId, - rhs: T, - _phantom: PhantomData, -} - -impl + Send> Kernel - for ScalarComparePrimitiveKernel -{ - fn step( - &self, - ctx: &KernelContext, - _chunk_idx: usize, - selection: &BitView, - out: &mut ViewMut, - ) -> VortexResult<()> { - let lhs_vec = ctx.vector(self.lhs); - let lhs = lhs_vec.as_array::(); - let bools = out.as_array_mut::(); - - match lhs_vec.selection() { - Selection::Prefix => { - for i in 0..selection.true_count() { - bools[i] = Op::compare(&lhs[i], &self.rhs); - } - out.set_selection(Selection::Prefix) - } - Selection::Mask => { - // TODO(ngates): decide at what true count we should iter indices... - selection.iter_ones(|idx| { - bools[idx] = Op::compare(&lhs[idx], &self.rhs); - }); - out.set_selection(Selection::Mask) - } - } - - Ok(()) - } -} - -pub(crate) trait CompareOp { - fn compare(lhs: &T, rhs: &T) -> bool; -} - -/// Equality comparison operation. -pub struct Eq; -impl CompareOp for Eq { - #[inline(always)] - fn compare(lhs: &T, rhs: &T) -> bool { - lhs == rhs - } -} - -/// Not equal comparison operation. -pub struct NotEq; -impl CompareOp for NotEq { - #[inline(always)] - fn compare(lhs: &T, rhs: &T) -> bool { - lhs != rhs - } -} - -/// Greater than comparison operation. -pub struct Gt; -impl CompareOp for Gt { - #[inline(always)] - fn compare(lhs: &T, rhs: &T) -> bool { - lhs > rhs - } -} - -/// Greater than or equal comparison operation. -pub struct Gte; -impl CompareOp for Gte { - #[inline(always)] - fn compare(lhs: &T, rhs: &T) -> bool { - lhs >= rhs - } -} - -/// Less than comparison operation. -pub struct Lt; -impl CompareOp for Lt { - #[inline(always)] - fn compare(lhs: &T, rhs: &T) -> bool { - lhs < rhs - } -} - -/// Less than or equal comparison operation. -pub struct Lte; -impl CompareOp for Lte { - #[inline(always)] - fn compare(lhs: &T, rhs: &T) -> bool { - lhs <= rhs - } -} - -// TODO(ngates): bring these back! -// #[cfg(test)] -// mod tests { -// use std::rc::Rc; -// -// use vortex_buffer::BufferMut; -// use vortex_dtype::Nullability; -// use vortex_scalar::Scalar; -// -// use crate::arrays::PrimitiveArray; -// use crate::operator::bits::BitView; -// -// #[test] -// fn test_scalar_compare_stacked_on_primitive() { -// // Create input data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] -// let size = 16; -// let primitive_array = (0..i32::try_from(size).unwrap()).collect::(); -// let primitive_op = primitive_array.as_ref().to_operator().unwrap().unwrap(); -// -// // Create scalar compare operator: primitive_value > 10 -// let compare_value = Scalar::primitive(10i32, Nullability::NonNullable); -// let scalar_compare_op = Rc::new(ScalarCompareOperator::new( -// primitive_op, -// BinaryOperator::Gt, -// compare_value, -// )); -// -// // Create query plan from the stacked operators -// let plan = QueryPlan::new(scalar_compare_op.as_ref()).unwrap(); -// let mut operator = plan.executable_plan().unwrap(); -// -// // Create all-true mask for simplicity -// let mask_data = [usize::MAX; N_WORDS]; -// let mask_view = BitView::new(&mask_data); -// -// // Create output buffer for boolean results -// let mut output = BufferMut::::with_capacity(N); -// unsafe { output.set_len(N) }; -// let mut output_view = ViewMut::new(&mut output[..], None); -// -// // Execute the operator -// let result = operator._step(mask_view, &mut output_view); -// assert!(result.is_ok()); -// -// // Verify results: values 0-10 should be false, values 11-15 should be true -// for i in 0..size { -// let expected = i > 10; -// assert_eq!( -// output[i], expected, -// "Position {}: expected {}, got {}", -// i, expected, output[i] -// ); -// } -// } -// -// #[test] -// fn test_scalar_compare_different_operators() { -// // Test with different comparison operators -// let size = 8; -// let primitive_array = (0..i32::try_from(size).unwrap()).collect::(); -// -// let primitive_op = primitive_array.as_ref().to_operator().unwrap().unwrap(); -// -// // Test Eq: values == 3 -// let compare_value = Scalar::primitive(3i32, Nullability::NonNullable); -// let eq_op = Rc::new(ScalarCompareOperator::new( -// primitive_op, -// BinaryOperator::Eq, -// compare_value, -// )); -// -// let plan = QueryPlan::new(eq_op.as_ref()).unwrap(); -// let mut operator = plan.executable_plan().unwrap(); -// -// let mask_data = [usize::MAX; N_WORDS]; -// let mask_view = BitView::new(&mask_data); -// -// let mut output = BufferMut::::with_capacity(N); -// unsafe { output.set_len(N) }; -// let mut output_view = ViewMut::new(&mut output[..], None); -// -// let result = operator._step(mask_view, &mut output_view); -// assert!(result.is_ok()); -// -// // Only position 3 should be true -// for i in 0..size { -// let expected = i == 3; -// assert_eq!( -// output[i], expected, -// "Eq test - Position {}: expected {}, got {}", -// i, expected, output[i] -// ); -// } -// } -// -// #[test] -// fn test_scalar_compare_with_f32() { -// // Test with floating-point values -// let size = 8; -// let values: Vec = (0..size).map(|i| i as f32 + 0.5).collect(); -// let primitive_array = values.into_iter().collect::(); -// -// let primitive_op = primitive_array.as_ref().to_operator().unwrap().unwrap(); -// -// // Test Lt: values < 3.5 -// let compare_value = Scalar::primitive(3.5f32, Nullability::NonNullable); -// let lt_op = Rc::new(ScalarCompareOperator::new( -// primitive_op, -// BinaryOperator::Lt, -// compare_value, -// )); -// -// let plan = QueryPlan::new(lt_op.as_ref()).unwrap(); -// let mut operator = plan.executable_plan().unwrap(); -// -// let mask_data = [usize::MAX; N_WORDS]; -// let mask_view = BitView::new(&mask_data); -// -// let mut output = BufferMut::::with_capacity(N); -// unsafe { output.set_len(N) }; -// let mut output_view = ViewMut::new(&mut output[..], None); -// -// let result = operator._step(mask_view, &mut output_view); -// assert!(result.is_ok()); -// -// // Values 0.5, 1.5, 2.5 should be < 3.5 (true), 3.5+ should be false -// for i in 0..size { -// let value = i as f32 + 0.5; -// let expected = value < 3.5; -// assert_eq!( -// output[i], expected, -// "Lt test - Position {}: value {} should be {}, got {}", -// i, value, expected, output[i] -// ); -// } -// } -// } diff --git a/vortex-array/src/operator/display.rs b/vortex-array/src/operator/display.rs deleted file mode 100644 index 6f0ad498ec1..00000000000 --- a/vortex-array/src/operator/display.rs +++ /dev/null @@ -1,32 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::fmt; -use std::fmt::{Display, Formatter}; - -use crate::operator::Operator; - -impl dyn Operator + '_ { - pub fn display_tree(&self) -> impl Display { - self - } -} - -pub enum DisplayFormat { - Compact, - Tree, -} - -impl Display for dyn Operator + '_ { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.fmt_all()) - } -} - -pub struct TreeNodeDisplay<'a, T: Operator + ?Sized>(pub &'a T); - -impl<'a, T: Operator + ?Sized> Display for TreeNodeDisplay<'a, T> { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - self.0.fmt_as(DisplayFormat::Tree, f) - } -} diff --git a/vortex-array/src/operator/filter.rs b/vortex-array/src/operator/filter.rs deleted file mode 100644 index d3830fb2dc3..00000000000 --- a/vortex-array/src/operator/filter.rs +++ /dev/null @@ -1,148 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::any::Any; -use std::fmt::Debug; -use std::hash::Hasher; -use std::slice; -use std::sync::Arc; - -use async_trait::async_trait; -use vortex_dtype::DType; -use vortex_error::{VortexExpect, VortexResult}; -use vortex_mask::Mask; - -use crate::compute::filter; -use crate::operator::{ - BatchBindCtx, BatchExecution, BatchExecutionRef, BatchOperator, LengthBounds, Operator, - OperatorEq, OperatorHash, OperatorId, OperatorRef, -}; -use crate::{Array, Canonical, IntoArray}; - -#[derive(Debug)] -pub struct FilterOperator { - child: OperatorRef, - mask: Mask, -} - -impl OperatorEq for FilterOperator { - fn operator_eq(&self, other: &Self) -> bool { - self.child.operator_eq(&other.child) && self.mask.operator_eq(&other.mask) - } -} - -impl OperatorHash for FilterOperator { - fn operator_hash(&self, state: &mut H) { - self.child.operator_hash(state); - self.mask.operator_hash(state); - } -} - -impl FilterOperator { - pub fn new(child: OperatorRef, mask: Mask) -> FilterOperator { - assert!( - child.bounds().contains(mask.len()), - "Mask length must be within child bounds" - ); - FilterOperator { child, mask } - } - - pub fn mask(&self) -> &Mask { - &self.mask - } -} - -impl Operator for FilterOperator { - fn id(&self) -> OperatorId { - OperatorId::from("vortex.filter") - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn dtype(&self) -> &DType { - self.child.dtype() - } - - fn bounds(&self) -> LengthBounds { - self.mask.true_count().into() - } - - fn children(&self) -> &[OperatorRef] { - slice::from_ref(&self.child) - } - - fn with_children(self: Arc, children: Vec) -> VortexResult { - Ok(Arc::new(FilterOperator { - child: children.into_iter().next().vortex_expect("missing child"), - mask: self.mask.clone(), - })) - } - - fn reduce_children(&self) -> VortexResult> { - // We need selection target information to be defined for all children. - let Some(selection_targets): Option> = self - .child - .children() - .iter() - .enumerate() - .map(|(i, child)| child.is_selection_target(i)) - .collect() - else { - return Ok(None); - }; - - // Selection is defined to be false for all children, so we cannot push down the - // filter. - if selection_targets.iter().all(|s| !s) { - return Ok(None); - } - - // Otherwise, we push down the filter to all children that are selection targets. - let children = self - .child - .children() - .iter() - .cloned() - .enumerate() - .map(|(i, child)| { - if selection_targets[i] { - // Push-down the filter to this child. - Arc::new(FilterOperator::new(child, self.mask.clone())) as OperatorRef - } else { - child - } - }) - .collect(); - - Ok(Some(self.child.clone().with_children(children)?)) - } - - fn as_batch(&self) -> Option<&dyn BatchOperator> { - Some(self) - } -} - -impl BatchOperator for FilterOperator { - fn bind(&self, ctx: &mut dyn BatchBindCtx) -> VortexResult { - Ok(Box::new(FilterExecution { - child: ctx.child(0)?, - mask: self.mask.clone(), - }) as BatchExecutionRef) - } -} - -struct FilterExecution { - child: BatchExecutionRef, - mask: Mask, -} - -#[async_trait] -impl BatchExecution for FilterExecution { - async fn execute(self: Box) -> VortexResult { - let child = self.child.execute().await?; - // TODO(ngates): obviously inline all canonical implementations here - Ok(filter(child.into_array().as_ref(), &self.mask)?.to_canonical()) - } -} diff --git a/vortex-array/src/operator/getitem.rs b/vortex-array/src/operator/getitem.rs deleted file mode 100644 index be4f7a2d779..00000000000 --- a/vortex-array/src/operator/getitem.rs +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::any::Any; -use std::hash::{Hash, Hasher}; -use std::slice; -use std::sync::Arc; - -use vortex_dtype::{DType, FieldName}; -use vortex_error::{VortexExpect, VortexResult}; - -use crate::operator::{LengthBounds, Operator, OperatorEq, OperatorHash, OperatorId, OperatorRef}; - -/// An operator that extracts a field from a struct array. -#[derive(Debug)] -pub struct GetItemOperator { - // The struct-like child operator. - child: OperatorRef, - field: FieldName, - // The dtype of the extracted field. - dtype: DType, -} - -impl OperatorHash for GetItemOperator { - fn operator_hash(&self, state: &mut H) { - self.child.operator_hash(state); - self.field.hash(state); - self.dtype.hash(state); - } -} -impl OperatorEq for GetItemOperator { - fn operator_eq(&self, other: &Self) -> bool { - self.child.operator_eq(&other.child) - && self.field == other.field - && self.dtype == other.dtype - } -} - -impl GetItemOperator { - pub fn field_name(&self) -> &FieldName { - &self.field - } -} - -impl Operator for GetItemOperator { - fn id(&self) -> OperatorId { - OperatorId::from("vortex.getitem") - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn dtype(&self) -> &DType { - &self.dtype - } - - fn bounds(&self) -> LengthBounds { - self.child.bounds() - } - - fn children(&self) -> &[OperatorRef] { - slice::from_ref(&self.child) - } - - fn with_children(self: Arc, children: Vec) -> VortexResult { - Ok(Arc::new(GetItemOperator { - child: children.into_iter().next().vortex_expect("missing child"), - field: self.field.clone(), - dtype: self.dtype.clone(), - })) - } -} diff --git a/vortex-array/src/operator/hash.rs b/vortex-array/src/operator/hash.rs deleted file mode 100644 index cb7b95a97e4..00000000000 --- a/vortex-array/src/operator/hash.rs +++ /dev/null @@ -1,177 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::any::Any; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; - -use vortex_buffer::Buffer; -use vortex_mask::Mask; - -use crate::ArrayRef; -use crate::operator::{Operator, OperatorRef}; -use crate::validity::Validity; - -/// A hash trait for operators that loosens the semantics to permit pointer-based hashing for -/// data objects such as buffers. -/// -/// Note that since this trait can use pointer hashing, the hash is only valid for the lifetime of -/// the object. -pub trait OperatorHash { - fn operator_hash(&self, state: &mut H); -} - -pub trait DynOperatorHash: private::SealedHash { - fn dyn_operator_hash(&self, state: &mut dyn Hasher); -} - -impl DynOperatorHash for T { - fn dyn_operator_hash(&self, mut state: &mut dyn Hasher) { - OperatorHash::operator_hash(self, &mut state); - } -} - -/// An equality trait for operators that loosens the semantics to permit pointer-based equality -/// for data objects such as buffers. -pub trait OperatorEq { - fn operator_eq(&self, other: &Self) -> bool; -} - -pub trait DynOperatorEq: private::SealedEq { - fn dyn_operator_eq(&self, other: &dyn Any) -> bool; -} - -impl DynOperatorEq for T { - fn dyn_operator_eq(&self, other: &dyn Any) -> bool { - other - .downcast_ref::() - .is_some_and(|other| OperatorEq::operator_eq(self, other)) - } -} - -mod private { - use crate::operator::{OperatorEq, OperatorHash}; - - pub trait SealedHash {} - impl SealedHash for T {} - pub trait SealedEq {} - impl SealedEq for T {} -} - -impl OperatorHash for dyn Operator + '_ { - fn operator_hash(&self, state: &mut H) { - self.dyn_operator_hash(state); - } -} - -impl OperatorEq for dyn Operator + '_ { - fn operator_eq(&self, other: &Self) -> bool { - self.dyn_operator_eq(other.as_any()) - } -} - -impl OperatorHash for OperatorRef { - fn operator_hash(&self, state: &mut H) { - self.as_ref().operator_hash(state); - } -} - -impl OperatorEq for OperatorRef { - fn operator_eq(&self, other: &Self) -> bool { - self.as_ref().operator_eq(other.as_ref()) - } -} - -/// A wrapper type to implement [`Hash`], [`PartialEq`], and [`Eq`] using the semantics defined -/// by [`OperatorHash`] and [`OperatorEq`]. -pub struct OperatorKey(pub T); -impl Hash for OperatorKey { - fn hash(&self, state: &mut H) { - self.0.operator_hash(state); - } -} -impl PartialEq for OperatorKey { - fn eq(&self, other: &Self) -> bool { - self.0.operator_eq(&other.0) - } -} -impl Eq for OperatorKey {} - -impl OperatorHash for Buffer { - fn operator_hash(&self, state: &mut H) { - self.as_ptr().hash(state); - self.len().hash(state); - } -} -impl OperatorEq for Buffer { - fn operator_eq(&self, other: &Self) -> bool { - self.as_ptr() == other.as_ptr() && self.len() == other.len() - } -} - -impl OperatorHash for Mask { - fn operator_hash(&self, state: &mut H) { - std::mem::discriminant(self).hash(state); - match self { - Mask::AllTrue(len) => { - len.hash(state); - } - Mask::AllFalse(len) => { - len.hash(state); - } - Mask::Values(values) => { - let buffer = values.bit_buffer(); - buffer.offset().hash(state); - buffer.len().hash(state); - buffer.inner().as_ptr().hash(state); - } - } - } -} -impl OperatorEq for Mask { - fn operator_eq(&self, other: &Self) -> bool { - match (self, other) { - (Mask::AllTrue(len1), Mask::AllTrue(len2)) => len1 == len2, - (Mask::AllFalse(len1), Mask::AllFalse(len2)) => len1 == len2, - (Mask::Values(buf1), Mask::Values(buf2)) => { - let b1 = buf1.bit_buffer(); - let b2 = buf2.bit_buffer(); - b1.offset() == b2.offset() - && b1.len() == b2.len() - && b1.inner().as_ptr() == b2.inner().as_ptr() - } - _ => false, - } - } -} - -impl OperatorHash for Validity { - fn operator_hash(&self, state: &mut H) { - std::mem::discriminant(self).hash(state); - if let Validity::Array(array) = self { - Arc::as_ptr(array).hash(state); - } - } -} -impl OperatorEq for Validity { - fn operator_eq(&self, other: &Self) -> bool { - match (self, other) { - (Validity::AllValid, Validity::AllValid) => true, - (Validity::AllInvalid, Validity::AllInvalid) => true, - (Validity::NonNullable, Validity::NonNullable) => true, - (Validity::Array(arr1), Validity::Array(arr2)) => Arc::ptr_eq(arr1, arr2), - _ => false, - } - } -} - -impl OperatorHash for ArrayRef { - fn operator_hash(&self, state: &mut H) { - Arc::as_ptr(self).hash(state); - } -} -impl OperatorEq for ArrayRef { - fn operator_eq(&self, other: &Self) -> bool { - Arc::ptr_eq(self, other) - } -} diff --git a/vortex-array/src/operator/metrics.rs b/vortex-array/src/operator/metrics.rs deleted file mode 100644 index c5d08542a29..00000000000 --- a/vortex-array/src/operator/metrics.rs +++ /dev/null @@ -1,163 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::any::Any; -use std::fmt::Debug; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; - -use async_trait::async_trait; -use vortex_dtype::DType; -use vortex_error::{VortexExpect, VortexResult}; -use vortex_metrics::{Timer, VortexMetrics}; - -use crate::Canonical; -use crate::operator::{ - BatchBindCtx, BatchExecution, BatchExecutionRef, BatchOperator, LengthBounds, Operator, - OperatorEq, OperatorHash, OperatorId, OperatorRef, -}; -use crate::pipeline::bits::BitView; -use crate::pipeline::view::ViewMut; -use crate::pipeline::{BindContext, Kernel, KernelContext, PipelinedOperator, RowSelection}; - -/// An operator that wraps another operator and records metrics about its execution. -#[derive(Debug)] -pub struct MetricsOperator { - inner: OperatorRef, - metrics: VortexMetrics, -} - -impl OperatorHash for MetricsOperator { - fn operator_hash(&self, state: &mut H) { - self.inner.operator_hash(state); - // Include our ID just to differentiate from the inner operator - self.id().hash(state); - } -} - -impl OperatorEq for MetricsOperator { - fn operator_eq(&self, other: &Self) -> bool { - self.inner.operator_eq(&other.inner) - } -} - -impl MetricsOperator { - pub fn new(inner: OperatorRef, metrics: VortexMetrics) -> Self { - let metrics = metrics.child_with_tags([("operator", inner.id().as_ref().to_string())]); - Self { inner, metrics } - } - - pub fn metrics(&self) -> &VortexMetrics { - &self.metrics - } -} - -impl Operator for MetricsOperator { - fn id(&self) -> OperatorId { - OperatorId::from("vortex.metrics") - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn dtype(&self) -> &DType { - self.inner.dtype() - } - - fn bounds(&self) -> LengthBounds { - self.inner.bounds() - } - - fn children(&self) -> &[OperatorRef] { - self.inner.children() - } - - fn with_children(self: Arc, children: Vec) -> VortexResult { - Ok(Arc::new(MetricsOperator { - inner: self.inner.clone().with_children(children)?, - metrics: self.metrics.clone(), - })) - } - - fn as_batch(&self) -> Option<&dyn BatchOperator> { - self.inner.as_batch().is_some().then_some(self) - } - - fn as_pipelined(&self) -> Option<&dyn PipelinedOperator> { - // Only support pipelined execution if the inner operator does - self.inner.as_pipelined().is_some().then_some(self) - } -} - -impl BatchOperator for MetricsOperator { - fn bind(&self, ctx: &mut dyn BatchBindCtx) -> VortexResult { - let inner = self.inner.as_batch().vortex_expect("checked").bind(ctx)?; - let timer = self.metrics.timer("operator.batch.execute"); - Ok(Box::new(MetricsBatchExecution { inner, timer })) - } -} - -struct MetricsBatchExecution { - inner: BatchExecutionRef, - timer: Arc, -} - -#[async_trait] -impl BatchExecution for MetricsBatchExecution { - async fn execute(self: Box) -> VortexResult { - let _timer = self.timer.time(); - self.inner.execute().await - } -} - -impl PipelinedOperator for MetricsOperator { - fn row_selection(&self) -> RowSelection { - self.inner - .as_pipelined() - .vortex_expect("checked") - .row_selection() - } - - fn bind(&self, ctx: &dyn BindContext) -> VortexResult> { - let inner = self - .inner - .as_pipelined() - .vortex_expect("checked") - .bind(ctx)?; - let timer = self.metrics.timer("operator.operator.step"); - Ok(Box::new(MetricsKernel { inner, timer })) - } - - fn vector_children(&self) -> Vec { - self.inner - .as_pipelined() - .vortex_expect("checked") - .vector_children() - } - - fn batch_children(&self) -> Vec { - self.inner - .as_pipelined() - .vortex_expect("checked") - .batch_children() - } -} - -struct MetricsKernel { - inner: Box, - timer: Arc, -} - -impl Kernel for MetricsKernel { - fn step( - &self, - ctx: &KernelContext, - chunk_idx: usize, - selection: &BitView, - out: &mut ViewMut, - ) -> VortexResult<()> { - let _timer = self.timer.time(); - self.inner.step(ctx, chunk_idx, selection, out) - } -} diff --git a/vortex-array/src/operator/mod.rs b/vortex-array/src/operator/mod.rs deleted file mode 100644 index 67ef0734895..00000000000 --- a/vortex-array/src/operator/mod.rs +++ /dev/null @@ -1,242 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! This module defines a new way of modelling arrays and expressions in Vortex. To avoid naming -//! conflicts, we refer to the new model as "operators". -//! -//! Operators form a more traditional "logical plan" as might be seen in other query engines. -//! Each operator supports one primary function which is to produce a canonical representation of -//! its data, known as `canonicalization`. Operators have the option to produce this canonical -//! form using different execution models, including batch, pipelined, and GPU. -//! -//! Initial designs for this module involved passing masks down through the physical execution -//! tree as futures, allowing operators to skip computation for rows that are not needed. We -//! ultimately decided against this approach and instead introduce a `Filter` operator -//! that can be pushed down in the same way as any other operator. -//! -//! On the one hand, this means common subtree elimination is much easier, since we know the mask -//! or identity of the mask future inside the filter operator up-front. On the other hand, it -//! means that an operator no longer has a known length. In the end state, we will redefine a -//! Vortex array to be a wrapped around an operator that _does_ have a known length, amongst other -//! properties (such as non-blocking evaluation). -//! -//! We also introduce the idea of an execution that can evaluate an operator tree efficiently. It -//! supports common subtree elimination, as well as extracting sub-graphs for pipelined and GPU -//! execution. The execution is also responsible for managing memory and scheduling work across -//! different execution resources. -//! - -#![allow(dead_code)] - -pub mod canonical; -pub mod compare; -mod display; -pub mod filter; -pub mod getitem; -mod hash; -pub mod metrics; -mod optimize; -pub mod slice; - -use std::any::{Any, type_name}; -use std::fmt; -use std::fmt::{Debug, Formatter}; -use std::ops::BitAnd; -use std::sync::Arc; - -use arcref::ArcRef; -use async_trait::async_trait; -pub use display::*; -pub use hash::*; -use termtree::Tree; -use vortex_dtype::DType; -use vortex_error::VortexResult; - -use crate::Canonical; -use crate::pipeline::PipelinedOperator; - -pub type OperatorId = ArcRef; -pub type OperatorRef = Arc; - -/// An operator represents a node in a logical query plan. -pub trait Operator: 'static + Send + Sync + Debug + DynOperatorHash + DynOperatorEq { - /// The unique identifier for this operator instance. - fn id(&self) -> OperatorId; - - /// For downcasting. - fn as_any(&self) -> &dyn Any; - - /// Returns the [`DType`] of the array produced by this operator. - fn dtype(&self) -> &DType; - - /// Returns the bounds on the number of rows produced by this operator. - fn bounds(&self) -> LengthBounds; - - /// Returns the exact number of rows produced by this operator, if known. - fn len(&self) -> Option { - self.bounds().maybe_len() - } - - /// Returns if this operator is known to be empty (i.e. max bound is 0). - fn is_empty(&self) -> bool { - self.bounds().max == 0 - } - - /// The children of this operator. - fn children(&self) -> &[OperatorRef]; - - /// The number of children of this operator. - fn nchildren(&self) -> usize { - self.children().len() - } - - /// Override the default formatting of this operator. - fn fmt_as(&self, _df: DisplayFormat, f: &mut Formatter) -> fmt::Result { - write!(f, "{}", type_name::()) - } - - fn fmt_all(&self) -> String { - let node_name = TreeNodeDisplay(self).to_string(); - let child_trees: Vec<_> = self - .children() - .iter() - .map(|child| child.fmt_all()) - .collect(); - Tree::new(node_name) - .with_leaves(child_trees) - .with_multiline(true) - .to_string() - } - - /// Create a new instance of this operator with the given children. - /// - /// ## Panics - /// - /// Panics if the number or dtypes of children are incorrect. - /// - fn with_children(self: Arc, _children: Vec) -> VortexResult; - - /// Attempt to optimize this node by analyzing its children. - /// - /// For example, if all the children are constant, this function should perform constant - /// folding and return a constant operator. - /// - /// This function should typically be implemented only for self-contained optimizations based - /// on child properties - fn reduce_children(&self) -> VortexResult> { - Ok(None) - } - - /// Attempt to push down a parent operator through this node. - /// - /// The `child_idx` parameter indicates which child of the parent this operator occupies. - /// For example, if the parent is a binary operator, and this operator is the left child, - /// then `child_idx` will be 0. If this operator is the right child, then `child_idx` will be 1. - /// - /// The returned operator will replace the parent in the tree. - /// - /// This function should typically be implemented for cross-operator optimizations where the - /// child needs to adapt to the parent's requirements - fn reduce_parent( - &self, - _parent: OperatorRef, - _child_idx: usize, - ) -> VortexResult> { - Ok(None) - } - - /// Return `true` if the given child is considered to be a selection target. - /// - /// The definition of this is such that pushing a selection operator down to all selection - /// targets will result in the same output as a selection on this operator. - /// - /// For example, `select(Op, mask) == Op(select(child, mask), ...)` for all children that are - /// selection targets. - /// - /// If any child index returns `None`, then selection pushdown is not possible. - /// If all children return `Some(false)`, then selection pushdown is not possible. - fn is_selection_target(&self, _child_idx: usize) -> Option { - None - } - - /// Returns this operator as a [`BatchOperator`] if it supports batch execution. - fn as_batch(&self) -> Option<&dyn BatchOperator> { - None - } - - /// Returns this operator as a [`PipelinedOperator`] if it supports pipelined execution. - /// - /// Note that operators that implement [`PipelinedOperator`] *do not need* to implement - /// [`BatchOperator`], although they may choose to do so. - fn as_pipelined(&self) -> Option<&dyn PipelinedOperator> { - None - } -} - -/// Represents the known row count bounds of an operator. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub struct LengthBounds { - pub min: usize, - pub max: usize, -} - -impl LengthBounds { - pub fn maybe_len(&self) -> Option { - (self.min == self.max).then_some(self.min) - } - - pub fn contains(&self, len: usize) -> bool { - self.min <= len && len <= self.max - } - - pub fn intersect_all>(iters: I) -> Self { - let mut min = 0; - let mut max = 0; - for bounds in iters { - min = min.max(bounds.min); - max = max.min(bounds.max); - } - Self { min, max } - } -} - -impl BitAnd for LengthBounds { - type Output = Self; - - fn bitand(self, rhs: Self) -> Self::Output { - Self { - min: self.min.max(rhs.min), - max: self.max.min(rhs.max), - } - } -} - -impl From for LengthBounds { - fn from(value: usize) -> Self { - Self { - min: value, - max: value, - } - } -} - -/// The default execution mode for an operator is batch mode. -pub trait BatchOperator: Operator { - fn bind(&self, ctx: &mut dyn BatchBindCtx) -> VortexResult; -} - -pub trait BatchBindCtx { - /// Returns the execution for the child at the given index, consuming it from the context. - /// Each child may be consumed only once. - fn child(&mut self, idx: usize) -> VortexResult; -} - -/// The primary execution trait for operators. -/// -/// Alternatively, or additionally, operators may choose to implement [`PipelinedOperator`]. -#[async_trait] -pub trait BatchExecution: Send { - async fn execute(self: Box) -> VortexResult; -} - -pub type BatchExecutionRef = Box; diff --git a/vortex-array/src/operator/optimize.rs b/vortex-array/src/operator/optimize.rs deleted file mode 100644 index bdb56c67e3b..00000000000 --- a/vortex-array/src/operator/optimize.rs +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::sync::Arc; - -use itertools::Itertools; -use vortex_error::VortexResult; - -use crate::operator::{Operator, OperatorRef}; - -impl dyn Operator + '_ { - /// Optimize the operator tree rooted at this operator by applying local - /// optimizations such as reducing redundant operators. - pub fn optimize(self: Arc) -> VortexResult { - let children = self - .children() - .iter() - .map(|child| child.clone().optimize()) - .try_collect()?; - - let mut operator = self.with_children(children)?; - operator = operator.reduce_children()?.unwrap_or(operator); - - let parent = operator.clone(); - for (idx, child) in operator.children().iter().enumerate() { - if let Some(new_operator) = child.reduce_parent(parent.clone(), idx)? { - return Ok(new_operator); - } - } - - Ok(operator) - } -} diff --git a/vortex-array/src/operator/slice.rs b/vortex-array/src/operator/slice.rs deleted file mode 100644 index 9427565770d..00000000000 --- a/vortex-array/src/operator/slice.rs +++ /dev/null @@ -1,138 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::any::Any; -use std::hash::Hash; -use std::ops::Range; -use std::sync::Arc; - -use async_trait::async_trait; -use itertools::Itertools; -use vortex_dtype::DType; -use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail}; - -use crate::operator::{ - BatchBindCtx, BatchExecution, BatchExecutionRef, BatchOperator, LengthBounds, Operator, - OperatorEq, OperatorHash, OperatorId, OperatorRef, -}; -use crate::{Array, Canonical, IntoArray}; - -#[derive(Debug, Clone)] -pub struct SliceOperator { - child: OperatorRef, - range: Range, -} - -impl SliceOperator { - pub fn try_new(child: OperatorRef, range: Range) -> VortexResult { - if range.start > range.end { - vortex_bail!( - "invalid slice range: start > end ({} > {})", - range.start, - range.end - ); - } - if range.end > child.bounds().max { - vortex_bail!( - "slice range end out of bounds: {} > {}", - range.end, - child.bounds().max - ); - } - Ok(SliceOperator { child, range }) - } - - pub fn range(&self) -> &Range { - &self.range - } -} - -impl OperatorHash for SliceOperator { - fn operator_hash(&self, state: &mut H) { - self.child.operator_hash(state); - self.range.hash(state); - } -} - -impl OperatorEq for SliceOperator { - fn operator_eq(&self, other: &Self) -> bool { - self.range == other.range && self.child.operator_eq(&other.child) - } -} - -impl Operator for SliceOperator { - fn id(&self) -> OperatorId { - OperatorId::from("vortex.slice") - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn dtype(&self) -> &DType { - self.child.dtype() - } - - fn bounds(&self) -> LengthBounds { - (self.range.end - self.range.start).into() - } - - fn children(&self) -> &[OperatorRef] { - std::slice::from_ref(&self.child) - } - - fn with_children(self: Arc, children: Vec) -> VortexResult { - Ok(Arc::new(SliceOperator::try_new( - children.into_iter().next().vortex_expect("missing child"), - self.range.clone(), - )?)) - } - - fn reduce_children(&self) -> VortexResult> { - // We push down the slice operator to any child that is aligned to the parent. - let children = (0..self.nchildren()) - .map(|i| { - let child = self.child.children()[i].clone(); - - if self.child.is_selection_target(i).unwrap_or_default() { - // Push-down the filter to this child. - Ok::<_, VortexError>(Arc::new(SliceOperator::try_new( - child, - self.range.clone(), - )?) as OperatorRef) - } else { - Ok(child) - } - }) - .try_collect()?; - - Ok(Some(self.child.clone().with_children(children)?)) - } - - fn as_batch(&self) -> Option<&dyn BatchOperator> { - Some(self) - } -} - -impl BatchOperator for SliceOperator { - fn bind(&self, ctx: &mut dyn BatchBindCtx) -> VortexResult { - let child_exec = ctx.child(0)?; - Ok(Box::new(SliceExecution { - child: child_exec, - range: self.range.clone(), - })) - } -} - -struct SliceExecution { - child: BatchExecutionRef, - range: Range, -} - -#[async_trait] -impl BatchExecution for SliceExecution { - async fn execute(self: Box) -> VortexResult { - let child = self.child.execute().await?; - Ok(child.into_array().slice(self.range).to_canonical()) - } -} diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs index ff918d86036..76a79dbacf1 100644 --- a/vortex-array/src/pipeline/mod.rs +++ b/vortex-array/src/pipeline/mod.rs @@ -19,22 +19,20 @@ //! It is a work-in-progress and is not yet used in production. pub mod bits; -pub(crate) mod operator; mod types; pub mod vec; pub mod view; use std::cell::RefCell; -use self::vec::Vector; -use crate::operator::Operator; -use crate::pipeline::bits::BitView; -use crate::Canonical; pub use types::*; use vec::VectorRef; use vortex_error::VortexResult; use vortex_vector::VectorMut; +use self::vec::Vector; +use crate::pipeline::bits::BitView; + /// The number of elements in each step of a Vortex evaluation operator. pub const N: usize = 1024; @@ -123,8 +121,6 @@ pub trait Kernel: Send { pub struct KernelContext { /// The allocated vectors for intermediate results. pub(crate) vectors: Vec>, - /// The computed batch inputs. - pub(crate) batch_inputs: Vec, } impl KernelContext { diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs index c0379350440..857436bfc34 100644 --- a/vortex-array/src/vtable/operator.rs +++ b/vortex-array/src/vtable/operator.rs @@ -1,15 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{VortexResult, vortex_bail}; use vortex_mask::Mask; use vortex_vector::Vector; +use crate::ArrayRef; use crate::array::IntoArray; use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx}; -use crate::operator::OperatorRef; +use crate::pipeline::Pipelined; use crate::vtable::{NotSupported, VTable}; -use crate::ArrayRef; /// A vtable for the new operator-based array functionality. Eventually this vtable will be /// merged into the main `VTable`, but for now it is kept separate to allow for incremental @@ -17,12 +17,6 @@ use crate::ArrayRef; /// /// See for the operators RFC. pub trait OperatorVTable { - /// Convert the current array into a [`OperatorRef`]. - /// Returns `None` if the array cannot be converted to an operator. - fn to_operator(_array: &V::Array) -> VortexResult> { - Ok(None) - } - /// Returns a canonical [`Vector`] containing the rows indicated by the given selection [`Mask`]. /// /// The returned vector must be the appropriate one for the array's logical type (they are @@ -46,7 +40,11 @@ pub trait OperatorVTable { Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute() } - /// Returns the + /// Returns an implementation of the [`Pipelined`] trait for this array, if pipelined execution + /// is supported. + fn execute_pipelined(_array: &V::Array) -> Option<&dyn Pipelined> { + None + } /// Bind the array for execution in batch mode. /// @@ -106,10 +104,6 @@ pub trait OperatorVTable { } impl OperatorVTable for NotSupported { - fn to_operator(_array: &V::Array) -> VortexResult> { - Ok(None) - } - fn bind( array: &V::Array, _selection: Option<&ArrayRef>, From 70d73cc3023f2395d25b4c44703a218aa7af4630 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Fri, 7 Nov 2025 14:42:20 -0500 Subject: [PATCH 03/10] pipelined execution Signed-off-by: Nicholas Gates --- vortex-array/src/array/operator.rs | 14 +- .../src/pipeline/{bits/view.rs => bits.rs} | 0 vortex-array/src/pipeline/bits/mod.rs | 10 - vortex-array/src/pipeline/bits/vector.rs | 256 ------------------ vortex-array/src/pipeline/bits/view_mut.rs | 135 --------- vortex-array/src/pipeline/mod.rs | 66 +++-- vortex-array/src/pipeline/source_driver.rs | 105 +++++++ vortex-array/src/pipeline/types.rs | 97 ------- vortex-array/src/pipeline/vec.rs | 161 ----------- vortex-array/src/pipeline/view.rs | 225 --------------- vortex-array/src/vtable/operator.rs | 11 +- 11 files changed, 161 insertions(+), 919 deletions(-) rename vortex-array/src/pipeline/{bits/view.rs => bits.rs} (100%) delete mode 100644 vortex-array/src/pipeline/bits/mod.rs delete mode 100644 vortex-array/src/pipeline/bits/vector.rs delete mode 100644 vortex-array/src/pipeline/bits/view_mut.rs create mode 100644 vortex-array/src/pipeline/source_driver.rs delete mode 100644 vortex-array/src/pipeline/types.rs delete mode 100644 vortex-array/src/pipeline/vec.rs delete mode 100644 vortex-array/src/pipeline/view.rs diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs index 0a0e0705046..81194065d6d 100644 --- a/vortex-array/src/array/operator.rs +++ b/vortex-array/src/array/operator.rs @@ -3,11 +3,12 @@ use std::sync::Arc; -use vortex_error::{VortexResult, vortex_panic}; +use vortex_error::{vortex_panic, VortexResult}; use vortex_mask::Mask; -use vortex_vector::{Vector, VectorOps, vector_matches_dtype}; +use vortex_vector::{vector_matches_dtype, Vector, VectorOps}; use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx}; +use crate::pipeline::source_driver::PipelineDriver; use crate::vtable::{OperatorVTable, VTable}; use crate::{Array, ArrayAdapter, ArrayRef}; @@ -62,6 +63,15 @@ impl ArrayOperator for Arc { impl ArrayOperator for ArrayAdapter { fn execute_batch(&self, selection: &Mask, ctx: &mut dyn ExecutionCtx) -> VortexResult { + // Check to see if we should execute the array in a pipelined fashion. This is a + // short-circuit for now until we have a full pipeline executor, but it allows each arrow + // to only implement the pipeline API. + if let Some(pipelined) = + >::execute_pipelined(&self.0) + { + PipelineDriver::new(pipelined).execute + } + let vector = >::execute_batch(&self.0, selection, ctx)?; diff --git a/vortex-array/src/pipeline/bits/view.rs b/vortex-array/src/pipeline/bits.rs similarity index 100% rename from vortex-array/src/pipeline/bits/view.rs rename to vortex-array/src/pipeline/bits.rs diff --git a/vortex-array/src/pipeline/bits/mod.rs b/vortex-array/src/pipeline/bits/mod.rs deleted file mode 100644 index 1ea4af9e628..00000000000 --- a/vortex-array/src/pipeline/bits/mod.rs +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -mod vector; -mod view; -mod view_mut; - -pub use vector::*; -pub use view::*; -pub use view_mut::*; diff --git a/vortex-array/src/pipeline/bits/vector.rs b/vortex-array/src/pipeline/bits/vector.rs deleted file mode 100644 index 5e214866da3..00000000000 --- a/vortex-array/src/pipeline/bits/vector.rs +++ /dev/null @@ -1,256 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::fmt::{Debug, Formatter}; -use std::ops::Not; -use std::sync::{Arc, LazyLock}; - -use bitvec::array::BitArray; -use bitvec::order::Lsb0; - -use super::{BitView, BitViewMut}; -use crate::pipeline::{N, N_WORDS}; - -static EMPTY: LazyLock = LazyLock::new(|| BitVector { - bits: Arc::new(BitArray::ZERO), - true_count: 0, -}); - -static FULL: LazyLock = LazyLock::new(|| BitVector { - bits: Arc::new(BitArray::ZERO.not()), - true_count: N, -}); - -/// An owned fixed-size bit vector of length `N` bits, represented as an array of usize words. -/// -/// Internally, it uses a [`BitArray`] to store the bits, but this crate has some -/// performance foot-guns in cases where we can lean on better assumptions, and therefore we wrap -/// it up for use within Vortex. -/// Owned bit vector for storing boolean selection masks. -#[derive(Clone)] -pub struct BitVector { - pub(super) bits: Arc>, - pub(super) true_count: usize, -} - -impl Debug for BitVector { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("BitVector") - .field("true_count", &self.true_count) - //.field("bits", &self.bits.as_raw_slice()) - .finish() - } -} - -impl PartialEq for BitVector { - fn eq(&self, other: &Self) -> bool { - Arc::ptr_eq(&self.bits, &other.bits) - || (self.true_count == other.true_count && self.bits == other.bits) - } -} - -impl Eq for BitVector {} - -impl BitVector { - pub fn empty() -> &'static BitVector { - &EMPTY - } - - pub fn full() -> &'static BitVector { - &FULL - } - - pub fn true_until(n: usize) -> Self { - assert!(n <= N, "Cannot create a BitVector with more than N bits"); - - let mut bits = Arc::new(BitArray::<[usize; N_WORDS], Lsb0>::ZERO); - let bits_mut = Arc::make_mut(&mut bits); - - let mut word = 0; - let mut remaining = n; - while remaining >= usize::BITS as usize { - bits_mut.as_raw_mut_slice()[word] = usize::MAX; - remaining -= usize::BITS as usize; - word += 1; - } - - if remaining > 0 { - // For LSB ordering, set the lower bits (0 to remaining-1) - bits_mut.as_raw_mut_slice()[word] = (1usize << remaining) - 1; - } - - BitVector { - bits, - true_count: n, - } - } - - pub fn true_count(&self) -> usize { - self.true_count - } - - pub fn as_raw(&self) -> &[usize; N_WORDS] { - // It's actually remarkably hard to get a reference to the underlying array! - let raw = self.bits.as_raw_slice(); - unsafe { &*(raw.as_ptr() as *const [usize; N_WORDS]) } - } - - pub fn as_raw_mut(&mut self) -> &mut [usize; N_WORDS] { - // SAFETY: We assume that the bits are mutable and that the view is valid. - let raw = Arc::make_mut(&mut self.bits).as_raw_mut_slice(); - unsafe { &mut *(raw.as_mut_ptr() as *mut [usize; N_WORDS]) } - } - - pub fn fill_from(&mut self, iter: I) - where - I: IntoIterator, - { - let mut true_count = 0; - for (dst, word) in self.as_raw_mut().iter_mut().zip(iter) { - true_count += word.count_ones() as usize; - *dst = word; - } - self.true_count = true_count; - } - - pub fn as_view(&self) -> BitView<'_> { - unsafe { BitView::new_unchecked(&self.bits, self.true_count) } - } - - pub fn as_view_mut(&mut self) -> BitViewMut<'_> { - unsafe { BitViewMut::new_unchecked(Arc::make_mut(&mut self.bits), self.true_count) } - } -} - -impl From> for BitVector { - fn from(value: BitView<'_>) -> Self { - let true_count = value.true_count(); - let bits = Arc::new(BitArray::<[usize; N_WORDS], Lsb0>::from(*value.as_raw())); - BitVector { bits, true_count } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_fill_from() { - let mut vec = BitVector::empty().clone(); - - // Fill with a pattern - let pattern = [ - 0b1010101010101010usize, - 0b1111000011110000usize, - usize::MAX, - 0, - ]; - - vec.fill_from(pattern.iter().copied()); - - let raw = vec.as_raw(); - assert_eq!(raw[0], 0b1010101010101010usize); - assert_eq!(raw[1], 0b1111000011110000usize); - assert_eq!(raw[2], usize::MAX); - assert_eq!(raw[3], 0); - - // Check true_count is updated correctly - let expected_count = 0b1010101010101010usize.count_ones() as usize - + 0b1111000011110000usize.count_ones() as usize - + usize::MAX.count_ones() as usize; - assert_eq!(vec.true_count(), expected_count); - } - - #[test] - fn test_as_view() { - let vec = BitVector::true_until(100); - let view = vec.as_view(); - - assert_eq!(view.true_count(), 100); - - // Verify the view sees the same bits - let mut ones = Vec::new(); - view.iter_ones(|idx| ones.push(idx)); - assert_eq!(ones, (0..100).collect::>()); - } - - #[test] - fn test_as_view_mut() { - let mut vec = BitVector::true_until(64); - { - let view_mut = vec.as_view_mut(); - // BitViewMut would allow modifications - // This test just verifies we can create a mutable view - assert_eq!(view_mut.true_count(), 64); - } - assert_eq!(vec.true_count(), 64); - } - - #[test] - fn test_from_bitview() { - // Create a BitView from raw data - let mut raw = [0usize; N_WORDS]; - raw[0] = 0b11111111; - raw[1] = 0b11110000; - - let view = BitView::new(&raw); - let vec = BitVector::from(view); - - assert_eq!(vec.true_count(), view.true_count()); - assert_eq!(vec.as_raw()[0], 0b11111111); - assert_eq!(vec.as_raw()[1], 0b11110000); - } - - #[test] - fn test_lsb_ordering_verification() { - // Verify LSB ordering by setting specific bits - let vec = BitVector::true_until(5); - let view = vec.as_view(); - - // Collect which bits are set - let mut ones = Vec::new(); - view.iter_ones(|idx| ones.push(idx)); - - // With LSB ordering, bits 0-4 should be set - assert_eq!(ones, vec![0, 1, 2, 3, 4]); - } - - #[test] - fn test_as_raw_mut() { - let mut vec = BitVector::empty().clone(); - - // Modify through as_raw_mut - let raw_mut = vec.as_raw_mut(); - raw_mut[0] = 0b1111; - raw_mut[2] = usize::MAX; - - // Note: true_count is NOT automatically updated when using as_raw_mut - // This is a low-level API, so the user must manage true_count - vec.true_count = 4 + 64; // Update manually - - assert_eq!(vec.as_raw()[0], 0b1111); - assert_eq!(vec.as_raw()[2], usize::MAX); - assert_eq!(vec.true_count(), 68); - } - - #[test] - fn test_boundary_conditions() { - // Test various boundary values - let boundaries = [1, 31, 32, 33, 63, 64, 65, 127, 128, 129, N - 1, N]; - - for &n in &boundaries { - let vec = BitVector::true_until(n); - assert_eq!(vec.true_count(), n); - - // Verify correct bits are set via view - let view = vec.as_view(); - let mut ones = Vec::new(); - view.iter_ones(|idx| ones.push(idx)); - assert_eq!(ones.len(), n); - if n > 0 { - assert_eq!(ones[0], 0); // First bit should be 0 (LSB) - assert_eq!(ones[n - 1], n - 1); // Last bit should be n-1 - } - } - } -} diff --git a/vortex-array/src/pipeline/bits/view_mut.rs b/vortex-array/src/pipeline/bits/view_mut.rs deleted file mode 100644 index 80155ee2079..00000000000 --- a/vortex-array/src/pipeline/bits/view_mut.rs +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use bitvec::array::BitArray; -use bitvec::order::Lsb0; - -use crate::pipeline::bits::BitView; -use crate::pipeline::{N, N_WORDS}; - -/// A mutable borrowed fixed-size bit vector of length `N` bits, represented as an array of -/// usize words. -/// Mutable view into a bit array for constructing selection masks. -#[derive(Debug)] -pub struct BitViewMut<'a> { - bits: &'a mut BitArray<[usize; N_WORDS], Lsb0>, - true_count: usize, -} - -impl<'a> BitViewMut<'a> { - pub fn new(bits: &'a mut [usize; N_WORDS]) -> Self { - let true_count = bits.iter().map(|&word| word.count_ones() as usize).sum(); - let bits: &mut BitArray<[usize; N_WORDS], Lsb0> = unsafe { std::mem::transmute(bits) }; - BitViewMut { bits, true_count } - } - - pub(crate) unsafe fn new_unchecked( - bits: &'a mut BitArray<[usize; N_WORDS], Lsb0>, - true_count: usize, - ) -> Self { - BitViewMut { bits, true_count } - } - - pub fn true_count(&self) -> usize { - self.true_count - } - - /// Mask the values in the mask up to the given length. - pub fn intersect_prefix(&mut self, mut len: usize) { - assert!(len <= N, "BitViewMut::truncate: length exceeds N"); - - let bit_slice = self.bits.as_raw_mut_slice(); - - let mut word = 0; - let mut true_count = 0; - while len >= usize::BITS as usize { - true_count += bit_slice[word].count_ones() as usize; - len -= usize::BITS as usize; - word += 1; - } - - if len > 0 { - bit_slice[word] &= !(usize::MAX << len); - true_count += bit_slice[word].count_ones() as usize; - word += 1; - } - - while word < N_WORDS { - bit_slice[word] = 0; - word += 1; - } - - self.set_true_count(true_count); - } - - pub fn clear(&mut self) { - self.bits.as_raw_mut_slice().fill(0); - self.set_true_count(0); - } - - pub fn fill_with_words(&mut self, mut iter: impl Iterator) { - let mut true_count = 0; - - let dst_bytes = unsafe { - std::slice::from_raw_parts_mut( - self.bits.as_raw_mut_slice().as_mut_ptr() as *mut u64, - N_WORDS, - ) - }; - - for word in 0..N / 64 { - if let Some(value) = iter.next() { - dst_bytes[word] = value; - true_count += value.count_ones() as usize; - } - } - self.set_true_count(true_count); - } - - pub fn as_view(&self) -> BitView<'_> { - unsafe { BitView::new_unchecked(self.bits, self.true_count) } - } - - pub fn as_raw_mut(&mut self) -> &mut [usize; N_WORDS] { - unsafe { std::mem::transmute(&mut self.bits) } - } - - #[inline(always)] - fn set_true_count(&mut self, true_count: usize) { - self.true_count = true_count; - debug_assert_eq!( - self.true_count, - self.bits - .as_raw_slice() - .iter() - .map(|&word| word.count_ones() as usize) - .sum::() - ); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::pipeline::bits::BitVector; - - #[test] - fn test_intersect_prefix() { - let mut bit_vec = BitVector::full().clone(); - - let mut view_mut = bit_vec.as_view_mut(); - assert_eq!(view_mut.true_count(), N); - - view_mut.intersect_prefix(N - 1); - assert_eq!(view_mut.true_count(), N - 1); - - view_mut.intersect_prefix(64); - assert_eq!(view_mut.true_count(), 64); - - view_mut.intersect_prefix(10); - assert_eq!(view_mut.true_count(), 10); - - view_mut.intersect_prefix(0); - assert_eq!(view_mut.true_count(), 0); - } -} diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs index 76a79dbacf1..d2da1ff8616 100644 --- a/vortex-array/src/pipeline/mod.rs +++ b/vortex-array/src/pipeline/mod.rs @@ -19,28 +19,24 @@ //! It is a work-in-progress and is not yet used in production. pub mod bits; -mod types; -pub mod vec; -pub mod view; +pub mod source_driver; -use std::cell::RefCell; - -pub use types::*; -use vec::VectorRef; +use crate::Array; +use bits::BitView; use vortex_error::VortexResult; -use vortex_vector::VectorMut; - -use self::vec::Vector; -use crate::pipeline::bits::BitView; +use vortex_vector::{Vector, VectorMut}; /// The number of elements in each step of a Vortex evaluation operator. pub const N: usize = 1024; -// Number of usize words needed to store N bits +/// Number of bytes needed to store N bits +pub const N_BYTES: usize = N / 8; + +/// Number of usize words needed to store N bits pub const N_WORDS: usize = N / usize::BITS as usize; /// Returned by an array to indicate that it can be executed in a pipelined fashion. -pub trait Pipelined { +pub trait PipelinedOperator: Array { // Whether this operator works by mutating its first child in-place. // // If `true`, the operator is invoked with the first child's input data passed via the @@ -50,18 +46,26 @@ pub trait Pipelined { // false // } - /// Returns the indices of the children of this array that should be passed to the kernel as - /// pipelined input vectors, 1024 elements at a time. + /// Returns whether the nth child of this array should be passed to the kernel as a pipelined + /// input vector, 1024 elements at a time. /// - /// Any child not listed here will be treated as a batch input, and the full vector will be + /// Any child that reports `false` will be treated as a batch input, and the full vector will be /// computed before pipelined execution begins. - fn pipelined_children(&self) -> Vec; + fn is_pipelined_child(&self, child_idx: usize) -> bool; /// Bind the operator into a [`Kernel`] for pipelined execution. /// /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and /// batch IDs for batch children. Each child can only be bound once. - fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult>; + fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult>; +} + +pub trait PipelinedSource: Array { + /// Bind the operator into a [`Kernel`] for pipelined execution. + /// + /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and + /// batch IDs for batch children. Each child can only be bound once. + fn bind_source(&self, ctx: &mut dyn BindContext) -> VortexResult>; } /// The context used when binding an operator for execution. @@ -92,7 +96,7 @@ pub type VectorId = usize; /// the setup costs (such as DType validation, stats short-circuiting, etc.), and to make better /// use of CPU caches by performing all operations while the data is hot. /// -/// The [`Kernel::step`] method will be invoked repeatedly to process chunks of data, [`N`] elements +/// The [`SourceKernel::step`] method will be invoked repeatedly to process chunks of data, [`N`] elements /// at a time. Each invocation is passed a selection mask indicating which elements of the chunk /// should be written to the start of the output vector. /// @@ -100,9 +104,9 @@ pub type VectorId = usize; /// its length will initially be set to zero. It is therefore safe to invoke unchecked writes up to /// `N` elements. /// -/// The pipeline may invoke the `Kernel::skip` method to skip over some number of chunks of data. +/// The pipeline may invoke the `SourceKernel::skip` method to skip over some number of chunks of data. /// The kernel should mutate any internal state as necessary to account for the skipped data. -pub trait Kernel: Send { +pub trait SourceKernel: Send { /// Skip over the given number of chunks of data. /// /// For example, if `n` is 3, then the kernel should skip over `3 * N` elements of input data. @@ -117,15 +121,29 @@ pub trait Kernel: Send { ) -> VortexResult<()>; } +pub trait OperatorKernel: Send { + /// Attempts to perform a single step of the operator, writing data to the output vector. + /// + /// The output vector has length equal to the number of valid elements in the input vectors. + /// This number of values should be written to the output vector. + fn step(&self, ctx: &KernelContext, out: &mut VectorMut) -> VortexResult<()>; +} + /// Context passed to kernels during execution, providing access to vectors. pub struct KernelContext { /// The allocated vectors for intermediate results. - pub(crate) vectors: Vec>, + pub(crate) vectors: Vec, } impl KernelContext { + pub fn empty() -> Self { + Self { + vectors: Vec::new(), + } + } + /// Get a vector by its ID. - pub fn vector(&self, vector_id: VectorId) -> VectorRef<'_> { - VectorRef::new(self.vectors[vector_id].borrow()) + pub fn vector(&self, _vector_id: VectorId) -> &Vector { + todo!() } } diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs new file mode 100644 index 00000000000..ff74ed5cf55 --- /dev/null +++ b/vortex-array/src/pipeline/source_driver.rs @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::pipeline::bits::BitView; +use crate::pipeline::{BindContext, KernelContext, PipelinedSource, VectorId, N}; +use itertools::Itertools; +use vortex_error::{vortex_panic, VortexResult}; +use vortex_mask::Mask; +use vortex_vector::{Vector, VectorMut, VectorMutOps}; + +/// Temporary driver for executing a single array in a pipelined fashion. +pub struct PipelineSourceDriver<'a> { + array: &'a dyn PipelinedSource, +} + +impl<'a> PipelineSourceDriver<'a> { + pub fn new(array: &'a dyn PipelinedSource) -> Self { + Self { array } + } + + pub fn execute(&self, selection: &Mask) -> VortexResult { + // First, we compute all child vectors. + // Since this is a pipeline source, we know that remaining children must be batch inputs, + // and therefore we cannot push down the selection mask. + let batch_inputs: Vec<_> = self + .array + .children() + .iter() + .map(|child| child.execute()) + .try_collect()?; + + // We now construct the source kernel. + let mut bind_ctx = PipelineSourceBindCtx { + batch_inputs: &batch_inputs, + }; + let mut kernel = self.array.bind_source(&mut bind_ctx)?; + let kernel_ctx = KernelContext::empty(); + + // Allocate an output vector, with up to N bytes of padding to ensure every call to + // `kernel.step(out)` has at least N bytes of capacity. + let mut output = VectorMut::with_capacity( + self.array.dtype(), + selection.true_count().next_multiple_of(N), + ); + + // TODO(ngates): change behaviour based on the density of the selection mask. + let selection_buffer = selection.to_bit_buffer(); + // TODO(ngates): rewrite chunks to take an arbitrary "storage type"? Or somehow copy + // the chunks directly into a wider bit slice? + let selection_chunks = selection_buffer.chunks(); + let mut selection_chunks_iter = selection_chunks.iter_padded(); + + let output_len = selection.true_count(); + + let mut selection_chunk = [0u64; N / u64::BITS as usize]; + + let mut output_chunks = vec![]; + while output.len() < output_len { + // Copy the next selection chunk into place. + for word_idx in 0..selection_chunk.len() { + selection_chunk[word_idx] = selection_chunks_iter.next().unwrap_or_else(|| 0u64); + } + + // TODO(ngates): ideally our chunks iter would use a usize... + let selection_chunk_usize = unsafe { std::mem::transmute(&selection_chunk) }; + let selection = BitView::new(selection_chunk_usize); + + // We know we have remaining capacity for N elements, so split off a size-N chunk. + let remaining_output = output.split_off(N); + + kernel.step(&kernel_ctx, &selection, &mut output)?; + assert_eq!( + output.len(), + selection.true_count(), + "Kernel did not write expected number of elements" + ); + + // Now we un-split the output vector back onto its full size. + // output.unsplit(remaining_output); + output_chunks.push(output); + output = remaining_output; + } + + // Combine all output chunks back into the output vector. + for chunk in output_chunks { + output.unsplit(chunk); + } + + Ok(output.freeze()) + } +} + +struct PipelineSourceBindCtx<'a> { + batch_inputs: &'a [Vector], +} + +impl BindContext for PipelineSourceBindCtx<'_> { + fn pipelined_input(&self, _child_idx: usize) -> VectorId { + vortex_panic!("PipelineSource cannot bind pipelined inputs"); + } + + fn batch_input(&self, child_idx: usize) -> Vector { + self.batch_inputs[child_idx].clone() + } +} diff --git a/vortex-array/src/pipeline/types.rs b/vortex-array/src/pipeline/types.rs deleted file mode 100644 index aaf6c9c9a44..00000000000 --- a/vortex-array/src/pipeline/types.rs +++ /dev/null @@ -1,97 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::fmt::{Debug, Display, Formatter}; - -use vortex_dtype::half::f16; -use vortex_dtype::{DType, NativePType, PType}; -use vortex_error::vortex_panic; -use vortex_vector::binaryview::BinaryView; - -/// Defines the "vector type", a physical type describing the data that's held in the vector. -/// -/// See the specific vector view types like primitive views for more details. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum VType { - Bool, - Primitive(PType), - Binary, -} - -impl Display for VType { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - VType::Bool => write!(f, "bool"), - VType::Primitive(ptype) => write!(f, "{}", ptype), - VType::Binary => write!(f, "binary"), - } - } -} - -impl VType { - pub fn of() -> Self { - T::vtype() - } - - pub fn byte_width(&self) -> usize { - match self { - VType::Bool => 1, - VType::Primitive(ptype) => ptype.byte_width(), - VType::Binary => size_of::(), - } - } -} - -/// A trait to identify canonical vector types. -pub trait Element: 'static + Copy + Debug + Send { - fn vtype() -> VType; -} - -/// NOTE: for now, we have chosen to store boolean values as byte-sized booleans instead -/// of packed into a bit mask, this is typically more efficient for SIMD compute operations. -/// For masks, we still use bit-packed booleans. -impl Element for bool { - fn vtype() -> VType { - VType::Bool - } -} - -macro_rules! canonical_ptype { - ($T:ty) => { - impl Element for $T { - fn vtype() -> VType { - VType::Primitive(<$T as NativePType>::PTYPE) - } - } - }; -} - -canonical_ptype!(u8); -canonical_ptype!(u16); -canonical_ptype!(u32); -canonical_ptype!(u64); -canonical_ptype!(i8); -canonical_ptype!(i16); -canonical_ptype!(i32); -canonical_ptype!(i64); -canonical_ptype!(f16); -canonical_ptype!(f32); -canonical_ptype!(f64); - -impl Element for BinaryView { - fn vtype() -> VType { - VType::Binary - } -} - -impl From<&DType> for VType { - fn from(value: &DType) -> Self { - match value { - DType::Bool(_) => VType::Bool, - DType::Primitive(ptype, _) => VType::Primitive(*ptype), - DType::Utf8(_) => VType::Binary, - DType::Binary(_) => VType::Binary, - _ => vortex_panic!("Unsupported dtype for VType: {}", value), - } - } -} diff --git a/vortex-array/src/pipeline/vec.rs b/vortex-array/src/pipeline/vec.rs deleted file mode 100644 index 16e137a4772..00000000000 --- a/vortex-array/src/pipeline/vec.rs +++ /dev/null @@ -1,161 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Vectors contain owned fixed-size canonical arrays of elements. -//! - -// TODO(ngates): Currently, the data in a vector is Arc'd. We should consider whether we want the -// performance hit for as_mut(), or whether we want zero-copy cloning. Not clear that we ever -// need the clone behavior. - -use std::cell::{Ref, RefMut}; -use std::fmt::Debug; -use std::ops::{Deref, DerefMut}; - -use vortex_buffer::{Alignment, ByteBuffer, ByteBufferMut}; - -use crate::pipeline::N; -use crate::pipeline::bits::BitVector; -use crate::pipeline::types::{Element, VType}; -use crate::pipeline::view::{View, ViewMut}; - -/// A vector contains fixed-size owned data in canonical form. -#[derive(Debug)] -pub struct Vector { - /// The physical type of the vector, which defines how the elements are stored. - vtype: VType, - /// The allocated elements buffer. - /// Alignment is at least the size of the element type. - /// The capacity of the elements buffer is N * `size_of::()` where T is the element type. - elements: ByteBufferMut, - /// The validity mask for the vector, indicating which elements in the buffer are valid. - validity: BitVector, - // The position of the selected values in the vector. - selection: Selection, - - /// Additional buffers of data used by the vector, such as string data. - // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST? - #[allow(dead_code)] - data: Vec, -} - -impl Vector { - pub fn new() -> Self { - Self::new_with_vtype(T::vtype()) - } - - pub fn new_with_vtype(vtype: VType) -> Self { - let mut elements = ByteBufferMut::with_capacity_aligned( - vtype.byte_width() * N, - Alignment::new(vtype.byte_width()), - ); - unsafe { elements.set_len(vtype.byte_width() * N) }; - - Self { - vtype, - elements, - validity: BitVector::full().clone(), - selection: Selection::Prefix, - data: vec![], - } - } - - pub fn set_selection(&mut self, selection: Selection) { - self.selection = selection; - } - - pub fn as_mut_array(&mut self) -> &mut [T; N] { - assert_eq!(self.vtype, T::vtype()); - unsafe { &mut *(self.elements.as_mut_ptr().cast::().cast::<[T; N]>()) } - } - - pub fn as_view_mut(&mut self) -> ViewMut<'_> { - ViewMut { - vtype: self.vtype, - elements: self.elements.as_mut_ptr().cast(), - validity: Some(self.validity.as_view_mut()), - data: vec![], - selection: self.selection, - _marker: Default::default(), - } - } - - pub fn as_view(&self) -> View<'_> { - View { - vtype: self.vtype, - elements: self.elements.as_ptr().cast(), - validity: Some(self.validity.as_view()), - selection: self.selection, - data: vec![], - _marker: Default::default(), - } - } -} - -/// A [`VectorRef`] provides a small wrapper to allow accessing a [`View`] with the same lifetime -/// as the borrowed vector, rather than the lifetime of the [`Ref`]. -pub struct VectorRef<'a> { - // Use to ensure that view and borrow have the same lifetime. - #[allow(dead_code)] - borrow: Ref<'a, Vector>, - view: View<'a>, -} - -impl<'a> VectorRef<'a> { - pub fn new(borrow: Ref<'a, Vector>) -> Self { - let view = borrow.as_view(); - // SAFETY: we continue to hold onto the [`Ref`], so it is safe to erase the lifetime. - let view = unsafe { std::mem::transmute::, View<'a>>(view) }; - Self { borrow, view } - } - - pub fn as_view(&self) -> &View<'a> { - &self.view - } -} - -impl<'a> Deref for VectorRef<'a> { - type Target = View<'a>; - - fn deref(&self) -> &Self::Target { - &self.view - } -} - -/// A [`VectorRefMut`] provides a small wrapper to allow accessing a [`ViewMut`] with the same -/// lifetime as the borrowed vector, rather than the lifetime of the [`RefMut`]. -pub struct VectorRefMut<'a> { - // Use to ensure that view and borrow have the same lifetime. - #[allow(dead_code)] - borrow: RefMut<'a, Vector>, - view: ViewMut<'a>, -} - -impl<'a> VectorRefMut<'a> { - pub fn new(mut borrow: RefMut<'a, Vector>) -> Self { - let view = borrow.as_view_mut(); - // SAFETY: we continue to hold onto the [`Ref`], so it is safe to erase the lifetime. - let view = unsafe { std::mem::transmute::, ViewMut<'a>>(view) }; - Self { borrow, view } - } -} - -impl<'a> Deref for VectorRefMut<'a> { - type Target = ViewMut<'a>; - - fn deref(&self) -> &Self::Target { - &self.view - } -} - -impl<'a> DerefMut for VectorRefMut<'a> { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.view - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Selection { - Prefix, - Mask, -} diff --git a/vortex-array/src/pipeline/view.rs b/vortex-array/src/pipeline/view.rs deleted file mode 100644 index 3aa6c674b31..00000000000 --- a/vortex-array/src/pipeline/view.rs +++ /dev/null @@ -1,225 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_buffer::ByteBuffer; -use vortex_error::VortexExpect; - -use crate::pipeline::N; -use crate::pipeline::bits::{BitView, BitViewMut}; -use crate::pipeline::types::{Element, VType}; -use crate::pipeline::vec::Selection; - -pub struct View<'a> { - /// The physical type of the vector, which defines how the elements are stored. - pub(super) vtype: VType, - /// A pointer to the allocated elements buffer. - /// Alignment is at least the size of the element type. - /// The capacity of the elements buffer is N * `size_of::()` where T is the element type. - pub(super) elements: *const u8, - /// The validity mask for the vector, indicating which elements in the buffer are valid. - /// This value can be `None` if the expected DType is `NonNullable`. - // TODO: support validity - #[allow(dead_code)] - pub(super) validity: Option>, - - // Indicates where the selected elements are positioned within the vector. - pub(super) selection: Selection, - - /// Additional buffers of data used by the vector, such as string data. - #[allow(dead_code)] - pub(super) data: Vec, - - /// Marker defining the lifetime of the contents of the vector. - pub(super) _marker: std::marker::PhantomData<&'a ()>, -} - -impl<'a> View<'a> { - #[inline(always)] - pub fn selection(&self) -> Selection { - self.selection - } - - pub fn as_array(&self) -> &'a [T; N] - where - T: Element, - { - debug_assert_eq!(self.vtype, T::vtype(), "Invalid type for canonical view"); - // SAFETY: We assume that the elements are of type T and that the view is valid. - unsafe { &*(self.elements.cast::() as *const [T; N]) } - } - - /// Re-interpret cast the vector into a new type where the element has the same width. - #[inline(always)] - pub fn reinterpret_as(&mut self) { - assert_eq!( - self.vtype.byte_width(), - size_of::(), - "Cannot reinterpret {} as {}", - self.vtype, - E::vtype() - ); - self.vtype = E::vtype(); - } -} - -pub struct ViewMut<'a> { - /// The physical type of the vector, which defines how the elements are stored. - pub(super) vtype: VType, - /// A pointer to the allocated elements buffer. - /// Alignment is at least the size of the element type. - /// The capacity of the elements buffer is N * `size_of::()` where T is the element type. - // TODO(ngates): it would be nice to guarantee _wider_ alignment, ideally 128 bytes, so that - // we can use aligned load/store instructions for wide SIMD lanes. - pub(super) elements: *mut u8, - /// The validity mask for the vector, indicating which elements in the buffer are valid. - /// This value can be `None` if the expected DType is `NonNullable`. - pub(super) validity: Option>, - - /// Additional buffers of data used by the vector, such as string data. - // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST? - #[allow(dead_code)] - pub(super) data: Vec, - - /// The position of the selected values of this buffer. - /// One of: - /// * All - all N values are selected. - /// * Prefix - the first n values are selected where i is the true count of the kernel mask. - /// * Mask - the values are in the positions indicated by the kernel mask. - pub(super) selection: Selection, - - /// Marker defining the lifetime of the contents of the vector. - pub(super) _marker: std::marker::PhantomData<&'a mut ()>, -} - -impl<'a> ViewMut<'a> { - pub fn new(elements: &'a mut [E], validity: Option>) -> Self { - assert_eq!(elements.len(), N); - Self { - vtype: E::vtype(), - elements: elements.as_mut_ptr().cast(), - validity, - data: vec![], - selection: Selection::Prefix, - _marker: Default::default(), - } - } - - /// Re-interpret cast the vector into a new type where the element has the same width. - #[inline(always)] - pub fn reinterpret_as(&mut self) { - assert_eq!( - self.vtype.byte_width(), - size_of::(), - "Cannot reinterpret {} as {}", - self.vtype, - E::vtype() - ); - self.vtype = E::vtype(); - } - - /// Returns an immutable array of the elements in the vector. - #[inline(always)] - pub fn as_array(&self) -> &'a [E; N] { - debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view"); - unsafe { &*(self.elements.cast::() as *const [E; N]) } - } - - /// Returns a mutable array of the elements in the vector, allowing for modification. - #[inline(always)] - pub fn as_array_mut(&mut self) -> &'a mut [E; N] { - debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view"); - unsafe { &mut *(self.elements.cast::() as *mut [E; N]) } - } - - /// Access the validity mask of the vector. - /// - /// ## Panics - /// - /// Panics if the vector does not support validity, i.e. if the DType was non-nullable when - /// it was created. - pub fn validity(&mut self) -> &mut BitViewMut<'a> { - self.validity - .as_mut() - .vortex_expect("Vector does not support validity") - } - - pub fn add_buffer(&mut self, buffer: ByteBuffer) { - self.data.push(buffer); - } - - #[inline(always)] - pub fn selection(&self) -> Selection { - self.selection - } - - pub fn set_selection(&mut self, selection: Selection) { - self.selection = selection; - } - - /// Flatten the view by bringing the selected elements of the mask to the beginning of - pub fn flatten(&mut self, selection: &BitView<'_>) { - assert_eq!( - self.vtype, - E::vtype(), - "ViewMut::flatten_mask: type mismatch" - ); - - if matches!(self.selection, Selection::Prefix) { - // Nothing to do, all elements are already selected. - return; - } - - match selection.true_count() { - 0 | N => { - // If the mask has no true bits or all true bits, we are already flattened. - } - n if n > 3 * N / 4 => { - // High density: use iter_zeros to compact by removing gaps - let slice = self.as_array_mut::(); - let mut write_idx = 0; - let mut read_idx = 0; - - selection.iter_zeros(|zero_idx| { - // Copy elements from read_idx to zero_idx (exclusive) to write_idx - let count = zero_idx - read_idx; - unsafe { - // SAFETY: We assume that the elements are of type E and that the view is valid. - // Using memmove for potentially overlapping regions - std::ptr::copy( - slice.as_ptr().add(read_idx), - slice.as_mut_ptr().add(write_idx), - count, - ); - write_idx += count; - } - read_idx = zero_idx + 1; - }); - - // Copy any remaining elements after the last zero - unsafe { - std::ptr::copy( - slice.as_ptr().add(read_idx), - slice.as_mut_ptr().add(write_idx), - N - read_idx, - ); - } - } - _ => { - let mut offset = 0; - let slice = self.as_array_mut::(); - selection.iter_ones(|idx| { - unsafe { - // SAFETY: We assume that the elements are of type E and that the view is valid. - let value = *slice.get_unchecked(idx); - // TODO(joe): use ptr increment (not offset). - *slice.get_unchecked_mut(offset) = value; - - offset += 1; - } - }); - } - } - - self.selection = Selection::Prefix - } -} diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs index 857436bfc34..fa731946dbe 100644 --- a/vortex-array/src/vtable/operator.rs +++ b/vortex-array/src/vtable/operator.rs @@ -1,15 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::{VortexResult, vortex_bail}; +use vortex_error::{vortex_bail, VortexResult}; use vortex_mask::Mask; use vortex_vector::Vector; -use crate::ArrayRef; use crate::array::IntoArray; use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx}; -use crate::pipeline::Pipelined; use crate::vtable::{NotSupported, VTable}; +use crate::ArrayRef; /// A vtable for the new operator-based array functionality. Eventually this vtable will be /// merged into the main `VTable`, but for now it is kept separate to allow for incremental @@ -40,12 +39,6 @@ pub trait OperatorVTable { Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute() } - /// Returns an implementation of the [`Pipelined`] trait for this array, if pipelined execution - /// is supported. - fn execute_pipelined(_array: &V::Array) -> Option<&dyn Pipelined> { - None - } - /// Bind the array for execution in batch mode. /// /// This function should return a [`BatchKernelRef`] that can be used to execute the array in From 332e3fa5ae8862bc5f005b93b78bf8779950ef10 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Fri, 7 Nov 2025 15:20:54 -0500 Subject: [PATCH 04/10] pipelined execution Signed-off-by: Nicholas Gates --- vortex-array/src/array/operator.rs | 10 --- .../src/arrays/primitive/vtable/operator.rs | 72 ++++++++++++++-- vortex-array/src/pipeline/bits.rs | 28 +----- vortex-array/src/pipeline/mod.rs | 7 +- vortex-array/src/pipeline/source_driver.rs | 27 ++++++ vortex-array/src/vtable/operator.rs | 6 ++ vortex-buffer/src/buffer_mut.rs | 14 +-- vortex-mask/src/mask_mut.rs | 44 ++++++++-- vortex-vector/src/primitive/generic_mut.rs | 34 +++++++- vortex-vector/src/primitive/vector_mut.rs | 85 ++++++++++++++++++- vortex-vector/src/vector_mut.rs | 20 ++--- 11 files changed, 278 insertions(+), 69 deletions(-) diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs index 81194065d6d..340997b0529 100644 --- a/vortex-array/src/array/operator.rs +++ b/vortex-array/src/array/operator.rs @@ -8,7 +8,6 @@ use vortex_mask::Mask; use vortex_vector::{vector_matches_dtype, Vector, VectorOps}; use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx}; -use crate::pipeline::source_driver::PipelineDriver; use crate::vtable::{OperatorVTable, VTable}; use crate::{Array, ArrayAdapter, ArrayRef}; @@ -63,15 +62,6 @@ impl ArrayOperator for Arc { impl ArrayOperator for ArrayAdapter { fn execute_batch(&self, selection: &Mask, ctx: &mut dyn ExecutionCtx) -> VortexResult { - // Check to see if we should execute the array in a pipelined fashion. This is a - // short-circuit for now until we have a full pipeline executor, but it allows each arrow - // to only implement the pipeline API. - if let Some(pipelined) = - >::execute_pipelined(&self.0) - { - PipelineDriver::new(pipelined).execute - } - let vector = >::execute_batch(&self.0, selection, ctx)?; diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs index fa18e516cec..4653d3ef5b8 100644 --- a/vortex-array/src/arrays/primitive/vtable/operator.rs +++ b/vortex-array/src/arrays/primitive/vtable/operator.rs @@ -1,18 +1,25 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable}; +use crate::execution::{kernel, BatchKernelRef, BindCtx}; +use crate::pipeline::bits::BitView; +use crate::pipeline::{BindContext, KernelContext, PipelinedSource, SourceKernel, N}; +use crate::vtable::{OperatorVTable, ValidityHelper}; +use crate::{ArrayRef, IntoArray}; use vortex_buffer::Buffer; use vortex_compute::filter::Filter; -use vortex_dtype::match_each_native_ptype; +use vortex_dtype::{match_each_native_ptype, NativePType, PTypeDowncastExt}; use vortex_error::VortexResult; +use vortex_mask::Mask; use vortex_vector::primitive::PVector; - -use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable}; -use crate::execution::{BatchKernelRef, BindCtx, kernel}; -use crate::vtable::{OperatorVTable, ValidityHelper}; -use crate::{ArrayRef, IntoArray}; +use vortex_vector::VectorMut; impl OperatorVTable for PrimitiveVTable { + fn as_pipelined_source(array: &PrimitiveArray) -> Option<&dyn PipelinedSource> { + Some(array) + } + fn bind( array: &PrimitiveArray, selection: Option<&ArrayRef>, @@ -61,3 +68,56 @@ impl OperatorVTable for PrimitiveVTable { Ok(None) } } + +impl PipelinedSource for PrimitiveArray { + fn bind_source(&self, _ctx: &mut dyn BindContext) -> VortexResult> { + match_each_native_ptype!(self.ptype(), |T| { + let primitive_kernel = PrimitiveKernel { + buffer: self.buffer::().clone(), + validity: self.validity_mask(), + offset: 0, + }; + Ok(Box::new(primitive_kernel)) + }) + } +} + +struct PrimitiveKernel { + buffer: Buffer, + validity: Mask, + offset: usize, +} + +impl SourceKernel for PrimitiveKernel { + fn skip(&mut self, n: usize) { + self.offset += n * N; + } + + fn step( + &mut self, + _ctx: &KernelContext, + selection: &BitView, + out: &mut VectorMut, + ) -> VortexResult<()> { + let out = out.as_primitive_mut().downcast::(); + + // SAFETY: we know the output has sufficient capacity. We just have to append nulls + // separately from copying over the elements. + unsafe { + out.validity_mut().append_n(true, selection.true_count()); + out.elements_mut().set_len(selection.true_count()); + } + + let source = &self.buffer.as_slice()[self.offset..]; + + let mut out_pos = 0; + selection.iter_slices(|(start, end)| { + print!("Slicing {} to {}\n", start, end); + let len = end - start; + out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..end]); + out_pos += len; + }); + + Ok(()) + } +} diff --git a/vortex-array/src/pipeline/bits.rs b/vortex-array/src/pipeline/bits.rs index c638806d92d..5d478952670 100644 --- a/vortex-array/src/pipeline/bits.rs +++ b/vortex-array/src/pipeline/bits.rs @@ -4,7 +4,7 @@ use std::fmt::{Debug, Formatter}; use bitvec::prelude::*; -use vortex_error::{VortexError, VortexResult, vortex_err}; +use vortex_error::{vortex_err, VortexError, VortexResult}; use crate::pipeline::{N, N_WORDS}; @@ -155,6 +155,8 @@ impl<'a> BitView<'a> { /// /// The function `f` receives a tuple `(start, len)` where `start` is the index of the first /// `true` bit and `len` is the number of consecutive `true` bits. + /// + /// FIXME(ngates): this code is broken. pub fn iter_slices(&self, mut f: F) where F: FnMut((usize, usize)), @@ -229,7 +231,6 @@ mod tests { use vortex_mask::Mask; use super::*; - use crate::pipeline::bits::BitVector; #[test] fn test_iter_ones_empty() { @@ -521,29 +522,6 @@ mod tests { }); } - #[test] - fn test_mask_and_bitview_all_true() { - let mask = Mask::AllTrue(5); - - let vector = BitVector::true_until(5); - - let view = vector.as_view(); - - // Collect indices from BitView - let mut bitview_ones = Vec::new(); - view.iter_ones(|idx| bitview_ones.push(idx)); - - // Collect indices from BitView - let mask_ones = mask.iter_bools(|iter| { - iter.enumerate() - .filter(|(_, b)| *b) - .map(|(i, _)| i) - .collect::>() - }); - - assert_eq!(bitview_ones, mask_ones); - } - #[test] fn test_bitview_zeros_complement_mask() { // Create a pattern diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs index d2da1ff8616..2c412c1d7c2 100644 --- a/vortex-array/src/pipeline/mod.rs +++ b/vortex-array/src/pipeline/mod.rs @@ -23,6 +23,7 @@ pub mod source_driver; use crate::Array; use bits::BitView; +use std::ops::Deref; use vortex_error::VortexResult; use vortex_vector::{Vector, VectorMut}; @@ -60,7 +61,7 @@ pub trait PipelinedOperator: Array { fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult>; } -pub trait PipelinedSource: Array { +pub trait PipelinedSource: Deref { /// Bind the operator into a [`Kernel`] for pipelined execution. /// /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and @@ -143,7 +144,7 @@ impl KernelContext { } /// Get a vector by its ID. - pub fn vector(&self, _vector_id: VectorId) -> &Vector { - todo!() + pub fn vector(&self, vector_id: VectorId) -> &Vector { + &self.vectors[vector_id] } } diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs index ff74ed5cf55..74ba0a3aa59 100644 --- a/vortex-array/src/pipeline/source_driver.rs +++ b/vortex-array/src/pipeline/source_driver.rs @@ -103,3 +103,30 @@ impl BindContext for PipelineSourceBindCtx<'_> { self.batch_inputs[child_idx].clone() } } + +#[cfg(test)] +mod test { + use crate::arrays::PrimitiveArray; + use crate::pipeline::source_driver::PipelineSourceDriver; + use crate::validity::Validity; + use vortex_buffer::buffer; + use vortex_dtype::PTypeDowncastExt; + use vortex_mask::Mask; + use vortex_vector::VectorOps; + + #[test] + fn test_primitive() { + let array = PrimitiveArray::new::(buffer![0..100000u32], Validity::AllValid); + + // Create a selection mask with some ranges. + let mask = Mask::from_iter((0..100000).map(|i| i % 30 < 20)); + + let out = PipelineSourceDriver::new(&array) + .execute(&mask) + .unwrap() + .into_primitive() + .downcast::(); + + assert_eq!(out.len(), mask.true_count()); + } +} diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs index fa731946dbe..2dbf34a9002 100644 --- a/vortex-array/src/vtable/operator.rs +++ b/vortex-array/src/vtable/operator.rs @@ -7,6 +7,7 @@ use vortex_vector::Vector; use crate::array::IntoArray; use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx}; +use crate::pipeline::PipelinedSource; use crate::vtable::{NotSupported, VTable}; use crate::ArrayRef; @@ -39,6 +40,11 @@ pub trait OperatorVTable { Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute() } + /// Downcast this array into a [`PipelinedSource`] if it supports pipelined execution. + fn as_pipelined_source(_array: &V::Array) -> Option<&dyn PipelinedSource> { + None + } + /// Bind the array for execution in batch mode. /// /// This function should return a [`BatchKernelRef`] that can be used to execute the array in diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs index f0ea523b5d4..436006f7d2f 100644 --- a/vortex-buffer/src/buffer_mut.rs +++ b/vortex-buffer/src/buffer_mut.rs @@ -9,7 +9,7 @@ use std::ops::{Deref, DerefMut}; use bytes::buf::UninitSlice; use bytes::{Buf, BufMut, BytesMut}; -use vortex_error::{VortexExpect, vortex_panic}; +use vortex_error::{vortex_panic, VortexExpect}; use crate::debug::TruncatedDebug; use crate::trusted_len::TrustedLen; @@ -338,8 +338,8 @@ impl BufferMut { /// /// Panics if either half would have a length that is not a multiple of the alignment. pub fn split_off(&mut self, at: usize) -> Self { - if at > self.len() { - vortex_panic!("Cannot split buffer of length {} at {}", self.len(), at); + if at > self.capacity() { + vortex_panic!("Cannot split buffer of capacity {} at {}", self.len(), at); } let bytes_at = at * size_of::(); @@ -352,8 +352,10 @@ impl BufferMut { } let new_bytes = self.bytes.split_off(bytes_at); - let new_length = self.length - at; - self.length = at; + + // Adjust the lengths, given that length may be < at + let new_length = self.length.saturating_sub(at); + self.length = self.length.min(at); BufferMut { bytes: new_bytes, @@ -724,7 +726,7 @@ impl Write for ByteBufferMut { mod test { use bytes::{Buf, BufMut}; - use crate::{Alignment, BufferMut, ByteBufferMut, buffer_mut}; + use crate::{buffer_mut, Alignment, BufferMut, ByteBufferMut}; #[test] fn capacity() { diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs index 5a85850213b..e7e5c736ed6 100644 --- a/vortex-mask/src/mask_mut.rs +++ b/vortex-mask/src/mask_mut.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Sub; use std::sync::Arc; use vortex_buffer::BitBufferMut; @@ -95,6 +94,35 @@ impl MaskMut { } } + /// Set the length of the mask. + pub unsafe fn set_len(&mut self, new_len: usize) { + debug_assert!(new_len < self.capacity()); + match &mut self.0 { + Inner::Empty { capacity, .. } => { + self.0 = Inner::Constant { + value: false, // Pick any value + len: new_len, + capacity: *capacity, + } + } + Inner::Constant { len, .. } => { + *len = new_len; + } + Inner::Builder(bits) => { + unsafe { bits.set_len(new_len) }; + } + } + } + + /// Returns the capacity of the mask. + pub fn capacity(&self) -> usize { + match &self.0 { + Inner::Empty { capacity } => *capacity, + Inner::Constant { capacity, .. } => *capacity, + Inner::Builder(bits) => bits.capacity(), + } + } + /// Clears the mask. /// /// Note that this method has no effect on the allocated capacity of the mask. @@ -207,10 +235,11 @@ impl MaskMut { /// values from `at` to the end, and leaving `self` with the values from /// the start to `at`. pub fn split_off(&mut self, at: usize) -> Self { - assert!(at <= self.len(), "split_off index out of bounds"); + assert!(at <= self.capacity(), "split_off index out of bounds"); match &mut self.0 { Inner::Empty { capacity } => { - let new_capacity = (*capacity).saturating_sub(at); + let new_capacity = *capacity - at; + *capacity = at; Self(Inner::Empty { capacity: new_capacity, }) @@ -220,9 +249,12 @@ impl MaskMut { len, capacity, } => { - let new_len = len.sub(at); - *len = at; - let new_capacity = (*capacity).saturating_sub(at); + // Adjust the lengths, given that length may be < at + let new_len = len.saturating_sub(at); + let new_capacity = *capacity - at; + *len = (*len).min(at); + *capacity = at; + Self(Inner::Constant { value: *value, len: new_len, diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index 5dbf64ae951..2525592e1ae 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -5,7 +5,7 @@ use vortex_buffer::BufferMut; use vortex_dtype::NativePType; -use vortex_error::{VortexExpect, VortexResult, vortex_ensure}; +use vortex_error::{vortex_ensure, VortexExpect, VortexResult}; use vortex_mask::MaskMut; use crate::primitive::PVector; @@ -73,6 +73,38 @@ impl PVectorMut { } } + /// Set the length of the vector. + /// + /// # Safety + /// + /// The caller must ensure that the new length does not exceed the capacity of the vector. + pub unsafe fn set_len(&mut self, new_len: usize) { + debug_assert!(new_len < self.elements.capacity()); + debug_assert!(new_len < self.validity.capacity()); + unsafe { self.elements.set_len(new_len) }; + unsafe { self.validity.set_len(new_len) }; + } + + /// Returns a mutable reference to the elements buffer. + /// + /// # Safety + /// + /// The caller must ensure that any mutations to the elements do not violate the + /// invariants of the vector (e.g., the length must remain consistent with the elements buffer). + pub unsafe fn elements_mut(&mut self) -> &mut BufferMut { + &mut self.elements + } + + /// Returns a mutable reference to the validity mask. + /// + /// # Safety + /// + /// The caller must ensure that any mutations to the validity mask do not violate the + /// invariants of the vector (e.g., the length must remain consistent with the elements buffer). + pub unsafe fn validity_mut(&mut self) -> &mut MaskMut { + &mut self.validity + } + /// Decomposes the primitive vector into its constituent parts (buffer and validity). pub fn into_parts(self) -> (BufferMut, MaskMut) { (self.elements, self.validity) diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs index 1d3a9812eeb..eef24f04707 100644 --- a/vortex-vector/src/primitive/vector_mut.rs +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -9,7 +9,7 @@ use vortex_error::vortex_panic; use vortex_mask::MaskMut; use crate::primitive::{PVectorMut, PrimitiveVector}; -use crate::{VectorMutOps, match_each_pvector_mut}; +use crate::{match_each_pvector_mut, VectorMutOps}; /// A mutable vector of primitive values. /// @@ -142,7 +142,7 @@ impl VectorMutOps for PrimitiveVectorMut { (Self::F16(a), Self::F16(b)) => a.unsplit(b), (Self::F32(a), Self::F32(b)) => a.unsplit(b), (Self::F64(a), Self::F64(b)) => a.unsplit(b), - _ => ::vortex_error::vortex_panic!("Mismatched primitive vector types"), + _ => vortex_panic!("Mismatched primitive vector types"), } } } @@ -276,6 +276,87 @@ impl PTypeDowncast for PrimitiveVectorMut { } } +impl<'a> PTypeDowncast for &'a mut PrimitiveVectorMut { + type Output = &'a mut PVectorMut; + + fn into_u8(self) -> Self::Output { + match self { + PrimitiveVectorMut::U8(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::U8, got {self:?}"), + } + } + + fn into_u16(self) -> Self::Output { + match self { + PrimitiveVectorMut::U16(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::U16, got {self:?}"), + } + } + + fn into_u32(self) -> Self::Output { + match self { + PrimitiveVectorMut::U32(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::U32, got {self:?}"), + } + } + + fn into_u64(self) -> Self::Output { + match self { + PrimitiveVectorMut::U64(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::U64, got {self:?}"), + } + } + + fn into_i8(self) -> Self::Output { + match self { + PrimitiveVectorMut::I8(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::I8, got {self:?}"), + } + } + + fn into_i16(self) -> Self::Output { + match self { + PrimitiveVectorMut::I16(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::I16, got {self:?}"), + } + } + + fn into_i32(self) -> Self::Output { + match self { + PrimitiveVectorMut::I32(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::I32, got {self:?}"), + } + } + + fn into_i64(self) -> Self::Output { + match self { + PrimitiveVectorMut::I64(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::I64, got {self:?}"), + } + } + + fn into_f16(self) -> Self::Output { + match self { + PrimitiveVectorMut::F16(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::F16, got {self:?}"), + } + } + + fn into_f32(self) -> Self::Output { + match self { + PrimitiveVectorMut::F32(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::F32, got {self:?}"), + } + } + + fn into_f64(self) -> Self::Output { + match self { + PrimitiveVectorMut::F64(v) => v, + _ => vortex_panic!("Expected PrimitiveVectorMut::F64, got {self:?}"), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index b45a0868f63..9be1f3fdfaa 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -18,7 +18,7 @@ use crate::listview::ListViewVectorMut; use crate::null::NullVectorMut; use crate::primitive::PrimitiveVectorMut; use crate::struct_::StructVectorMut; -use crate::{Vector, VectorMutOps, match_each_vector_mut, match_vector_pair}; +use crate::{match_each_vector_mut, match_vector_pair, Vector, VectorMutOps}; /// An enum over all kinds of mutable vectors, which represent fully decompressed (canonical) array /// data. @@ -131,7 +131,7 @@ impl VectorMutOps for VectorMut { impl VectorMut { /// Returns a reference to the inner [`NullVectorMut`] if `self` is of that variant. - pub fn as_null(&self) -> &NullVectorMut { + pub fn as_null_mut(&mut self) -> &mut NullVectorMut { if let VectorMut::Null(v) = self { return v; } @@ -139,7 +139,7 @@ impl VectorMut { } /// Returns a reference to the inner [`BoolVectorMut`] if `self` is of that variant. - pub fn as_bool(&self) -> &BoolVectorMut { + pub fn as_bool_mut(&mut self) -> &mut BoolVectorMut { if let VectorMut::Bool(v) = self { return v; } @@ -147,7 +147,7 @@ impl VectorMut { } /// Returns a reference to the inner [`PrimitiveVectorMut`] if `self` is of that variant. - pub fn as_primitive(&self) -> &PrimitiveVectorMut { + pub fn as_primitive_mut(&mut self) -> &mut PrimitiveVectorMut { if let VectorMut::Primitive(v) = self { return v; } @@ -155,7 +155,7 @@ impl VectorMut { } /// Returns a reference to the inner [`StringVectorMut`] if `self` is of that variant. - pub fn as_string(&self) -> &StringVectorMut { + pub fn as_string_mut(&mut self) -> &mut StringVectorMut { if let VectorMut::String(v) = self { return v; } @@ -163,7 +163,7 @@ impl VectorMut { } /// Returns a reference to the inner [`BinaryVectorMut`] if `self` is of that variant. - pub fn as_binary(&self) -> &BinaryVectorMut { + pub fn as_binary_mut(&mut self) -> &mut BinaryVectorMut { if let VectorMut::Binary(v) = self { return v; } @@ -171,7 +171,7 @@ impl VectorMut { } /// Returns a reference to the inner [`ListViewVectorMut`] if `self` is of that variant. - pub fn as_list(&self) -> &ListViewVectorMut { + pub fn as_list_mut(&mut self) -> &mut ListViewVectorMut { if let VectorMut::List(v) = self { return v; } @@ -179,7 +179,7 @@ impl VectorMut { } /// Returns a reference to the inner [`FixedSizeListVectorMut`] if `self` is of that variant. - pub fn as_fixed_size_list(&self) -> &FixedSizeListVectorMut { + pub fn as_fixed_size_list_mut(&mut self) -> &mut FixedSizeListVectorMut { if let VectorMut::FixedSizeList(v) = self { return v; } @@ -187,7 +187,7 @@ impl VectorMut { } /// Returns a reference to the inner [`StructVectorMut`] if `self` is of that variant. - pub fn as_struct(&self) -> &StructVectorMut { + pub fn as_struct_mut(&mut self) -> &mut StructVectorMut { if let VectorMut::Struct(v) = self { return v; } @@ -267,9 +267,9 @@ mod tests { use vortex_dtype::{DecimalDType, Nullability, PType}; use super::*; - use crate::VectorOps; use crate::decimal::DecimalVectorMut; use crate::primitive::PVectorMut; + use crate::VectorOps; #[test] fn test_with_capacity() { From d53c68b34811243d19da2116b60d09b679bac23f Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Fri, 7 Nov 2025 17:45:51 -0500 Subject: [PATCH 05/10] pipelined execution Signed-off-by: Nicholas Gates --- .../src/arrays/primitive/vtable/operator.rs | 2 +- .../src/pipeline/{bits.rs => bit_view.rs} | 256 +++++++----------- vortex-array/src/pipeline/mod.rs | 16 +- vortex-array/src/pipeline/source_driver.rs | 2 +- 4 files changed, 112 insertions(+), 164 deletions(-) rename vortex-array/src/pipeline/{bits.rs => bit_view.rs} (66%) diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs index 4653d3ef5b8..cf3b6b5338f 100644 --- a/vortex-array/src/arrays/primitive/vtable/operator.rs +++ b/vortex-array/src/arrays/primitive/vtable/operator.rs @@ -3,7 +3,7 @@ use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable}; use crate::execution::{kernel, BatchKernelRef, BindCtx}; -use crate::pipeline::bits::BitView; +use crate::pipeline::bit_view::BitView; use crate::pipeline::{BindContext, KernelContext, PipelinedSource, SourceKernel, N}; use crate::vtable::{OperatorVTable, ValidityHelper}; use crate::{ArrayRef, IntoArray}; diff --git a/vortex-array/src/pipeline/bits.rs b/vortex-array/src/pipeline/bit_view.rs similarity index 66% rename from vortex-array/src/pipeline/bits.rs rename to vortex-array/src/pipeline/bit_view.rs index 5d478952670..b23e3c8108d 100644 --- a/vortex-array/src/pipeline/bits.rs +++ b/vortex-array/src/pipeline/bit_view.rs @@ -3,20 +3,20 @@ use std::fmt::{Debug, Formatter}; -use bitvec::prelude::*; -use vortex_error::{vortex_err, VortexError, VortexResult}; +use vortex_error::VortexResult; -use crate::pipeline::{N, N_WORDS}; +use crate::pipeline::{N, N_BYTES, N_WORDS}; /// A borrowed fixed-size bit vector of length `N` bits, represented as an array of usize words. /// -/// Internally, it uses a [`BitArray`] to store the bits, but this crate has some -/// performance foot-guns in cases where we can lean on better assumptions, and therefore we wrap -/// it up for use within Vortex. -/// Read-only view into a bit array for selection masking in operator operations. +/// This struct is designed to provide a view over a Vortex [`vortex_buffer::BitBuffer`], therefore +/// the bit-ordering is LSB0 (least-significant-bit first). +/// +/// Note that [`BitView`] does not support an offset. Therefore, bits are assumed to start at +/// index and end at index `N - 1`. #[derive(Clone, Copy)] pub struct BitView<'a> { - bits: &'a BitArray<[usize; N_WORDS], Lsb0>, + bits: &'a [u8; N_BYTES], // TODO(ngates): we may want to expose this for optimizations. // If set to Selection::Prefix, then all true bits are at the start of the array. // selection: Selection, @@ -34,43 +34,26 @@ impl Debug for BitView<'_> { impl BitView<'static> { pub fn all_true() -> Self { - static ALL_TRUE: [usize; N_WORDS] = [usize::MAX; N_WORDS]; - unsafe { - BitView::new_unchecked( - std::mem::transmute::<&[usize; N_WORDS], &BitArray<[usize; N_WORDS], Lsb0>>( - &ALL_TRUE, - ), - N, - ) - } + static ALL_TRUE: [u8; N_BYTES] = [u8::MAX; N_BYTES]; + unsafe { BitView::new_unchecked(&ALL_TRUE, N) } } pub fn all_false() -> Self { - static ALL_FALSE: [usize; N_WORDS] = [0; N_WORDS]; - unsafe { - BitView::new_unchecked( - std::mem::transmute::<&[usize; N_WORDS], &BitArray<[usize; N_WORDS], Lsb0>>( - &ALL_FALSE, - ), - 0, - ) - } + static ALL_FALSE: [u8; N_BYTES] = [0; N_BYTES]; + unsafe { BitView::new_unchecked(&ALL_FALSE, 0) } } } impl<'a> BitView<'a> { - pub fn new(bits: &[usize; N_WORDS]) -> Self { - let true_count = bits.iter().map(|&word| word.count_ones() as usize).sum(); - let bits: &BitArray<[usize; N_WORDS], Lsb0> = unsafe { - std::mem::transmute::<&[usize; N_WORDS], &BitArray<[usize; N_WORDS], Lsb0>>(bits) - }; + pub fn new(bits: &'a [u8; N_BYTES]) -> Self { + let ptr = bits.as_ptr().cast::(); + let true_count = (0..N_WORDS) + .map(|idx| unsafe { ptr.add(idx).read_unaligned().count_ones() as usize }) + .sum(); BitView { bits, true_count } } - pub(crate) unsafe fn new_unchecked( - bits: &'a BitArray<[usize; N_WORDS], Lsb0>, - true_count: usize, - ) -> Self { + pub(crate) unsafe fn new_unchecked(bits: &'a [u8; N_BYTES], true_count: usize) -> Self { BitView { bits, true_count } } @@ -79,6 +62,17 @@ impl<'a> BitView<'a> { self.true_count } + /// Iterate the [`BitView`] in fixed-size words. + /// + /// The words are loaded using unaligned loads to ensure correct bit ordering. + /// For example, bit 0 is located in `word & 1 << 0`, bit 63 is located in `word & 1 << 63`, + /// assuming the word size is 64 bits. + fn iter_words(&self) -> impl Iterator + '_ { + let ptr = self.bits.as_ptr().cast::(); + // We use constant N_WORDS to trigger loop unrolling. + (0..N_WORDS).map(move |idx| unsafe { ptr.add(idx).read_unaligned() }) + } + /// Runs the provided function `f` for each index of a `true` bit in the view. pub fn iter_ones(&self, mut f: F) where @@ -89,7 +83,7 @@ impl<'a> BitView<'a> { N => (0..N).for_each(&mut f), _ => { let mut bit_idx = 0; - for mut raw in self.bits.into_inner() { + for mut raw in self.iter_words() { while raw != 0 { let bit_pos = raw.trailing_zeros(); f(bit_idx + bit_pos as usize); @@ -116,7 +110,7 @@ impl<'a> BitView<'a> { } _ => { let mut bit_idx = 0; - for mut raw in self.bits.into_inner() { + for mut raw in self.iter_words() { while raw != 0 { let bit_pos = raw.trailing_zeros(); f(bit_idx + bit_pos as usize)?; @@ -139,7 +133,7 @@ impl<'a> BitView<'a> { N => {} _ => { let mut bit_idx = 0; - for mut raw in self.bits.into_inner() { + for mut raw in self.iter_words() { while raw != usize::MAX { let bit_pos = raw.trailing_ones(); f(bit_idx + bit_pos as usize); @@ -166,7 +160,8 @@ impl<'a> BitView<'a> { N => f((0, N)), _ => { let mut bit_idx = 0; - for mut raw in self.bits.into_inner() { + for raw in self.bits { + let mut raw = *raw; let mut offset = 0; while raw != 0 { // Skip leading zeros first @@ -192,49 +187,66 @@ impl<'a> BitView<'a> { } } - pub fn as_raw(&self) -> &[usize; N_WORDS] { - // It's actually remarkably hard to get a reference to the underlying array! - let raw = self.bits.as_raw_slice(); - unsafe { &*(raw.as_ptr() as *const [usize; N_WORDS]) } - } -} - -impl<'a> From<&'a [usize; N_WORDS]> for BitView<'a> { - fn from(value: &'a [usize; N_WORDS]) -> Self { - Self::new(value) - } -} - -impl<'a> From<&'a BitArray<[usize; N_WORDS], Lsb0>> for BitView<'a> { - fn from(bits: &'a BitArray<[usize; N_WORDS], Lsb0>) -> Self { - BitView::new(unsafe { - std::mem::transmute::<&BitArray<[usize; N_WORDS]>, &[usize; N_WORDS]>(bits) - }) - } -} - -impl<'a> TryFrom<&'a BitSlice> for BitView<'a> { - type Error = VortexError; - - fn try_from(value: &'a BitSlice) -> Result { - let bits: &BitArray<[usize; N_WORDS], Lsb0> = value - .try_into() - .map_err(|e| vortex_err!("Failed to convert BitSlice to BitArray: {}", e))?; - Ok(BitView::new(unsafe { - std::mem::transmute::<&BitArray<[usize; N_WORDS]>, &[usize; N_WORDS]>(bits) - })) + pub fn as_raw(&self) -> &[u8; N_BYTES] { + self.bits } } #[cfg(test)] mod tests { - use vortex_mask::Mask; + use bitvec::slice::BitSlice; + use vortex_buffer::BitBufferMut; use super::*; + #[test] + fn test_bits() { + let mut bits = BitBufferMut::new_unset(128); + bits.set(1); + bits.set(2); + bits.set(3); + bits.set(8); + bits.set(64); + let bits = bits.freeze(); + assert_eq!(bits.set_indices().collect::>(), vec![1, 2, 3, 8, 64]); + + // Can we just transmute and pass it into bitvec crate? + // Absolutely not is that answer. + let slice_u64 = + BitSlice::::from_slice(unsafe { std::mem::transmute(bits.inner().as_ref()) }); + assert_ne!( + slice_u64.iter_ones().collect::>(), + vec![1, 2, 3, 8, 64] + ); + + // But if we have a &[u8], we can use unaligned load to pull it into the right order. + unsafe { + let vec_usize = (0..2) + .map(|idx| { + bits.inner() + .as_ptr() + .cast::() + .add(idx) + .read_unaligned() + }) + .collect::>(); + let slice_usize = BitSlice::::from_slice(&vec_usize); + assert_eq!( + slice_usize.iter_ones().collect::>(), + vec![1, 2, 3, 8, 64] + ); + } + + println!( + "Bits: {:08b} {:08b}", + bits.inner().as_ref()[0], + bits.inner().as_ref()[1] + ); + } + #[test] fn test_iter_ones_empty() { - let bits = [0usize; N_WORDS]; + let bits = [0; N_BYTES]; let view = BitView::new(&bits); let mut ones = Vec::new(); @@ -258,7 +270,7 @@ mod tests { #[test] fn test_iter_zeros_empty() { - let bits = [0usize; N_WORDS]; + let bits = [0; N_BYTES]; let view = BitView::new(&bits); let mut zeros = Vec::new(); @@ -280,7 +292,7 @@ mod tests { #[test] fn test_iter_ones_single_bit() { - let mut bits = [0usize; N_WORDS]; + let mut bits = [0; N_BYTES]; bits[0] = 1; // Set bit 0 (LSB) let view = BitView::new(&bits); @@ -293,8 +305,8 @@ mod tests { #[test] fn test_iter_zeros_single_bit_unset() { - let mut bits = [usize::MAX; N_WORDS]; - bits[0] = usize::MAX ^ 1; // Clear bit 0 (LSB) + let mut bits = [u8::MAX; N_BYTES]; + bits[0] = u8::MAX ^ 1; // Clear bit 0 (LSB) let view = BitView::new(&bits); let mut zeros = Vec::new(); @@ -305,7 +317,7 @@ mod tests { #[test] fn test_iter_ones_multiple_bits_first_word() { - let mut bits = [0usize; N_WORDS]; + let mut bits = [0; N_BYTES]; bits[0] = 0b1010101; // Set bits 0, 2, 4, 6 let view = BitView::new(&bits); @@ -318,7 +330,7 @@ mod tests { #[test] fn test_iter_zeros_multiple_bits_first_word() { - let mut bits = [usize::MAX; N_WORDS]; + let mut bits = [u8::MAX; N_BYTES]; bits[0] = !0b1010101; // Clear bits 0, 2, 4, 6 let view = BitView::new(&bits); @@ -330,7 +342,7 @@ mod tests { #[test] fn test_iter_ones_across_words() { - let mut bits = [0usize; N_WORDS]; + let mut bits = [0; N_BYTES]; bits[0] = 1 << 63; // Set bit 63 of first word bits[1] = 1; // Set bit 0 of second word (bit 64 overall) bits[2] = 1 << 31; // Set bit 31 of third word (bit 159 overall) @@ -345,7 +357,7 @@ mod tests { #[test] fn test_iter_zeros_across_words() { - let mut bits = [usize::MAX; N_WORDS]; + let mut bits = [u8::MAX; N_BYTES]; bits[0] = !(1 << 63); // Clear bit 63 of first word bits[1] = !1; // Clear bit 0 of second word (bit 64 overall) bits[2] = !(1 << 31); // Clear bit 31 of third word (bit 159 overall) @@ -359,7 +371,7 @@ mod tests { #[test] fn test_lsb_bit_ordering() { - let mut bits = [0usize; N_WORDS]; + let mut bits = [0; N_BYTES]; bits[0] = 0b11111111; // Set bits 0-7 (LSB ordering) let view = BitView::new(&bits); @@ -372,7 +384,7 @@ mod tests { #[test] fn test_iter_ones_and_zeros_complement() { - let mut bits = [0usize; N_WORDS]; + let mut bits = [0; N_BYTES]; bits[0] = 0xAAAAAAAAAAAAAAAA; // Alternating pattern let view = BitView::new(&bits); @@ -448,11 +460,11 @@ mod tests { let indices = vec![0, 10, 20, 63, 64, 100, 500, 1023]; // Create corresponding BitView - let mut bits = [0usize; N_WORDS]; + let mut bits = [0; N_BYTES]; for idx in &indices { - let word_idx = idx / 64; - let bit_idx = idx % 64; - bits[word_idx] |= 1usize << bit_idx; + let word_idx = idx / 8; + let bit_idx = idx % 8; + bits[word_idx] |= 1u8 << bit_idx; } let view = BitView::new(&bits); @@ -470,12 +482,12 @@ mod tests { let slices = vec![(0, 10), (100, 110), (500, 510)]; // Create corresponding BitView - let mut bits = [0usize; N_WORDS]; + let mut bits = [0; N_BYTES]; for (start, end) in &slices { for idx in *start..*end { - let word_idx = idx / 64; - let bit_idx = idx % 64; - bits[word_idx] |= 1usize << bit_idx; + let word_idx = idx / 8; + let bit_idx = idx % 8; + bits[word_idx] |= 1u8 << bit_idx; } } let view = BitView::new(&bits); @@ -493,68 +505,4 @@ mod tests { assert_eq!(bitview_ones, expected_indices); assert_eq!(view.true_count(), expected_indices.len()); } - - #[test] - fn test_mask_and_bitview_iter_match() { - // Create a pattern with alternating bits in first word - let mut bits = [0usize; N_WORDS]; - bits[0] = 0xAAAAAAAAAAAAAAAA; // Alternating 1s and 0s - bits[1] = 0xFF00FF00FF00FF00; // Alternating bytes - - let view = BitView::new(&bits); - - // Collect indices from BitView - let mut bitview_ones = Vec::new(); - view.iter_ones(|idx| bitview_ones.push(idx)); - - // Create Mask from the same indices - let mask = Mask::from_indices(N, bitview_ones.clone()); - - // Verify the mask returns the same indices - mask.iter_bools(|iter| { - let mask_bools: Vec = iter.collect(); - - // Check each bit matches - for i in 0..N { - let expected = bitview_ones.contains(&i); - assert_eq!(mask_bools[i], expected, "Mismatch at index {}", i); - } - }); - } - - #[test] - fn test_bitview_zeros_complement_mask() { - // Create a pattern - let mut bits = [0usize; N_WORDS]; - bits[0] = 0b11110000111100001111000011110000; - - let view = BitView::new(&bits); - - // Collect ones and zeros from BitView - let mut bitview_ones = Vec::new(); - let mut bitview_zeros = Vec::new(); - view.iter_ones(|idx| bitview_ones.push(idx)); - view.iter_zeros(|idx| bitview_zeros.push(idx)); - - // Create masks for ones and zeros - let ones_mask = Mask::from_indices(N, bitview_ones); - let zeros_mask = Mask::from_indices(N, bitview_zeros); - - // Verify they are complements - ones_mask.iter_bools(|ones_iter| { - zeros_mask.iter_bools(|zeros_iter| { - let ones_bools: Vec = ones_iter.collect(); - let zeros_bools: Vec = zeros_iter.collect(); - - for i in 0..N { - // Each index should be either in ones or zeros, but not both - assert_ne!( - ones_bools[i], zeros_bools[i], - "Index {} should be in exactly one set", - i - ); - } - }); - }); - } } diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs index 2c412c1d7c2..d22e596397d 100644 --- a/vortex-array/src/pipeline/mod.rs +++ b/vortex-array/src/pipeline/mod.rs @@ -18,11 +18,11 @@ //! //! It is a work-in-progress and is not yet used in production. -pub mod bits; +pub mod bit_view; pub mod source_driver; use crate::Array; -use bits::BitView; +use bit_view::BitView; use std::ops::Deref; use vortex_error::VortexResult; use vortex_vector::{Vector, VectorMut}; @@ -97,13 +97,13 @@ pub type VectorId = usize; /// the setup costs (such as DType validation, stats short-circuiting, etc.), and to make better /// use of CPU caches by performing all operations while the data is hot. /// -/// The [`SourceKernel::step`] method will be invoked repeatedly to process chunks of data, [`N`] elements -/// at a time. Each invocation is passed a selection mask indicating which elements of the chunk -/// should be written to the start of the output vector. +/// The [`SourceKernel::step`] method will be invoked repeatedly to process chunks of data, [`N`] +/// elements at a time. Each invocation is passed a selection mask indicating which elements of the +/// chunk should be written to the start of the output vector. /// -/// The mutable output vector is **guaranteed** to have a capacity of at least [`N`] elements, and -/// its length will initially be set to zero. It is therefore safe to invoke unchecked writes up to -/// `N` elements. +/// The mutable output vector is **guaranteed** to have a capacity of at least [`N`] elements. The +/// caller makes no guarantee about the initial length of the output vector; and the kernel is +/// expected to append `selection.true_count()` elements. /// /// The pipeline may invoke the `SourceKernel::skip` method to skip over some number of chunks of data. /// The kernel should mutate any internal state as necessary to account for the skipped data. diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs index 74ba0a3aa59..27fbe87f6e9 100644 --- a/vortex-array/src/pipeline/source_driver.rs +++ b/vortex-array/src/pipeline/source_driver.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use crate::pipeline::bits::BitView; +use crate::pipeline::bit_view::BitView; use crate::pipeline::{BindContext, KernelContext, PipelinedSource, VectorId, N}; use itertools::Itertools; use vortex_error::{vortex_panic, VortexResult}; From f6815c2e4f62224055900b73b3fbe4f2071db9dc Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Sat, 8 Nov 2025 18:15:55 -0500 Subject: [PATCH 06/10] pipelined execution Signed-off-by: Nicholas Gates --- vortex-array/src/array/operator.rs | 4 +- .../src/arrays/primitive/vtable/operator.rs | 17 +- vortex-array/src/pipeline/bit_view.rs | 347 +++++++++++------- vortex-array/src/pipeline/mod.rs | 6 +- vortex-array/src/pipeline/source_driver.rs | 91 +++-- vortex-array/src/vtable/operator.rs | 4 +- vortex-buffer/src/buffer_mut.rs | 4 +- vortex-vector/src/primitive/generic_mut.rs | 2 +- vortex-vector/src/primitive/vector_mut.rs | 2 +- vortex-vector/src/vector_mut.rs | 4 +- 10 files changed, 285 insertions(+), 196 deletions(-) diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs index 340997b0529..0a0e0705046 100644 --- a/vortex-array/src/array/operator.rs +++ b/vortex-array/src/array/operator.rs @@ -3,9 +3,9 @@ use std::sync::Arc; -use vortex_error::{vortex_panic, VortexResult}; +use vortex_error::{VortexResult, vortex_panic}; use vortex_mask::Mask; -use vortex_vector::{vector_matches_dtype, Vector, VectorOps}; +use vortex_vector::{Vector, VectorOps, vector_matches_dtype}; use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx}; use crate::vtable::{OperatorVTable, VTable}; diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs index cf3b6b5338f..caa15c04901 100644 --- a/vortex-array/src/arrays/primitive/vtable/operator.rs +++ b/vortex-array/src/arrays/primitive/vtable/operator.rs @@ -1,19 +1,20 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable}; -use crate::execution::{kernel, BatchKernelRef, BindCtx}; -use crate::pipeline::bit_view::BitView; -use crate::pipeline::{BindContext, KernelContext, PipelinedSource, SourceKernel, N}; -use crate::vtable::{OperatorVTable, ValidityHelper}; -use crate::{ArrayRef, IntoArray}; use vortex_buffer::Buffer; use vortex_compute::filter::Filter; -use vortex_dtype::{match_each_native_ptype, NativePType, PTypeDowncastExt}; +use vortex_dtype::{NativePType, PTypeDowncastExt, match_each_native_ptype}; use vortex_error::VortexResult; use vortex_mask::Mask; -use vortex_vector::primitive::PVector; use vortex_vector::VectorMut; +use vortex_vector::primitive::PVector; + +use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable}; +use crate::execution::{BatchKernelRef, BindCtx, kernel}; +use crate::pipeline::bit_view::BitView; +use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, SourceKernel}; +use crate::vtable::{OperatorVTable, ValidityHelper}; +use crate::{ArrayRef, IntoArray}; impl OperatorVTable for PrimitiveVTable { fn as_pipelined_source(array: &PrimitiveArray) -> Option<&dyn PipelinedSource> { diff --git a/vortex-array/src/pipeline/bit_view.rs b/vortex-array/src/pipeline/bit_view.rs index b23e3c8108d..aa3129b66fc 100644 --- a/vortex-array/src/pipeline/bit_view.rs +++ b/vortex-array/src/pipeline/bit_view.rs @@ -1,8 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::borrow::Cow; use std::fmt::{Debug, Formatter}; +use vortex_buffer::BitBuffer; use vortex_error::VortexResult; use crate::pipeline::{N, N_BYTES, N_WORDS}; @@ -14,9 +16,8 @@ use crate::pipeline::{N, N_BYTES, N_WORDS}; /// /// Note that [`BitView`] does not support an offset. Therefore, bits are assumed to start at /// index and end at index `N - 1`. -#[derive(Clone, Copy)] pub struct BitView<'a> { - bits: &'a [u8; N_BYTES], + bits: Cow<'a, [u8; N_BYTES]>, // TODO(ngates): we may want to expose this for optimizations. // If set to Selection::Prefix, then all true bits are at the start of the array. // selection: Selection, @@ -45,16 +46,82 @@ impl BitView<'static> { } impl<'a> BitView<'a> { + /// Creates a [`BitView`] from raw bits, computing the true count. pub fn new(bits: &'a [u8; N_BYTES]) -> Self { let ptr = bits.as_ptr().cast::(); let true_count = (0..N_WORDS) .map(|idx| unsafe { ptr.add(idx).read_unaligned().count_ones() as usize }) .sum(); - BitView { bits, true_count } + BitView { + bits: Cow::Borrowed(bits), + true_count, + } + } + + /// Creates a [`BitView`] from owned raw bits. + pub fn new_owned(bits: [u8; N_BYTES]) -> Self { + let ptr = bits.as_ptr().cast::(); + let true_count = (0..N_WORDS) + .map(|idx| unsafe { ptr.add(idx).read_unaligned().count_ones() as usize }) + .sum(); + BitView { + bits: Cow::Owned(bits), + true_count, + } } + /// Creates a [`BitView`] from raw bits and a known true count. + /// + /// # Safety + /// + /// The caller must ensure that `true_count` is correct for the provided `bits`. pub(crate) unsafe fn new_unchecked(bits: &'a [u8; N_BYTES], true_count: usize) -> Self { - BitView { bits, true_count } + BitView { + bits: Cow::Borrowed(bits), + true_count, + } + } + + /// Creates a [`BitView`] from a byte slice. + /// + /// # Panics + /// + /// If the length of the slice is not equal to `N_BYTES`. + pub fn from_slice(bits: &'a [u8]) -> Self { + assert_eq!(bits.len(), N_BYTES); + let bits_array = unsafe { &*(bits.as_ptr() as *const [u8; N_BYTES]) }; + BitView::new(bits_array) + } + + /// Creates a [`BitView`] from a mutable byte array, populating it with the requested prefix + /// of `true` bits. + pub fn with_prefix(n_true: usize) -> Self { + assert!(n_true <= N); + + // We're going to own our own array of bits + let mut bits = [0u8; N_BYTES]; + + // All-true words first + let n_full_words = n_true / (usize::BITS as usize); + let remaining_bits = n_true % (usize::BITS as usize); + + let ptr = bits.as_mut_ptr().cast::(); + + // Fill the all-true words + for word_idx in 0..n_full_words { + unsafe { ptr.add(word_idx).write_unaligned(usize::MAX) }; + } + + // Fill the remaining bits in the next word + if remaining_bits > 0 { + let mask = (1usize << remaining_bits) - 1; + unsafe { ptr.add(n_full_words).write_unaligned(mask) }; + } + + Self { + bits: Cow::Owned(bits), + true_count: n_true, + } } /// Returns the number of `true` bits in the view. @@ -145,104 +212,160 @@ impl<'a> BitView<'a> { } } - /// Runs the provided function `f` for each range of `true` bits in the view. - /// - /// The function `f` receives a tuple `(start, len)` where `start` is the index of the first - /// `true` bit and `len` is the number of consecutive `true` bits. - /// - /// FIXME(ngates): this code is broken. pub fn iter_slices(&self, mut f: F) where F: FnMut((usize, usize)), { - match self.true_count { - 0 => {} - N => f((0, N)), - _ => { - let mut bit_idx = 0; - for raw in self.bits { - let mut raw = *raw; - let mut offset = 0; - while raw != 0 { - // Skip leading zeros first - let zeros = raw.leading_zeros(); - offset += zeros; - raw <<= zeros; + if self.true_count == 0 { + return; + } + + let mut abs_bit_offset: usize = 0; // Absolute bit index of the *current* word being processed + let mut slice_start_bit: usize = 0; // Absolute start index of the run of 1s being tracked + let mut slice_length: usize = 0; // Accumulated length of the run of 1s + + for mut word in self.iter_words() { + match word { + 0 => { + // If a slice was being tracked, the run ends at the start of this word. + if slice_length > 0 { + f((slice_start_bit, slice_length)); + slice_length = 0; + } + } + usize::MAX => { + // If a slice was not already open, it starts at the beginning of this word. + if slice_length == 0 { + slice_start_bit = abs_bit_offset; + } + // Extend the length by a full word (64 bits). + slice_length += usize::BITS as usize; + } + _ => { + while word != 0 { + // Find the first set bit (start of a run of 1s) + let zeros = word.trailing_zeros() as usize; + + // If a run was open, and we hit a zero gap, report the finished slice + if slice_length > 0 && zeros > 0 { + f((slice_start_bit, slice_length)); + slice_length = 0; // Reset state for a new slice + } + + // Advance past the zeros + word >>= zeros; - if offset >= 64 { + if word == 0 { break; } - // Count leading ones - let ones = raw.leading_ones(); - if ones > 0 { - f((bit_idx + offset as usize, ones as usize)); - offset += ones; - raw <<= ones; + // Find the contiguous ones (the length of the current run segment) + let ones = word.trailing_ones() as usize; + + // If slice_length is 0, we found the *absolute* start of a new slice. + if slice_length == 0 { + // Calculate the bit index within the *entire* mask where this run starts + let current_word_idx = abs_bit_offset + zeros; + slice_start_bit = current_word_idx; } + + // Accumulate the length of the slice + slice_length += ones; + + // Advance past the ones + word >>= ones; } - bit_idx += usize::BITS as usize; // Move to next word } } + + abs_bit_offset += usize::BITS as usize; + } + + if slice_length > 0 { + f((slice_start_bit, slice_length)); } } + /// Runs the provided function `f` for each range of `true` bits in the view. + /// + /// The function `f` receives a tuple `(start, len)` where `start` is the index of the first + /// `true` bit and `len` is the number of consecutive `true` bits. + /// + /// FIXME(ngates): this code is broken. + pub fn as_raw(&self) -> &[u8; N_BYTES] { - self.bits + self.bits.as_ref() } } -#[cfg(test)] -mod tests { - use bitvec::slice::BitSlice; - use vortex_buffer::BitBufferMut; - - use super::*; +pub trait BitViewExt { + /// Iterate the [`BitBuffer`] in fixed-size chunks of [`BitView`]. + /// + /// The final chunk will be filled with unset padding bits if the bit buffer's length is not + /// a multiple of `N`. + /// + /// # Panics + /// + /// If the bit buffer's bit-offset is not zero. + fn iter_bit_views(&self) -> impl Iterator> + '_; +} - #[test] - fn test_bits() { - let mut bits = BitBufferMut::new_unset(128); - bits.set(1); - bits.set(2); - bits.set(3); - bits.set(8); - bits.set(64); - let bits = bits.freeze(); - assert_eq!(bits.set_indices().collect::>(), vec![1, 2, 3, 8, 64]); - - // Can we just transmute and pass it into bitvec crate? - // Absolutely not is that answer. - let slice_u64 = - BitSlice::::from_slice(unsafe { std::mem::transmute(bits.inner().as_ref()) }); - assert_ne!( - slice_u64.iter_ones().collect::>(), - vec![1, 2, 3, 8, 64] +impl BitViewExt for BitBuffer { + fn iter_bit_views(&self) -> impl Iterator> + '_ { + assert_eq!( + self.offset(), + 0, + "BitView iteration requires zero bit offset" ); + let n_views = (self.len() + N - 1) / N; + BitViewIterator { + bits: self.inner().as_ref(), + view_idx: 0, + n_views, + } + } +} + +struct BitViewIterator<'a> { + bits: &'a [u8], + // The index of the view to be returned next + view_idx: usize, + // The total number of views + n_views: usize, +} - // But if we have a &[u8], we can use unaligned load to pull it into the right order. - unsafe { - let vec_usize = (0..2) - .map(|idx| { - bits.inner() - .as_ptr() - .cast::() - .add(idx) - .read_unaligned() - }) - .collect::>(); - let slice_usize = BitSlice::::from_slice(&vec_usize); - assert_eq!( - slice_usize.iter_ones().collect::>(), - vec![1, 2, 3, 8, 64] - ); +impl<'a> Iterator for BitViewIterator<'a> { + type Item = BitView<'a>; + + fn next(&mut self) -> Option { + if self.view_idx == self.n_views { + return None; } - println!( - "Bits: {:08b} {:08b}", - bits.inner().as_ref()[0], - bits.inner().as_ref()[1] - ); + let start_byte = self.view_idx * N_BYTES; + let end_byte = start_byte + N_BYTES; + + let bits = if end_byte <= self.bits.len() { + // Full view from the original bits + BitView::from_slice(&self.bits[start_byte..end_byte]) + } else { + // Partial view, copy to scratch + let remaining_bytes = self.bits.len() - start_byte; + let mut remaining = [0u8; N_BYTES]; + remaining[..remaining_bytes].copy_from_slice(&self.bits[start_byte..]); + BitView::new_owned(remaining) + }; + + self.view_idx += 1; + Some(bits) } +} + +#[cfg(test)] +mod tests { + use std::usize; + + use super::*; #[test] fn test_iter_ones_empty() { @@ -340,35 +463,6 @@ mod tests { assert_eq!(zeros, vec![0, 2, 4, 6]); } - #[test] - fn test_iter_ones_across_words() { - let mut bits = [0; N_BYTES]; - bits[0] = 1 << 63; // Set bit 63 of first word - bits[1] = 1; // Set bit 0 of second word (bit 64 overall) - bits[2] = 1 << 31; // Set bit 31 of third word (bit 159 overall) - let view = BitView::new(&bits); - - let mut ones = Vec::new(); - view.iter_ones(|idx| ones.push(idx)); - - assert_eq!(ones, vec![63, 64, 159]); - assert_eq!(view.true_count(), 3); - } - - #[test] - fn test_iter_zeros_across_words() { - let mut bits = [u8::MAX; N_BYTES]; - bits[0] = !(1 << 63); // Clear bit 63 of first word - bits[1] = !1; // Clear bit 0 of second word (bit 64 overall) - bits[2] = !(1 << 31); // Clear bit 31 of third word (bit 159 overall) - let view = BitView::new(&bits); - - let mut zeros = Vec::new(); - view.iter_zeros(|idx| zeros.push(idx)); - - assert_eq!(zeros, vec![63, 64, 159]); - } - #[test] fn test_lsb_bit_ordering() { let mut bits = [0; N_BYTES]; @@ -382,30 +476,6 @@ mod tests { assert_eq!(view.true_count(), 8); } - #[test] - fn test_iter_ones_and_zeros_complement() { - let mut bits = [0; N_BYTES]; - bits[0] = 0xAAAAAAAAAAAAAAAA; // Alternating pattern - let view = BitView::new(&bits); - - let mut ones = Vec::new(); - let mut zeros = Vec::new(); - view.iter_ones(|idx| ones.push(idx)); - view.iter_zeros(|idx| zeros.push(idx)); - - // Check that ones and zeros together cover all indices - let mut all_indices = ones.clone(); - all_indices.extend(&zeros); - all_indices.sort_unstable(); - - assert_eq!(all_indices, (0..N).collect::>()); - - // Check they don't overlap - for one_idx in &ones { - assert!(!zeros.contains(one_idx)); - } - } - #[test] fn test_all_false_static() { let view = BitView::all_false(); @@ -505,4 +575,21 @@ mod tests { assert_eq!(bitview_ones, expected_indices); assert_eq!(view.true_count(), expected_indices.len()); } + + #[test] + fn test_with_prefix() { + assert_eq!(BitView::with_prefix(0).true_count(), 0); + + // May as well test all the possible prefix lengths! + for i in 1..N { + let view = BitView::with_prefix(i); + + // Collect slices (there should be one slice from 0 to n_true) + let mut slices = vec![]; + view.iter_slices(|slice| slices.push(slice)); + + assert_eq!(slices.len(), 1); + assert_eq!(slices[0], (0, i)); + } + } } diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs index d22e596397d..f71605479cd 100644 --- a/vortex-array/src/pipeline/mod.rs +++ b/vortex-array/src/pipeline/mod.rs @@ -21,12 +21,14 @@ pub mod bit_view; pub mod source_driver; -use crate::Array; -use bit_view::BitView; use std::ops::Deref; + +use bit_view::BitView; use vortex_error::VortexResult; use vortex_vector::{Vector, VectorMut}; +use crate::Array; + /// The number of elements in each step of a Vortex evaluation operator. pub const N: usize = 1024; diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs index 27fbe87f6e9..7f11ddcc370 100644 --- a/vortex-array/src/pipeline/source_driver.rs +++ b/vortex-array/src/pipeline/source_driver.rs @@ -1,13 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use crate::pipeline::bit_view::BitView; -use crate::pipeline::{BindContext, KernelContext, PipelinedSource, VectorId, N}; use itertools::Itertools; -use vortex_error::{vortex_panic, VortexResult}; +use vortex_error::{VortexResult, vortex_panic}; use vortex_mask::Mask; use vortex_vector::{Vector, VectorMut, VectorMutOps}; +use crate::pipeline::bit_view::{BitView, BitViewExt}; +use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, VectorId}; + /// Temporary driver for executing a single array in a pipelined fashion. pub struct PipelineSourceDriver<'a> { array: &'a dyn PipelinedSource, @@ -40,50 +41,47 @@ impl<'a> PipelineSourceDriver<'a> { // `kernel.step(out)` has at least N bytes of capacity. let mut output = VectorMut::with_capacity( self.array.dtype(), - selection.true_count().next_multiple_of(N), + // We add an extra N to ensure we have enough capacity so the last chunk has 2 * N + // elements of capacity. + selection.true_count().next_multiple_of(N) + N, ); // TODO(ngates): change behaviour based on the density of the selection mask. - let selection_buffer = selection.to_bit_buffer(); - // TODO(ngates): rewrite chunks to take an arbitrary "storage type"? Or somehow copy - // the chunks directly into a wider bit slice? - let selection_chunks = selection_buffer.chunks(); - let mut selection_chunks_iter = selection_chunks.iter_padded(); - - let output_len = selection.true_count(); - - let mut selection_chunk = [0u64; N / u64::BITS as usize]; - - let mut output_chunks = vec![]; - while output.len() < output_len { - // Copy the next selection chunk into place. - for word_idx in 0..selection_chunk.len() { - selection_chunk[word_idx] = selection_chunks_iter.next().unwrap_or_else(|| 0u64); + match selection { + Mask::AllTrue(_) => { + // Select everything, so we can just run the kernel in a tight loop. + + // The number of _full_ chunks we need to process. + let nchunks = selection.len() / N; + for _ in 0..nchunks { + let prev_len = output.len(); + kernel.step(&kernel_ctx, &BitView::all_true(), &mut output)?; + debug_assert_eq!(output.len(), prev_len + N); + } + + // Now process the final partial chunk, if any. + let remaining = selection.len() % N; + if remaining > 0 { + let selection_view = BitView::with_prefix(remaining); + + let prev_len = output.len(); + kernel.step(&kernel_ctx, &selection_view, &mut output)?; + debug_assert_eq!(output.len(), prev_len + remaining); + debug_assert_eq!(output.len(), selection.len()); + } + } + Mask::AllFalse(_) => { + // Select nothing, return empty output! + } + Mask::Values(values) => { + // Mixed selection, so we have to process in chunks. + let selection_bits = values.bit_buffer(); + for selection_view in selection_bits.iter_bit_views() { + let prev_len = output.len(); + kernel.step(&kernel_ctx, &selection_view, &mut output)?; + debug_assert_eq!(output.len(), prev_len + selection_view.true_count()); + } } - - // TODO(ngates): ideally our chunks iter would use a usize... - let selection_chunk_usize = unsafe { std::mem::transmute(&selection_chunk) }; - let selection = BitView::new(selection_chunk_usize); - - // We know we have remaining capacity for N elements, so split off a size-N chunk. - let remaining_output = output.split_off(N); - - kernel.step(&kernel_ctx, &selection, &mut output)?; - assert_eq!( - output.len(), - selection.true_count(), - "Kernel did not write expected number of elements" - ); - - // Now we un-split the output vector back onto its full size. - // output.unsplit(remaining_output); - output_chunks.push(output); - output = remaining_output; - } - - // Combine all output chunks back into the output vector. - for chunk in output_chunks { - output.unsplit(chunk); } Ok(output.freeze()) @@ -106,14 +104,15 @@ impl BindContext for PipelineSourceBindCtx<'_> { #[cfg(test)] mod test { - use crate::arrays::PrimitiveArray; - use crate::pipeline::source_driver::PipelineSourceDriver; - use crate::validity::Validity; use vortex_buffer::buffer; use vortex_dtype::PTypeDowncastExt; use vortex_mask::Mask; use vortex_vector::VectorOps; + use crate::arrays::PrimitiveArray; + use crate::pipeline::source_driver::PipelineSourceDriver; + use crate::validity::Validity; + #[test] fn test_primitive() { let array = PrimitiveArray::new::(buffer![0..100000u32], Validity::AllValid); diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs index 2dbf34a9002..9603ca0cf16 100644 --- a/vortex-array/src/vtable/operator.rs +++ b/vortex-array/src/vtable/operator.rs @@ -1,15 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{VortexResult, vortex_bail}; use vortex_mask::Mask; use vortex_vector::Vector; +use crate::ArrayRef; use crate::array::IntoArray; use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx}; use crate::pipeline::PipelinedSource; use crate::vtable::{NotSupported, VTable}; -use crate::ArrayRef; /// A vtable for the new operator-based array functionality. Eventually this vtable will be /// merged into the main `VTable`, but for now it is kept separate to allow for incremental diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs index 436006f7d2f..add9ae5f7be 100644 --- a/vortex-buffer/src/buffer_mut.rs +++ b/vortex-buffer/src/buffer_mut.rs @@ -9,7 +9,7 @@ use std::ops::{Deref, DerefMut}; use bytes::buf::UninitSlice; use bytes::{Buf, BufMut, BytesMut}; -use vortex_error::{vortex_panic, VortexExpect}; +use vortex_error::{VortexExpect, vortex_panic}; use crate::debug::TruncatedDebug; use crate::trusted_len::TrustedLen; @@ -726,7 +726,7 @@ impl Write for ByteBufferMut { mod test { use bytes::{Buf, BufMut}; - use crate::{buffer_mut, Alignment, BufferMut, ByteBufferMut}; + use crate::{Alignment, BufferMut, ByteBufferMut, buffer_mut}; #[test] fn capacity() { diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index 2525592e1ae..9a02109803f 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -5,7 +5,7 @@ use vortex_buffer::BufferMut; use vortex_dtype::NativePType; -use vortex_error::{vortex_ensure, VortexExpect, VortexResult}; +use vortex_error::{VortexExpect, VortexResult, vortex_ensure}; use vortex_mask::MaskMut; use crate::primitive::PVector; diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs index eef24f04707..2666f3f9150 100644 --- a/vortex-vector/src/primitive/vector_mut.rs +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -9,7 +9,7 @@ use vortex_error::vortex_panic; use vortex_mask::MaskMut; use crate::primitive::{PVectorMut, PrimitiveVector}; -use crate::{match_each_pvector_mut, VectorMutOps}; +use crate::{VectorMutOps, match_each_pvector_mut}; /// A mutable vector of primitive values. /// diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index 9be1f3fdfaa..522b491de51 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -18,7 +18,7 @@ use crate::listview::ListViewVectorMut; use crate::null::NullVectorMut; use crate::primitive::PrimitiveVectorMut; use crate::struct_::StructVectorMut; -use crate::{match_each_vector_mut, match_vector_pair, Vector, VectorMutOps}; +use crate::{Vector, VectorMutOps, match_each_vector_mut, match_vector_pair}; /// An enum over all kinds of mutable vectors, which represent fully decompressed (canonical) array /// data. @@ -267,9 +267,9 @@ mod tests { use vortex_dtype::{DecimalDType, Nullability, PType}; use super::*; + use crate::VectorOps; use crate::decimal::DecimalVectorMut; use crate::primitive::PVectorMut; - use crate::VectorOps; #[test] fn test_with_capacity() { From cef3f517a01779e0afb4a57df0902137fdb60fb8 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Sat, 8 Nov 2025 18:26:11 -0500 Subject: [PATCH 07/10] pipelined execution Signed-off-by: Nicholas Gates --- vortex-array/src/array/operator.rs | 12 ++++++++++-- .../src/arrays/primitive/vtable/operator.rs | 18 +++++++++--------- vortex-array/src/pipeline/bit_view.rs | 2 ++ vortex-array/src/pipeline/source_driver.rs | 6 +++--- vortex-buffer/src/buffer_mut.rs | 11 ++++++++--- 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs index 0a0e0705046..74731a71ce5 100644 --- a/vortex-array/src/array/operator.rs +++ b/vortex-array/src/array/operator.rs @@ -3,11 +3,12 @@ use std::sync::Arc; -use vortex_error::{VortexResult, vortex_panic}; +use vortex_error::{vortex_panic, VortexResult}; use vortex_mask::Mask; -use vortex_vector::{Vector, VectorOps, vector_matches_dtype}; +use vortex_vector::{vector_matches_dtype, Vector, VectorOps}; use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx}; +use crate::pipeline::source_driver::PipelineSourceDriver; use crate::vtable::{OperatorVTable, VTable}; use crate::{Array, ArrayAdapter, ArrayRef}; @@ -62,6 +63,13 @@ impl ArrayOperator for Arc { impl ArrayOperator for ArrayAdapter { fn execute_batch(&self, selection: &Mask, ctx: &mut dyn ExecutionCtx) -> VortexResult { + // Check if the array is a pipeline source, and if so use the single-node driver for now. + if let Some(pipeline_source) = + >::as_pipelined_source(&self.0) + { + return PipelineSourceDriver::new(pipeline_source).execute(selection); + } + let vector = >::execute_batch(&self.0, selection, ctx)?; diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs index caa15c04901..d4b74821bbf 100644 --- a/vortex-array/src/arrays/primitive/vtable/operator.rs +++ b/vortex-array/src/arrays/primitive/vtable/operator.rs @@ -3,16 +3,16 @@ use vortex_buffer::Buffer; use vortex_compute::filter::Filter; -use vortex_dtype::{NativePType, PTypeDowncastExt, match_each_native_ptype}; +use vortex_dtype::{match_each_native_ptype, NativePType, PTypeDowncastExt}; use vortex_error::VortexResult; use vortex_mask::Mask; -use vortex_vector::VectorMut; use vortex_vector::primitive::PVector; +use vortex_vector::{VectorMut, VectorMutOps}; use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable}; -use crate::execution::{BatchKernelRef, BindCtx, kernel}; +use crate::execution::{kernel, BatchKernelRef, BindCtx}; use crate::pipeline::bit_view::BitView; -use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, SourceKernel}; +use crate::pipeline::{BindContext, KernelContext, PipelinedSource, SourceKernel, N}; use crate::vtable::{OperatorVTable, ValidityHelper}; use crate::{ArrayRef, IntoArray}; @@ -106,16 +106,16 @@ impl SourceKernel for PrimitiveKernel { // separately from copying over the elements. unsafe { out.validity_mut().append_n(true, selection.true_count()); - out.elements_mut().set_len(selection.true_count()); + let prev_len = out.len(); + out.elements_mut() + .set_len(prev_len + selection.true_count()); } let source = &self.buffer.as_slice()[self.offset..]; let mut out_pos = 0; - selection.iter_slices(|(start, end)| { - print!("Slicing {} to {}\n", start, end); - let len = end - start; - out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..end]); + selection.iter_slices(|(start, len)| { + out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..][..len]); out_pos += len; }); diff --git a/vortex-array/src/pipeline/bit_view.rs b/vortex-array/src/pipeline/bit_view.rs index aa3129b66fc..76189e53f92 100644 --- a/vortex-array/src/pipeline/bit_view.rs +++ b/vortex-array/src/pipeline/bit_view.rs @@ -214,6 +214,8 @@ impl<'a> BitView<'a> { pub fn iter_slices(&self, mut f: F) where + // FIXME(ngates): I have repeatedly assumed this to be a (start, end) slice, not a + // (start, len)... I think we should wrap this in a struct to avoid confusion. F: FnMut((usize, usize)), { if self.true_count == 0 { diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs index 7f11ddcc370..04e7eda0a79 100644 --- a/vortex-array/src/pipeline/source_driver.rs +++ b/vortex-array/src/pipeline/source_driver.rs @@ -2,14 +2,14 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use itertools::Itertools; -use vortex_error::{VortexResult, vortex_panic}; +use vortex_error::{vortex_panic, VortexResult}; use vortex_mask::Mask; use vortex_vector::{Vector, VectorMut, VectorMutOps}; use crate::pipeline::bit_view::{BitView, BitViewExt}; -use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, VectorId}; +use crate::pipeline::{BindContext, KernelContext, PipelinedSource, VectorId, N}; -/// Temporary driver for executing a single array in a pipelined fashion. +/// Temporary driver for executing a single source array in a pipelined fashion. pub struct PipelineSourceDriver<'a> { array: &'a dyn PipelinedSource, } diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs index add9ae5f7be..c00e573da52 100644 --- a/vortex-buffer/src/buffer_mut.rs +++ b/vortex-buffer/src/buffer_mut.rs @@ -9,7 +9,7 @@ use std::ops::{Deref, DerefMut}; use bytes::buf::UninitSlice; use bytes::{Buf, BufMut, BytesMut}; -use vortex_error::{VortexExpect, vortex_panic}; +use vortex_error::{vortex_panic, VortexExpect}; use crate::debug::TruncatedDebug; use crate::trusted_len::TrustedLen; @@ -241,10 +241,15 @@ impl BufferMut { } } + /// Sets the length of the buffer. + /// /// # Safety - /// The caller must ensure that the buffer was properly initialized up to `len`. + /// + /// The caller must ensure that there is sufficient capacity in the buffer and that the values + /// are valid up to `len`. #[inline] pub unsafe fn set_len(&mut self, len: usize) { + debug_assert!(len <= self.capacity()); unsafe { self.bytes.set_len(len * size_of::()) }; self.length = len; } @@ -726,7 +731,7 @@ impl Write for ByteBufferMut { mod test { use bytes::{Buf, BufMut}; - use crate::{Alignment, BufferMut, ByteBufferMut, buffer_mut}; + use crate::{buffer_mut, Alignment, BufferMut, ByteBufferMut}; #[test] fn capacity() { From fe16812dc1b32b0f4b1b15220b73c654d33abf77 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Sat, 8 Nov 2025 21:22:40 -0500 Subject: [PATCH 08/10] pipelined execution Signed-off-by: Nicholas Gates --- vortex-array/src/array/operator.rs | 4 +- .../src/arrays/primitive/vtable/operator.rs | 101 ++++++++++++++---- vortex-array/src/pipeline/bit_view.rs | 43 +++++--- vortex-array/src/pipeline/mod.rs | 19 +++- vortex-array/src/pipeline/source_driver.rs | 4 +- vortex-buffer/src/buffer_mut.rs | 4 +- vortex-mask/src/mask_mut.rs | 4 + 7 files changed, 136 insertions(+), 43 deletions(-) diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs index 74731a71ce5..8d7daae5a33 100644 --- a/vortex-array/src/array/operator.rs +++ b/vortex-array/src/array/operator.rs @@ -3,9 +3,9 @@ use std::sync::Arc; -use vortex_error::{vortex_panic, VortexResult}; +use vortex_error::{VortexResult, vortex_panic}; use vortex_mask::Mask; -use vortex_vector::{vector_matches_dtype, Vector, VectorOps}; +use vortex_vector::{Vector, VectorOps, vector_matches_dtype}; use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx}; use crate::pipeline::source_driver::PipelineSourceDriver; diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs index d4b74821bbf..60063f0d39a 100644 --- a/vortex-array/src/arrays/primitive/vtable/operator.rs +++ b/vortex-array/src/arrays/primitive/vtable/operator.rs @@ -1,18 +1,20 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_buffer::Buffer; +use vortex_buffer::{BitBuffer, Buffer}; use vortex_compute::filter::Filter; -use vortex_dtype::{match_each_native_ptype, NativePType, PTypeDowncastExt}; +use vortex_dtype::{NativePType, PTypeDowncastExt, match_each_native_ptype}; use vortex_error::VortexResult; -use vortex_mask::Mask; use vortex_vector::primitive::PVector; use vortex_vector::{VectorMut, VectorMutOps}; use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable}; -use crate::execution::{kernel, BatchKernelRef, BindCtx}; -use crate::pipeline::bit_view::BitView; -use crate::pipeline::{BindContext, KernelContext, PipelinedSource, SourceKernel, N}; +use crate::execution::{BatchKernelRef, BindCtx, kernel}; +use crate::pipeline::bit_view::{BitSlice, BitView}; +use crate::pipeline::{ + AllNullSourceKernel, BindContext, KernelContext, N, PipelinedSource, SourceKernel, +}; +use crate::validity::Validity; use crate::vtable::{OperatorVTable, ValidityHelper}; use crate::{ArrayRef, IntoArray}; @@ -71,25 +73,81 @@ impl OperatorVTable for PrimitiveVTable { } impl PipelinedSource for PrimitiveArray { - fn bind_source(&self, _ctx: &mut dyn BindContext) -> VortexResult> { - match_each_native_ptype!(self.ptype(), |T| { - let primitive_kernel = PrimitiveKernel { - buffer: self.buffer::().clone(), - validity: self.validity_mask(), - offset: 0, - }; - Ok(Box::new(primitive_kernel)) - }) + fn bind_source(&self, ctx: &mut dyn BindContext) -> VortexResult> { + match self.validity() { + Validity::NonNullable | Validity::AllValid => { + match_each_native_ptype!(self.ptype(), |T| { + let primitive_kernel = NonNullablePrimitiveKernel { + buffer: self.buffer::(), + offset: 0, + }; + Ok(Box::new(primitive_kernel)) + }) + } + Validity::AllInvalid => Ok(Box::new(AllNullSourceKernel)), + Validity::Array(_) => { + let validity = ctx.batch_input(0).into_bool(); + // Validity is non-nullable, so we extract the inner bit buffer. + let (validity, _) = validity.into_parts(); + + match_each_native_ptype!(self.ptype(), |T| { + let primitive_kernel = NullablePrimitiveKernel { + buffer: self.buffer::(), + validity, + offset: 0, + }; + Ok(Box::new(primitive_kernel)) + }) + } + } + } +} + +struct NonNullablePrimitiveKernel { + buffer: Buffer, + offset: usize, +} + +impl SourceKernel for NonNullablePrimitiveKernel { + fn skip(&mut self, n: usize) { + self.offset += n * N; + } + + fn step( + &mut self, + _ctx: &KernelContext, + selection: &BitView, + out: &mut VectorMut, + ) -> VortexResult<()> { + let out = out.as_primitive_mut().downcast::(); + + // SAFETY: we know the output has sufficient capacity. + unsafe { + out.validity_mut().append_n(true, selection.true_count()); + let prev_len = out.len(); + out.elements_mut() + .set_len(prev_len + selection.true_count()); + } + + let source = &self.buffer.as_slice()[self.offset..]; + let mut out_pos = 0; + selection.iter_slices(|BitSlice { start, len }| { + out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..][..len]); + out_pos += len; + }); + + Ok(()) } } -struct PrimitiveKernel { +struct NullablePrimitiveKernel { buffer: Buffer, - validity: Mask, + #[allow(dead_code)] // TODO(ngates): implement appending validity bits + validity: BitBuffer, offset: usize, } -impl SourceKernel for PrimitiveKernel { +impl SourceKernel for NullablePrimitiveKernel { fn skip(&mut self, n: usize) { self.offset += n * N; } @@ -114,9 +172,14 @@ impl SourceKernel for PrimitiveKernel { let source = &self.buffer.as_slice()[self.offset..]; let mut out_pos = 0; - selection.iter_slices(|(start, len)| { + selection.iter_slices(|BitSlice { start, len }| { + // Copy over the elements. out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..][..len]); out_pos += len; + + // Append the validity bits. + let _validity = unsafe { out.validity_mut() }; + todo!("Append validity bits correctly and optimally!"); }); Ok(()) diff --git a/vortex-array/src/pipeline/bit_view.rs b/vortex-array/src/pipeline/bit_view.rs index 76189e53f92..9e59dc89819 100644 --- a/vortex-array/src/pipeline/bit_view.rs +++ b/vortex-array/src/pipeline/bit_view.rs @@ -212,11 +212,13 @@ impl<'a> BitView<'a> { } } + /// Runs the provided function `f` for each range of `true` bits in the view. + /// + /// The function `f` receives a [`BitSlice`] containing the inclusive `start` bit as well as + /// the length. pub fn iter_slices(&self, mut f: F) where - // FIXME(ngates): I have repeatedly assumed this to be a (start, end) slice, not a - // (start, len)... I think we should wrap this in a struct to avoid confusion. - F: FnMut((usize, usize)), + F: FnMut(BitSlice), { if self.true_count == 0 { return; @@ -231,7 +233,10 @@ impl<'a> BitView<'a> { 0 => { // If a slice was being tracked, the run ends at the start of this word. if slice_length > 0 { - f((slice_start_bit, slice_length)); + f(BitSlice { + start: slice_start_bit, + len: slice_length, + }); slice_length = 0; } } @@ -250,7 +255,10 @@ impl<'a> BitView<'a> { // If a run was open, and we hit a zero gap, report the finished slice if slice_length > 0 && zeros > 0 { - f((slice_start_bit, slice_length)); + f(BitSlice { + start: slice_start_bit, + len: slice_length, + }); slice_length = 0; // Reset state for a new slice } @@ -284,22 +292,26 @@ impl<'a> BitView<'a> { } if slice_length > 0 { - f((slice_start_bit, slice_length)); + f(BitSlice { + start: slice_start_bit, + len: slice_length, + }); } } - /// Runs the provided function `f` for each range of `true` bits in the view. - /// - /// The function `f` receives a tuple `(start, len)` where `start` is the index of the first - /// `true` bit and `len` is the number of consecutive `true` bits. - /// - /// FIXME(ngates): this code is broken. - pub fn as_raw(&self) -> &[u8; N_BYTES] { self.bits.as_ref() } } +/// A slice of bits within a [`BitBuffer`]. +/// +/// We use this struct to avoid a common mistake of assuming the slices represent (start, end) ranges, +pub struct BitSlice { + pub start: usize, + pub len: usize, +} + pub trait BitViewExt { /// Iterate the [`BitBuffer`] in fixed-size chunks of [`BitView`]. /// @@ -319,7 +331,7 @@ impl BitViewExt for BitBuffer { 0, "BitView iteration requires zero bit offset" ); - let n_views = (self.len() + N - 1) / N; + let n_views = self.len().div_ceil(N); BitViewIterator { bits: self.inner().as_ref(), view_idx: 0, @@ -365,8 +377,6 @@ impl<'a> Iterator for BitViewIterator<'a> { #[cfg(test)] mod tests { - use std::usize; - use super::*; #[test] @@ -591,7 +601,6 @@ mod tests { view.iter_slices(|slice| slices.push(slice)); assert_eq!(slices.len(), 1); - assert_eq!(slices[0], (0, i)); } } } diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs index f71605479cd..ad8ca8d210b 100644 --- a/vortex-array/src/pipeline/mod.rs +++ b/vortex-array/src/pipeline/mod.rs @@ -25,7 +25,7 @@ use std::ops::Deref; use bit_view::BitView; use vortex_error::VortexResult; -use vortex_vector::{Vector, VectorMut}; +use vortex_vector::{Vector, VectorMut, VectorMutOps}; use crate::Array; @@ -150,3 +150,20 @@ impl KernelContext { &self.vectors[vector_id] } } + +/// A general implementation of a source kernel that produces all null values. +pub struct AllNullSourceKernel; + +impl SourceKernel for AllNullSourceKernel { + fn skip(&mut self, _n: usize) {} + + fn step( + &mut self, + _ctx: &KernelContext, + selection: &BitView, + out: &mut VectorMut, + ) -> VortexResult<()> { + out.append_nulls(selection.true_count()); + Ok(()) + } +} diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs index 04e7eda0a79..7515e603ec5 100644 --- a/vortex-array/src/pipeline/source_driver.rs +++ b/vortex-array/src/pipeline/source_driver.rs @@ -2,12 +2,12 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use itertools::Itertools; -use vortex_error::{vortex_panic, VortexResult}; +use vortex_error::{VortexResult, vortex_panic}; use vortex_mask::Mask; use vortex_vector::{Vector, VectorMut, VectorMutOps}; use crate::pipeline::bit_view::{BitView, BitViewExt}; -use crate::pipeline::{BindContext, KernelContext, PipelinedSource, VectorId, N}; +use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, VectorId}; /// Temporary driver for executing a single source array in a pipelined fashion. pub struct PipelineSourceDriver<'a> { diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs index c00e573da52..f4ba91bbfef 100644 --- a/vortex-buffer/src/buffer_mut.rs +++ b/vortex-buffer/src/buffer_mut.rs @@ -9,7 +9,7 @@ use std::ops::{Deref, DerefMut}; use bytes::buf::UninitSlice; use bytes::{Buf, BufMut, BytesMut}; -use vortex_error::{vortex_panic, VortexExpect}; +use vortex_error::{VortexExpect, vortex_panic}; use crate::debug::TruncatedDebug; use crate::trusted_len::TrustedLen; @@ -731,7 +731,7 @@ impl Write for ByteBufferMut { mod test { use bytes::{Buf, BufMut}; - use crate::{buffer_mut, Alignment, BufferMut, ByteBufferMut}; + use crate::{Alignment, BufferMut, ByteBufferMut, buffer_mut}; #[test] fn capacity() { diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs index e7e5c736ed6..adb2d86ae17 100644 --- a/vortex-mask/src/mask_mut.rs +++ b/vortex-mask/src/mask_mut.rs @@ -95,6 +95,10 @@ impl MaskMut { } /// Set the length of the mask. + /// + /// # Safety + /// + /// The caller must ensure that `new_len` is less than the capacity of the mask. pub unsafe fn set_len(&mut self, new_len: usize) { debug_assert!(new_len < self.capacity()); match &mut self.0 { From 54ae1e43412e0ef74655b8beb6800d99662baf52 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Mon, 10 Nov 2025 08:44:38 -0500 Subject: [PATCH 09/10] pipelined execution Signed-off-by: Nicholas Gates --- .../src/arrays/primitive/vtable/operator.rs | 128 +----------------- vortex-array/src/pipeline/mod.rs | 61 ++++----- vortex-array/src/pipeline/source_driver.rs | 37 +---- vortex-array/src/vtable/operator.rs | 16 ++- 4 files changed, 47 insertions(+), 195 deletions(-) diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs index 60063f0d39a..fa18e516cec 100644 --- a/vortex-array/src/arrays/primitive/vtable/operator.rs +++ b/vortex-array/src/arrays/primitive/vtable/operator.rs @@ -1,28 +1,18 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_buffer::{BitBuffer, Buffer}; +use vortex_buffer::Buffer; use vortex_compute::filter::Filter; -use vortex_dtype::{NativePType, PTypeDowncastExt, match_each_native_ptype}; +use vortex_dtype::match_each_native_ptype; use vortex_error::VortexResult; use vortex_vector::primitive::PVector; -use vortex_vector::{VectorMut, VectorMutOps}; use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable}; use crate::execution::{BatchKernelRef, BindCtx, kernel}; -use crate::pipeline::bit_view::{BitSlice, BitView}; -use crate::pipeline::{ - AllNullSourceKernel, BindContext, KernelContext, N, PipelinedSource, SourceKernel, -}; -use crate::validity::Validity; use crate::vtable::{OperatorVTable, ValidityHelper}; use crate::{ArrayRef, IntoArray}; impl OperatorVTable for PrimitiveVTable { - fn as_pipelined_source(array: &PrimitiveArray) -> Option<&dyn PipelinedSource> { - Some(array) - } - fn bind( array: &PrimitiveArray, selection: Option<&ArrayRef>, @@ -71,117 +61,3 @@ impl OperatorVTable for PrimitiveVTable { Ok(None) } } - -impl PipelinedSource for PrimitiveArray { - fn bind_source(&self, ctx: &mut dyn BindContext) -> VortexResult> { - match self.validity() { - Validity::NonNullable | Validity::AllValid => { - match_each_native_ptype!(self.ptype(), |T| { - let primitive_kernel = NonNullablePrimitiveKernel { - buffer: self.buffer::(), - offset: 0, - }; - Ok(Box::new(primitive_kernel)) - }) - } - Validity::AllInvalid => Ok(Box::new(AllNullSourceKernel)), - Validity::Array(_) => { - let validity = ctx.batch_input(0).into_bool(); - // Validity is non-nullable, so we extract the inner bit buffer. - let (validity, _) = validity.into_parts(); - - match_each_native_ptype!(self.ptype(), |T| { - let primitive_kernel = NullablePrimitiveKernel { - buffer: self.buffer::(), - validity, - offset: 0, - }; - Ok(Box::new(primitive_kernel)) - }) - } - } - } -} - -struct NonNullablePrimitiveKernel { - buffer: Buffer, - offset: usize, -} - -impl SourceKernel for NonNullablePrimitiveKernel { - fn skip(&mut self, n: usize) { - self.offset += n * N; - } - - fn step( - &mut self, - _ctx: &KernelContext, - selection: &BitView, - out: &mut VectorMut, - ) -> VortexResult<()> { - let out = out.as_primitive_mut().downcast::(); - - // SAFETY: we know the output has sufficient capacity. - unsafe { - out.validity_mut().append_n(true, selection.true_count()); - let prev_len = out.len(); - out.elements_mut() - .set_len(prev_len + selection.true_count()); - } - - let source = &self.buffer.as_slice()[self.offset..]; - let mut out_pos = 0; - selection.iter_slices(|BitSlice { start, len }| { - out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..][..len]); - out_pos += len; - }); - - Ok(()) - } -} - -struct NullablePrimitiveKernel { - buffer: Buffer, - #[allow(dead_code)] // TODO(ngates): implement appending validity bits - validity: BitBuffer, - offset: usize, -} - -impl SourceKernel for NullablePrimitiveKernel { - fn skip(&mut self, n: usize) { - self.offset += n * N; - } - - fn step( - &mut self, - _ctx: &KernelContext, - selection: &BitView, - out: &mut VectorMut, - ) -> VortexResult<()> { - let out = out.as_primitive_mut().downcast::(); - - // SAFETY: we know the output has sufficient capacity. We just have to append nulls - // separately from copying over the elements. - unsafe { - out.validity_mut().append_n(true, selection.true_count()); - let prev_len = out.len(); - out.elements_mut() - .set_len(prev_len + selection.true_count()); - } - - let source = &self.buffer.as_slice()[self.offset..]; - - let mut out_pos = 0; - selection.iter_slices(|BitSlice { start, len }| { - // Copy over the elements. - out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..][..len]); - out_pos += len; - - // Append the validity bits. - let _validity = unsafe { out.validity_mut() }; - todo!("Append validity bits correctly and optimally!"); - }); - - Ok(()) - } -} diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs index ad8ca8d210b..3f76522bd6f 100644 --- a/vortex-array/src/pipeline/mod.rs +++ b/vortex-array/src/pipeline/mod.rs @@ -1,23 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Vortex crate containing vectorized operator processing. -//! -//! This module contains experiments into pipelined data processing within Vortex. -//! -//! Arrays (and eventually Layouts) will be convertible into a [`Kernel`] that can then be -//! exported into a [`ViewMut`] one chunk of [`N`] elements at a time. This allows us to keep -//! compute largely within the L1 cache, as well as to write out canonical data into externally -//! provided buffers. -//! -//! Each chunk is represented in a canonical physical form, as determined by the logical -//! [`vortex_dtype::DType`] of the array. This provides a predicate base on which to perform -//! compute. Unlike DuckDB and other vectorized systems, we force a single canonical representation -//! instead of supporting multiple encodings because compute push-down is applied a priori to the -//! logical representation. -//! -//! It is a work-in-progress and is not yet used in production. - pub mod bit_view; pub mod source_driver; @@ -38,8 +21,11 @@ pub const N_BYTES: usize = N / 8; /// Number of usize words needed to store N bits pub const N_WORDS: usize = N / usize::BITS as usize; -/// Returned by an array to indicate that it can be executed in a pipelined fashion. -pub trait PipelinedOperator: Array { +/// Indicates that an array supports acting as a transformation node in a pipelined execution. +/// +/// That is, it has one or more child arrays for which each input element produces a single output +/// element. See [`PipelineSource`] for nodes that have zero pipelined children. +pub trait PipelineTransform: Deref { // Whether this operator works by mutating its first child in-place. // // If `true`, the operator is invoked with the first child's input data passed via the @@ -56,36 +42,39 @@ pub trait PipelinedOperator: Array { /// computed before pipelined execution begins. fn is_pipelined_child(&self, child_idx: usize) -> bool; - /// Bind the operator into a [`Kernel`] for pipelined execution. + /// Bind the operator into a [`TransformKernel`] for pipelined execution. /// /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and /// batch IDs for batch children. Each child can only be bound once. - fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult>; + fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult>; } -pub trait PipelinedSource: Deref { - /// Bind the operator into a [`Kernel`] for pipelined execution. +/// Indicates that an array supports acting as a source node in a pipelined execution. +pub trait PipelineSource: Deref { + /// Bind the operator into a [`SourceKernel`] for pipelined execution. /// /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and /// batch IDs for batch children. Each child can only be bound once. - fn bind_source(&self, ctx: &mut dyn BindContext) -> VortexResult>; + fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult>; } /// The context used when binding an operator for execution. pub trait BindContext { /// Returns a [`VectorId`] that can be passed to the [`KernelContext`] within the body of - /// the [`Kernel`] to access the given child as a pipelined input vector. + /// the kernel to access the given child as a pipelined input vector. /// /// # Panics /// - /// If the child index requested here was not listed in [`Pipelined::pipelined_children`]. + /// If the child index requested here was not marked as a pipelined child in + /// [`PipelineTransform::is_pipelined_child`]. fn pipelined_input(&self, child_idx: usize) -> VectorId; /// Returns the batch input vector for the given child. /// /// # Panics /// - /// If the child index requested here was listed in [`Pipelined::pipelined_children`]. + /// If the child index requested here was marked as a pipelined child in + /// [`PipelineTransform::is_pipelined_child`]. fn batch_input(&self, child_idx: usize) -> Vector; } @@ -115,7 +104,12 @@ pub trait SourceKernel: Send { /// For example, if `n` is 3, then the kernel should skip over `3 * N` elements of input data. fn skip(&mut self, n: usize); - /// Attempts to perform a single step of the operator, writing data to the output vector. + /// Attempts to perform a single step of the operator, appending data to the output vector. + /// + /// The provided selection mask indicates which elements of the current chunk should be + /// appended to the output vector. + /// + /// The provided output vector is guaranteed to have at least `N` elements of capacity. fn step( &mut self, ctx: &KernelContext, @@ -124,12 +118,13 @@ pub trait SourceKernel: Send { ) -> VortexResult<()>; } -pub trait OperatorKernel: Send { - /// Attempts to perform a single step of the operator, writing data to the output vector. +pub trait TransformKernel: Send { + /// Attempts to perform a single step of the operator, appending data to the output vector. + /// + /// The input vectors can be accessed via the provided `KernelContext`. /// - /// The output vector has length equal to the number of valid elements in the input vectors. - /// This number of values should be written to the output vector. - fn step(&self, ctx: &KernelContext, out: &mut VectorMut) -> VortexResult<()>; + /// The provided output vector is guaranteed to have at least `N` elements of capacity. + fn step(&mut self, ctx: &KernelContext, out: &mut VectorMut) -> VortexResult<()>; } /// Context passed to kernels during execution, providing access to vectors. diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs index 7515e603ec5..63b97ff849d 100644 --- a/vortex-array/src/pipeline/source_driver.rs +++ b/vortex-array/src/pipeline/source_driver.rs @@ -7,15 +7,15 @@ use vortex_mask::Mask; use vortex_vector::{Vector, VectorMut, VectorMutOps}; use crate::pipeline::bit_view::{BitView, BitViewExt}; -use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, VectorId}; +use crate::pipeline::{BindContext, KernelContext, N, PipelineSource, VectorId}; /// Temporary driver for executing a single source array in a pipelined fashion. pub struct PipelineSourceDriver<'a> { - array: &'a dyn PipelinedSource, + array: &'a dyn PipelineSource, } impl<'a> PipelineSourceDriver<'a> { - pub fn new(array: &'a dyn PipelinedSource) -> Self { + pub fn new(array: &'a dyn PipelineSource) -> Self { Self { array } } @@ -34,7 +34,7 @@ impl<'a> PipelineSourceDriver<'a> { let mut bind_ctx = PipelineSourceBindCtx { batch_inputs: &batch_inputs, }; - let mut kernel = self.array.bind_source(&mut bind_ctx)?; + let mut kernel = self.array.bind(&mut bind_ctx)?; let kernel_ctx = KernelContext::empty(); // Allocate an output vector, with up to N bytes of padding to ensure every call to @@ -46,7 +46,6 @@ impl<'a> PipelineSourceDriver<'a> { selection.true_count().next_multiple_of(N) + N, ); - // TODO(ngates): change behaviour based on the density of the selection mask. match selection { Mask::AllTrue(_) => { // Select everything, so we can just run the kernel in a tight loop. @@ -101,31 +100,3 @@ impl BindContext for PipelineSourceBindCtx<'_> { self.batch_inputs[child_idx].clone() } } - -#[cfg(test)] -mod test { - use vortex_buffer::buffer; - use vortex_dtype::PTypeDowncastExt; - use vortex_mask::Mask; - use vortex_vector::VectorOps; - - use crate::arrays::PrimitiveArray; - use crate::pipeline::source_driver::PipelineSourceDriver; - use crate::validity::Validity; - - #[test] - fn test_primitive() { - let array = PrimitiveArray::new::(buffer![0..100000u32], Validity::AllValid); - - // Create a selection mask with some ranges. - let mask = Mask::from_iter((0..100000).map(|i| i % 30 < 20)); - - let out = PipelineSourceDriver::new(&array) - .execute(&mask) - .unwrap() - .into_primitive() - .downcast::(); - - assert_eq!(out.len(), mask.true_count()); - } -} diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs index 9603ca0cf16..2edb26d0213 100644 --- a/vortex-array/src/vtable/operator.rs +++ b/vortex-array/src/vtable/operator.rs @@ -8,7 +8,7 @@ use vortex_vector::Vector; use crate::ArrayRef; use crate::array::IntoArray; use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx}; -use crate::pipeline::PipelinedSource; +use crate::pipeline::{PipelineSource, PipelineTransform}; use crate::vtable::{NotSupported, VTable}; /// A vtable for the new operator-based array functionality. Eventually this vtable will be @@ -40,8 +40,10 @@ pub trait OperatorVTable { Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute() } - /// Downcast this array into a [`PipelinedSource`] if it supports pipelined execution. - fn as_pipelined_source(_array: &V::Array) -> Option<&dyn PipelinedSource> { + /// Downcast this array into a [`PipelineNode`] if it supports pipelined execution. + /// + /// Each node is either a source node or a transformation node. + fn pipeline_node(_array: &V::Array) -> Option> { None } @@ -102,6 +104,14 @@ pub trait OperatorVTable { } } +/// An enum over the types of pipeline nodes. +pub enum PipelineNode<'a> { + /// This node is a source node in a pipeline. + Source(&'a dyn PipelineSource), + /// This node is a transformation node in a pipeline. + Transform(&'a dyn PipelineTransform), +} + impl OperatorVTable for NotSupported { fn bind( array: &V::Array, From 7a65acb3c5bde2ee5337e8033e4305ac5efa9787 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Mon, 10 Nov 2025 08:53:09 -0500 Subject: [PATCH 10/10] pipelined execution Signed-off-by: Nicholas Gates --- vortex-array/src/array/operator.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs index 8d7daae5a33..7bb348cb63b 100644 --- a/vortex-array/src/array/operator.rs +++ b/vortex-array/src/array/operator.rs @@ -9,7 +9,7 @@ use vortex_vector::{Vector, VectorOps, vector_matches_dtype}; use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx}; use crate::pipeline::source_driver::PipelineSourceDriver; -use crate::vtable::{OperatorVTable, VTable}; +use crate::vtable::{OperatorVTable, PipelineNode, VTable}; use crate::{Array, ArrayAdapter, ArrayRef}; /// Array functions as provided by the `OperatorVTable`. @@ -63,11 +63,12 @@ impl ArrayOperator for Arc { impl ArrayOperator for ArrayAdapter { fn execute_batch(&self, selection: &Mask, ctx: &mut dyn ExecutionCtx) -> VortexResult { - // Check if the array is a pipeline source, and if so use the single-node driver for now. - if let Some(pipeline_source) = - >::as_pipelined_source(&self.0) + // Check if the array is a pipeline node + if let Some(pipeline_node) = + >::pipeline_node(&self.0) + && let PipelineNode::Source(source) = pipeline_node { - return PipelineSourceDriver::new(pipeline_source).execute(selection); + return PipelineSourceDriver::new(source).execute(selection); } let vector =