From be9f62ee41ecfc9c87aa3753c70947143b191705 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Fri, 7 Nov 2025 11:06:40 -0500
Subject: [PATCH 01/10] pipelined execution

Signed-off-by: Nicholas Gates <nick@nickgates.com>
---
 encodings/fastlanes/src/for/pipeline.rs    |  8 +-
 vortex-array/src/operator/compare.rs       |  8 +-
 vortex-array/src/pipeline/mod.rs           | 96 ++++++++++++----------
 vortex-array/src/pipeline/operator/bind.rs |  4 +-
 vortex-array/src/pipeline/row_selection.rs | 27 ------
 vortex-array/src/vtable/operator.rs        |  9 +-
 6 files changed, 66 insertions(+), 86 deletions(-)
 delete mode 100644 vortex-array/src/pipeline/row_selection.rs
diff --git a/encodings/fastlanes/src/for/pipeline.rs b/encodings/fastlanes/src/for/pipeline.rs
index 428c334c88e..2a5b1abbeb5 100644
--- a/encodings/fastlanes/src/for/pipeline.rs
+++ b/encodings/fastlanes/src/for/pipeline.rs
@@ -7,7 +7,6 @@ use std::marker::PhantomData;
 use std::sync::Arc;
 
 use num_traits::WrappingAdd;
-use vortex_array::Array;
 use vortex_array::operator::{
     LengthBounds, Operator, OperatorEq, OperatorHash, OperatorId, OperatorRef,
 };
@@ -17,8 +16,9 @@ use vortex_array::pipeline::{
     BindContext, Element, Kernel, KernelContext, PipelinedOperator, RowSelection, VectorId,
 };
 use vortex_array::vtable::OperatorVTable;
-use vortex_dtype::{DType, NativePType, PType, match_each_integer_ptype};
-use vortex_error::{VortexExpect, VortexResult, vortex_bail};
+use vortex_array::Array;
+use vortex_dtype::{match_each_integer_ptype, DType, NativePType, PType};
+use vortex_error::{vortex_bail, VortexExpect, VortexResult};
 use vortex_scalar::Scalar;
 
 use crate::{FoRArray, FoRVTable};
@@ -150,7 +150,7 @@ impl PipelinedOperator for FoROperator {
         match_each_integer_ptype!(ptype, |T| {
             match_each_integer_ptype!(self.encoded_ptype, |E| {
                 Ok(Box::new(FoRKernel::<T, E> {
-                    child: ctx.children()[0],
+                    child: ctx.pipelined_input()[0],
                     reference: self
                         .reference
                         .as_primitive()
diff --git a/vortex-array/src/operator/compare.rs b/vortex-array/src/operator/compare.rs
index 96fc20936ca..485cf1f8284 100644
--- a/vortex-array/src/operator/compare.rs
+++ b/vortex-array/src/operator/compare.rs
@@ -183,7 +183,7 @@ impl PipelinedOperator for CompareOperator {
             return match_each_native_ptype!(ptype, |T| {
                 match_each_compare_op!(self.op.swap(), |Op| {
                     Ok(Box::new(ScalarComparePrimitiveKernel::<T, Op> {
-                        lhs: ctx.children()[1],
+                        lhs: ctx.pipelined_input()[1],
                         rhs: lhs_const
                             .scalar()
                             .as_primitive()
@@ -201,7 +201,7 @@ impl PipelinedOperator for CompareOperator {
             return match_each_native_ptype!(ptype, |T| {
                 match_each_compare_op!(self.op, |Op| {
                     Ok(Box::new(ScalarComparePrimitiveKernel::<T, Op> {
-                        lhs: ctx.children()[0],
+                        lhs: ctx.pipelined_input()[0],
                         rhs: rhs_const
                             .scalar()
                             .as_primitive()
@@ -216,8 +216,8 @@ impl PipelinedOperator for CompareOperator {
         match_each_native_ptype!(ptype, |T| {
             match_each_compare_op!(self.op, |Op| {
                 Ok(Box::new(ComparePrimitiveKernel::<T, Op> {
-                    lhs: ctx.children()[0],
-                    rhs: ctx.children()[1],
+                    lhs: ctx.pipelined_input()[0],
+                    rhs: ctx.pipelined_input()[1],
                     _phantom: PhantomData,
                 }) as Box<dyn Kernel>)
             })
diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs
index 866b8e29bb9..ff918d86036 100644
--- a/vortex-array/src/pipeline/mod.rs
+++ b/vortex-array/src/pipeline/mod.rs
@@ -20,23 +20,20 @@
 
 pub mod bits;
 pub(crate) mod operator;
-pub mod row_selection;
 mod types;
 pub mod vec;
 pub mod view;
 
 use std::cell::RefCell;
 
-pub use row_selection::*;
-pub use types::*;
-use vec::VectorRef;
-use vortex_error::VortexResult;
-
 use self::vec::Vector;
-use self::view::ViewMut;
-use crate::Canonical;
 use crate::operator::Operator;
 use crate::pipeline::bits::BitView;
+use crate::Canonical;
+pub use types::*;
+use vec::VectorRef;
+use vortex_error::VortexResult;
+use vortex_vector::VectorMut;
 
 /// The number of elements in each step of a Vortex evaluation operator.
 pub const N: usize = 1024;
@@ -44,10 +41,8 @@ pub const N: usize = 1024;
 // Number of usize words needed to store N bits
 pub const N_WORDS: usize = N / usize::BITS as usize;
 
-pub trait PipelinedOperator: Operator {
-    /// Defines the row selection of this pipeline operator.
-    fn row_selection(&self) -> RowSelection;
-
+/// Returned by an array to indicate that it can be executed in a pipelined fashion.
+pub trait Pipelined {
     // Whether this operator works by mutating its first child in-place.
     //
     // If `true`, the operator is invoked with the first child's input data passed via the
@@ -57,54 +52,70 @@ pub trait PipelinedOperator: Operator {
     //     false
     // }
 
-    /// Bind the operator into a [`Kernel`] for pipelined execution.
-    fn bind(&self, ctx: &dyn BindContext) -> VortexResult<Box<dyn Kernel>>;
-
-    /// Returns the child indices of this operator that are passed to the kernel as input vectors.
-    fn vector_children(&self) -> Vec<usize>;
+    /// Returns the indices of the children of this array that should be passed to the kernel as
+    /// pipelined input vectors, 1024 elements at a time.
+    ///
+    /// Any child not listed here will be treated as a batch input, and the full vector will be
+    /// computed before pipelined execution begins.
+    fn pipelined_children(&self) -> Vec<usize>;
 
-    /// Returns the child indices of this operator that are passed to the kernel as batch inputs.
-    fn batch_children(&self) -> Vec<usize>;
+    /// Bind the operator into a [`Kernel`] for pipelined execution.
+    ///
+    /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and
+    /// batch IDs for batch children. Each child can only be bound once.
+    fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn Kernel>>;
 }
 
 /// The context used when binding an operator for execution.
 pub trait BindContext {
-    fn children(&self) -> &[VectorId];
+    /// Returns a [`VectorId`] that can be passed to the [`KernelContext`] within the body of
+    /// the [`Kernel`] to access the given child as a pipelined input vector.
+    ///
+    /// # Panics
+    ///
+    /// If the child index requested here was not listed in [`Pipelined::pipelined_children`].
+    fn pipelined_input(&self, child_idx: usize) -> VectorId;
 
-    fn batch_inputs(&self) -> &[BatchId];
+    /// Returns the batch input vector for the given child.
+    ///
+    /// # Panics
+    ///
+    /// If the child index requested here was listed in [`Pipelined::pipelined_children`].
+    fn batch_input(&self, child_idx: usize) -> Vector;
 }
 
 /// The ID of the vector to use.
 pub type VectorId = usize;
-/// The ID of the batch input to use.
-pub type BatchId = usize;
 
-/// A operator provides a push-based way to emit a stream of canonical data.
+/// A kernel implements the physical compute required for pipelined execution. It is driven in a
+/// push-based way, typically as part of a larger pipeline of kernels.
 ///
 /// By passing multiple vector computations through the same operator, we can amortize
 /// the setup costs (such as DType validation, stats short-circuiting, etc.), and to make better
 /// use of CPU caches by performing all operations while the data is hot.
+///
+/// The [`Kernel::step`] method will be invoked repeatedly to process chunks of data, [`N`] elements
+/// at a time. Each invocation is passed a selection mask indicating which elements of the chunk
+/// should be written to the start of the output vector.
+///
+/// The mutable output vector is **guaranteed** to have a capacity of at least [`N`] elements, and
+/// its length will initially be set to zero. It is therefore safe to invoke unchecked writes up to
+/// `N` elements.
+///
+/// The pipeline may invoke the `Kernel::skip` method to skip over some number of chunks of data.
+/// The kernel should mutate any internal state as necessary to account for the skipped data.
 pub trait Kernel: Send {
-    /// Attempts to perform a single step of the operator, writing data to the output vector.
-    ///
-    /// The kernel step should be stateless and is passed the chunk index as well as the selection
-    /// mask for this chunk.
+    /// Skip over the given number of chunks of data.
     ///
-    /// Input and output vectors have a `Selection` enum indicating which elements of the vector
-    /// are valid for processing. This is one of:
-    /// * Full - all N elements are valid.
-    /// * Prefix - the first n elements are valid, where n is the true count of the selection mask.
-    /// * Mask - only the elements indicated by the selection mask are valid.
-    ///
-    /// Kernel should inspect the selection enum of the input and iterate the values accordingly.
-    /// They may choose to write the output vector in any selection mode, but should choose the most
-    /// efficient mode possible - not forgetting to update the output vector's selection enum.
+    /// For example, if `n` is 3, then the kernel should skip over `3 * N` elements of input data.
+    fn skip(&mut self, n: usize);
+
+    /// Attempts to perform a single step of the operator, writing data to the output vector.
     fn step(
-        &self,
+        &mut self,
         ctx: &KernelContext,
-        chunk_idx: usize,
         selection: &BitView,
-        out: &mut ViewMut,
+        out: &mut VectorMut,
     ) -> VortexResult<()>;
 }
 
@@ -121,9 +132,4 @@ impl KernelContext {
     pub fn vector(&self, vector_id: VectorId) -> VectorRef<'_> {
         VectorRef::new(self.vectors[vector_id].borrow())
     }
-
-    /// Get a batch input by its ID.
-    pub fn batch_input(&self, batch_id: BatchId) -> &Canonical {
-        &self.batch_inputs[batch_id]
-    }
 }
diff --git a/vortex-array/src/pipeline/operator/bind.rs b/vortex-array/src/pipeline/operator/bind.rs
index 16dc77b6972..63612a56bc8 100644
--- a/vortex-array/src/pipeline/operator/bind.rs
+++ b/vortex-array/src/pipeline/operator/bind.rs
@@ -5,8 +5,8 @@
 
 use vortex_error::{VortexExpect, VortexResult};
 
-use crate::pipeline::operator::PipelineNode;
 use crate::pipeline::operator::buffers::VectorAllocationPlan;
+use crate::pipeline::operator::PipelineNode;
 use crate::pipeline::{BatchId, BindContext, Kernel, VectorId};
 
 pub(crate) fn bind_kernels(
@@ -44,7 +44,7 @@ struct PipelineBindContext<'a> {
 }
 
 impl BindContext for PipelineBindContext<'_> {
-    fn children(&self) -> &[VectorId] {
+    fn pipelined_input(&self) -> &[VectorId] {
         self.children
     }
 
diff --git a/vortex-array/src/pipeline/row_selection.rs b/vortex-array/src/pipeline/row_selection.rs
deleted file mode 100644
index 549c93bd16c..00000000000
--- a/vortex-array/src/pipeline/row_selection.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use crate::operator::{OperatorEq, OperatorRef};
-
-/// Each operator has a row selection over the domain of input rows.
-#[derive(Debug, Clone)]
-pub enum RowSelection {
-    /// Defines a new domain of N rows.
-    Domain(usize),
-    /// Returns all rows from the domain.
-    All,
-    /// Selects rows from the range where the boolean operator resolves to a true bit.
-    MaskOperator(OperatorRef),
-}
-
-impl PartialEq for RowSelection {
-    fn eq(&self, other: &Self) -> bool {
-        match (self, other) {
-            (RowSelection::Domain(n1), RowSelection::Domain(n2)) => n1 == n2,
-            (RowSelection::All, RowSelection::All) => true,
-            (RowSelection::MaskOperator(o1), RowSelection::MaskOperator(o2)) => o1.operator_eq(o2),
-            _ => false,
-        }
-    }
-}
-impl Eq for RowSelection {}
diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs
index 8f9abbd43a1..c0379350440 100644
--- a/vortex-array/src/vtable/operator.rs
+++ b/vortex-array/src/vtable/operator.rs
@@ -1,15 +1,15 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use vortex_error::{VortexResult, vortex_bail};
+use vortex_error::{vortex_bail, VortexResult};
 use vortex_mask::Mask;
 use vortex_vector::Vector;
 
-use crate::ArrayRef;
 use crate::array::IntoArray;
 use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx};
 use crate::operator::OperatorRef;
 use crate::vtable::{NotSupported, VTable};
+use crate::ArrayRef;
 
 /// A vtable for the new operator-based array functionality. Eventually this vtable will be
 /// merged into the main `VTable`, but for now it is kept separate to allow for incremental
@@ -23,8 +23,7 @@ pub trait OperatorVTable<V: VTable> {
         Ok(None)
     }
 
-    /// Takes the array by ownership, returning a canonical [`Vector`] containing the rows
-    /// indicated by the given selection [`Mask`].
+    /// Returns a canonical [`Vector`] containing the rows indicated by the given selection [`Mask`].
     ///
     /// The returned vector must be the appropriate one for the array's logical type (they are
     /// one-to-one with Vortex `DType`s), and should respect the output nullability of the array.
@@ -47,6 +46,8 @@ pub trait OperatorVTable<V: VTable> {
         Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute()
     }
 
+    /// Returns the
+
     /// Bind the array for execution in batch mode.
     ///
     /// This function should return a [`BatchKernelRef`] that can be used to execute the array in

From c0399c05853e9dc4eb2ceeec1d4870ea8418fff1 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Fri, 7 Nov 2025 13:06:19 -0500
Subject: [PATCH 02/10] pipelined execution

Signed-off-by: Nicholas Gates <nick@nickgates.com>
---
 .../fastlanes/src/bitpacking/vtable/mod.rs    |   3 +-
 encodings/fastlanes/src/for/mod.rs            |   3 +-
 encodings/fsst/src/array.rs                   |   2 +-
 encodings/fsst/src/lib.rs                     |   1 -
 encodings/fsst/src/operator.rs                | 194 -------
 vortex-array/src/array/mod.rs                 |  18 +-
 vortex-array/src/array/operator.rs            |   2 +-
 vortex-array/src/arrays/varbin/mod.rs         |   4 +-
 vortex-array/src/arrays/varbin/operator.rs    |  28 -
 vortex-array/src/lib.rs                       |   1 -
 vortex-array/src/operator/canonical.rs        |  17 -
 vortex-array/src/operator/compare.rs          | 532 ------------------
 vortex-array/src/operator/display.rs          |  32 --
 vortex-array/src/operator/filter.rs           | 148 -----
 vortex-array/src/operator/getitem.rs          |  73 ---
 vortex-array/src/operator/hash.rs             | 177 ------
 vortex-array/src/operator/metrics.rs          | 163 ------
 vortex-array/src/operator/mod.rs              | 242 --------
 vortex-array/src/operator/optimize.rs         |  33 --
 vortex-array/src/operator/slice.rs            | 138 -----
 vortex-array/src/pipeline/mod.rs              |  10 +-
 vortex-array/src/vtable/operator.rs           |  22 +-
 22 files changed, 19 insertions(+), 1824 deletions(-)
 delete mode 100644 encodings/fsst/src/operator.rs
 delete mode 100644 vortex-array/src/arrays/varbin/operator.rs
 delete mode 100644 vortex-array/src/operator/canonical.rs
 delete mode 100644 vortex-array/src/operator/compare.rs
 delete mode 100644 vortex-array/src/operator/display.rs
 delete mode 100644 vortex-array/src/operator/filter.rs
 delete mode 100644 vortex-array/src/operator/getitem.rs
 delete mode 100644 vortex-array/src/operator/hash.rs
 delete mode 100644 vortex-array/src/operator/metrics.rs
 delete mode 100644 vortex-array/src/operator/mod.rs
 delete mode 100644 vortex-array/src/operator/optimize.rs
 delete mode 100644 vortex-array/src/operator/slice.rs

diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs
index 0cc6ffc36cf..94add8b7516 100644
--- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs
+++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs
@@ -10,7 +10,6 @@ mod array;
 mod canonical;
 mod encode;
 mod operations;
-mod operator;
 mod serde;
 mod validity;
 mod visitor;
@@ -29,7 +28,7 @@ impl VTable for BitPackedVTable {
     type ComputeVTable = NotSupported;
     type EncodeVTable = Self;
     type SerdeVTable = Self;
-    type OperatorVTable = Self;
+    type OperatorVTable = NotSupported;
 
     fn id(_encoding: &Self::Encoding) -> EncodingId {
         EncodingId::new_ref("fastlanes.bitpacked")
diff --git a/encodings/fastlanes/src/for/mod.rs b/encodings/fastlanes/src/for/mod.rs
index 13fd9572cfa..f27aed63602 100644
--- a/encodings/fastlanes/src/for/mod.rs
+++ b/encodings/fastlanes/src/for/mod.rs
@@ -18,7 +18,6 @@ use vortex_scalar::Scalar;
 mod compress;
 mod compute;
 mod ops;
-mod pipeline;
 mod serde;
 
 vtable!(FoR);
@@ -35,7 +34,7 @@ impl VTable for FoRVTable {
     type ComputeVTable = NotSupported;
     type EncodeVTable = Self;
     type SerdeVTable = Self;
-    type OperatorVTable = Self;
+    type OperatorVTable = NotSupported;
 
     fn id(_encoding: &Self::Encoding) -> EncodingId {
         EncodingId::new_ref("fastlanes.for")
diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs
index 7df661e604f..4a26a6f0f38 100644
--- a/encodings/fsst/src/array.rs
+++ b/encodings/fsst/src/array.rs
@@ -32,7 +32,7 @@ impl VTable for FSSTVTable {
     type ComputeVTable = NotSupported;
     type EncodeVTable = Self;
     type SerdeVTable = Self;
-    type OperatorVTable = Self;
+    type OperatorVTable = NotSupported;
 
     fn id(_encoding: &Self::Encoding) -> EncodingId {
         EncodingId::new_ref("vortex.fsst")
diff --git a/encodings/fsst/src/lib.rs b/encodings/fsst/src/lib.rs
index f854708e790..b8e449a66a6 100644
--- a/encodings/fsst/src/lib.rs
+++ b/encodings/fsst/src/lib.rs
@@ -15,7 +15,6 @@ mod array;
 mod canonical;
 mod compress;
 mod compute;
-mod operator;
 mod ops;
 mod serde;
 #[cfg(test)]
diff --git a/encodings/fsst/src/operator.rs b/encodings/fsst/src/operator.rs
deleted file mode 100644
index f203fcdcb72..00000000000
--- a/encodings/fsst/src/operator.rs
+++ /dev/null
@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::any::Any;
-use std::hash::{Hash, Hasher};
-use std::sync::Arc;
-
-use async_trait::async_trait;
-use vortex_array::compute::filter;
-use vortex_array::operator::filter::FilterOperator;
-use vortex_array::operator::slice::SliceOperator;
-use vortex_array::operator::{
-    BatchBindCtx, BatchExecution, BatchExecutionRef, BatchOperator, LengthBounds, Operator,
-    OperatorEq, OperatorHash, OperatorId, OperatorRef,
-};
-use vortex_array::vtable::OperatorVTable;
-use vortex_array::{Array, Canonical};
-use vortex_dtype::DType;
-use vortex_error::VortexResult;
-use vortex_mask::Mask;
-
-use crate::{FSSTArray, FSSTVTable};
-
-impl OperatorVTable<FSSTVTable> for FSSTVTable {
-    fn to_operator(array: &FSSTArray) -> VortexResult<Option<OperatorRef>> {
-        Ok(Some(Arc::new(array.clone())))
-    }
-}
-
-impl OperatorHash for FSSTArray {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        self.dtype().hash(state);
-        self.symbols().operator_hash(state);
-        self.symbol_lengths().operator_hash(state);
-        self.codes().operator_hash(state);
-        self.uncompressed_lengths().operator_hash(state);
-    }
-}
-
-impl OperatorEq for FSSTArray {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.dtype() == other.dtype()
-            && self.symbols().operator_eq(other.symbols())
-            && self.symbol_lengths().operator_eq(other.symbol_lengths())
-            && self.codes().operator_eq(other.codes())
-            && self
-                .uncompressed_lengths()
-                .operator_eq(other.uncompressed_lengths())
-    }
-}
-
-impl Operator for FSSTArray {
-    fn id(&self) -> OperatorId {
-        self.encoding_id()
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn dtype(&self) -> &DType {
-        Array::dtype(self.as_ref())
-    }
-
-    fn bounds(&self) -> LengthBounds {
-        Array::len(self.as_ref()).into()
-    }
-
-    fn children(&self) -> &[OperatorRef] {
-        // TODO(ngates): we have varbin child
-        &[]
-    }
-
-    fn with_children(self: Arc<Self>, _children: Vec<OperatorRef>) -> VortexResult<OperatorRef> {
-        Ok(self)
-    }
-
-    fn reduce_parent(
-        &self,
-        parent: OperatorRef,
-        _child_idx: usize,
-    ) -> VortexResult<Option<OperatorRef>> {
-        if let Some(filter) = parent.as_any().downcast_ref::<FilterOperator>() {
-            return Ok(Some(Arc::new(FilteredFSSTOperator {
-                array: self.clone(),
-                mask: filter.mask().clone(),
-            })));
-        }
-
-        if let Some(slice) = parent.as_any().downcast_ref::<SliceOperator>() {
-            return Ok(Some(Arc::new(
-                self.slice(slice.range().clone())
-                    .as_::<FSSTVTable>()
-                    .clone(),
-            )));
-        }
-
-        Ok(None)
-    }
-
-    fn as_batch(&self) -> Option<&dyn BatchOperator> {
-        Some(self)
-    }
-}
-
-impl BatchOperator for FSSTArray {
-    fn bind(&self, _ctx: &mut dyn BatchBindCtx) -> VortexResult<BatchExecutionRef> {
-        Ok(Box::new(FSSTExecution {
-            array: self.clone(),
-        }))
-    }
-}
-
-// TODO(ngates): obviously we should inline the canonical logic here
-struct FSSTExecution {
-    array: FSSTArray,
-}
-
-#[async_trait]
-impl BatchExecution for FSSTExecution {
-    async fn execute(self: Box<Self>) -> VortexResult<Canonical> {
-        Ok(self.array.to_canonical())
-    }
-}
-
-#[derive(Debug)]
-pub struct FilteredFSSTOperator {
-    array: FSSTArray,
-    mask: Mask,
-}
-
-impl OperatorHash for FilteredFSSTOperator {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        self.array.operator_hash(state);
-        self.mask.operator_hash(state);
-    }
-}
-
-impl OperatorEq for FilteredFSSTOperator {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.array.operator_eq(&other.array) && self.mask.operator_eq(&other.mask)
-    }
-}
-
-impl Operator for FilteredFSSTOperator {
-    fn id(&self) -> OperatorId {
-        OperatorId::from("vortex.fsst.filtered")
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn dtype(&self) -> &DType {
-        self.array.dtype()
-    }
-
-    fn bounds(&self) -> LengthBounds {
-        self.mask.len().into()
-    }
-
-    fn children(&self) -> &[OperatorRef] {
-        &[]
-    }
-
-    fn with_children(self: Arc<Self>, _children: Vec<OperatorRef>) -> VortexResult<OperatorRef> {
-        Ok(self)
-    }
-
-    fn as_batch(&self) -> Option<&dyn BatchOperator> {
-        Some(self)
-    }
-}
-
-impl BatchOperator for FilteredFSSTOperator {
-    fn bind(&self, _ctx: &mut dyn BatchBindCtx) -> VortexResult<BatchExecutionRef> {
-        Ok(Box::new(FilteredFSSTExecution {
-            array: self.array.clone(),
-            mask: self.mask.clone(),
-        }))
-    }
-}
-
-struct FilteredFSSTExecution {
-    array: FSSTArray,
-    mask: Mask,
-}
-
-#[async_trait]
-impl BatchExecution for FilteredFSSTExecution {
-    async fn execute(self: Box<Self>) -> VortexResult<Canonical> {
-        Ok(filter(self.array.as_ref(), &self.mask)?.to_canonical())
-    }
-}
diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs
index 98c1f3cdc06..d7bc2dc72f4 100644
--- a/vortex-array/src/array/mod.rs
+++ b/vortex-array/src/array/mod.rs
@@ -26,12 +26,11 @@ use crate::arrays::{
 };
 use crate::builders::ArrayBuilder;
 use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts};
-use crate::operator::OperatorRef;
 use crate::serde::ArrayChildren;
 use crate::stats::{Precision, Stat, StatsProviderExt, StatsSetRef};
 use crate::vtable::{
-    ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, OperatorVTable, SerdeVTable,
-    VTable, ValidityVTable, VisitorVTable,
+    ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable,
+    ValidityVTable, VisitorVTable,
 };
 use crate::{
     ArrayEq, ArrayHash, Canonical, DynArrayEq, DynArrayHash, EncodingId, EncodingRef,
@@ -168,11 +167,6 @@ pub trait Array:
     /// call.
     fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
     -> VortexResult<Option<Output>>;
-
-    /// Convert the array to an operator if supported by the encoding.
-    ///
-    /// Returns `None` if the encoding does not support operator operations.
-    fn to_operator(&self) -> VortexResult<Option<OperatorRef>>;
 }
 
 impl Array for Arc<dyn Array> {
@@ -275,10 +269,6 @@ impl Array for Arc<dyn Array> {
     ) -> VortexResult<Option<Output>> {
         self.as_ref().invoke(compute_fn, args)
     }
-
-    fn to_operator(&self) -> VortexResult<Option<OperatorRef>> {
-        self.as_ref().to_operator()
-    }
 }
 
 /// A reference counted pointer to a dynamic [`Array`] trait object.
@@ -649,10 +639,6 @@ impl<V: VTable> Array for ArrayAdapter<V> {
     ) -> VortexResult<Option<Output>> {
         <V::ComputeVTable as ComputeVTable<V>>::invoke(&self.0, compute_fn, args)
     }
-
-    fn to_operator(&self) -> VortexResult<Option<OperatorRef>> {
-        <V::OperatorVTable as OperatorVTable<V>>::to_operator(&self.0)
-    }
 }
 
 impl<V: VTable> ArrayHash for ArrayAdapter<V> {
diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs
index ef612a53933..0a0e0705046 100644
--- a/vortex-array/src/array/operator.rs
+++ b/vortex-array/src/array/operator.rs
@@ -21,7 +21,7 @@ pub trait ArrayOperator: 'static + Send + Sync {
     /// # Panics
     ///
     /// If the mask length does not match the array length.
-    /// If the array's implementation returns an invalid vector (wrong length, wrong type, etc).
+    /// If the array's implementation returns an invalid vector (wrong length, wrong type, etc.).
     fn execute_batch(&self, selection: &Mask, ctx: &mut dyn ExecutionCtx) -> VortexResult<Vector>;
 
     /// Optimize the array by running the optimization rules.
diff --git a/vortex-array/src/arrays/varbin/mod.rs b/vortex-array/src/arrays/varbin/mod.rs
index fc911834ca0..5c28fe4466a 100644
--- a/vortex-array/src/arrays/varbin/mod.rs
+++ b/vortex-array/src/arrays/varbin/mod.rs
@@ -5,7 +5,8 @@ mod array;
 pub use array::VarBinArray;
 
 mod compute;
-pub(crate) use compute::varbin_compute_min_max; // For use in `varbinview`.
+pub(crate) use compute::varbin_compute_min_max;
+// For use in `varbinview`.
 
 mod vtable;
 pub use vtable::{VarBinEncoding, VarBinVTable};
@@ -13,7 +14,6 @@ pub use vtable::{VarBinEncoding, VarBinVTable};
 pub mod builder;
 
 mod accessor;
-mod operator;
 
 use vortex_buffer::ByteBuffer;
 use vortex_dtype::DType;
diff --git a/vortex-array/src/arrays/varbin/operator.rs b/vortex-array/src/arrays/varbin/operator.rs
deleted file mode 100644
index b20c381cf54..00000000000
--- a/vortex-array/src/arrays/varbin/operator.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::hash::{Hash, Hasher};
-
-use crate::arrays::VarBinArray;
-use crate::operator::{OperatorEq, OperatorHash};
-use crate::vtable::ValidityHelper;
-
-impl OperatorHash for VarBinArray {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        self.dtype.hash(state);
-        self.bytes().operator_hash(state);
-        self.offsets().operator_hash(state);
-        self.validity().operator_hash(state);
-    }
-}
-
-impl OperatorEq for VarBinArray {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.dtype == other.dtype
-            && self.bytes().operator_eq(other.bytes())
-            && self.offsets().operator_eq(other.offsets())
-            && self.validity().operator_eq(other.validity())
-    }
-}
-
-// TODO(ngates): impl Operator
diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs
index eb2e1b4492a..31a13731b35 100644
--- a/vortex-array/src/lib.rs
+++ b/vortex-array/src/lib.rs
@@ -44,7 +44,6 @@ mod hash;
 pub mod iter;
 mod mask_future;
 mod metadata;
-pub mod operator;
 pub mod optimizer;
 mod partial_ord;
 pub mod patches;
diff --git a/vortex-array/src/operator/canonical.rs b/vortex-array/src/operator/canonical.rs
deleted file mode 100644
index 2d983e9ad86..00000000000
--- a/vortex-array/src/operator/canonical.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use async_trait::async_trait;
-use vortex_error::VortexResult;
-
-use crate::Canonical;
-use crate::operator::BatchExecution;
-
-pub struct CanonicalExecution(pub Canonical);
-
-#[async_trait]
-impl BatchExecution for CanonicalExecution {
-    async fn execute(self: Box<Self>) -> VortexResult<Canonical> {
-        Ok(self.0)
-    }
-}
diff --git a/vortex-array/src/operator/compare.rs b/vortex-array/src/operator/compare.rs
deleted file mode 100644
index 485cf1f8284..00000000000
--- a/vortex-array/src/operator/compare.rs
+++ /dev/null
@@ -1,532 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::any::Any;
-use std::hash::{Hash, Hasher};
-use std::marker::PhantomData;
-use std::sync::Arc;
-
-use itertools::Itertools;
-use vortex_dtype::{DType, NativePType, match_each_native_ptype};
-use vortex_error::{VortexExpect, VortexResult, vortex_bail};
-
-use crate::arrays::ConstantArray;
-use crate::compute::Operator as Op;
-use crate::operator::{LengthBounds, Operator, OperatorEq, OperatorHash, OperatorId, OperatorRef};
-use crate::pipeline::bits::BitView;
-use crate::pipeline::vec::Selection;
-use crate::pipeline::view::ViewMut;
-use crate::pipeline::{
-    BindContext, Element, Kernel, KernelContext, PipelinedOperator, RowSelection, VectorId,
-};
-
-#[derive(Debug)]
-pub struct CompareOperator {
-    children: [OperatorRef; 2],
-    op: Op,
-    dtype: DType,
-}
-
-impl CompareOperator {
-    pub fn try_new(lhs: OperatorRef, rhs: OperatorRef, op: Op) -> VortexResult<CompareOperator> {
-        if lhs.dtype() != rhs.dtype() {
-            vortex_bail!(
-                "Cannot compare arrays with different dtypes: {} and {}",
-                lhs.dtype(),
-                rhs.dtype()
-            );
-        }
-
-        let lhs_const = lhs.as_any().downcast_ref::<ConstantArray>();
-        let rhs_const = rhs.as_any().downcast_ref::<ConstantArray>();
-        if lhs_const.is_some() && rhs_const.is_some() {
-            // TODO(ngates): we should return the Constant result!
-        }
-
-        let nullability = lhs.dtype().nullability() | rhs.dtype().nullability();
-        let dtype = DType::Bool(nullability);
-
-        Ok(CompareOperator {
-            children: [lhs, rhs],
-            op,
-            dtype,
-        })
-    }
-
-    pub fn op(&self) -> Op {
-        self.op
-    }
-}
-
-impl OperatorHash for CompareOperator {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        self.op.hash(state);
-        self.dtype.hash(state);
-        self.children.iter().for_each(|c| c.operator_hash(state));
-    }
-}
-
-impl OperatorEq for CompareOperator {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.op == other.op
-            && self.dtype == other.dtype
-            && self
-                .children
-                .iter()
-                .zip(other.children.iter())
-                .all(|(a, b)| a.operator_eq(b))
-    }
-}
-
-impl Operator for CompareOperator {
-    fn id(&self) -> OperatorId {
-        OperatorId::from("vortex.compare")
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn dtype(&self) -> &DType {
-        &self.dtype
-    }
-
-    fn bounds(&self) -> LengthBounds {
-        self.children[0].bounds() & self.children[1].bounds()
-    }
-
-    fn children(&self) -> &[OperatorRef] {
-        &self.children
-    }
-
-    fn with_children(self: Arc<Self>, children: Vec<OperatorRef>) -> VortexResult<OperatorRef> {
-        let (lhs, rhs) = children
-            .into_iter()
-            .tuples()
-            .next()
-            .vortex_expect("missing");
-        Ok(Arc::new(CompareOperator {
-            children: [lhs, rhs],
-            op: self.op,
-            dtype: self.dtype.clone(),
-        }))
-    }
-
-    fn as_pipelined(&self) -> Option<&dyn PipelinedOperator> {
-        // If both children support pipelining, but have different row selections, then we cannot
-        // pipeline without an alignment step (which we currently do not support).
-        if let Some((left, right)) = self.children[0]
-            .as_pipelined()
-            .zip(self.children[1].as_pipelined())
-            && left.row_selection() != right.row_selection()
-        {
-            return None;
-        }
-
-        Some(self)
-    }
-}
-
-macro_rules! match_each_compare_op {
-    ($self:expr, | $enc:ident | $body:block) => {{
-        match $self {
-            Op::Eq => {
-                type $enc = Eq;
-                $body
-            }
-            Op::NotEq => {
-                type $enc = NotEq;
-                $body
-            }
-            Op::Gt => {
-                type $enc = Gt;
-                $body
-            }
-            Op::Gte => {
-                type $enc = Gte;
-                $body
-            }
-            Op::Lt => {
-                type $enc = Lt;
-                $body
-            }
-            Op::Lte => {
-                type $enc = Lte;
-                $body
-            }
-        }
-    }};
-}
-
-impl PipelinedOperator for CompareOperator {
-    fn row_selection(&self) -> RowSelection {
-        self.children[0]
-            .as_pipelined()
-            .map(|p| p.row_selection())
-            .unwrap_or(RowSelection::All)
-    }
-
-    #[allow(clippy::cognitive_complexity)]
-    fn bind(&self, ctx: &dyn BindContext) -> VortexResult<Box<dyn Kernel>> {
-        debug_assert_eq!(self.children[0].dtype(), self.children[1].dtype());
-
-        let DType::Primitive(ptype, _) = self.children[0].dtype() else {
-            vortex_bail!(
-                "Unsupported type for comparison: {}",
-                self.children[0].dtype()
-            )
-        };
-
-        let lhs_const = self.children[0].as_any().downcast_ref::<ConstantArray>();
-        if let Some(lhs_const) = lhs_const {
-            // LHS is constant, use ScalarComparePrimitiveKernel
-            return match_each_native_ptype!(ptype, |T| {
-                match_each_compare_op!(self.op.swap(), |Op| {
-                    Ok(Box::new(ScalarComparePrimitiveKernel::<T, Op> {
-                        lhs: ctx.pipelined_input()[1],
-                        rhs: lhs_const
-                            .scalar()
-                            .as_primitive()
-                            .typed_value::<T>()
-                            .vortex_expect("scalar value not of type T"),
-                        _phantom: PhantomData,
-                    }) as Box<dyn Kernel>)
-                })
-            });
-        }
-
-        let rhs_const = self.children[1].as_any().downcast_ref::<ConstantArray>();
-        if let Some(rhs_const) = rhs_const {
-            // RHS is constant, use ScalarComparePrimitiveKernel
-            return match_each_native_ptype!(ptype, |T| {
-                match_each_compare_op!(self.op, |Op| {
-                    Ok(Box::new(ScalarComparePrimitiveKernel::<T, Op> {
-                        lhs: ctx.pipelined_input()[0],
-                        rhs: rhs_const
-                            .scalar()
-                            .as_primitive()
-                            .typed_value::<T>()
-                            .vortex_expect("scalar value not of type T"),
-                        _phantom: PhantomData,
-                    }) as Box<dyn Kernel>)
-                })
-            });
-        }
-
-        match_each_native_ptype!(ptype, |T| {
-            match_each_compare_op!(self.op, |Op| {
-                Ok(Box::new(ComparePrimitiveKernel::<T, Op> {
-                    lhs: ctx.pipelined_input()[0],
-                    rhs: ctx.pipelined_input()[1],
-                    _phantom: PhantomData,
-                }) as Box<dyn Kernel>)
-            })
-        })
-    }
-
-    fn vector_children(&self) -> Vec<usize> {
-        vec![0, 1]
-    }
-
-    fn batch_children(&self) -> Vec<usize> {
-        vec![]
-    }
-}
-
-/// A compare operator for primitive types that compares two vectors element-wise using a binary
-/// operation.
-/// Kernel that performs primitive type comparisons between two input vectors.
-pub struct ComparePrimitiveKernel<T, Op> {
-    lhs: VectorId,
-    rhs: VectorId,
-    _phantom: PhantomData<(T, Op)>,
-}
-
-impl<T: Element + NativePType, Op: CompareOp<T> + Send> Kernel for ComparePrimitiveKernel<T, Op> {
-    fn step(
-        &self,
-        ctx: &KernelContext,
-        _chunk_idx: usize,
-        selection: &BitView,
-        out: &mut ViewMut,
-    ) -> VortexResult<()> {
-        let lhs_vec = ctx.vector(self.lhs);
-        let lhs = lhs_vec.as_array::<T>();
-        let rhs_vec = ctx.vector(self.rhs);
-        let rhs = rhs_vec.as_array::<T>();
-        let bools = out.as_array_mut::<bool>();
-
-        match (lhs_vec.selection(), rhs_vec.selection()) {
-            (Selection::Prefix, Selection::Prefix) => {
-                for i in 0..selection.true_count() {
-                    bools[i] = Op::compare(&lhs[i], &rhs[i]);
-                }
-                out.set_selection(Selection::Prefix)
-            }
-            (Selection::Mask, Selection::Mask) => {
-                // TODO(ngates): check density to decide if we should iterate indices or do
-                //  a full scan
-                let mut pos = 0;
-                selection.iter_ones(|idx| {
-                    bools[pos] = Op::compare(&lhs[idx], &rhs[idx]);
-                    pos += 1;
-                });
-                out.set_selection(Selection::Prefix)
-            }
-            (Selection::Mask, Selection::Prefix) => {
-                let mut pos = 0;
-                selection.iter_ones(|idx| {
-                    bools[pos] = Op::compare(&lhs[idx], &rhs[pos]);
-                    pos += 1;
-                });
-                out.set_selection(Selection::Prefix)
-            }
-            (Selection::Prefix, Selection::Mask) => {
-                let mut pos = 0;
-                selection.iter_ones(|idx| {
-                    bools[pos] = Op::compare(&lhs[pos], &rhs[idx]);
-                    pos += 1;
-                });
-                out.set_selection(Selection::Prefix)
-            }
-        }
-
-        Ok(())
-    }
-}
-
-struct ScalarComparePrimitiveKernel<T: Element + NativePType, Op: CompareOp<T>> {
-    lhs: VectorId,
-    rhs: T,
-    _phantom: PhantomData<Op>,
-}
-
-impl<T: Element + NativePType, Op: CompareOp<T> + Send> Kernel
-    for ScalarComparePrimitiveKernel<T, Op>
-{
-    fn step(
-        &self,
-        ctx: &KernelContext,
-        _chunk_idx: usize,
-        selection: &BitView,
-        out: &mut ViewMut,
-    ) -> VortexResult<()> {
-        let lhs_vec = ctx.vector(self.lhs);
-        let lhs = lhs_vec.as_array::<T>();
-        let bools = out.as_array_mut::<bool>();
-
-        match lhs_vec.selection() {
-            Selection::Prefix => {
-                for i in 0..selection.true_count() {
-                    bools[i] = Op::compare(&lhs[i], &self.rhs);
-                }
-                out.set_selection(Selection::Prefix)
-            }
-            Selection::Mask => {
-                // TODO(ngates): decide at what true count we should iter indices...
-                selection.iter_ones(|idx| {
-                    bools[idx] = Op::compare(&lhs[idx], &self.rhs);
-                });
-                out.set_selection(Selection::Mask)
-            }
-        }
-
-        Ok(())
-    }
-}
-
-pub(crate) trait CompareOp<T> {
-    fn compare(lhs: &T, rhs: &T) -> bool;
-}
-
-/// Equality comparison operation.
-pub struct Eq;
-impl<T: PartialEq> CompareOp<T> for Eq {
-    #[inline(always)]
-    fn compare(lhs: &T, rhs: &T) -> bool {
-        lhs == rhs
-    }
-}
-
-/// Not equal comparison operation.
-pub struct NotEq;
-impl<T: PartialEq> CompareOp<T> for NotEq {
-    #[inline(always)]
-    fn compare(lhs: &T, rhs: &T) -> bool {
-        lhs != rhs
-    }
-}
-
-/// Greater than comparison operation.
-pub struct Gt;
-impl<T: PartialOrd> CompareOp<T> for Gt {
-    #[inline(always)]
-    fn compare(lhs: &T, rhs: &T) -> bool {
-        lhs > rhs
-    }
-}
-
-/// Greater than or equal comparison operation.
-pub struct Gte;
-impl<T: PartialOrd> CompareOp<T> for Gte {
-    #[inline(always)]
-    fn compare(lhs: &T, rhs: &T) -> bool {
-        lhs >= rhs
-    }
-}
-
-/// Less than comparison operation.
-pub struct Lt;
-impl<T: PartialOrd> CompareOp<T> for Lt {
-    #[inline(always)]
-    fn compare(lhs: &T, rhs: &T) -> bool {
-        lhs < rhs
-    }
-}
-
-/// Less than or equal comparison operation.
-pub struct Lte;
-impl<T: PartialOrd> CompareOp<T> for Lte {
-    #[inline(always)]
-    fn compare(lhs: &T, rhs: &T) -> bool {
-        lhs <= rhs
-    }
-}
-
-// TODO(ngates): bring these back!
-// #[cfg(test)]
-// mod tests {
-//     use std::rc::Rc;
-//
-//     use vortex_buffer::BufferMut;
-//     use vortex_dtype::Nullability;
-//     use vortex_scalar::Scalar;
-//
-//     use crate::arrays::PrimitiveArray;
-//     use crate::operator::bits::BitView;
-//
-//     #[test]
-//     fn test_scalar_compare_stacked_on_primitive() {
-//         // Create input data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-//         let size = 16;
-//         let primitive_array = (0..i32::try_from(size).unwrap()).collect::<PrimitiveArray>();
-//         let primitive_op = primitive_array.as_ref().to_operator().unwrap().unwrap();
-//
-//         // Create scalar compare operator: primitive_value > 10
-//         let compare_value = Scalar::primitive(10i32, Nullability::NonNullable);
-//         let scalar_compare_op = Rc::new(ScalarCompareOperator::new(
-//             primitive_op,
-//             BinaryOperator::Gt,
-//             compare_value,
-//         ));
-//
-//         // Create query plan from the stacked operators
-//         let plan = QueryPlan::new(scalar_compare_op.as_ref()).unwrap();
-//         let mut operator = plan.executable_plan().unwrap();
-//
-//         // Create all-true mask for simplicity
-//         let mask_data = [usize::MAX; N_WORDS];
-//         let mask_view = BitView::new(&mask_data);
-//
-//         // Create output buffer for boolean results
-//         let mut output = BufferMut::<bool>::with_capacity(N);
-//         unsafe { output.set_len(N) };
-//         let mut output_view = ViewMut::new(&mut output[..], None);
-//
-//         // Execute the operator
-//         let result = operator._step(mask_view, &mut output_view);
-//         assert!(result.is_ok());
-//
-//         // Verify results: values 0-10 should be false, values 11-15 should be true
-//         for i in 0..size {
-//             let expected = i > 10;
-//             assert_eq!(
-//                 output[i], expected,
-//                 "Position {}: expected {}, got {}",
-//                 i, expected, output[i]
-//             );
-//         }
-//     }
-//
-//     #[test]
-//     fn test_scalar_compare_different_operators() {
-//         // Test with different comparison operators
-//         let size = 8;
-//         let primitive_array = (0..i32::try_from(size).unwrap()).collect::<PrimitiveArray>();
-//
-//         let primitive_op = primitive_array.as_ref().to_operator().unwrap().unwrap();
-//
-//         // Test Eq: values == 3
-//         let compare_value = Scalar::primitive(3i32, Nullability::NonNullable);
-//         let eq_op = Rc::new(ScalarCompareOperator::new(
-//             primitive_op,
-//             BinaryOperator::Eq,
-//             compare_value,
-//         ));
-//
-//         let plan = QueryPlan::new(eq_op.as_ref()).unwrap();
-//         let mut operator = plan.executable_plan().unwrap();
-//
-//         let mask_data = [usize::MAX; N_WORDS];
-//         let mask_view = BitView::new(&mask_data);
-//
-//         let mut output = BufferMut::<bool>::with_capacity(N);
-//         unsafe { output.set_len(N) };
-//         let mut output_view = ViewMut::new(&mut output[..], None);
-//
-//         let result = operator._step(mask_view, &mut output_view);
-//         assert!(result.is_ok());
-//
-//         // Only position 3 should be true
-//         for i in 0..size {
-//             let expected = i == 3;
-//             assert_eq!(
-//                 output[i], expected,
-//                 "Eq test - Position {}: expected {}, got {}",
-//                 i, expected, output[i]
-//             );
-//         }
-//     }
-//
-//     #[test]
-//     fn test_scalar_compare_with_f32() {
-//         // Test with floating-point values
-//         let size = 8;
-//         let values: Vec<f32> = (0..size).map(|i| i as f32 + 0.5).collect();
-//         let primitive_array = values.into_iter().collect::<PrimitiveArray>();
-//
-//         let primitive_op = primitive_array.as_ref().to_operator().unwrap().unwrap();
-//
-//         // Test Lt: values < 3.5
-//         let compare_value = Scalar::primitive(3.5f32, Nullability::NonNullable);
-//         let lt_op = Rc::new(ScalarCompareOperator::new(
-//             primitive_op,
-//             BinaryOperator::Lt,
-//             compare_value,
-//         ));
-//
-//         let plan = QueryPlan::new(lt_op.as_ref()).unwrap();
-//         let mut operator = plan.executable_plan().unwrap();
-//
-//         let mask_data = [usize::MAX; N_WORDS];
-//         let mask_view = BitView::new(&mask_data);
-//
-//         let mut output = BufferMut::<bool>::with_capacity(N);
-//         unsafe { output.set_len(N) };
-//         let mut output_view = ViewMut::new(&mut output[..], None);
-//
-//         let result = operator._step(mask_view, &mut output_view);
-//         assert!(result.is_ok());
-//
-//         // Values 0.5, 1.5, 2.5 should be < 3.5 (true), 3.5+ should be false
-//         for i in 0..size {
-//             let value = i as f32 + 0.5;
-//             let expected = value < 3.5;
-//             assert_eq!(
-//                 output[i], expected,
-//                 "Lt test - Position {}: value {} should be {}, got {}",
-//                 i, value, expected, output[i]
-//             );
-//         }
-//     }
-// }
diff --git a/vortex-array/src/operator/display.rs b/vortex-array/src/operator/display.rs
deleted file mode 100644
index 6f0ad498ec1..00000000000
--- a/vortex-array/src/operator/display.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::fmt;
-use std::fmt::{Display, Formatter};
-
-use crate::operator::Operator;
-
-impl dyn Operator + '_ {
-    pub fn display_tree(&self) -> impl Display {
-        self
-    }
-}
-
-pub enum DisplayFormat {
-    Compact,
-    Tree,
-}
-
-impl Display for dyn Operator + '_ {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", self.fmt_all())
-    }
-}
-
-pub struct TreeNodeDisplay<'a, T: Operator + ?Sized>(pub &'a T);
-
-impl<'a, T: Operator + ?Sized> Display for TreeNodeDisplay<'a, T> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        self.0.fmt_as(DisplayFormat::Tree, f)
-    }
-}
diff --git a/vortex-array/src/operator/filter.rs b/vortex-array/src/operator/filter.rs
deleted file mode 100644
index d3830fb2dc3..00000000000
--- a/vortex-array/src/operator/filter.rs
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::any::Any;
-use std::fmt::Debug;
-use std::hash::Hasher;
-use std::slice;
-use std::sync::Arc;
-
-use async_trait::async_trait;
-use vortex_dtype::DType;
-use vortex_error::{VortexExpect, VortexResult};
-use vortex_mask::Mask;
-
-use crate::compute::filter;
-use crate::operator::{
-    BatchBindCtx, BatchExecution, BatchExecutionRef, BatchOperator, LengthBounds, Operator,
-    OperatorEq, OperatorHash, OperatorId, OperatorRef,
-};
-use crate::{Array, Canonical, IntoArray};
-
-#[derive(Debug)]
-pub struct FilterOperator {
-    child: OperatorRef,
-    mask: Mask,
-}
-
-impl OperatorEq for FilterOperator {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.child.operator_eq(&other.child) && self.mask.operator_eq(&other.mask)
-    }
-}
-
-impl OperatorHash for FilterOperator {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        self.child.operator_hash(state);
-        self.mask.operator_hash(state);
-    }
-}
-
-impl FilterOperator {
-    pub fn new(child: OperatorRef, mask: Mask) -> FilterOperator {
-        assert!(
-            child.bounds().contains(mask.len()),
-            "Mask length must be within child bounds"
-        );
-        FilterOperator { child, mask }
-    }
-
-    pub fn mask(&self) -> &Mask {
-        &self.mask
-    }
-}
-
-impl Operator for FilterOperator {
-    fn id(&self) -> OperatorId {
-        OperatorId::from("vortex.filter")
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn dtype(&self) -> &DType {
-        self.child.dtype()
-    }
-
-    fn bounds(&self) -> LengthBounds {
-        self.mask.true_count().into()
-    }
-
-    fn children(&self) -> &[OperatorRef] {
-        slice::from_ref(&self.child)
-    }
-
-    fn with_children(self: Arc<Self>, children: Vec<OperatorRef>) -> VortexResult<OperatorRef> {
-        Ok(Arc::new(FilterOperator {
-            child: children.into_iter().next().vortex_expect("missing child"),
-            mask: self.mask.clone(),
-        }))
-    }
-
-    fn reduce_children(&self) -> VortexResult<Option<OperatorRef>> {
-        // We need selection target information to be defined for all children.
-        let Some(selection_targets): Option<Vec<_>> = self
-            .child
-            .children()
-            .iter()
-            .enumerate()
-            .map(|(i, child)| child.is_selection_target(i))
-            .collect()
-        else {
-            return Ok(None);
-        };
-
-        // Selection is defined to be false for all children, so we cannot push down the
-        // filter.
-        if selection_targets.iter().all(|s| !s) {
-            return Ok(None);
-        }
-
-        // Otherwise, we push down the filter to all children that are selection targets.
-        let children = self
-            .child
-            .children()
-            .iter()
-            .cloned()
-            .enumerate()
-            .map(|(i, child)| {
-                if selection_targets[i] {
-                    // Push-down the filter to this child.
-                    Arc::new(FilterOperator::new(child, self.mask.clone())) as OperatorRef
-                } else {
-                    child
-                }
-            })
-            .collect();
-
-        Ok(Some(self.child.clone().with_children(children)?))
-    }
-
-    fn as_batch(&self) -> Option<&dyn BatchOperator> {
-        Some(self)
-    }
-}
-
-impl BatchOperator for FilterOperator {
-    fn bind(&self, ctx: &mut dyn BatchBindCtx) -> VortexResult<BatchExecutionRef> {
-        Ok(Box::new(FilterExecution {
-            child: ctx.child(0)?,
-            mask: self.mask.clone(),
-        }) as BatchExecutionRef)
-    }
-}
-
-struct FilterExecution {
-    child: BatchExecutionRef,
-    mask: Mask,
-}
-
-#[async_trait]
-impl BatchExecution for FilterExecution {
-    async fn execute(self: Box<Self>) -> VortexResult<Canonical> {
-        let child = self.child.execute().await?;
-        // TODO(ngates): obviously inline all canonical implementations here
-        Ok(filter(child.into_array().as_ref(), &self.mask)?.to_canonical())
-    }
-}
diff --git a/vortex-array/src/operator/getitem.rs b/vortex-array/src/operator/getitem.rs
deleted file mode 100644
index be4f7a2d779..00000000000
--- a/vortex-array/src/operator/getitem.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::any::Any;
-use std::hash::{Hash, Hasher};
-use std::slice;
-use std::sync::Arc;
-
-use vortex_dtype::{DType, FieldName};
-use vortex_error::{VortexExpect, VortexResult};
-
-use crate::operator::{LengthBounds, Operator, OperatorEq, OperatorHash, OperatorId, OperatorRef};
-
-/// An operator that extracts a field from a struct array.
-#[derive(Debug)]
-pub struct GetItemOperator {
-    // The struct-like child operator.
-    child: OperatorRef,
-    field: FieldName,
-    // The dtype of the extracted field.
-    dtype: DType,
-}
-
-impl OperatorHash for GetItemOperator {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        self.child.operator_hash(state);
-        self.field.hash(state);
-        self.dtype.hash(state);
-    }
-}
-impl OperatorEq for GetItemOperator {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.child.operator_eq(&other.child)
-            && self.field == other.field
-            && self.dtype == other.dtype
-    }
-}
-
-impl GetItemOperator {
-    pub fn field_name(&self) -> &FieldName {
-        &self.field
-    }
-}
-
-impl Operator for GetItemOperator {
-    fn id(&self) -> OperatorId {
-        OperatorId::from("vortex.getitem")
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn dtype(&self) -> &DType {
-        &self.dtype
-    }
-
-    fn bounds(&self) -> LengthBounds {
-        self.child.bounds()
-    }
-
-    fn children(&self) -> &[OperatorRef] {
-        slice::from_ref(&self.child)
-    }
-
-    fn with_children(self: Arc<Self>, children: Vec<OperatorRef>) -> VortexResult<OperatorRef> {
-        Ok(Arc::new(GetItemOperator {
-            child: children.into_iter().next().vortex_expect("missing child"),
-            field: self.field.clone(),
-            dtype: self.dtype.clone(),
-        }))
-    }
-}
diff --git a/vortex-array/src/operator/hash.rs b/vortex-array/src/operator/hash.rs
deleted file mode 100644
index cb7b95a97e4..00000000000
--- a/vortex-array/src/operator/hash.rs
+++ /dev/null
@@ -1,177 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::any::Any;
-use std::hash::{Hash, Hasher};
-use std::sync::Arc;
-
-use vortex_buffer::Buffer;
-use vortex_mask::Mask;
-
-use crate::ArrayRef;
-use crate::operator::{Operator, OperatorRef};
-use crate::validity::Validity;
-
-/// A hash trait for operators that loosens the semantics to permit pointer-based hashing for
-/// data objects such as buffers.
-///
-/// Note that since this trait can use pointer hashing, the hash is only valid for the lifetime of
-/// the object.
-pub trait OperatorHash {
-    fn operator_hash<H: Hasher>(&self, state: &mut H);
-}
-
-pub trait DynOperatorHash: private::SealedHash {
-    fn dyn_operator_hash(&self, state: &mut dyn Hasher);
-}
-
-impl<T: OperatorHash + ?Sized> DynOperatorHash for T {
-    fn dyn_operator_hash(&self, mut state: &mut dyn Hasher) {
-        OperatorHash::operator_hash(self, &mut state);
-    }
-}
-
-/// An equality trait for operators that loosens the semantics to permit pointer-based equality
-/// for data objects such as buffers.
-pub trait OperatorEq {
-    fn operator_eq(&self, other: &Self) -> bool;
-}
-
-pub trait DynOperatorEq: private::SealedEq {
-    fn dyn_operator_eq(&self, other: &dyn Any) -> bool;
-}
-
-impl<T: OperatorEq + 'static> DynOperatorEq for T {
-    fn dyn_operator_eq(&self, other: &dyn Any) -> bool {
-        other
-            .downcast_ref::<Self>()
-            .is_some_and(|other| OperatorEq::operator_eq(self, other))
-    }
-}
-
-mod private {
-    use crate::operator::{OperatorEq, OperatorHash};
-
-    pub trait SealedHash {}
-    impl<T: OperatorHash + ?Sized> SealedHash for T {}
-    pub trait SealedEq {}
-    impl<T: OperatorEq + ?Sized> SealedEq for T {}
-}
-
-impl OperatorHash for dyn Operator + '_ {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        self.dyn_operator_hash(state);
-    }
-}
-
-impl OperatorEq for dyn Operator + '_ {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.dyn_operator_eq(other.as_any())
-    }
-}
-
-impl OperatorHash for OperatorRef {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        self.as_ref().operator_hash(state);
-    }
-}
-
-impl OperatorEq for OperatorRef {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.as_ref().operator_eq(other.as_ref())
-    }
-}
-
-/// A wrapper type to implement [`Hash`], [`PartialEq`], and [`Eq`] using the semantics defined
-/// by [`OperatorHash`] and [`OperatorEq`].
-pub struct OperatorKey<T>(pub T);
-impl<T: OperatorHash> Hash for OperatorKey<T> {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.0.operator_hash(state);
-    }
-}
-impl<T: OperatorEq + Any> PartialEq for OperatorKey<T> {
-    fn eq(&self, other: &Self) -> bool {
-        self.0.operator_eq(&other.0)
-    }
-}
-impl<T: OperatorEq + Any> Eq for OperatorKey<T> {}
-
-impl<T> OperatorHash for Buffer<T> {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        self.as_ptr().hash(state);
-        self.len().hash(state);
-    }
-}
-impl<T> OperatorEq for Buffer<T> {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.as_ptr() == other.as_ptr() && self.len() == other.len()
-    }
-}
-
-impl OperatorHash for Mask {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        std::mem::discriminant(self).hash(state);
-        match self {
-            Mask::AllTrue(len) => {
-                len.hash(state);
-            }
-            Mask::AllFalse(len) => {
-                len.hash(state);
-            }
-            Mask::Values(values) => {
-                let buffer = values.bit_buffer();
-                buffer.offset().hash(state);
-                buffer.len().hash(state);
-                buffer.inner().as_ptr().hash(state);
-            }
-        }
-    }
-}
-impl OperatorEq for Mask {
-    fn operator_eq(&self, other: &Self) -> bool {
-        match (self, other) {
-            (Mask::AllTrue(len1), Mask::AllTrue(len2)) => len1 == len2,
-            (Mask::AllFalse(len1), Mask::AllFalse(len2)) => len1 == len2,
-            (Mask::Values(buf1), Mask::Values(buf2)) => {
-                let b1 = buf1.bit_buffer();
-                let b2 = buf2.bit_buffer();
-                b1.offset() == b2.offset()
-                    && b1.len() == b2.len()
-                    && b1.inner().as_ptr() == b2.inner().as_ptr()
-            }
-            _ => false,
-        }
-    }
-}
-
-impl OperatorHash for Validity {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        std::mem::discriminant(self).hash(state);
-        if let Validity::Array(array) = self {
-            Arc::as_ptr(array).hash(state);
-        }
-    }
-}
-impl OperatorEq for Validity {
-    fn operator_eq(&self, other: &Self) -> bool {
-        match (self, other) {
-            (Validity::AllValid, Validity::AllValid) => true,
-            (Validity::AllInvalid, Validity::AllInvalid) => true,
-            (Validity::NonNullable, Validity::NonNullable) => true,
-            (Validity::Array(arr1), Validity::Array(arr2)) => Arc::ptr_eq(arr1, arr2),
-            _ => false,
-        }
-    }
-}
-
-impl OperatorHash for ArrayRef {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        Arc::as_ptr(self).hash(state);
-    }
-}
-impl OperatorEq for ArrayRef {
-    fn operator_eq(&self, other: &Self) -> bool {
-        Arc::ptr_eq(self, other)
-    }
-}
diff --git a/vortex-array/src/operator/metrics.rs b/vortex-array/src/operator/metrics.rs
deleted file mode 100644
index c5d08542a29..00000000000
--- a/vortex-array/src/operator/metrics.rs
+++ /dev/null
@@ -1,163 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::any::Any;
-use std::fmt::Debug;
-use std::hash::{Hash, Hasher};
-use std::sync::Arc;
-
-use async_trait::async_trait;
-use vortex_dtype::DType;
-use vortex_error::{VortexExpect, VortexResult};
-use vortex_metrics::{Timer, VortexMetrics};
-
-use crate::Canonical;
-use crate::operator::{
-    BatchBindCtx, BatchExecution, BatchExecutionRef, BatchOperator, LengthBounds, Operator,
-    OperatorEq, OperatorHash, OperatorId, OperatorRef,
-};
-use crate::pipeline::bits::BitView;
-use crate::pipeline::view::ViewMut;
-use crate::pipeline::{BindContext, Kernel, KernelContext, PipelinedOperator, RowSelection};
-
-/// An operator that wraps another operator and records metrics about its execution.
-#[derive(Debug)]
-pub struct MetricsOperator {
-    inner: OperatorRef,
-    metrics: VortexMetrics,
-}
-
-impl OperatorHash for MetricsOperator {
-    fn operator_hash<H: Hasher>(&self, state: &mut H) {
-        self.inner.operator_hash(state);
-        // Include our ID just to differentiate from the inner operator
-        self.id().hash(state);
-    }
-}
-
-impl OperatorEq for MetricsOperator {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.inner.operator_eq(&other.inner)
-    }
-}
-
-impl MetricsOperator {
-    pub fn new(inner: OperatorRef, metrics: VortexMetrics) -> Self {
-        let metrics = metrics.child_with_tags([("operator", inner.id().as_ref().to_string())]);
-        Self { inner, metrics }
-    }
-
-    pub fn metrics(&self) -> &VortexMetrics {
-        &self.metrics
-    }
-}
-
-impl Operator for MetricsOperator {
-    fn id(&self) -> OperatorId {
-        OperatorId::from("vortex.metrics")
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn dtype(&self) -> &DType {
-        self.inner.dtype()
-    }
-
-    fn bounds(&self) -> LengthBounds {
-        self.inner.bounds()
-    }
-
-    fn children(&self) -> &[OperatorRef] {
-        self.inner.children()
-    }
-
-    fn with_children(self: Arc<Self>, children: Vec<OperatorRef>) -> VortexResult<OperatorRef> {
-        Ok(Arc::new(MetricsOperator {
-            inner: self.inner.clone().with_children(children)?,
-            metrics: self.metrics.clone(),
-        }))
-    }
-
-    fn as_batch(&self) -> Option<&dyn BatchOperator> {
-        self.inner.as_batch().is_some().then_some(self)
-    }
-
-    fn as_pipelined(&self) -> Option<&dyn PipelinedOperator> {
-        // Only support pipelined execution if the inner operator does
-        self.inner.as_pipelined().is_some().then_some(self)
-    }
-}
-
-impl BatchOperator for MetricsOperator {
-    fn bind(&self, ctx: &mut dyn BatchBindCtx) -> VortexResult<BatchExecutionRef> {
-        let inner = self.inner.as_batch().vortex_expect("checked").bind(ctx)?;
-        let timer = self.metrics.timer("operator.batch.execute");
-        Ok(Box::new(MetricsBatchExecution { inner, timer }))
-    }
-}
-
-struct MetricsBatchExecution {
-    inner: BatchExecutionRef,
-    timer: Arc<Timer>,
-}
-
-#[async_trait]
-impl BatchExecution for MetricsBatchExecution {
-    async fn execute(self: Box<Self>) -> VortexResult<Canonical> {
-        let _timer = self.timer.time();
-        self.inner.execute().await
-    }
-}
-
-impl PipelinedOperator for MetricsOperator {
-    fn row_selection(&self) -> RowSelection {
-        self.inner
-            .as_pipelined()
-            .vortex_expect("checked")
-            .row_selection()
-    }
-
-    fn bind(&self, ctx: &dyn BindContext) -> VortexResult<Box<dyn Kernel>> {
-        let inner = self
-            .inner
-            .as_pipelined()
-            .vortex_expect("checked")
-            .bind(ctx)?;
-        let timer = self.metrics.timer("operator.operator.step");
-        Ok(Box::new(MetricsKernel { inner, timer }))
-    }
-
-    fn vector_children(&self) -> Vec<usize> {
-        self.inner
-            .as_pipelined()
-            .vortex_expect("checked")
-            .vector_children()
-    }
-
-    fn batch_children(&self) -> Vec<usize> {
-        self.inner
-            .as_pipelined()
-            .vortex_expect("checked")
-            .batch_children()
-    }
-}
-
-struct MetricsKernel {
-    inner: Box<dyn Kernel>,
-    timer: Arc<Timer>,
-}
-
-impl Kernel for MetricsKernel {
-    fn step(
-        &self,
-        ctx: &KernelContext,
-        chunk_idx: usize,
-        selection: &BitView,
-        out: &mut ViewMut,
-    ) -> VortexResult<()> {
-        let _timer = self.timer.time();
-        self.inner.step(ctx, chunk_idx, selection, out)
-    }
-}
diff --git a/vortex-array/src/operator/mod.rs b/vortex-array/src/operator/mod.rs
deleted file mode 100644
index 67ef0734895..00000000000
--- a/vortex-array/src/operator/mod.rs
+++ /dev/null
@@ -1,242 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-//! This module defines a new way of modelling arrays and expressions in Vortex. To avoid naming
-//! conflicts, we refer to the new model as "operators".
-//!
-//! Operators form a more traditional "logical plan" as might be seen in other query engines.
-//! Each operator supports one primary function which is to produce a canonical representation of
-//! its data, known as `canonicalization`. Operators have the option to produce this canonical
-//! form using different execution models, including batch, pipelined, and GPU.
-//!
-//! Initial designs for this module involved passing masks down through the physical execution
-//! tree as futures, allowing operators to skip computation for rows that are not needed. We
-//! ultimately decided against this approach and instead introduce a `Filter` operator
-//! that can be pushed down in the same way as any other operator.
-//!
-//! On the one hand, this means common subtree elimination is much easier, since we know the mask
-//! or identity of the mask future inside the filter operator up-front. On the other hand, it
-//! means that an operator no longer has a known length. In the end state, we will redefine a
-//! Vortex array to be a wrapped around an operator that _does_ have a known length, amongst other
-//! properties (such as non-blocking evaluation).
-//!
-//! We also introduce the idea of an execution that can evaluate an operator tree efficiently. It
-//! supports common subtree elimination, as well as extracting sub-graphs for pipelined and GPU
-//! execution. The execution is also responsible for managing memory and scheduling work across
-//! different execution resources.
-//!
-
-#![allow(dead_code)]
-
-pub mod canonical;
-pub mod compare;
-mod display;
-pub mod filter;
-pub mod getitem;
-mod hash;
-pub mod metrics;
-mod optimize;
-pub mod slice;
-
-use std::any::{Any, type_name};
-use std::fmt;
-use std::fmt::{Debug, Formatter};
-use std::ops::BitAnd;
-use std::sync::Arc;
-
-use arcref::ArcRef;
-use async_trait::async_trait;
-pub use display::*;
-pub use hash::*;
-use termtree::Tree;
-use vortex_dtype::DType;
-use vortex_error::VortexResult;
-
-use crate::Canonical;
-use crate::pipeline::PipelinedOperator;
-
-pub type OperatorId = ArcRef<str>;
-pub type OperatorRef = Arc<dyn Operator>;
-
-/// An operator represents a node in a logical query plan.
-pub trait Operator: 'static + Send + Sync + Debug + DynOperatorHash + DynOperatorEq {
-    /// The unique identifier for this operator instance.
-    fn id(&self) -> OperatorId;
-
-    /// For downcasting.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Returns the [`DType`] of the array produced by this operator.
-    fn dtype(&self) -> &DType;
-
-    /// Returns the bounds on the number of rows produced by this operator.
-    fn bounds(&self) -> LengthBounds;
-
-    /// Returns the exact number of rows produced by this operator, if known.
-    fn len(&self) -> Option<usize> {
-        self.bounds().maybe_len()
-    }
-
-    /// Returns if this operator is known to be empty (i.e. max bound is 0).
-    fn is_empty(&self) -> bool {
-        self.bounds().max == 0
-    }
-
-    /// The children of this operator.
-    fn children(&self) -> &[OperatorRef];
-
-    /// The number of children of this operator.
-    fn nchildren(&self) -> usize {
-        self.children().len()
-    }
-
-    /// Override the default formatting of this operator.
-    fn fmt_as(&self, _df: DisplayFormat, f: &mut Formatter) -> fmt::Result {
-        write!(f, "{}", type_name::<Self>())
-    }
-
-    fn fmt_all(&self) -> String {
-        let node_name = TreeNodeDisplay(self).to_string();
-        let child_trees: Vec<_> = self
-            .children()
-            .iter()
-            .map(|child| child.fmt_all())
-            .collect();
-        Tree::new(node_name)
-            .with_leaves(child_trees)
-            .with_multiline(true)
-            .to_string()
-    }
-
-    /// Create a new instance of this operator with the given children.
-    ///
-    /// ## Panics
-    ///
-    /// Panics if the number or dtypes of children are incorrect.
-    ///
-    fn with_children(self: Arc<Self>, _children: Vec<OperatorRef>) -> VortexResult<OperatorRef>;
-
-    /// Attempt to optimize this node by analyzing its children.
-    ///
-    /// For example, if all the children are constant, this function should perform constant
-    /// folding and return a constant operator.
-    ///
-    /// This function should typically be implemented only for self-contained optimizations based
-    /// on child properties
-    fn reduce_children(&self) -> VortexResult<Option<OperatorRef>> {
-        Ok(None)
-    }
-
-    /// Attempt to push down a parent operator through this node.
-    ///
-    /// The `child_idx` parameter indicates which child of the parent this operator occupies.
-    /// For example, if the parent is a binary operator, and this operator is the left child,
-    /// then `child_idx` will be 0. If this operator is the right child, then `child_idx` will be 1.
-    ///
-    /// The returned operator will replace the parent in the tree.
-    ///
-    /// This function should typically be implemented for cross-operator optimizations where the
-    /// child needs to adapt to the parent's requirements
-    fn reduce_parent(
-        &self,
-        _parent: OperatorRef,
-        _child_idx: usize,
-    ) -> VortexResult<Option<OperatorRef>> {
-        Ok(None)
-    }
-
-    /// Return `true` if the given child is considered to be a selection target.
-    ///
-    /// The definition of this is such that pushing a selection operator down to all selection
-    /// targets will result in the same output as a selection on this operator.
-    ///
-    /// For example, `select(Op, mask) == Op(select(child, mask), ...)` for all children that are
-    /// selection targets.
-    ///
-    /// If any child index returns `None`, then selection pushdown is not possible.
-    /// If all children return `Some(false)`, then selection pushdown is not possible.
-    fn is_selection_target(&self, _child_idx: usize) -> Option<bool> {
-        None
-    }
-
-    /// Returns this operator as a [`BatchOperator`] if it supports batch execution.
-    fn as_batch(&self) -> Option<&dyn BatchOperator> {
-        None
-    }
-
-    /// Returns this operator as a [`PipelinedOperator`] if it supports pipelined execution.
-    ///
-    /// Note that operators that implement [`PipelinedOperator`] *do not need* to implement
-    /// [`BatchOperator`], although they may choose to do so.
-    fn as_pipelined(&self) -> Option<&dyn PipelinedOperator> {
-        None
-    }
-}
-
-/// Represents the known row count bounds of an operator.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub struct LengthBounds {
-    pub min: usize,
-    pub max: usize,
-}
-
-impl LengthBounds {
-    pub fn maybe_len(&self) -> Option<usize> {
-        (self.min == self.max).then_some(self.min)
-    }
-
-    pub fn contains(&self, len: usize) -> bool {
-        self.min <= len && len <= self.max
-    }
-
-    pub fn intersect_all<I: IntoIterator<Item = LengthBounds>>(iters: I) -> Self {
-        let mut min = 0;
-        let mut max = 0;
-        for bounds in iters {
-            min = min.max(bounds.min);
-            max = max.min(bounds.max);
-        }
-        Self { min, max }
-    }
-}
-
-impl BitAnd for LengthBounds {
-    type Output = Self;
-
-    fn bitand(self, rhs: Self) -> Self::Output {
-        Self {
-            min: self.min.max(rhs.min),
-            max: self.max.min(rhs.max),
-        }
-    }
-}
-
-impl From<usize> for LengthBounds {
-    fn from(value: usize) -> Self {
-        Self {
-            min: value,
-            max: value,
-        }
-    }
-}
-
-/// The default execution mode for an operator is batch mode.
-pub trait BatchOperator: Operator {
-    fn bind(&self, ctx: &mut dyn BatchBindCtx) -> VortexResult<BatchExecutionRef>;
-}
-
-pub trait BatchBindCtx {
-    /// Returns the execution for the child at the given index, consuming it from the context.
-    /// Each child may be consumed only once.
-    fn child(&mut self, idx: usize) -> VortexResult<BatchExecutionRef>;
-}
-
-/// The primary execution trait for operators.
-///
-/// Alternatively, or additionally, operators may choose to implement [`PipelinedOperator`].
-#[async_trait]
-pub trait BatchExecution: Send {
-    async fn execute(self: Box<Self>) -> VortexResult<Canonical>;
-}
-
-pub type BatchExecutionRef = Box<dyn BatchExecution>;
diff --git a/vortex-array/src/operator/optimize.rs b/vortex-array/src/operator/optimize.rs
deleted file mode 100644
index bdb56c67e3b..00000000000
--- a/vortex-array/src/operator/optimize.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::sync::Arc;
-
-use itertools::Itertools;
-use vortex_error::VortexResult;
-
-use crate::operator::{Operator, OperatorRef};
-
-impl dyn Operator + '_ {
-    /// Optimize the operator tree rooted at this operator by applying local
-    /// optimizations such as reducing redundant operators.
-    pub fn optimize(self: Arc<Self>) -> VortexResult<OperatorRef> {
-        let children = self
-            .children()
-            .iter()
-            .map(|child| child.clone().optimize())
-            .try_collect()?;
-
-        let mut operator = self.with_children(children)?;
-        operator = operator.reduce_children()?.unwrap_or(operator);
-
-        let parent = operator.clone();
-        for (idx, child) in operator.children().iter().enumerate() {
-            if let Some(new_operator) = child.reduce_parent(parent.clone(), idx)? {
-                return Ok(new_operator);
-            }
-        }
-
-        Ok(operator)
-    }
-}
diff --git a/vortex-array/src/operator/slice.rs b/vortex-array/src/operator/slice.rs
deleted file mode 100644
index 9427565770d..00000000000
--- a/vortex-array/src/operator/slice.rs
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::any::Any;
-use std::hash::Hash;
-use std::ops::Range;
-use std::sync::Arc;
-
-use async_trait::async_trait;
-use itertools::Itertools;
-use vortex_dtype::DType;
-use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail};
-
-use crate::operator::{
-    BatchBindCtx, BatchExecution, BatchExecutionRef, BatchOperator, LengthBounds, Operator,
-    OperatorEq, OperatorHash, OperatorId, OperatorRef,
-};
-use crate::{Array, Canonical, IntoArray};
-
-#[derive(Debug, Clone)]
-pub struct SliceOperator {
-    child: OperatorRef,
-    range: Range<usize>,
-}
-
-impl SliceOperator {
-    pub fn try_new(child: OperatorRef, range: Range<usize>) -> VortexResult<Self> {
-        if range.start > range.end {
-            vortex_bail!(
-                "invalid slice range: start > end ({} > {})",
-                range.start,
-                range.end
-            );
-        }
-        if range.end > child.bounds().max {
-            vortex_bail!(
-                "slice range end out of bounds: {} > {}",
-                range.end,
-                child.bounds().max
-            );
-        }
-        Ok(SliceOperator { child, range })
-    }
-
-    pub fn range(&self) -> &Range<usize> {
-        &self.range
-    }
-}
-
-impl OperatorHash for SliceOperator {
-    fn operator_hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.child.operator_hash(state);
-        self.range.hash(state);
-    }
-}
-
-impl OperatorEq for SliceOperator {
-    fn operator_eq(&self, other: &Self) -> bool {
-        self.range == other.range && self.child.operator_eq(&other.child)
-    }
-}
-
-impl Operator for SliceOperator {
-    fn id(&self) -> OperatorId {
-        OperatorId::from("vortex.slice")
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn dtype(&self) -> &DType {
-        self.child.dtype()
-    }
-
-    fn bounds(&self) -> LengthBounds {
-        (self.range.end - self.range.start).into()
-    }
-
-    fn children(&self) -> &[OperatorRef] {
-        std::slice::from_ref(&self.child)
-    }
-
-    fn with_children(self: Arc<Self>, children: Vec<OperatorRef>) -> VortexResult<OperatorRef> {
-        Ok(Arc::new(SliceOperator::try_new(
-            children.into_iter().next().vortex_expect("missing child"),
-            self.range.clone(),
-        )?))
-    }
-
-    fn reduce_children(&self) -> VortexResult<Option<OperatorRef>> {
-        // We push down the slice operator to any child that is aligned to the parent.
-        let children = (0..self.nchildren())
-            .map(|i| {
-                let child = self.child.children()[i].clone();
-
-                if self.child.is_selection_target(i).unwrap_or_default() {
-                    // Push-down the filter to this child.
-                    Ok::<_, VortexError>(Arc::new(SliceOperator::try_new(
-                        child,
-                        self.range.clone(),
-                    )?) as OperatorRef)
-                } else {
-                    Ok(child)
-                }
-            })
-            .try_collect()?;
-
-        Ok(Some(self.child.clone().with_children(children)?))
-    }
-
-    fn as_batch(&self) -> Option<&dyn BatchOperator> {
-        Some(self)
-    }
-}
-
-impl BatchOperator for SliceOperator {
-    fn bind(&self, ctx: &mut dyn BatchBindCtx) -> VortexResult<BatchExecutionRef> {
-        let child_exec = ctx.child(0)?;
-        Ok(Box::new(SliceExecution {
-            child: child_exec,
-            range: self.range.clone(),
-        }))
-    }
-}
-
-struct SliceExecution {
-    child: BatchExecutionRef,
-    range: Range<usize>,
-}
-
-#[async_trait]
-impl BatchExecution for SliceExecution {
-    async fn execute(self: Box<Self>) -> VortexResult<Canonical> {
-        let child = self.child.execute().await?;
-        Ok(child.into_array().slice(self.range).to_canonical())
-    }
-}
diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs
index ff918d86036..76a79dbacf1 100644
--- a/vortex-array/src/pipeline/mod.rs
+++ b/vortex-array/src/pipeline/mod.rs
@@ -19,22 +19,20 @@
 //! It is a work-in-progress and is not yet used in production.
 
 pub mod bits;
-pub(crate) mod operator;
 mod types;
 pub mod vec;
 pub mod view;
 
 use std::cell::RefCell;
 
-use self::vec::Vector;
-use crate::operator::Operator;
-use crate::pipeline::bits::BitView;
-use crate::Canonical;
 pub use types::*;
 use vec::VectorRef;
 use vortex_error::VortexResult;
 use vortex_vector::VectorMut;
 
+use self::vec::Vector;
+use crate::pipeline::bits::BitView;
+
 /// The number of elements in each step of a Vortex evaluation operator.
 pub const N: usize = 1024;
 
@@ -123,8 +121,6 @@ pub trait Kernel: Send {
 pub struct KernelContext {
     /// The allocated vectors for intermediate results.
     pub(crate) vectors: Vec<RefCell<Vector>>,
-    /// The computed batch inputs.
-    pub(crate) batch_inputs: Vec<Canonical>,
 }
 
 impl KernelContext {
diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs
index c0379350440..857436bfc34 100644
--- a/vortex-array/src/vtable/operator.rs
+++ b/vortex-array/src/vtable/operator.rs
@@ -1,15 +1,15 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use vortex_error::{vortex_bail, VortexResult};
+use vortex_error::{VortexResult, vortex_bail};
 use vortex_mask::Mask;
 use vortex_vector::Vector;
 
+use crate::ArrayRef;
 use crate::array::IntoArray;
 use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx};
-use crate::operator::OperatorRef;
+use crate::pipeline::Pipelined;
 use crate::vtable::{NotSupported, VTable};
-use crate::ArrayRef;
 
 /// A vtable for the new operator-based array functionality. Eventually this vtable will be
 /// merged into the main `VTable`, but for now it is kept separate to allow for incremental
@@ -17,12 +17,6 @@ use crate::ArrayRef;
 ///
 /// See <https://github.com/vortex-data/vortex/pull/4726> for the operators RFC.
 pub trait OperatorVTable<V: VTable> {
-    /// Convert the current array into a [`OperatorRef`].
-    /// Returns `None` if the array cannot be converted to an operator.
-    fn to_operator(_array: &V::Array) -> VortexResult<Option<OperatorRef>> {
-        Ok(None)
-    }
-
     /// Returns a canonical [`Vector`] containing the rows indicated by the given selection [`Mask`].
     ///
     /// The returned vector must be the appropriate one for the array's logical type (they are
@@ -46,7 +40,11 @@ pub trait OperatorVTable<V: VTable> {
         Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute()
     }
 
-    /// Returns the
+    /// Returns an implementation of the [`Pipelined`] trait for this array, if pipelined execution
+    /// is supported.
+    fn execute_pipelined(_array: &V::Array) -> Option<&dyn Pipelined> {
+        None
+    }
 
     /// Bind the array for execution in batch mode.
     ///
@@ -106,10 +104,6 @@ pub trait OperatorVTable<V: VTable> {
 }
 
 impl<V: VTable> OperatorVTable<V> for NotSupported {
-    fn to_operator(_array: &V::Array) -> VortexResult<Option<OperatorRef>> {
-        Ok(None)
-    }
-
     fn bind(
         array: &V::Array,
         _selection: Option<&ArrayRef>,

From 70d73cc3023f2395d25b4c44703a218aa7af4630 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Fri, 7 Nov 2025 14:42:20 -0500
Subject: [PATCH 03/10] pipelined execution

Signed-off-by: Nicholas Gates <nick@nickgates.com>
---
 vortex-array/src/array/operator.rs            |  14 +-
 .../src/pipeline/{bits/view.rs => bits.rs}    |   0
 vortex-array/src/pipeline/bits/mod.rs         |  10 -
 vortex-array/src/pipeline/bits/vector.rs      | 256 ------------------
 vortex-array/src/pipeline/bits/view_mut.rs    | 135 ---------
 vortex-array/src/pipeline/mod.rs              |  66 +++--
 vortex-array/src/pipeline/source_driver.rs    | 105 +++++++
 vortex-array/src/pipeline/types.rs            |  97 -------
 vortex-array/src/pipeline/vec.rs              | 161 -----------
 vortex-array/src/pipeline/view.rs             | 225 ---------------
 vortex-array/src/vtable/operator.rs           |  11 +-
 11 files changed, 161 insertions(+), 919 deletions(-)
 rename vortex-array/src/pipeline/{bits/view.rs => bits.rs} (100%)
 delete mode 100644 vortex-array/src/pipeline/bits/mod.rs
 delete mode 100644 vortex-array/src/pipeline/bits/vector.rs
 delete mode 100644 vortex-array/src/pipeline/bits/view_mut.rs
 create mode 100644 vortex-array/src/pipeline/source_driver.rs
 delete mode 100644 vortex-array/src/pipeline/types.rs
 delete mode 100644 vortex-array/src/pipeline/vec.rs
 delete mode 100644 vortex-array/src/pipeline/view.rs

diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs
index 0a0e0705046..81194065d6d 100644
--- a/vortex-array/src/array/operator.rs
+++ b/vortex-array/src/array/operator.rs
@@ -3,11 +3,12 @@
 
 use std::sync::Arc;
 
-use vortex_error::{VortexResult, vortex_panic};
+use vortex_error::{vortex_panic, VortexResult};
 use vortex_mask::Mask;
-use vortex_vector::{Vector, VectorOps, vector_matches_dtype};
+use vortex_vector::{vector_matches_dtype, Vector, VectorOps};
 
 use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx};
+use crate::pipeline::source_driver::PipelineDriver;
 use crate::vtable::{OperatorVTable, VTable};
 use crate::{Array, ArrayAdapter, ArrayRef};
 
@@ -62,6 +63,15 @@ impl ArrayOperator for Arc<dyn Array> {
 
 impl<V: VTable> ArrayOperator for ArrayAdapter<V> {
     fn execute_batch(&self, selection: &Mask, ctx: &mut dyn ExecutionCtx) -> VortexResult<Vector> {
+        // Check to see if we should execute the array in a pipelined fashion. This is a
+        // short-circuit for now until we have a full pipeline executor, but it allows each arrow
+        // to only implement the pipeline API.
+        if let Some(pipelined) =
+            <V::OperatorVTable as OperatorVTable<V>>::execute_pipelined(&self.0)
+        {
+            PipelineDriver::new(pipelined).execute
+        }
+
         let vector =
             <V::OperatorVTable as OperatorVTable<V>>::execute_batch(&self.0, selection, ctx)?;
 
diff --git a/vortex-array/src/pipeline/bits/view.rs b/vortex-array/src/pipeline/bits.rs
similarity index 100%
rename from vortex-array/src/pipeline/bits/view.rs
rename to vortex-array/src/pipeline/bits.rs
diff --git a/vortex-array/src/pipeline/bits/mod.rs b/vortex-array/src/pipeline/bits/mod.rs
deleted file mode 100644
index 1ea4af9e628..00000000000
--- a/vortex-array/src/pipeline/bits/mod.rs
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-mod vector;
-mod view;
-mod view_mut;
-
-pub use vector::*;
-pub use view::*;
-pub use view_mut::*;
diff --git a/vortex-array/src/pipeline/bits/vector.rs b/vortex-array/src/pipeline/bits/vector.rs
deleted file mode 100644
index 5e214866da3..00000000000
--- a/vortex-array/src/pipeline/bits/vector.rs
+++ /dev/null
@@ -1,256 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::fmt::{Debug, Formatter};
-use std::ops::Not;
-use std::sync::{Arc, LazyLock};
-
-use bitvec::array::BitArray;
-use bitvec::order::Lsb0;
-
-use super::{BitView, BitViewMut};
-use crate::pipeline::{N, N_WORDS};
-
-static EMPTY: LazyLock<BitVector> = LazyLock::new(|| BitVector {
-    bits: Arc::new(BitArray::ZERO),
-    true_count: 0,
-});
-
-static FULL: LazyLock<BitVector> = LazyLock::new(|| BitVector {
-    bits: Arc::new(BitArray::ZERO.not()),
-    true_count: N,
-});
-
-/// An owned fixed-size bit vector of length `N` bits, represented as an array of usize words.
-///
-/// Internally, it uses a [`BitArray`] to store the bits, but this crate has some
-/// performance foot-guns in cases where we can lean on better assumptions, and therefore we wrap
-/// it up for use within Vortex.
-/// Owned bit vector for storing boolean selection masks.
-#[derive(Clone)]
-pub struct BitVector {
-    pub(super) bits: Arc<BitArray<[usize; N_WORDS], Lsb0>>,
-    pub(super) true_count: usize,
-}
-
-impl Debug for BitVector {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("BitVector")
-            .field("true_count", &self.true_count)
-            //.field("bits", &self.bits.as_raw_slice())
-            .finish()
-    }
-}
-
-impl PartialEq for BitVector {
-    fn eq(&self, other: &Self) -> bool {
-        Arc::ptr_eq(&self.bits, &other.bits)
-            || (self.true_count == other.true_count && self.bits == other.bits)
-    }
-}
-
-impl Eq for BitVector {}
-
-impl BitVector {
-    pub fn empty() -> &'static BitVector {
-        &EMPTY
-    }
-
-    pub fn full() -> &'static BitVector {
-        &FULL
-    }
-
-    pub fn true_until(n: usize) -> Self {
-        assert!(n <= N, "Cannot create a BitVector with more than N bits");
-
-        let mut bits = Arc::new(BitArray::<[usize; N_WORDS], Lsb0>::ZERO);
-        let bits_mut = Arc::make_mut(&mut bits);
-
-        let mut word = 0;
-        let mut remaining = n;
-        while remaining >= usize::BITS as usize {
-            bits_mut.as_raw_mut_slice()[word] = usize::MAX;
-            remaining -= usize::BITS as usize;
-            word += 1;
-        }
-
-        if remaining > 0 {
-            // For LSB ordering, set the lower bits (0 to remaining-1)
-            bits_mut.as_raw_mut_slice()[word] = (1usize << remaining) - 1;
-        }
-
-        BitVector {
-            bits,
-            true_count: n,
-        }
-    }
-
-    pub fn true_count(&self) -> usize {
-        self.true_count
-    }
-
-    pub fn as_raw(&self) -> &[usize; N_WORDS] {
-        // It's actually remarkably hard to get a reference to the underlying array!
-        let raw = self.bits.as_raw_slice();
-        unsafe { &*(raw.as_ptr() as *const [usize; N_WORDS]) }
-    }
-
-    pub fn as_raw_mut(&mut self) -> &mut [usize; N_WORDS] {
-        // SAFETY: We assume that the bits are mutable and that the view is valid.
-        let raw = Arc::make_mut(&mut self.bits).as_raw_mut_slice();
-        unsafe { &mut *(raw.as_mut_ptr() as *mut [usize; N_WORDS]) }
-    }
-
-    pub fn fill_from<I>(&mut self, iter: I)
-    where
-        I: IntoIterator<Item = usize>,
-    {
-        let mut true_count = 0;
-        for (dst, word) in self.as_raw_mut().iter_mut().zip(iter) {
-            true_count += word.count_ones() as usize;
-            *dst = word;
-        }
-        self.true_count = true_count;
-    }
-
-    pub fn as_view(&self) -> BitView<'_> {
-        unsafe { BitView::new_unchecked(&self.bits, self.true_count) }
-    }
-
-    pub fn as_view_mut(&mut self) -> BitViewMut<'_> {
-        unsafe { BitViewMut::new_unchecked(Arc::make_mut(&mut self.bits), self.true_count) }
-    }
-}
-
-impl From<BitView<'_>> for BitVector {
-    fn from(value: BitView<'_>) -> Self {
-        let true_count = value.true_count();
-        let bits = Arc::new(BitArray::<[usize; N_WORDS], Lsb0>::from(*value.as_raw()));
-        BitVector { bits, true_count }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_fill_from() {
-        let mut vec = BitVector::empty().clone();
-
-        // Fill with a pattern
-        let pattern = [
-            0b1010101010101010usize,
-            0b1111000011110000usize,
-            usize::MAX,
-            0,
-        ];
-
-        vec.fill_from(pattern.iter().copied());
-
-        let raw = vec.as_raw();
-        assert_eq!(raw[0], 0b1010101010101010usize);
-        assert_eq!(raw[1], 0b1111000011110000usize);
-        assert_eq!(raw[2], usize::MAX);
-        assert_eq!(raw[3], 0);
-
-        // Check true_count is updated correctly
-        let expected_count = 0b1010101010101010usize.count_ones() as usize
-            + 0b1111000011110000usize.count_ones() as usize
-            + usize::MAX.count_ones() as usize;
-        assert_eq!(vec.true_count(), expected_count);
-    }
-
-    #[test]
-    fn test_as_view() {
-        let vec = BitVector::true_until(100);
-        let view = vec.as_view();
-
-        assert_eq!(view.true_count(), 100);
-
-        // Verify the view sees the same bits
-        let mut ones = Vec::new();
-        view.iter_ones(|idx| ones.push(idx));
-        assert_eq!(ones, (0..100).collect::<Vec<_>>());
-    }
-
-    #[test]
-    fn test_as_view_mut() {
-        let mut vec = BitVector::true_until(64);
-        {
-            let view_mut = vec.as_view_mut();
-            // BitViewMut would allow modifications
-            // This test just verifies we can create a mutable view
-            assert_eq!(view_mut.true_count(), 64);
-        }
-        assert_eq!(vec.true_count(), 64);
-    }
-
-    #[test]
-    fn test_from_bitview() {
-        // Create a BitView from raw data
-        let mut raw = [0usize; N_WORDS];
-        raw[0] = 0b11111111;
-        raw[1] = 0b11110000;
-
-        let view = BitView::new(&raw);
-        let vec = BitVector::from(view);
-
-        assert_eq!(vec.true_count(), view.true_count());
-        assert_eq!(vec.as_raw()[0], 0b11111111);
-        assert_eq!(vec.as_raw()[1], 0b11110000);
-    }
-
-    #[test]
-    fn test_lsb_ordering_verification() {
-        // Verify LSB ordering by setting specific bits
-        let vec = BitVector::true_until(5);
-        let view = vec.as_view();
-
-        // Collect which bits are set
-        let mut ones = Vec::new();
-        view.iter_ones(|idx| ones.push(idx));
-
-        // With LSB ordering, bits 0-4 should be set
-        assert_eq!(ones, vec![0, 1, 2, 3, 4]);
-    }
-
-    #[test]
-    fn test_as_raw_mut() {
-        let mut vec = BitVector::empty().clone();
-
-        // Modify through as_raw_mut
-        let raw_mut = vec.as_raw_mut();
-        raw_mut[0] = 0b1111;
-        raw_mut[2] = usize::MAX;
-
-        // Note: true_count is NOT automatically updated when using as_raw_mut
-        // This is a low-level API, so the user must manage true_count
-        vec.true_count = 4 + 64; // Update manually
-
-        assert_eq!(vec.as_raw()[0], 0b1111);
-        assert_eq!(vec.as_raw()[2], usize::MAX);
-        assert_eq!(vec.true_count(), 68);
-    }
-
-    #[test]
-    fn test_boundary_conditions() {
-        // Test various boundary values
-        let boundaries = [1, 31, 32, 33, 63, 64, 65, 127, 128, 129, N - 1, N];
-
-        for &n in &boundaries {
-            let vec = BitVector::true_until(n);
-            assert_eq!(vec.true_count(), n);
-
-            // Verify correct bits are set via view
-            let view = vec.as_view();
-            let mut ones = Vec::new();
-            view.iter_ones(|idx| ones.push(idx));
-            assert_eq!(ones.len(), n);
-            if n > 0 {
-                assert_eq!(ones[0], 0); // First bit should be 0 (LSB)
-                assert_eq!(ones[n - 1], n - 1); // Last bit should be n-1
-            }
-        }
-    }
-}
diff --git a/vortex-array/src/pipeline/bits/view_mut.rs b/vortex-array/src/pipeline/bits/view_mut.rs
deleted file mode 100644
index 80155ee2079..00000000000
--- a/vortex-array/src/pipeline/bits/view_mut.rs
+++ /dev/null
@@ -1,135 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use bitvec::array::BitArray;
-use bitvec::order::Lsb0;
-
-use crate::pipeline::bits::BitView;
-use crate::pipeline::{N, N_WORDS};
-
-/// A mutable borrowed fixed-size bit vector of length `N` bits, represented as an array of
-/// usize words.
-/// Mutable view into a bit array for constructing selection masks.
-#[derive(Debug)]
-pub struct BitViewMut<'a> {
-    bits: &'a mut BitArray<[usize; N_WORDS], Lsb0>,
-    true_count: usize,
-}
-
-impl<'a> BitViewMut<'a> {
-    pub fn new(bits: &'a mut [usize; N_WORDS]) -> Self {
-        let true_count = bits.iter().map(|&word| word.count_ones() as usize).sum();
-        let bits: &mut BitArray<[usize; N_WORDS], Lsb0> = unsafe { std::mem::transmute(bits) };
-        BitViewMut { bits, true_count }
-    }
-
-    pub(crate) unsafe fn new_unchecked(
-        bits: &'a mut BitArray<[usize; N_WORDS], Lsb0>,
-        true_count: usize,
-    ) -> Self {
-        BitViewMut { bits, true_count }
-    }
-
-    pub fn true_count(&self) -> usize {
-        self.true_count
-    }
-
-    /// Mask the values in the mask up to the given length.
-    pub fn intersect_prefix(&mut self, mut len: usize) {
-        assert!(len <= N, "BitViewMut::truncate: length exceeds N");
-
-        let bit_slice = self.bits.as_raw_mut_slice();
-
-        let mut word = 0;
-        let mut true_count = 0;
-        while len >= usize::BITS as usize {
-            true_count += bit_slice[word].count_ones() as usize;
-            len -= usize::BITS as usize;
-            word += 1;
-        }
-
-        if len > 0 {
-            bit_slice[word] &= !(usize::MAX << len);
-            true_count += bit_slice[word].count_ones() as usize;
-            word += 1;
-        }
-
-        while word < N_WORDS {
-            bit_slice[word] = 0;
-            word += 1;
-        }
-
-        self.set_true_count(true_count);
-    }
-
-    pub fn clear(&mut self) {
-        self.bits.as_raw_mut_slice().fill(0);
-        self.set_true_count(0);
-    }
-
-    pub fn fill_with_words(&mut self, mut iter: impl Iterator<Item = u64>) {
-        let mut true_count = 0;
-
-        let dst_bytes = unsafe {
-            std::slice::from_raw_parts_mut(
-                self.bits.as_raw_mut_slice().as_mut_ptr() as *mut u64,
-                N_WORDS,
-            )
-        };
-
-        for word in 0..N / 64 {
-            if let Some(value) = iter.next() {
-                dst_bytes[word] = value;
-                true_count += value.count_ones() as usize;
-            }
-        }
-        self.set_true_count(true_count);
-    }
-
-    pub fn as_view(&self) -> BitView<'_> {
-        unsafe { BitView::new_unchecked(self.bits, self.true_count) }
-    }
-
-    pub fn as_raw_mut(&mut self) -> &mut [usize; N_WORDS] {
-        unsafe { std::mem::transmute(&mut self.bits) }
-    }
-
-    #[inline(always)]
-    fn set_true_count(&mut self, true_count: usize) {
-        self.true_count = true_count;
-        debug_assert_eq!(
-            self.true_count,
-            self.bits
-                .as_raw_slice()
-                .iter()
-                .map(|&word| word.count_ones() as usize)
-                .sum::<usize>()
-        );
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::pipeline::bits::BitVector;
-
-    #[test]
-    fn test_intersect_prefix() {
-        let mut bit_vec = BitVector::full().clone();
-
-        let mut view_mut = bit_vec.as_view_mut();
-        assert_eq!(view_mut.true_count(), N);
-
-        view_mut.intersect_prefix(N - 1);
-        assert_eq!(view_mut.true_count(), N - 1);
-
-        view_mut.intersect_prefix(64);
-        assert_eq!(view_mut.true_count(), 64);
-
-        view_mut.intersect_prefix(10);
-        assert_eq!(view_mut.true_count(), 10);
-
-        view_mut.intersect_prefix(0);
-        assert_eq!(view_mut.true_count(), 0);
-    }
-}
diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs
index 76a79dbacf1..d2da1ff8616 100644
--- a/vortex-array/src/pipeline/mod.rs
+++ b/vortex-array/src/pipeline/mod.rs
@@ -19,28 +19,24 @@
 //! It is a work-in-progress and is not yet used in production.
 
 pub mod bits;
-mod types;
-pub mod vec;
-pub mod view;
+pub mod source_driver;
 
-use std::cell::RefCell;
-
-pub use types::*;
-use vec::VectorRef;
+use crate::Array;
+use bits::BitView;
 use vortex_error::VortexResult;
-use vortex_vector::VectorMut;
-
-use self::vec::Vector;
-use crate::pipeline::bits::BitView;
+use vortex_vector::{Vector, VectorMut};
 
 /// The number of elements in each step of a Vortex evaluation operator.
 pub const N: usize = 1024;
 
-// Number of usize words needed to store N bits
+/// Number of bytes needed to store N bits
+pub const N_BYTES: usize = N / 8;
+
+/// Number of usize words needed to store N bits
 pub const N_WORDS: usize = N / usize::BITS as usize;
 
 /// Returned by an array to indicate that it can be executed in a pipelined fashion.
-pub trait Pipelined {
+pub trait PipelinedOperator: Array {
     // Whether this operator works by mutating its first child in-place.
     //
     // If `true`, the operator is invoked with the first child's input data passed via the
@@ -50,18 +46,26 @@ pub trait Pipelined {
     //     false
     // }
 
-    /// Returns the indices of the children of this array that should be passed to the kernel as
-    /// pipelined input vectors, 1024 elements at a time.
+    /// Returns whether the nth child of this array should be passed to the kernel as a pipelined
+    /// input vector, 1024 elements at a time.
     ///
-    /// Any child not listed here will be treated as a batch input, and the full vector will be
+    /// Any child that reports `false` will be treated as a batch input, and the full vector will be
     /// computed before pipelined execution begins.
-    fn pipelined_children(&self) -> Vec<usize>;
+    fn is_pipelined_child(&self, child_idx: usize) -> bool;
 
     /// Bind the operator into a [`Kernel`] for pipelined execution.
     ///
     /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and
     /// batch IDs for batch children. Each child can only be bound once.
-    fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn Kernel>>;
+    fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn OperatorKernel>>;
+}
+
+pub trait PipelinedSource: Array {
+    /// Bind the operator into a [`Kernel`] for pipelined execution.
+    ///
+    /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and
+    /// batch IDs for batch children. Each child can only be bound once.
+    fn bind_source(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn SourceKernel>>;
 }
 
 /// The context used when binding an operator for execution.
@@ -92,7 +96,7 @@ pub type VectorId = usize;
 /// the setup costs (such as DType validation, stats short-circuiting, etc.), and to make better
 /// use of CPU caches by performing all operations while the data is hot.
 ///
-/// The [`Kernel::step`] method will be invoked repeatedly to process chunks of data, [`N`] elements
+/// The [`SourceKernel::step`] method will be invoked repeatedly to process chunks of data, [`N`] elements
 /// at a time. Each invocation is passed a selection mask indicating which elements of the chunk
 /// should be written to the start of the output vector.
 ///
@@ -100,9 +104,9 @@ pub type VectorId = usize;
 /// its length will initially be set to zero. It is therefore safe to invoke unchecked writes up to
 /// `N` elements.
 ///
-/// The pipeline may invoke the `Kernel::skip` method to skip over some number of chunks of data.
+/// The pipeline may invoke the `SourceKernel::skip` method to skip over some number of chunks of data.
 /// The kernel should mutate any internal state as necessary to account for the skipped data.
-pub trait Kernel: Send {
+pub trait SourceKernel: Send {
     /// Skip over the given number of chunks of data.
     ///
     /// For example, if `n` is 3, then the kernel should skip over `3 * N` elements of input data.
@@ -117,15 +121,29 @@ pub trait Kernel: Send {
     ) -> VortexResult<()>;
 }
 
+pub trait OperatorKernel: Send {
+    /// Attempts to perform a single step of the operator, writing data to the output vector.
+    ///
+    /// The output vector has length equal to the number of valid elements in the input vectors.
+    /// This number of values should be written to the output vector.
+    fn step(&self, ctx: &KernelContext, out: &mut VectorMut) -> VortexResult<()>;
+}
+
 /// Context passed to kernels during execution, providing access to vectors.
 pub struct KernelContext {
     /// The allocated vectors for intermediate results.
-    pub(crate) vectors: Vec<RefCell<Vector>>,
+    pub(crate) vectors: Vec<Vector>,
 }
 
 impl KernelContext {
+    pub fn empty() -> Self {
+        Self {
+            vectors: Vec::new(),
+        }
+    }
+
     /// Get a vector by its ID.
-    pub fn vector(&self, vector_id: VectorId) -> VectorRef<'_> {
-        VectorRef::new(self.vectors[vector_id].borrow())
+    pub fn vector(&self, _vector_id: VectorId) -> &Vector {
+        todo!()
     }
 }
diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs
new file mode 100644
index 00000000000..ff74ed5cf55
--- /dev/null
+++ b/vortex-array/src/pipeline/source_driver.rs
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+use crate::pipeline::bits::BitView;
+use crate::pipeline::{BindContext, KernelContext, PipelinedSource, VectorId, N};
+use itertools::Itertools;
+use vortex_error::{vortex_panic, VortexResult};
+use vortex_mask::Mask;
+use vortex_vector::{Vector, VectorMut, VectorMutOps};
+
+/// Temporary driver for executing a single array in a pipelined fashion.
+pub struct PipelineSourceDriver<'a> {
+    array: &'a dyn PipelinedSource,
+}
+
+impl<'a> PipelineSourceDriver<'a> {
+    pub fn new(array: &'a dyn PipelinedSource) -> Self {
+        Self { array }
+    }
+
+    pub fn execute(&self, selection: &Mask) -> VortexResult<Vector> {
+        // First, we compute all child vectors.
+        // Since this is a pipeline source, we know that remaining children must be batch inputs,
+        // and therefore we cannot push down the selection mask.
+        let batch_inputs: Vec<_> = self
+            .array
+            .children()
+            .iter()
+            .map(|child| child.execute())
+            .try_collect()?;
+
+        // We now construct the source kernel.
+        let mut bind_ctx = PipelineSourceBindCtx {
+            batch_inputs: &batch_inputs,
+        };
+        let mut kernel = self.array.bind_source(&mut bind_ctx)?;
+        let kernel_ctx = KernelContext::empty();
+
+        // Allocate an output vector, with up to N bytes of padding to ensure every call to
+        // `kernel.step(out)` has at least N bytes of capacity.
+        let mut output = VectorMut::with_capacity(
+            self.array.dtype(),
+            selection.true_count().next_multiple_of(N),
+        );
+
+        // TODO(ngates): change behaviour based on the density of the selection mask.
+        let selection_buffer = selection.to_bit_buffer();
+        // TODO(ngates): rewrite chunks to take an arbitrary "storage type"? Or somehow copy
+        //  the chunks directly into a wider bit slice?
+        let selection_chunks = selection_buffer.chunks();
+        let mut selection_chunks_iter = selection_chunks.iter_padded();
+
+        let output_len = selection.true_count();
+
+        let mut selection_chunk = [0u64; N / u64::BITS as usize];
+
+        let mut output_chunks = vec![];
+        while output.len() < output_len {
+            // Copy the next selection chunk into place.
+            for word_idx in 0..selection_chunk.len() {
+                selection_chunk[word_idx] = selection_chunks_iter.next().unwrap_or_else(|| 0u64);
+            }
+
+            // TODO(ngates): ideally our chunks iter would use a usize...
+            let selection_chunk_usize = unsafe { std::mem::transmute(&selection_chunk) };
+            let selection = BitView::new(selection_chunk_usize);
+
+            // We know we have remaining capacity for N elements, so split off a size-N chunk.
+            let remaining_output = output.split_off(N);
+
+            kernel.step(&kernel_ctx, &selection, &mut output)?;
+            assert_eq!(
+                output.len(),
+                selection.true_count(),
+                "Kernel did not write expected number of elements"
+            );
+
+            // Now we un-split the output vector back onto its full size.
+            // output.unsplit(remaining_output);
+            output_chunks.push(output);
+            output = remaining_output;
+        }
+
+        // Combine all output chunks back into the output vector.
+        for chunk in output_chunks {
+            output.unsplit(chunk);
+        }
+
+        Ok(output.freeze())
+    }
+}
+
+struct PipelineSourceBindCtx<'a> {
+    batch_inputs: &'a [Vector],
+}
+
+impl BindContext for PipelineSourceBindCtx<'_> {
+    fn pipelined_input(&self, _child_idx: usize) -> VectorId {
+        vortex_panic!("PipelineSource cannot bind pipelined inputs");
+    }
+
+    fn batch_input(&self, child_idx: usize) -> Vector {
+        self.batch_inputs[child_idx].clone()
+    }
+}
diff --git a/vortex-array/src/pipeline/types.rs b/vortex-array/src/pipeline/types.rs
deleted file mode 100644
index aaf6c9c9a44..00000000000
--- a/vortex-array/src/pipeline/types.rs
+++ /dev/null
@@ -1,97 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::fmt::{Debug, Display, Formatter};
-
-use vortex_dtype::half::f16;
-use vortex_dtype::{DType, NativePType, PType};
-use vortex_error::vortex_panic;
-use vortex_vector::binaryview::BinaryView;
-
-/// Defines the "vector type", a physical type describing the data that's held in the vector.
-///
-/// See the specific vector view types like primitive views for more details.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum VType {
-    Bool,
-    Primitive(PType),
-    Binary,
-}
-
-impl Display for VType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            VType::Bool => write!(f, "bool"),
-            VType::Primitive(ptype) => write!(f, "{}", ptype),
-            VType::Binary => write!(f, "binary"),
-        }
-    }
-}
-
-impl VType {
-    pub fn of<T: Element>() -> Self {
-        T::vtype()
-    }
-
-    pub fn byte_width(&self) -> usize {
-        match self {
-            VType::Bool => 1,
-            VType::Primitive(ptype) => ptype.byte_width(),
-            VType::Binary => size_of::<BinaryView>(),
-        }
-    }
-}
-
-/// A trait to identify canonical vector types.
-pub trait Element: 'static + Copy + Debug + Send {
-    fn vtype() -> VType;
-}
-
-/// NOTE: for now, we have chosen to store boolean values as byte-sized booleans instead
-///  of packed into a bit mask, this is typically more efficient for SIMD compute operations.
-///  For masks, we still use bit-packed booleans.
-impl Element for bool {
-    fn vtype() -> VType {
-        VType::Bool
-    }
-}
-
-macro_rules! canonical_ptype {
-    ($T:ty) => {
-        impl Element for $T {
-            fn vtype() -> VType {
-                VType::Primitive(<$T as NativePType>::PTYPE)
-            }
-        }
-    };
-}
-
-canonical_ptype!(u8);
-canonical_ptype!(u16);
-canonical_ptype!(u32);
-canonical_ptype!(u64);
-canonical_ptype!(i8);
-canonical_ptype!(i16);
-canonical_ptype!(i32);
-canonical_ptype!(i64);
-canonical_ptype!(f16);
-canonical_ptype!(f32);
-canonical_ptype!(f64);
-
-impl Element for BinaryView {
-    fn vtype() -> VType {
-        VType::Binary
-    }
-}
-
-impl From<&DType> for VType {
-    fn from(value: &DType) -> Self {
-        match value {
-            DType::Bool(_) => VType::Bool,
-            DType::Primitive(ptype, _) => VType::Primitive(*ptype),
-            DType::Utf8(_) => VType::Binary,
-            DType::Binary(_) => VType::Binary,
-            _ => vortex_panic!("Unsupported dtype for VType: {}", value),
-        }
-    }
-}
diff --git a/vortex-array/src/pipeline/vec.rs b/vortex-array/src/pipeline/vec.rs
deleted file mode 100644
index 16e137a4772..00000000000
--- a/vortex-array/src/pipeline/vec.rs
+++ /dev/null
@@ -1,161 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-//! Vectors contain owned fixed-size canonical arrays of elements.
-//!
-
-// TODO(ngates): Currently, the data in a vector is Arc'd. We should consider whether we want the
-//  performance hit for as_mut(), or whether we want zero-copy cloning. Not clear that we ever
-//  need the clone behavior.
-
-use std::cell::{Ref, RefMut};
-use std::fmt::Debug;
-use std::ops::{Deref, DerefMut};
-
-use vortex_buffer::{Alignment, ByteBuffer, ByteBufferMut};
-
-use crate::pipeline::N;
-use crate::pipeline::bits::BitVector;
-use crate::pipeline::types::{Element, VType};
-use crate::pipeline::view::{View, ViewMut};
-
-/// A vector contains fixed-size owned data in canonical form.
-#[derive(Debug)]
-pub struct Vector {
-    /// The physical type of the vector, which defines how the elements are stored.
-    vtype: VType,
-    /// The allocated elements buffer.
-    /// Alignment is at least the size of the element type.
-    /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
-    elements: ByteBufferMut,
-    /// The validity mask for the vector, indicating which elements in the buffer are valid.
-    validity: BitVector,
-    // The position of the selected values in the vector.
-    selection: Selection,
-
-    /// Additional buffers of data used by the vector, such as string data.
-    // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST?
-    #[allow(dead_code)]
-    data: Vec<ByteBuffer>,
-}
-
-impl Vector {
-    pub fn new<T: Element>() -> Self {
-        Self::new_with_vtype(T::vtype())
-    }
-
-    pub fn new_with_vtype(vtype: VType) -> Self {
-        let mut elements = ByteBufferMut::with_capacity_aligned(
-            vtype.byte_width() * N,
-            Alignment::new(vtype.byte_width()),
-        );
-        unsafe { elements.set_len(vtype.byte_width() * N) };
-
-        Self {
-            vtype,
-            elements,
-            validity: BitVector::full().clone(),
-            selection: Selection::Prefix,
-            data: vec![],
-        }
-    }
-
-    pub fn set_selection(&mut self, selection: Selection) {
-        self.selection = selection;
-    }
-
-    pub fn as_mut_array<T: Element>(&mut self) -> &mut [T; N] {
-        assert_eq!(self.vtype, T::vtype());
-        unsafe { &mut *(self.elements.as_mut_ptr().cast::<T>().cast::<[T; N]>()) }
-    }
-
-    pub fn as_view_mut(&mut self) -> ViewMut<'_> {
-        ViewMut {
-            vtype: self.vtype,
-            elements: self.elements.as_mut_ptr().cast(),
-            validity: Some(self.validity.as_view_mut()),
-            data: vec![],
-            selection: self.selection,
-            _marker: Default::default(),
-        }
-    }
-
-    pub fn as_view(&self) -> View<'_> {
-        View {
-            vtype: self.vtype,
-            elements: self.elements.as_ptr().cast(),
-            validity: Some(self.validity.as_view()),
-            selection: self.selection,
-            data: vec![],
-            _marker: Default::default(),
-        }
-    }
-}
-
-/// A [`VectorRef`] provides a small wrapper to allow accessing a [`View`] with the same lifetime
-/// as the borrowed vector, rather than the lifetime of the [`Ref`].
-pub struct VectorRef<'a> {
-    // Use to ensure that view and borrow have the same lifetime.
-    #[allow(dead_code)]
-    borrow: Ref<'a, Vector>,
-    view: View<'a>,
-}
-
-impl<'a> VectorRef<'a> {
-    pub fn new(borrow: Ref<'a, Vector>) -> Self {
-        let view = borrow.as_view();
-        // SAFETY: we continue to hold onto the [`Ref`], so it is safe to erase the lifetime.
-        let view = unsafe { std::mem::transmute::<View<'_>, View<'a>>(view) };
-        Self { borrow, view }
-    }
-
-    pub fn as_view(&self) -> &View<'a> {
-        &self.view
-    }
-}
-
-impl<'a> Deref for VectorRef<'a> {
-    type Target = View<'a>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.view
-    }
-}
-
-/// A [`VectorRefMut`] provides a small wrapper to allow accessing a [`ViewMut`] with the same
-/// lifetime as the borrowed vector, rather than the lifetime of the [`RefMut`].
-pub struct VectorRefMut<'a> {
-    // Use to ensure that view and borrow have the same lifetime.
-    #[allow(dead_code)]
-    borrow: RefMut<'a, Vector>,
-    view: ViewMut<'a>,
-}
-
-impl<'a> VectorRefMut<'a> {
-    pub fn new(mut borrow: RefMut<'a, Vector>) -> Self {
-        let view = borrow.as_view_mut();
-        // SAFETY: we continue to hold onto the [`Ref`], so it is safe to erase the lifetime.
-        let view = unsafe { std::mem::transmute::<ViewMut<'_>, ViewMut<'a>>(view) };
-        Self { borrow, view }
-    }
-}
-
-impl<'a> Deref for VectorRefMut<'a> {
-    type Target = ViewMut<'a>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.view
-    }
-}
-
-impl<'a> DerefMut for VectorRefMut<'a> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.view
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum Selection {
-    Prefix,
-    Mask,
-}
diff --git a/vortex-array/src/pipeline/view.rs b/vortex-array/src/pipeline/view.rs
deleted file mode 100644
index 3aa6c674b31..00000000000
--- a/vortex-array/src/pipeline/view.rs
+++ /dev/null
@@ -1,225 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use vortex_buffer::ByteBuffer;
-use vortex_error::VortexExpect;
-
-use crate::pipeline::N;
-use crate::pipeline::bits::{BitView, BitViewMut};
-use crate::pipeline::types::{Element, VType};
-use crate::pipeline::vec::Selection;
-
-pub struct View<'a> {
-    /// The physical type of the vector, which defines how the elements are stored.
-    pub(super) vtype: VType,
-    /// A pointer to the allocated elements buffer.
-    /// Alignment is at least the size of the element type.
-    /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
-    pub(super) elements: *const u8,
-    /// The validity mask for the vector, indicating which elements in the buffer are valid.
-    /// This value can be `None` if the expected DType is `NonNullable`.
-    // TODO: support validity
-    #[allow(dead_code)]
-    pub(super) validity: Option<BitView<'a>>,
-
-    // Indicates where the selected elements are positioned within the vector.
-    pub(super) selection: Selection,
-
-    /// Additional buffers of data used by the vector, such as string data.
-    #[allow(dead_code)]
-    pub(super) data: Vec<ByteBuffer>,
-
-    /// Marker defining the lifetime of the contents of the vector.
-    pub(super) _marker: std::marker::PhantomData<&'a ()>,
-}
-
-impl<'a> View<'a> {
-    #[inline(always)]
-    pub fn selection(&self) -> Selection {
-        self.selection
-    }
-
-    pub fn as_array<T>(&self) -> &'a [T; N]
-    where
-        T: Element,
-    {
-        debug_assert_eq!(self.vtype, T::vtype(), "Invalid type for canonical view");
-        // SAFETY: We assume that the elements are of type T and that the view is valid.
-        unsafe { &*(self.elements.cast::<T>() as *const [T; N]) }
-    }
-
-    /// Re-interpret cast the vector into a new type where the element has the same width.
-    #[inline(always)]
-    pub fn reinterpret_as<E: Element>(&mut self) {
-        assert_eq!(
-            self.vtype.byte_width(),
-            size_of::<E>(),
-            "Cannot reinterpret {} as {}",
-            self.vtype,
-            E::vtype()
-        );
-        self.vtype = E::vtype();
-    }
-}
-
-pub struct ViewMut<'a> {
-    /// The physical type of the vector, which defines how the elements are stored.
-    pub(super) vtype: VType,
-    /// A pointer to the allocated elements buffer.
-    /// Alignment is at least the size of the element type.
-    /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
-    // TODO(ngates): it would be nice to guarantee _wider_ alignment, ideally 128 bytes, so that
-    //  we can use aligned load/store instructions for wide SIMD lanes.
-    pub(super) elements: *mut u8,
-    /// The validity mask for the vector, indicating which elements in the buffer are valid.
-    /// This value can be `None` if the expected DType is `NonNullable`.
-    pub(super) validity: Option<BitViewMut<'a>>,
-
-    /// Additional buffers of data used by the vector, such as string data.
-    // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST?
-    #[allow(dead_code)]
-    pub(super) data: Vec<ByteBuffer>,
-
-    /// The position of the selected values of this buffer.
-    /// One of:
-    /// * All - all N values are selected.
-    /// * Prefix - the first n values are selected where i is the true count of the kernel mask.
-    /// * Mask - the values are in the positions indicated by the kernel mask.
-    pub(super) selection: Selection,
-
-    /// Marker defining the lifetime of the contents of the vector.
-    pub(super) _marker: std::marker::PhantomData<&'a mut ()>,
-}
-
-impl<'a> ViewMut<'a> {
-    pub fn new<E: Element>(elements: &'a mut [E], validity: Option<BitViewMut<'a>>) -> Self {
-        assert_eq!(elements.len(), N);
-        Self {
-            vtype: E::vtype(),
-            elements: elements.as_mut_ptr().cast(),
-            validity,
-            data: vec![],
-            selection: Selection::Prefix,
-            _marker: Default::default(),
-        }
-    }
-
-    /// Re-interpret cast the vector into a new type where the element has the same width.
-    #[inline(always)]
-    pub fn reinterpret_as<E: Element>(&mut self) {
-        assert_eq!(
-            self.vtype.byte_width(),
-            size_of::<E>(),
-            "Cannot reinterpret {} as {}",
-            self.vtype,
-            E::vtype()
-        );
-        self.vtype = E::vtype();
-    }
-
-    /// Returns an immutable array of the elements in the vector.
-    #[inline(always)]
-    pub fn as_array<E: Element>(&self) -> &'a [E; N] {
-        debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view");
-        unsafe { &*(self.elements.cast::<E>() as *const [E; N]) }
-    }
-
-    /// Returns a mutable array of the elements in the vector, allowing for modification.
-    #[inline(always)]
-    pub fn as_array_mut<E: Element>(&mut self) -> &'a mut [E; N] {
-        debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view");
-        unsafe { &mut *(self.elements.cast::<E>() as *mut [E; N]) }
-    }
-
-    /// Access the validity mask of the vector.
-    ///
-    /// ## Panics
-    ///
-    /// Panics if the vector does not support validity, i.e. if the DType was non-nullable when
-    /// it was created.
-    pub fn validity(&mut self) -> &mut BitViewMut<'a> {
-        self.validity
-            .as_mut()
-            .vortex_expect("Vector does not support validity")
-    }
-
-    pub fn add_buffer(&mut self, buffer: ByteBuffer) {
-        self.data.push(buffer);
-    }
-
-    #[inline(always)]
-    pub fn selection(&self) -> Selection {
-        self.selection
-    }
-
-    pub fn set_selection(&mut self, selection: Selection) {
-        self.selection = selection;
-    }
-
-    /// Flatten the view by bringing the selected elements of the mask to the beginning of
-    pub fn flatten<E: Element>(&mut self, selection: &BitView<'_>) {
-        assert_eq!(
-            self.vtype,
-            E::vtype(),
-            "ViewMut::flatten_mask: type mismatch"
-        );
-
-        if matches!(self.selection, Selection::Prefix) {
-            // Nothing to do, all elements are already selected.
-            return;
-        }
-
-        match selection.true_count() {
-            0 | N => {
-                // If the mask has no true bits or all true bits, we are already flattened.
-            }
-            n if n > 3 * N / 4 => {
-                // High density: use iter_zeros to compact by removing gaps
-                let slice = self.as_array_mut::<E>();
-                let mut write_idx = 0;
-                let mut read_idx = 0;
-
-                selection.iter_zeros(|zero_idx| {
-                    // Copy elements from read_idx to zero_idx (exclusive) to write_idx
-                    let count = zero_idx - read_idx;
-                    unsafe {
-                        // SAFETY: We assume that the elements are of type E and that the view is valid.
-                        // Using memmove for potentially overlapping regions
-                        std::ptr::copy(
-                            slice.as_ptr().add(read_idx),
-                            slice.as_mut_ptr().add(write_idx),
-                            count,
-                        );
-                        write_idx += count;
-                    }
-                    read_idx = zero_idx + 1;
-                });
-
-                // Copy any remaining elements after the last zero
-                unsafe {
-                    std::ptr::copy(
-                        slice.as_ptr().add(read_idx),
-                        slice.as_mut_ptr().add(write_idx),
-                        N - read_idx,
-                    );
-                }
-            }
-            _ => {
-                let mut offset = 0;
-                let slice = self.as_array_mut::<E>();
-                selection.iter_ones(|idx| {
-                    unsafe {
-                        // SAFETY: We assume that the elements are of type E and that the view is valid.
-                        let value = *slice.get_unchecked(idx);
-                        // TODO(joe): use ptr increment (not offset).
-                        *slice.get_unchecked_mut(offset) = value;
-
-                        offset += 1;
-                    }
-                });
-            }
-        }
-
-        self.selection = Selection::Prefix
-    }
-}
diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs
index 857436bfc34..fa731946dbe 100644
--- a/vortex-array/src/vtable/operator.rs
+++ b/vortex-array/src/vtable/operator.rs
@@ -1,15 +1,14 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use vortex_error::{VortexResult, vortex_bail};
+use vortex_error::{vortex_bail, VortexResult};
 use vortex_mask::Mask;
 use vortex_vector::Vector;
 
-use crate::ArrayRef;
 use crate::array::IntoArray;
 use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx};
-use crate::pipeline::Pipelined;
 use crate::vtable::{NotSupported, VTable};
+use crate::ArrayRef;
 
 /// A vtable for the new operator-based array functionality. Eventually this vtable will be
 /// merged into the main `VTable`, but for now it is kept separate to allow for incremental
@@ -40,12 +39,6 @@ pub trait OperatorVTable<V: VTable> {
         Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute()
     }
 
-    /// Returns an implementation of the [`Pipelined`] trait for this array, if pipelined execution
-    /// is supported.
-    fn execute_pipelined(_array: &V::Array) -> Option<&dyn Pipelined> {
-        None
-    }
-
     /// Bind the array for execution in batch mode.
     ///
     /// This function should return a [`BatchKernelRef`] that can be used to execute the array in

From 332e3fa5ae8862bc5f005b93b78bf8779950ef10 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Fri, 7 Nov 2025 15:20:54 -0500
Subject: [PATCH 04/10] pipelined execution

Signed-off-by: Nicholas Gates <nick@nickgates.com>
---
 vortex-array/src/array/operator.rs            | 10 ---
 .../src/arrays/primitive/vtable/operator.rs   | 72 ++++++++++++++--
 vortex-array/src/pipeline/bits.rs             | 28 +-----
 vortex-array/src/pipeline/mod.rs              |  7 +-
 vortex-array/src/pipeline/source_driver.rs    | 27 ++++++
 vortex-array/src/vtable/operator.rs           |  6 ++
 vortex-buffer/src/buffer_mut.rs               | 14 +--
 vortex-mask/src/mask_mut.rs                   | 44 ++++++++--
 vortex-vector/src/primitive/generic_mut.rs    | 34 +++++++-
 vortex-vector/src/primitive/vector_mut.rs     | 85 ++++++++++++++++++-
 vortex-vector/src/vector_mut.rs               | 20 ++---
 11 files changed, 278 insertions(+), 69 deletions(-)

diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs
index 81194065d6d..340997b0529 100644
--- a/vortex-array/src/array/operator.rs
+++ b/vortex-array/src/array/operator.rs
@@ -8,7 +8,6 @@ use vortex_mask::Mask;
 use vortex_vector::{vector_matches_dtype, Vector, VectorOps};
 
 use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx};
-use crate::pipeline::source_driver::PipelineDriver;
 use crate::vtable::{OperatorVTable, VTable};
 use crate::{Array, ArrayAdapter, ArrayRef};
 
@@ -63,15 +62,6 @@ impl ArrayOperator for Arc<dyn Array> {
 
 impl<V: VTable> ArrayOperator for ArrayAdapter<V> {
     fn execute_batch(&self, selection: &Mask, ctx: &mut dyn ExecutionCtx) -> VortexResult<Vector> {
-        // Check to see if we should execute the array in a pipelined fashion. This is a
-        // short-circuit for now until we have a full pipeline executor, but it allows each arrow
-        // to only implement the pipeline API.
-        if let Some(pipelined) =
-            <V::OperatorVTable as OperatorVTable<V>>::execute_pipelined(&self.0)
-        {
-            PipelineDriver::new(pipelined).execute
-        }
-
         let vector =
             <V::OperatorVTable as OperatorVTable<V>>::execute_batch(&self.0, selection, ctx)?;
 
diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs
index fa18e516cec..4653d3ef5b8 100644
--- a/vortex-array/src/arrays/primitive/vtable/operator.rs
+++ b/vortex-array/src/arrays/primitive/vtable/operator.rs
@@ -1,18 +1,25 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
+use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable};
+use crate::execution::{kernel, BatchKernelRef, BindCtx};
+use crate::pipeline::bits::BitView;
+use crate::pipeline::{BindContext, KernelContext, PipelinedSource, SourceKernel, N};
+use crate::vtable::{OperatorVTable, ValidityHelper};
+use crate::{ArrayRef, IntoArray};
 use vortex_buffer::Buffer;
 use vortex_compute::filter::Filter;
-use vortex_dtype::match_each_native_ptype;
+use vortex_dtype::{match_each_native_ptype, NativePType, PTypeDowncastExt};
 use vortex_error::VortexResult;
+use vortex_mask::Mask;
 use vortex_vector::primitive::PVector;
-
-use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable};
-use crate::execution::{BatchKernelRef, BindCtx, kernel};
-use crate::vtable::{OperatorVTable, ValidityHelper};
-use crate::{ArrayRef, IntoArray};
+use vortex_vector::VectorMut;
 
 impl OperatorVTable<PrimitiveVTable> for PrimitiveVTable {
+    fn as_pipelined_source(array: &PrimitiveArray) -> Option<&dyn PipelinedSource> {
+        Some(array)
+    }
+
     fn bind(
         array: &PrimitiveArray,
         selection: Option<&ArrayRef>,
@@ -61,3 +68,56 @@ impl OperatorVTable<PrimitiveVTable> for PrimitiveVTable {
         Ok(None)
     }
 }
+
+impl PipelinedSource for PrimitiveArray {
+    fn bind_source(&self, _ctx: &mut dyn BindContext) -> VortexResult<Box<dyn SourceKernel>> {
+        match_each_native_ptype!(self.ptype(), |T| {
+            let primitive_kernel = PrimitiveKernel {
+                buffer: self.buffer::<T>().clone(),
+                validity: self.validity_mask(),
+                offset: 0,
+            };
+            Ok(Box::new(primitive_kernel))
+        })
+    }
+}
+
+struct PrimitiveKernel<T: NativePType> {
+    buffer: Buffer<T>,
+    validity: Mask,
+    offset: usize,
+}
+
+impl<T: NativePType> SourceKernel for PrimitiveKernel<T> {
+    fn skip(&mut self, n: usize) {
+        self.offset += n * N;
+    }
+
+    fn step(
+        &mut self,
+        _ctx: &KernelContext,
+        selection: &BitView,
+        out: &mut VectorMut,
+    ) -> VortexResult<()> {
+        let out = out.as_primitive_mut().downcast::<T>();
+
+        // SAFETY: we know the output has sufficient capacity. We just have to append nulls
+        //  separately from copying over the elements.
+        unsafe {
+            out.validity_mut().append_n(true, selection.true_count());
+            out.elements_mut().set_len(selection.true_count());
+        }
+
+        let source = &self.buffer.as_slice()[self.offset..];
+
+        let mut out_pos = 0;
+        selection.iter_slices(|(start, end)| {
+            print!("Slicing {} to {}\n", start, end);
+            let len = end - start;
+            out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..end]);
+            out_pos += len;
+        });
+
+        Ok(())
+    }
+}
diff --git a/vortex-array/src/pipeline/bits.rs b/vortex-array/src/pipeline/bits.rs
index c638806d92d..5d478952670 100644
--- a/vortex-array/src/pipeline/bits.rs
+++ b/vortex-array/src/pipeline/bits.rs
@@ -4,7 +4,7 @@
 use std::fmt::{Debug, Formatter};
 
 use bitvec::prelude::*;
-use vortex_error::{VortexError, VortexResult, vortex_err};
+use vortex_error::{vortex_err, VortexError, VortexResult};
 
 use crate::pipeline::{N, N_WORDS};
 
@@ -155,6 +155,8 @@ impl<'a> BitView<'a> {
     ///
     /// The function `f` receives a tuple `(start, len)` where `start` is the index of the first
     /// `true` bit and `len` is the number of consecutive `true` bits.
+    ///
+    /// FIXME(ngates): this code is broken.
     pub fn iter_slices<F>(&self, mut f: F)
     where
         F: FnMut((usize, usize)),
@@ -229,7 +231,6 @@ mod tests {
     use vortex_mask::Mask;
 
     use super::*;
-    use crate::pipeline::bits::BitVector;
 
     #[test]
     fn test_iter_ones_empty() {
@@ -521,29 +522,6 @@ mod tests {
         });
     }
 
-    #[test]
-    fn test_mask_and_bitview_all_true() {
-        let mask = Mask::AllTrue(5);
-
-        let vector = BitVector::true_until(5);
-
-        let view = vector.as_view();
-
-        // Collect indices from BitView
-        let mut bitview_ones = Vec::new();
-        view.iter_ones(|idx| bitview_ones.push(idx));
-
-        // Collect indices from BitView
-        let mask_ones = mask.iter_bools(|iter| {
-            iter.enumerate()
-                .filter(|(_, b)| *b)
-                .map(|(i, _)| i)
-                .collect::<Vec<_>>()
-        });
-
-        assert_eq!(bitview_ones, mask_ones);
-    }
-
     #[test]
     fn test_bitview_zeros_complement_mask() {
         // Create a pattern
diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs
index d2da1ff8616..2c412c1d7c2 100644
--- a/vortex-array/src/pipeline/mod.rs
+++ b/vortex-array/src/pipeline/mod.rs
@@ -23,6 +23,7 @@ pub mod source_driver;
 
 use crate::Array;
 use bits::BitView;
+use std::ops::Deref;
 use vortex_error::VortexResult;
 use vortex_vector::{Vector, VectorMut};
 
@@ -60,7 +61,7 @@ pub trait PipelinedOperator: Array {
     fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn OperatorKernel>>;
 }
 
-pub trait PipelinedSource: Array {
+pub trait PipelinedSource: Deref<Target = dyn Array> {
     /// Bind the operator into a [`Kernel`] for pipelined execution.
     ///
     /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and
@@ -143,7 +144,7 @@ impl KernelContext {
     }
 
     /// Get a vector by its ID.
-    pub fn vector(&self, _vector_id: VectorId) -> &Vector {
-        todo!()
+    pub fn vector(&self, vector_id: VectorId) -> &Vector {
+        &self.vectors[vector_id]
     }
 }
diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs
index ff74ed5cf55..74ba0a3aa59 100644
--- a/vortex-array/src/pipeline/source_driver.rs
+++ b/vortex-array/src/pipeline/source_driver.rs
@@ -103,3 +103,30 @@ impl BindContext for PipelineSourceBindCtx<'_> {
         self.batch_inputs[child_idx].clone()
     }
 }
+
+#[cfg(test)]
+mod test {
+    use crate::arrays::PrimitiveArray;
+    use crate::pipeline::source_driver::PipelineSourceDriver;
+    use crate::validity::Validity;
+    use vortex_buffer::buffer;
+    use vortex_dtype::PTypeDowncastExt;
+    use vortex_mask::Mask;
+    use vortex_vector::VectorOps;
+
+    #[test]
+    fn test_primitive() {
+        let array = PrimitiveArray::new::<u32>(buffer![0..100000u32], Validity::AllValid);
+
+        // Create a selection mask with some ranges.
+        let mask = Mask::from_iter((0..100000).map(|i| i % 30 < 20));
+
+        let out = PipelineSourceDriver::new(&array)
+            .execute(&mask)
+            .unwrap()
+            .into_primitive()
+            .downcast::<u32>();
+
+        assert_eq!(out.len(), mask.true_count());
+    }
+}
diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs
index fa731946dbe..2dbf34a9002 100644
--- a/vortex-array/src/vtable/operator.rs
+++ b/vortex-array/src/vtable/operator.rs
@@ -7,6 +7,7 @@ use vortex_vector::Vector;
 
 use crate::array::IntoArray;
 use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx};
+use crate::pipeline::PipelinedSource;
 use crate::vtable::{NotSupported, VTable};
 use crate::ArrayRef;
 
@@ -39,6 +40,11 @@ pub trait OperatorVTable<V: VTable> {
         Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute()
     }
 
+    /// Downcast this array into a [`PipelinedSource`] if it supports pipelined execution.
+    fn as_pipelined_source(_array: &V::Array) -> Option<&dyn PipelinedSource> {
+        None
+    }
+
     /// Bind the array for execution in batch mode.
     ///
     /// This function should return a [`BatchKernelRef`] that can be used to execute the array in
diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs
index f0ea523b5d4..436006f7d2f 100644
--- a/vortex-buffer/src/buffer_mut.rs
+++ b/vortex-buffer/src/buffer_mut.rs
@@ -9,7 +9,7 @@ use std::ops::{Deref, DerefMut};
 
 use bytes::buf::UninitSlice;
 use bytes::{Buf, BufMut, BytesMut};
-use vortex_error::{VortexExpect, vortex_panic};
+use vortex_error::{vortex_panic, VortexExpect};
 
 use crate::debug::TruncatedDebug;
 use crate::trusted_len::TrustedLen;
@@ -338,8 +338,8 @@ impl<T> BufferMut<T> {
     ///
     /// Panics if either half would have a length that is not a multiple of the alignment.
     pub fn split_off(&mut self, at: usize) -> Self {
-        if at > self.len() {
-            vortex_panic!("Cannot split buffer of length {} at {}", self.len(), at);
+        if at > self.capacity() {
+            vortex_panic!("Cannot split buffer of capacity {} at {}", self.len(), at);
         }
 
         let bytes_at = at * size_of::<T>();
@@ -352,8 +352,10 @@ impl<T> BufferMut<T> {
         }
 
         let new_bytes = self.bytes.split_off(bytes_at);
-        let new_length = self.length - at;
-        self.length = at;
+
+        // Adjust the lengths, given that length may be < at
+        let new_length = self.length.saturating_sub(at);
+        self.length = self.length.min(at);
 
         BufferMut {
             bytes: new_bytes,
@@ -724,7 +726,7 @@ impl Write for ByteBufferMut {
 mod test {
     use bytes::{Buf, BufMut};
 
-    use crate::{Alignment, BufferMut, ByteBufferMut, buffer_mut};
+    use crate::{buffer_mut, Alignment, BufferMut, ByteBufferMut};
 
     #[test]
     fn capacity() {
diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs
index 5a85850213b..e7e5c736ed6 100644
--- a/vortex-mask/src/mask_mut.rs
+++ b/vortex-mask/src/mask_mut.rs
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use std::ops::Sub;
 use std::sync::Arc;
 
 use vortex_buffer::BitBufferMut;
@@ -95,6 +94,35 @@ impl MaskMut {
         }
     }
 
+    /// Set the length of the mask.
+    pub unsafe fn set_len(&mut self, new_len: usize) {
+        debug_assert!(new_len < self.capacity());
+        match &mut self.0 {
+            Inner::Empty { capacity, .. } => {
+                self.0 = Inner::Constant {
+                    value: false, // Pick any value
+                    len: new_len,
+                    capacity: *capacity,
+                }
+            }
+            Inner::Constant { len, .. } => {
+                *len = new_len;
+            }
+            Inner::Builder(bits) => {
+                unsafe { bits.set_len(new_len) };
+            }
+        }
+    }
+
+    /// Returns the capacity of the mask.
+    pub fn capacity(&self) -> usize {
+        match &self.0 {
+            Inner::Empty { capacity } => *capacity,
+            Inner::Constant { capacity, .. } => *capacity,
+            Inner::Builder(bits) => bits.capacity(),
+        }
+    }
+
     /// Clears the mask.
     ///
     /// Note that this method has no effect on the allocated capacity of the mask.
@@ -207,10 +235,11 @@ impl MaskMut {
     /// values from `at` to the end, and leaving `self` with the values from
     /// the start to `at`.
     pub fn split_off(&mut self, at: usize) -> Self {
-        assert!(at <= self.len(), "split_off index out of bounds");
+        assert!(at <= self.capacity(), "split_off index out of bounds");
         match &mut self.0 {
             Inner::Empty { capacity } => {
-                let new_capacity = (*capacity).saturating_sub(at);
+                let new_capacity = *capacity - at;
+                *capacity = at;
                 Self(Inner::Empty {
                     capacity: new_capacity,
                 })
@@ -220,9 +249,12 @@ impl MaskMut {
                 len,
                 capacity,
             } => {
-                let new_len = len.sub(at);
-                *len = at;
-                let new_capacity = (*capacity).saturating_sub(at);
+                // Adjust the lengths, given that length may be < at
+                let new_len = len.saturating_sub(at);
+                let new_capacity = *capacity - at;
+                *len = (*len).min(at);
+                *capacity = at;
+
                 Self(Inner::Constant {
                     value: *value,
                     len: new_len,
diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs
index 5dbf64ae951..2525592e1ae 100644
--- a/vortex-vector/src/primitive/generic_mut.rs
+++ b/vortex-vector/src/primitive/generic_mut.rs
@@ -5,7 +5,7 @@
 
 use vortex_buffer::BufferMut;
 use vortex_dtype::NativePType;
-use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
+use vortex_error::{vortex_ensure, VortexExpect, VortexResult};
 use vortex_mask::MaskMut;
 
 use crate::primitive::PVector;
@@ -73,6 +73,38 @@ impl<T> PVectorMut<T> {
         }
     }
 
+    /// Set the length of the vector.
+    ///
+    /// # Safety
+    ///
+    /// The caller must ensure that the new length does not exceed the capacity of the vector.
+    pub unsafe fn set_len(&mut self, new_len: usize) {
+        debug_assert!(new_len < self.elements.capacity());
+        debug_assert!(new_len < self.validity.capacity());
+        unsafe { self.elements.set_len(new_len) };
+        unsafe { self.validity.set_len(new_len) };
+    }
+
+    /// Returns a mutable reference to the elements buffer.
+    ///
+    /// # Safety
+    ///
+    /// The caller must ensure that any mutations to the elements do not violate the
+    /// invariants of the vector (e.g., the length must remain consistent with the elements buffer).
+    pub unsafe fn elements_mut(&mut self) -> &mut BufferMut<T> {
+        &mut self.elements
+    }
+
+    /// Returns a mutable reference to the validity mask.
+    ///
+    /// # Safety
+    ///
+    /// The caller must ensure that any mutations to the validity mask do not violate the
+    /// invariants of the vector (e.g., the length must remain consistent with the elements buffer).
+    pub unsafe fn validity_mut(&mut self) -> &mut MaskMut {
+        &mut self.validity
+    }
+
     /// Decomposes the primitive vector into its constituent parts (buffer and validity).
     pub fn into_parts(self) -> (BufferMut<T>, MaskMut) {
         (self.elements, self.validity)
diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs
index 1d3a9812eeb..eef24f04707 100644
--- a/vortex-vector/src/primitive/vector_mut.rs
+++ b/vortex-vector/src/primitive/vector_mut.rs
@@ -9,7 +9,7 @@ use vortex_error::vortex_panic;
 use vortex_mask::MaskMut;
 
 use crate::primitive::{PVectorMut, PrimitiveVector};
-use crate::{VectorMutOps, match_each_pvector_mut};
+use crate::{match_each_pvector_mut, VectorMutOps};
 
 /// A mutable vector of primitive values.
 ///
@@ -142,7 +142,7 @@ impl VectorMutOps for PrimitiveVectorMut {
             (Self::F16(a), Self::F16(b)) => a.unsplit(b),
             (Self::F32(a), Self::F32(b)) => a.unsplit(b),
             (Self::F64(a), Self::F64(b)) => a.unsplit(b),
-            _ => ::vortex_error::vortex_panic!("Mismatched primitive vector types"),
+            _ => vortex_panic!("Mismatched primitive vector types"),
         }
     }
 }
@@ -276,6 +276,87 @@ impl PTypeDowncast for PrimitiveVectorMut {
     }
 }
 
+impl<'a> PTypeDowncast for &'a mut PrimitiveVectorMut {
+    type Output<T: NativePType> = &'a mut PVectorMut<T>;
+
+    fn into_u8(self) -> Self::Output<u8> {
+        match self {
+            PrimitiveVectorMut::U8(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::U8, got {self:?}"),
+        }
+    }
+
+    fn into_u16(self) -> Self::Output<u16> {
+        match self {
+            PrimitiveVectorMut::U16(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::U16, got {self:?}"),
+        }
+    }
+
+    fn into_u32(self) -> Self::Output<u32> {
+        match self {
+            PrimitiveVectorMut::U32(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::U32, got {self:?}"),
+        }
+    }
+
+    fn into_u64(self) -> Self::Output<u64> {
+        match self {
+            PrimitiveVectorMut::U64(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::U64, got {self:?}"),
+        }
+    }
+
+    fn into_i8(self) -> Self::Output<i8> {
+        match self {
+            PrimitiveVectorMut::I8(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::I8, got {self:?}"),
+        }
+    }
+
+    fn into_i16(self) -> Self::Output<i16> {
+        match self {
+            PrimitiveVectorMut::I16(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::I16, got {self:?}"),
+        }
+    }
+
+    fn into_i32(self) -> Self::Output<i32> {
+        match self {
+            PrimitiveVectorMut::I32(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::I32, got {self:?}"),
+        }
+    }
+
+    fn into_i64(self) -> Self::Output<i64> {
+        match self {
+            PrimitiveVectorMut::I64(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::I64, got {self:?}"),
+        }
+    }
+
+    fn into_f16(self) -> Self::Output<f16> {
+        match self {
+            PrimitiveVectorMut::F16(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::F16, got {self:?}"),
+        }
+    }
+
+    fn into_f32(self) -> Self::Output<f32> {
+        match self {
+            PrimitiveVectorMut::F32(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::F32, got {self:?}"),
+        }
+    }
+
+    fn into_f64(self) -> Self::Output<f64> {
+        match self {
+            PrimitiveVectorMut::F64(v) => v,
+            _ => vortex_panic!("Expected PrimitiveVectorMut::F64, got {self:?}"),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs
index b45a0868f63..9be1f3fdfaa 100644
--- a/vortex-vector/src/vector_mut.rs
+++ b/vortex-vector/src/vector_mut.rs
@@ -18,7 +18,7 @@ use crate::listview::ListViewVectorMut;
 use crate::null::NullVectorMut;
 use crate::primitive::PrimitiveVectorMut;
 use crate::struct_::StructVectorMut;
-use crate::{Vector, VectorMutOps, match_each_vector_mut, match_vector_pair};
+use crate::{match_each_vector_mut, match_vector_pair, Vector, VectorMutOps};
 
 /// An enum over all kinds of mutable vectors, which represent fully decompressed (canonical) array
 /// data.
@@ -131,7 +131,7 @@ impl VectorMutOps for VectorMut {
 
 impl VectorMut {
     /// Returns a reference to the inner [`NullVectorMut`] if `self` is of that variant.
-    pub fn as_null(&self) -> &NullVectorMut {
+    pub fn as_null_mut(&mut self) -> &mut NullVectorMut {
         if let VectorMut::Null(v) = self {
             return v;
         }
@@ -139,7 +139,7 @@ impl VectorMut {
     }
 
     /// Returns a reference to the inner [`BoolVectorMut`] if `self` is of that variant.
-    pub fn as_bool(&self) -> &BoolVectorMut {
+    pub fn as_bool_mut(&mut self) -> &mut BoolVectorMut {
         if let VectorMut::Bool(v) = self {
             return v;
         }
@@ -147,7 +147,7 @@ impl VectorMut {
     }
 
     /// Returns a reference to the inner [`PrimitiveVectorMut`] if `self` is of that variant.
-    pub fn as_primitive(&self) -> &PrimitiveVectorMut {
+    pub fn as_primitive_mut(&mut self) -> &mut PrimitiveVectorMut {
         if let VectorMut::Primitive(v) = self {
             return v;
         }
@@ -155,7 +155,7 @@ impl VectorMut {
     }
 
     /// Returns a reference to the inner [`StringVectorMut`] if `self` is of that variant.
-    pub fn as_string(&self) -> &StringVectorMut {
+    pub fn as_string_mut(&mut self) -> &mut StringVectorMut {
         if let VectorMut::String(v) = self {
             return v;
         }
@@ -163,7 +163,7 @@ impl VectorMut {
     }
 
     /// Returns a reference to the inner [`BinaryVectorMut`] if `self` is of that variant.
-    pub fn as_binary(&self) -> &BinaryVectorMut {
+    pub fn as_binary_mut(&mut self) -> &mut BinaryVectorMut {
         if let VectorMut::Binary(v) = self {
             return v;
         }
@@ -171,7 +171,7 @@ impl VectorMut {
     }
 
     /// Returns a reference to the inner [`ListViewVectorMut`] if `self` is of that variant.
-    pub fn as_list(&self) -> &ListViewVectorMut {
+    pub fn as_list_mut(&mut self) -> &mut ListViewVectorMut {
         if let VectorMut::List(v) = self {
             return v;
         }
@@ -179,7 +179,7 @@ impl VectorMut {
     }
 
     /// Returns a reference to the inner [`FixedSizeListVectorMut`] if `self` is of that variant.
-    pub fn as_fixed_size_list(&self) -> &FixedSizeListVectorMut {
+    pub fn as_fixed_size_list_mut(&mut self) -> &mut FixedSizeListVectorMut {
         if let VectorMut::FixedSizeList(v) = self {
             return v;
         }
@@ -187,7 +187,7 @@ impl VectorMut {
     }
 
     /// Returns a reference to the inner [`StructVectorMut`] if `self` is of that variant.
-    pub fn as_struct(&self) -> &StructVectorMut {
+    pub fn as_struct_mut(&mut self) -> &mut StructVectorMut {
         if let VectorMut::Struct(v) = self {
             return v;
         }
@@ -267,9 +267,9 @@ mod tests {
     use vortex_dtype::{DecimalDType, Nullability, PType};
 
     use super::*;
-    use crate::VectorOps;
     use crate::decimal::DecimalVectorMut;
     use crate::primitive::PVectorMut;
+    use crate::VectorOps;
 
     #[test]
     fn test_with_capacity() {

From d53c68b34811243d19da2116b60d09b679bac23f Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Fri, 7 Nov 2025 17:45:51 -0500
Subject: [PATCH 05/10] pipelined execution

Signed-off-by: Nicholas Gates <nick@nickgates.com>
---
 .../src/arrays/primitive/vtable/operator.rs   |   2 +-
 .../src/pipeline/{bits.rs => bit_view.rs}     | 256 +++++++-----------
 vortex-array/src/pipeline/mod.rs              |  16 +-
 vortex-array/src/pipeline/source_driver.rs    |   2 +-
 4 files changed, 112 insertions(+), 164 deletions(-)
 rename vortex-array/src/pipeline/{bits.rs => bit_view.rs} (66%)

diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs
index 4653d3ef5b8..cf3b6b5338f 100644
--- a/vortex-array/src/arrays/primitive/vtable/operator.rs
+++ b/vortex-array/src/arrays/primitive/vtable/operator.rs
@@ -3,7 +3,7 @@
 
 use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable};
 use crate::execution::{kernel, BatchKernelRef, BindCtx};
-use crate::pipeline::bits::BitView;
+use crate::pipeline::bit_view::BitView;
 use crate::pipeline::{BindContext, KernelContext, PipelinedSource, SourceKernel, N};
 use crate::vtable::{OperatorVTable, ValidityHelper};
 use crate::{ArrayRef, IntoArray};
diff --git a/vortex-array/src/pipeline/bits.rs b/vortex-array/src/pipeline/bit_view.rs
similarity index 66%
rename from vortex-array/src/pipeline/bits.rs
rename to vortex-array/src/pipeline/bit_view.rs
index 5d478952670..b23e3c8108d 100644
--- a/vortex-array/src/pipeline/bits.rs
+++ b/vortex-array/src/pipeline/bit_view.rs
@@ -3,20 +3,20 @@
 
 use std::fmt::{Debug, Formatter};
 
-use bitvec::prelude::*;
-use vortex_error::{vortex_err, VortexError, VortexResult};
+use vortex_error::VortexResult;
 
-use crate::pipeline::{N, N_WORDS};
+use crate::pipeline::{N, N_BYTES, N_WORDS};
 
 /// A borrowed fixed-size bit vector of length `N` bits, represented as an array of usize words.
 ///
-/// Internally, it uses a [`BitArray`] to store the bits, but this crate has some
-/// performance foot-guns in cases where we can lean on better assumptions, and therefore we wrap
-/// it up for use within Vortex.
-/// Read-only view into a bit array for selection masking in operator operations.
+/// This struct is designed to provide a view over a Vortex [`vortex_buffer::BitBuffer`], therefore
+/// the bit-ordering is LSB0 (least-significant-bit first).
+///
+/// Note that [`BitView`] does not support an offset. Therefore, bits are assumed to start at
+/// index and end at index `N - 1`.
 #[derive(Clone, Copy)]
 pub struct BitView<'a> {
-    bits: &'a BitArray<[usize; N_WORDS], Lsb0>,
+    bits: &'a [u8; N_BYTES],
     // TODO(ngates): we may want to expose this for optimizations.
     // If set to Selection::Prefix, then all true bits are at the start of the array.
     // selection: Selection,
@@ -34,43 +34,26 @@ impl Debug for BitView<'_> {
 
 impl BitView<'static> {
     pub fn all_true() -> Self {
-        static ALL_TRUE: [usize; N_WORDS] = [usize::MAX; N_WORDS];
-        unsafe {
-            BitView::new_unchecked(
-                std::mem::transmute::<&[usize; N_WORDS], &BitArray<[usize; N_WORDS], Lsb0>>(
-                    &ALL_TRUE,
-                ),
-                N,
-            )
-        }
+        static ALL_TRUE: [u8; N_BYTES] = [u8::MAX; N_BYTES];
+        unsafe { BitView::new_unchecked(&ALL_TRUE, N) }
     }
 
     pub fn all_false() -> Self {
-        static ALL_FALSE: [usize; N_WORDS] = [0; N_WORDS];
-        unsafe {
-            BitView::new_unchecked(
-                std::mem::transmute::<&[usize; N_WORDS], &BitArray<[usize; N_WORDS], Lsb0>>(
-                    &ALL_FALSE,
-                ),
-                0,
-            )
-        }
+        static ALL_FALSE: [u8; N_BYTES] = [0; N_BYTES];
+        unsafe { BitView::new_unchecked(&ALL_FALSE, 0) }
     }
 }
 
 impl<'a> BitView<'a> {
-    pub fn new(bits: &[usize; N_WORDS]) -> Self {
-        let true_count = bits.iter().map(|&word| word.count_ones() as usize).sum();
-        let bits: &BitArray<[usize; N_WORDS], Lsb0> = unsafe {
-            std::mem::transmute::<&[usize; N_WORDS], &BitArray<[usize; N_WORDS], Lsb0>>(bits)
-        };
+    pub fn new(bits: &'a [u8; N_BYTES]) -> Self {
+        let ptr = bits.as_ptr().cast::<usize>();
+        let true_count = (0..N_WORDS)
+            .map(|idx| unsafe { ptr.add(idx).read_unaligned().count_ones() as usize })
+            .sum();
         BitView { bits, true_count }
     }
 
-    pub(crate) unsafe fn new_unchecked(
-        bits: &'a BitArray<[usize; N_WORDS], Lsb0>,
-        true_count: usize,
-    ) -> Self {
+    pub(crate) unsafe fn new_unchecked(bits: &'a [u8; N_BYTES], true_count: usize) -> Self {
         BitView { bits, true_count }
     }
 
@@ -79,6 +62,17 @@ impl<'a> BitView<'a> {
         self.true_count
     }
 
+    /// Iterate the [`BitView`] in fixed-size words.
+    ///
+    /// The words are loaded using unaligned loads to ensure correct bit ordering.
+    /// For example, bit 0 is located in `word & 1 << 0`, bit 63 is located in `word & 1 << 63`,
+    /// assuming the word size is 64 bits.
+    fn iter_words(&self) -> impl Iterator<Item = usize> + '_ {
+        let ptr = self.bits.as_ptr().cast::<usize>();
+        // We use constant N_WORDS to trigger loop unrolling.
+        (0..N_WORDS).map(move |idx| unsafe { ptr.add(idx).read_unaligned() })
+    }
+
     /// Runs the provided function `f` for each index of a `true` bit in the view.
     pub fn iter_ones<F>(&self, mut f: F)
     where
@@ -89,7 +83,7 @@ impl<'a> BitView<'a> {
             N => (0..N).for_each(&mut f),
             _ => {
                 let mut bit_idx = 0;
-                for mut raw in self.bits.into_inner() {
+                for mut raw in self.iter_words() {
                     while raw != 0 {
                         let bit_pos = raw.trailing_zeros();
                         f(bit_idx + bit_pos as usize);
@@ -116,7 +110,7 @@ impl<'a> BitView<'a> {
             }
             _ => {
                 let mut bit_idx = 0;
-                for mut raw in self.bits.into_inner() {
+                for mut raw in self.iter_words() {
                     while raw != 0 {
                         let bit_pos = raw.trailing_zeros();
                         f(bit_idx + bit_pos as usize)?;
@@ -139,7 +133,7 @@ impl<'a> BitView<'a> {
             N => {}
             _ => {
                 let mut bit_idx = 0;
-                for mut raw in self.bits.into_inner() {
+                for mut raw in self.iter_words() {
                     while raw != usize::MAX {
                         let bit_pos = raw.trailing_ones();
                         f(bit_idx + bit_pos as usize);
@@ -166,7 +160,8 @@ impl<'a> BitView<'a> {
             N => f((0, N)),
             _ => {
                 let mut bit_idx = 0;
-                for mut raw in self.bits.into_inner() {
+                for raw in self.bits {
+                    let mut raw = *raw;
                     let mut offset = 0;
                     while raw != 0 {
                         // Skip leading zeros first
@@ -192,49 +187,66 @@ impl<'a> BitView<'a> {
         }
     }
 
-    pub fn as_raw(&self) -> &[usize; N_WORDS] {
-        // It's actually remarkably hard to get a reference to the underlying array!
-        let raw = self.bits.as_raw_slice();
-        unsafe { &*(raw.as_ptr() as *const [usize; N_WORDS]) }
-    }
-}
-
-impl<'a> From<&'a [usize; N_WORDS]> for BitView<'a> {
-    fn from(value: &'a [usize; N_WORDS]) -> Self {
-        Self::new(value)
-    }
-}
-
-impl<'a> From<&'a BitArray<[usize; N_WORDS], Lsb0>> for BitView<'a> {
-    fn from(bits: &'a BitArray<[usize; N_WORDS], Lsb0>) -> Self {
-        BitView::new(unsafe {
-            std::mem::transmute::<&BitArray<[usize; N_WORDS]>, &[usize; N_WORDS]>(bits)
-        })
-    }
-}
-
-impl<'a> TryFrom<&'a BitSlice<usize, Lsb0>> for BitView<'a> {
-    type Error = VortexError;
-
-    fn try_from(value: &'a BitSlice<usize, Lsb0>) -> Result<Self, Self::Error> {
-        let bits: &BitArray<[usize; N_WORDS], Lsb0> = value
-            .try_into()
-            .map_err(|e| vortex_err!("Failed to convert BitSlice to BitArray: {}", e))?;
-        Ok(BitView::new(unsafe {
-            std::mem::transmute::<&BitArray<[usize; N_WORDS]>, &[usize; N_WORDS]>(bits)
-        }))
+    pub fn as_raw(&self) -> &[u8; N_BYTES] {
+        self.bits
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use vortex_mask::Mask;
+    use bitvec::slice::BitSlice;
+    use vortex_buffer::BitBufferMut;
 
     use super::*;
 
+    #[test]
+    fn test_bits() {
+        let mut bits = BitBufferMut::new_unset(128);
+        bits.set(1);
+        bits.set(2);
+        bits.set(3);
+        bits.set(8);
+        bits.set(64);
+        let bits = bits.freeze();
+        assert_eq!(bits.set_indices().collect::<Vec<_>>(), vec![1, 2, 3, 8, 64]);
+
+        // Can we just transmute and pass it into bitvec crate?
+        // Absolutely not is that answer.
+        let slice_u64 =
+            BitSlice::<u64>::from_slice(unsafe { std::mem::transmute(bits.inner().as_ref()) });
+        assert_ne!(
+            slice_u64.iter_ones().collect::<Vec<_>>(),
+            vec![1, 2, 3, 8, 64]
+        );
+
+        // But if we have a &[u8], we can use unaligned load to pull it into the right order.
+        unsafe {
+            let vec_usize = (0..2)
+                .map(|idx| {
+                    bits.inner()
+                        .as_ptr()
+                        .cast::<usize>()
+                        .add(idx)
+                        .read_unaligned()
+                })
+                .collect::<Vec<_>>();
+            let slice_usize = BitSlice::<usize>::from_slice(&vec_usize);
+            assert_eq!(
+                slice_usize.iter_ones().collect::<Vec<_>>(),
+                vec![1, 2, 3, 8, 64]
+            );
+        }
+
+        println!(
+            "Bits: {:08b} {:08b}",
+            bits.inner().as_ref()[0],
+            bits.inner().as_ref()[1]
+        );
+    }
+
     #[test]
     fn test_iter_ones_empty() {
-        let bits = [0usize; N_WORDS];
+        let bits = [0; N_BYTES];
         let view = BitView::new(&bits);
 
         let mut ones = Vec::new();
@@ -258,7 +270,7 @@ mod tests {
 
     #[test]
     fn test_iter_zeros_empty() {
-        let bits = [0usize; N_WORDS];
+        let bits = [0; N_BYTES];
         let view = BitView::new(&bits);
 
         let mut zeros = Vec::new();
@@ -280,7 +292,7 @@ mod tests {
 
     #[test]
     fn test_iter_ones_single_bit() {
-        let mut bits = [0usize; N_WORDS];
+        let mut bits = [0; N_BYTES];
         bits[0] = 1; // Set bit 0 (LSB)
         let view = BitView::new(&bits);
 
@@ -293,8 +305,8 @@ mod tests {
 
     #[test]
     fn test_iter_zeros_single_bit_unset() {
-        let mut bits = [usize::MAX; N_WORDS];
-        bits[0] = usize::MAX ^ 1; // Clear bit 0 (LSB)
+        let mut bits = [u8::MAX; N_BYTES];
+        bits[0] = u8::MAX ^ 1; // Clear bit 0 (LSB)
         let view = BitView::new(&bits);
 
         let mut zeros = Vec::new();
@@ -305,7 +317,7 @@ mod tests {
 
     #[test]
     fn test_iter_ones_multiple_bits_first_word() {
-        let mut bits = [0usize; N_WORDS];
+        let mut bits = [0; N_BYTES];
         bits[0] = 0b1010101; // Set bits 0, 2, 4, 6
         let view = BitView::new(&bits);
 
@@ -318,7 +330,7 @@ mod tests {
 
     #[test]
     fn test_iter_zeros_multiple_bits_first_word() {
-        let mut bits = [usize::MAX; N_WORDS];
+        let mut bits = [u8::MAX; N_BYTES];
         bits[0] = !0b1010101; // Clear bits 0, 2, 4, 6
         let view = BitView::new(&bits);
 
@@ -330,7 +342,7 @@ mod tests {
 
     #[test]
     fn test_iter_ones_across_words() {
-        let mut bits = [0usize; N_WORDS];
+        let mut bits = [0; N_BYTES];
         bits[0] = 1 << 63; // Set bit 63 of first word
         bits[1] = 1; // Set bit 0 of second word (bit 64 overall)
         bits[2] = 1 << 31; // Set bit 31 of third word (bit 159 overall)
@@ -345,7 +357,7 @@ mod tests {
 
     #[test]
     fn test_iter_zeros_across_words() {
-        let mut bits = [usize::MAX; N_WORDS];
+        let mut bits = [u8::MAX; N_BYTES];
         bits[0] = !(1 << 63); // Clear bit 63 of first word
         bits[1] = !1; // Clear bit 0 of second word (bit 64 overall)
         bits[2] = !(1 << 31); // Clear bit 31 of third word (bit 159 overall)
@@ -359,7 +371,7 @@ mod tests {
 
     #[test]
     fn test_lsb_bit_ordering() {
-        let mut bits = [0usize; N_WORDS];
+        let mut bits = [0; N_BYTES];
         bits[0] = 0b11111111; // Set bits 0-7 (LSB ordering)
         let view = BitView::new(&bits);
 
@@ -372,7 +384,7 @@ mod tests {
 
     #[test]
     fn test_iter_ones_and_zeros_complement() {
-        let mut bits = [0usize; N_WORDS];
+        let mut bits = [0; N_BYTES];
         bits[0] = 0xAAAAAAAAAAAAAAAA; // Alternating pattern
         let view = BitView::new(&bits);
 
@@ -448,11 +460,11 @@ mod tests {
         let indices = vec![0, 10, 20, 63, 64, 100, 500, 1023];
 
         // Create corresponding BitView
-        let mut bits = [0usize; N_WORDS];
+        let mut bits = [0; N_BYTES];
         for idx in &indices {
-            let word_idx = idx / 64;
-            let bit_idx = idx % 64;
-            bits[word_idx] |= 1usize << bit_idx;
+            let word_idx = idx / 8;
+            let bit_idx = idx % 8;
+            bits[word_idx] |= 1u8 << bit_idx;
         }
         let view = BitView::new(&bits);
 
@@ -470,12 +482,12 @@ mod tests {
         let slices = vec![(0, 10), (100, 110), (500, 510)];
 
         // Create corresponding BitView
-        let mut bits = [0usize; N_WORDS];
+        let mut bits = [0; N_BYTES];
         for (start, end) in &slices {
             for idx in *start..*end {
-                let word_idx = idx / 64;
-                let bit_idx = idx % 64;
-                bits[word_idx] |= 1usize << bit_idx;
+                let word_idx = idx / 8;
+                let bit_idx = idx % 8;
+                bits[word_idx] |= 1u8 << bit_idx;
             }
         }
         let view = BitView::new(&bits);
@@ -493,68 +505,4 @@ mod tests {
         assert_eq!(bitview_ones, expected_indices);
         assert_eq!(view.true_count(), expected_indices.len());
     }
-
-    #[test]
-    fn test_mask_and_bitview_iter_match() {
-        // Create a pattern with alternating bits in first word
-        let mut bits = [0usize; N_WORDS];
-        bits[0] = 0xAAAAAAAAAAAAAAAA; // Alternating 1s and 0s
-        bits[1] = 0xFF00FF00FF00FF00; // Alternating bytes
-
-        let view = BitView::new(&bits);
-
-        // Collect indices from BitView
-        let mut bitview_ones = Vec::new();
-        view.iter_ones(|idx| bitview_ones.push(idx));
-
-        // Create Mask from the same indices
-        let mask = Mask::from_indices(N, bitview_ones.clone());
-
-        // Verify the mask returns the same indices
-        mask.iter_bools(|iter| {
-            let mask_bools: Vec<bool> = iter.collect();
-
-            // Check each bit matches
-            for i in 0..N {
-                let expected = bitview_ones.contains(&i);
-                assert_eq!(mask_bools[i], expected, "Mismatch at index {}", i);
-            }
-        });
-    }
-
-    #[test]
-    fn test_bitview_zeros_complement_mask() {
-        // Create a pattern
-        let mut bits = [0usize; N_WORDS];
-        bits[0] = 0b11110000111100001111000011110000;
-
-        let view = BitView::new(&bits);
-
-        // Collect ones and zeros from BitView
-        let mut bitview_ones = Vec::new();
-        let mut bitview_zeros = Vec::new();
-        view.iter_ones(|idx| bitview_ones.push(idx));
-        view.iter_zeros(|idx| bitview_zeros.push(idx));
-
-        // Create masks for ones and zeros
-        let ones_mask = Mask::from_indices(N, bitview_ones);
-        let zeros_mask = Mask::from_indices(N, bitview_zeros);
-
-        // Verify they are complements
-        ones_mask.iter_bools(|ones_iter| {
-            zeros_mask.iter_bools(|zeros_iter| {
-                let ones_bools: Vec<bool> = ones_iter.collect();
-                let zeros_bools: Vec<bool> = zeros_iter.collect();
-
-                for i in 0..N {
-                    // Each index should be either in ones or zeros, but not both
-                    assert_ne!(
-                        ones_bools[i], zeros_bools[i],
-                        "Index {} should be in exactly one set",
-                        i
-                    );
-                }
-            });
-        });
-    }
 }
diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs
index 2c412c1d7c2..d22e596397d 100644
--- a/vortex-array/src/pipeline/mod.rs
+++ b/vortex-array/src/pipeline/mod.rs
@@ -18,11 +18,11 @@
 //!
 //! It is a work-in-progress and is not yet used in production.
 
-pub mod bits;
+pub mod bit_view;
 pub mod source_driver;
 
 use crate::Array;
-use bits::BitView;
+use bit_view::BitView;
 use std::ops::Deref;
 use vortex_error::VortexResult;
 use vortex_vector::{Vector, VectorMut};
@@ -97,13 +97,13 @@ pub type VectorId = usize;
 /// the setup costs (such as DType validation, stats short-circuiting, etc.), and to make better
 /// use of CPU caches by performing all operations while the data is hot.
 ///
-/// The [`SourceKernel::step`] method will be invoked repeatedly to process chunks of data, [`N`] elements
-/// at a time. Each invocation is passed a selection mask indicating which elements of the chunk
-/// should be written to the start of the output vector.
+/// The [`SourceKernel::step`] method will be invoked repeatedly to process chunks of data, [`N`]
+/// elements at a time. Each invocation is passed a selection mask indicating which elements of the
+/// chunk should be written to the start of the output vector.
 ///
-/// The mutable output vector is **guaranteed** to have a capacity of at least [`N`] elements, and
-/// its length will initially be set to zero. It is therefore safe to invoke unchecked writes up to
-/// `N` elements.
+/// The mutable output vector is **guaranteed** to have a capacity of at least [`N`] elements. The
+/// caller makes no guarantee about the initial length of the output vector; and the kernel is
+/// expected to append `selection.true_count()` elements.
 ///
 /// The pipeline may invoke the `SourceKernel::skip` method to skip over some number of chunks of data.
 /// The kernel should mutate any internal state as necessary to account for the skipped data.
diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs
index 74ba0a3aa59..27fbe87f6e9 100644
--- a/vortex-array/src/pipeline/source_driver.rs
+++ b/vortex-array/src/pipeline/source_driver.rs
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use crate::pipeline::bits::BitView;
+use crate::pipeline::bit_view::BitView;
 use crate::pipeline::{BindContext, KernelContext, PipelinedSource, VectorId, N};
 use itertools::Itertools;
 use vortex_error::{vortex_panic, VortexResult};

From f6815c2e4f62224055900b73b3fbe4f2071db9dc Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Sat, 8 Nov 2025 18:15:55 -0500
Subject: [PATCH 06/10] pipelined execution

Signed-off-by: Nicholas Gates <nick@nickgates.com>
---
 vortex-array/src/array/operator.rs            |   4 +-
 .../src/arrays/primitive/vtable/operator.rs   |  17 +-
 vortex-array/src/pipeline/bit_view.rs         | 347 +++++++++++-------
 vortex-array/src/pipeline/mod.rs              |   6 +-
 vortex-array/src/pipeline/source_driver.rs    |  91 +++--
 vortex-array/src/vtable/operator.rs           |   4 +-
 vortex-buffer/src/buffer_mut.rs               |   4 +-
 vortex-vector/src/primitive/generic_mut.rs    |   2 +-
 vortex-vector/src/primitive/vector_mut.rs     |   2 +-
 vortex-vector/src/vector_mut.rs               |   4 +-
 10 files changed, 285 insertions(+), 196 deletions(-)

diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs
index 340997b0529..0a0e0705046 100644
--- a/vortex-array/src/array/operator.rs
+++ b/vortex-array/src/array/operator.rs
@@ -3,9 +3,9 @@
 
 use std::sync::Arc;
 
-use vortex_error::{vortex_panic, VortexResult};
+use vortex_error::{VortexResult, vortex_panic};
 use vortex_mask::Mask;
-use vortex_vector::{vector_matches_dtype, Vector, VectorOps};
+use vortex_vector::{Vector, VectorOps, vector_matches_dtype};
 
 use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx};
 use crate::vtable::{OperatorVTable, VTable};
diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs
index cf3b6b5338f..caa15c04901 100644
--- a/vortex-array/src/arrays/primitive/vtable/operator.rs
+++ b/vortex-array/src/arrays/primitive/vtable/operator.rs
@@ -1,19 +1,20 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable};
-use crate::execution::{kernel, BatchKernelRef, BindCtx};
-use crate::pipeline::bit_view::BitView;
-use crate::pipeline::{BindContext, KernelContext, PipelinedSource, SourceKernel, N};
-use crate::vtable::{OperatorVTable, ValidityHelper};
-use crate::{ArrayRef, IntoArray};
 use vortex_buffer::Buffer;
 use vortex_compute::filter::Filter;
-use vortex_dtype::{match_each_native_ptype, NativePType, PTypeDowncastExt};
+use vortex_dtype::{NativePType, PTypeDowncastExt, match_each_native_ptype};
 use vortex_error::VortexResult;
 use vortex_mask::Mask;
-use vortex_vector::primitive::PVector;
 use vortex_vector::VectorMut;
+use vortex_vector::primitive::PVector;
+
+use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable};
+use crate::execution::{BatchKernelRef, BindCtx, kernel};
+use crate::pipeline::bit_view::BitView;
+use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, SourceKernel};
+use crate::vtable::{OperatorVTable, ValidityHelper};
+use crate::{ArrayRef, IntoArray};
 
 impl OperatorVTable<PrimitiveVTable> for PrimitiveVTable {
     fn as_pipelined_source(array: &PrimitiveArray) -> Option<&dyn PipelinedSource> {
diff --git a/vortex-array/src/pipeline/bit_view.rs b/vortex-array/src/pipeline/bit_view.rs
index b23e3c8108d..aa3129b66fc 100644
--- a/vortex-array/src/pipeline/bit_view.rs
+++ b/vortex-array/src/pipeline/bit_view.rs
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
+use std::borrow::Cow;
 use std::fmt::{Debug, Formatter};
 
+use vortex_buffer::BitBuffer;
 use vortex_error::VortexResult;
 
 use crate::pipeline::{N, N_BYTES, N_WORDS};
@@ -14,9 +16,8 @@ use crate::pipeline::{N, N_BYTES, N_WORDS};
 ///
 /// Note that [`BitView`] does not support an offset. Therefore, bits are assumed to start at
 /// index and end at index `N - 1`.
-#[derive(Clone, Copy)]
 pub struct BitView<'a> {
-    bits: &'a [u8; N_BYTES],
+    bits: Cow<'a, [u8; N_BYTES]>,
     // TODO(ngates): we may want to expose this for optimizations.
     // If set to Selection::Prefix, then all true bits are at the start of the array.
     // selection: Selection,
@@ -45,16 +46,82 @@ impl BitView<'static> {
 }
 
 impl<'a> BitView<'a> {
+    /// Creates a [`BitView`] from raw bits, computing the true count.
     pub fn new(bits: &'a [u8; N_BYTES]) -> Self {
         let ptr = bits.as_ptr().cast::<usize>();
         let true_count = (0..N_WORDS)
             .map(|idx| unsafe { ptr.add(idx).read_unaligned().count_ones() as usize })
             .sum();
-        BitView { bits, true_count }
+        BitView {
+            bits: Cow::Borrowed(bits),
+            true_count,
+        }
+    }
+
+    /// Creates a [`BitView`] from owned raw bits.
+    pub fn new_owned(bits: [u8; N_BYTES]) -> Self {
+        let ptr = bits.as_ptr().cast::<usize>();
+        let true_count = (0..N_WORDS)
+            .map(|idx| unsafe { ptr.add(idx).read_unaligned().count_ones() as usize })
+            .sum();
+        BitView {
+            bits: Cow::Owned(bits),
+            true_count,
+        }
     }
 
+    /// Creates a [`BitView`] from raw bits and a known true count.
+    ///
+    /// # Safety
+    ///
+    /// The caller must ensure that `true_count` is correct for the provided `bits`.
     pub(crate) unsafe fn new_unchecked(bits: &'a [u8; N_BYTES], true_count: usize) -> Self {
-        BitView { bits, true_count }
+        BitView {
+            bits: Cow::Borrowed(bits),
+            true_count,
+        }
+    }
+
+    /// Creates a [`BitView`] from a byte slice.
+    ///
+    /// # Panics
+    ///
+    /// If the length of the slice is not equal to `N_BYTES`.
+    pub fn from_slice(bits: &'a [u8]) -> Self {
+        assert_eq!(bits.len(), N_BYTES);
+        let bits_array = unsafe { &*(bits.as_ptr() as *const [u8; N_BYTES]) };
+        BitView::new(bits_array)
+    }
+
+    /// Creates a [`BitView`] from a mutable byte array, populating it with the requested prefix
+    /// of `true` bits.
+    pub fn with_prefix(n_true: usize) -> Self {
+        assert!(n_true <= N);
+
+        // We're going to own our own array of bits
+        let mut bits = [0u8; N_BYTES];
+
+        // All-true words first
+        let n_full_words = n_true / (usize::BITS as usize);
+        let remaining_bits = n_true % (usize::BITS as usize);
+
+        let ptr = bits.as_mut_ptr().cast::<usize>();
+
+        // Fill the all-true words
+        for word_idx in 0..n_full_words {
+            unsafe { ptr.add(word_idx).write_unaligned(usize::MAX) };
+        }
+
+        // Fill the remaining bits in the next word
+        if remaining_bits > 0 {
+            let mask = (1usize << remaining_bits) - 1;
+            unsafe { ptr.add(n_full_words).write_unaligned(mask) };
+        }
+
+        Self {
+            bits: Cow::Owned(bits),
+            true_count: n_true,
+        }
     }
 
     /// Returns the number of `true` bits in the view.
@@ -145,104 +212,160 @@ impl<'a> BitView<'a> {
         }
     }
 
-    /// Runs the provided function `f` for each range of `true` bits in the view.
-    ///
-    /// The function `f` receives a tuple `(start, len)` where `start` is the index of the first
-    /// `true` bit and `len` is the number of consecutive `true` bits.
-    ///
-    /// FIXME(ngates): this code is broken.
     pub fn iter_slices<F>(&self, mut f: F)
     where
         F: FnMut((usize, usize)),
     {
-        match self.true_count {
-            0 => {}
-            N => f((0, N)),
-            _ => {
-                let mut bit_idx = 0;
-                for raw in self.bits {
-                    let mut raw = *raw;
-                    let mut offset = 0;
-                    while raw != 0 {
-                        // Skip leading zeros first
-                        let zeros = raw.leading_zeros();
-                        offset += zeros;
-                        raw <<= zeros;
+        if self.true_count == 0 {
+            return;
+        }
+
+        let mut abs_bit_offset: usize = 0; // Absolute bit index of the *current* word being processed
+        let mut slice_start_bit: usize = 0; // Absolute start index of the run of 1s being tracked
+        let mut slice_length: usize = 0; // Accumulated length of the run of 1s
+
+        for mut word in self.iter_words() {
+            match word {
+                0 => {
+                    // If a slice was being tracked, the run ends at the start of this word.
+                    if slice_length > 0 {
+                        f((slice_start_bit, slice_length));
+                        slice_length = 0;
+                    }
+                }
+                usize::MAX => {
+                    // If a slice was not already open, it starts at the beginning of this word.
+                    if slice_length == 0 {
+                        slice_start_bit = abs_bit_offset;
+                    }
+                    // Extend the length by a full word (64 bits).
+                    slice_length += usize::BITS as usize;
+                }
+                _ => {
+                    while word != 0 {
+                        // Find the first set bit (start of a run of 1s)
+                        let zeros = word.trailing_zeros() as usize;
+
+                        // If a run was open, and we hit a zero gap, report the finished slice
+                        if slice_length > 0 && zeros > 0 {
+                            f((slice_start_bit, slice_length));
+                            slice_length = 0; // Reset state for a new slice
+                        }
+
+                        // Advance past the zeros
+                        word >>= zeros;
 
-                        if offset >= 64 {
+                        if word == 0 {
                             break;
                         }
 
-                        // Count leading ones
-                        let ones = raw.leading_ones();
-                        if ones > 0 {
-                            f((bit_idx + offset as usize, ones as usize));
-                            offset += ones;
-                            raw <<= ones;
+                        // Find the contiguous ones (the length of the current run segment)
+                        let ones = word.trailing_ones() as usize;
+
+                        // If slice_length is 0, we found the *absolute* start of a new slice.
+                        if slice_length == 0 {
+                            // Calculate the bit index within the *entire* mask where this run starts
+                            let current_word_idx = abs_bit_offset + zeros;
+                            slice_start_bit = current_word_idx;
                         }
+
+                        // Accumulate the length of the slice
+                        slice_length += ones;
+
+                        // Advance past the ones
+                        word >>= ones;
                     }
-                    bit_idx += usize::BITS as usize; // Move to next word
                 }
             }
+
+            abs_bit_offset += usize::BITS as usize;
+        }
+
+        if slice_length > 0 {
+            f((slice_start_bit, slice_length));
         }
     }
 
+    /// Runs the provided function `f` for each range of `true` bits in the view.
+    ///
+    /// The function `f` receives a tuple `(start, len)` where `start` is the index of the first
+    /// `true` bit and `len` is the number of consecutive `true` bits.
+    ///
+    /// FIXME(ngates): this code is broken.
+
     pub fn as_raw(&self) -> &[u8; N_BYTES] {
-        self.bits
+        self.bits.as_ref()
     }
 }
 
-#[cfg(test)]
-mod tests {
-    use bitvec::slice::BitSlice;
-    use vortex_buffer::BitBufferMut;
-
-    use super::*;
+pub trait BitViewExt {
+    /// Iterate the [`BitBuffer`] in fixed-size chunks of [`BitView`].
+    ///
+    /// The final chunk will be filled with unset padding bits if the bit buffer's length is not
+    /// a multiple of `N`.
+    ///
+    /// # Panics
+    ///
+    /// If the bit buffer's bit-offset is not zero.
+    fn iter_bit_views(&self) -> impl Iterator<Item = BitView<'_>> + '_;
+}
 
-    #[test]
-    fn test_bits() {
-        let mut bits = BitBufferMut::new_unset(128);
-        bits.set(1);
-        bits.set(2);
-        bits.set(3);
-        bits.set(8);
-        bits.set(64);
-        let bits = bits.freeze();
-        assert_eq!(bits.set_indices().collect::<Vec<_>>(), vec![1, 2, 3, 8, 64]);
-
-        // Can we just transmute and pass it into bitvec crate?
-        // Absolutely not is that answer.
-        let slice_u64 =
-            BitSlice::<u64>::from_slice(unsafe { std::mem::transmute(bits.inner().as_ref()) });
-        assert_ne!(
-            slice_u64.iter_ones().collect::<Vec<_>>(),
-            vec![1, 2, 3, 8, 64]
+impl BitViewExt for BitBuffer {
+    fn iter_bit_views(&self) -> impl Iterator<Item = BitView<'_>> + '_ {
+        assert_eq!(
+            self.offset(),
+            0,
+            "BitView iteration requires zero bit offset"
         );
+        let n_views = (self.len() + N - 1) / N;
+        BitViewIterator {
+            bits: self.inner().as_ref(),
+            view_idx: 0,
+            n_views,
+        }
+    }
+}
+
+struct BitViewIterator<'a> {
+    bits: &'a [u8],
+    // The index of the view to be returned next
+    view_idx: usize,
+    // The total number of views
+    n_views: usize,
+}
 
-        // But if we have a &[u8], we can use unaligned load to pull it into the right order.
-        unsafe {
-            let vec_usize = (0..2)
-                .map(|idx| {
-                    bits.inner()
-                        .as_ptr()
-                        .cast::<usize>()
-                        .add(idx)
-                        .read_unaligned()
-                })
-                .collect::<Vec<_>>();
-            let slice_usize = BitSlice::<usize>::from_slice(&vec_usize);
-            assert_eq!(
-                slice_usize.iter_ones().collect::<Vec<_>>(),
-                vec![1, 2, 3, 8, 64]
-            );
+impl<'a> Iterator for BitViewIterator<'a> {
+    type Item = BitView<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.view_idx == self.n_views {
+            return None;
         }
 
-        println!(
-            "Bits: {:08b} {:08b}",
-            bits.inner().as_ref()[0],
-            bits.inner().as_ref()[1]
-        );
+        let start_byte = self.view_idx * N_BYTES;
+        let end_byte = start_byte + N_BYTES;
+
+        let bits = if end_byte <= self.bits.len() {
+            // Full view from the original bits
+            BitView::from_slice(&self.bits[start_byte..end_byte])
+        } else {
+            // Partial view, copy to scratch
+            let remaining_bytes = self.bits.len() - start_byte;
+            let mut remaining = [0u8; N_BYTES];
+            remaining[..remaining_bytes].copy_from_slice(&self.bits[start_byte..]);
+            BitView::new_owned(remaining)
+        };
+
+        self.view_idx += 1;
+        Some(bits)
     }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::usize;
+
+    use super::*;
 
     #[test]
     fn test_iter_ones_empty() {
@@ -340,35 +463,6 @@ mod tests {
         assert_eq!(zeros, vec![0, 2, 4, 6]);
     }
 
-    #[test]
-    fn test_iter_ones_across_words() {
-        let mut bits = [0; N_BYTES];
-        bits[0] = 1 << 63; // Set bit 63 of first word
-        bits[1] = 1; // Set bit 0 of second word (bit 64 overall)
-        bits[2] = 1 << 31; // Set bit 31 of third word (bit 159 overall)
-        let view = BitView::new(&bits);
-
-        let mut ones = Vec::new();
-        view.iter_ones(|idx| ones.push(idx));
-
-        assert_eq!(ones, vec![63, 64, 159]);
-        assert_eq!(view.true_count(), 3);
-    }
-
-    #[test]
-    fn test_iter_zeros_across_words() {
-        let mut bits = [u8::MAX; N_BYTES];
-        bits[0] = !(1 << 63); // Clear bit 63 of first word
-        bits[1] = !1; // Clear bit 0 of second word (bit 64 overall)
-        bits[2] = !(1 << 31); // Clear bit 31 of third word (bit 159 overall)
-        let view = BitView::new(&bits);
-
-        let mut zeros = Vec::new();
-        view.iter_zeros(|idx| zeros.push(idx));
-
-        assert_eq!(zeros, vec![63, 64, 159]);
-    }
-
     #[test]
     fn test_lsb_bit_ordering() {
         let mut bits = [0; N_BYTES];
@@ -382,30 +476,6 @@ mod tests {
         assert_eq!(view.true_count(), 8);
     }
 
-    #[test]
-    fn test_iter_ones_and_zeros_complement() {
-        let mut bits = [0; N_BYTES];
-        bits[0] = 0xAAAAAAAAAAAAAAAA; // Alternating pattern
-        let view = BitView::new(&bits);
-
-        let mut ones = Vec::new();
-        let mut zeros = Vec::new();
-        view.iter_ones(|idx| ones.push(idx));
-        view.iter_zeros(|idx| zeros.push(idx));
-
-        // Check that ones and zeros together cover all indices
-        let mut all_indices = ones.clone();
-        all_indices.extend(&zeros);
-        all_indices.sort_unstable();
-
-        assert_eq!(all_indices, (0..N).collect::<Vec<_>>());
-
-        // Check they don't overlap
-        for one_idx in &ones {
-            assert!(!zeros.contains(one_idx));
-        }
-    }
-
     #[test]
     fn test_all_false_static() {
         let view = BitView::all_false();
@@ -505,4 +575,21 @@ mod tests {
         assert_eq!(bitview_ones, expected_indices);
         assert_eq!(view.true_count(), expected_indices.len());
     }
+
+    #[test]
+    fn test_with_prefix() {
+        assert_eq!(BitView::with_prefix(0).true_count(), 0);
+
+        // May as well test all the possible prefix lengths!
+        for i in 1..N {
+            let view = BitView::with_prefix(i);
+
+            // Collect slices (there should be one slice from 0 to n_true)
+            let mut slices = vec![];
+            view.iter_slices(|slice| slices.push(slice));
+
+            assert_eq!(slices.len(), 1);
+            assert_eq!(slices[0], (0, i));
+        }
+    }
 }
diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs
index d22e596397d..f71605479cd 100644
--- a/vortex-array/src/pipeline/mod.rs
+++ b/vortex-array/src/pipeline/mod.rs
@@ -21,12 +21,14 @@
 pub mod bit_view;
 pub mod source_driver;
 
-use crate::Array;
-use bit_view::BitView;
 use std::ops::Deref;
+
+use bit_view::BitView;
 use vortex_error::VortexResult;
 use vortex_vector::{Vector, VectorMut};
 
+use crate::Array;
+
 /// The number of elements in each step of a Vortex evaluation operator.
 pub const N: usize = 1024;
 
diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs
index 27fbe87f6e9..7f11ddcc370 100644
--- a/vortex-array/src/pipeline/source_driver.rs
+++ b/vortex-array/src/pipeline/source_driver.rs
@@ -1,13 +1,14 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use crate::pipeline::bit_view::BitView;
-use crate::pipeline::{BindContext, KernelContext, PipelinedSource, VectorId, N};
 use itertools::Itertools;
-use vortex_error::{vortex_panic, VortexResult};
+use vortex_error::{VortexResult, vortex_panic};
 use vortex_mask::Mask;
 use vortex_vector::{Vector, VectorMut, VectorMutOps};
 
+use crate::pipeline::bit_view::{BitView, BitViewExt};
+use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, VectorId};
+
 /// Temporary driver for executing a single array in a pipelined fashion.
 pub struct PipelineSourceDriver<'a> {
     array: &'a dyn PipelinedSource,
@@ -40,50 +41,47 @@ impl<'a> PipelineSourceDriver<'a> {
         // `kernel.step(out)` has at least N bytes of capacity.
         let mut output = VectorMut::with_capacity(
             self.array.dtype(),
-            selection.true_count().next_multiple_of(N),
+            // We add an extra N to ensure we have enough capacity so the last chunk has 2 * N
+            // elements of capacity.
+            selection.true_count().next_multiple_of(N) + N,
         );
 
         // TODO(ngates): change behaviour based on the density of the selection mask.
-        let selection_buffer = selection.to_bit_buffer();
-        // TODO(ngates): rewrite chunks to take an arbitrary "storage type"? Or somehow copy
-        //  the chunks directly into a wider bit slice?
-        let selection_chunks = selection_buffer.chunks();
-        let mut selection_chunks_iter = selection_chunks.iter_padded();
-
-        let output_len = selection.true_count();
-
-        let mut selection_chunk = [0u64; N / u64::BITS as usize];
-
-        let mut output_chunks = vec![];
-        while output.len() < output_len {
-            // Copy the next selection chunk into place.
-            for word_idx in 0..selection_chunk.len() {
-                selection_chunk[word_idx] = selection_chunks_iter.next().unwrap_or_else(|| 0u64);
+        match selection {
+            Mask::AllTrue(_) => {
+                // Select everything, so we can just run the kernel in a tight loop.
+
+                // The number of _full_ chunks we need to process.
+                let nchunks = selection.len() / N;
+                for _ in 0..nchunks {
+                    let prev_len = output.len();
+                    kernel.step(&kernel_ctx, &BitView::all_true(), &mut output)?;
+                    debug_assert_eq!(output.len(), prev_len + N);
+                }
+
+                // Now process the final partial chunk, if any.
+                let remaining = selection.len() % N;
+                if remaining > 0 {
+                    let selection_view = BitView::with_prefix(remaining);
+
+                    let prev_len = output.len();
+                    kernel.step(&kernel_ctx, &selection_view, &mut output)?;
+                    debug_assert_eq!(output.len(), prev_len + remaining);
+                    debug_assert_eq!(output.len(), selection.len());
+                }
+            }
+            Mask::AllFalse(_) => {
+                // Select nothing, return empty output!
+            }
+            Mask::Values(values) => {
+                // Mixed selection, so we have to process in chunks.
+                let selection_bits = values.bit_buffer();
+                for selection_view in selection_bits.iter_bit_views() {
+                    let prev_len = output.len();
+                    kernel.step(&kernel_ctx, &selection_view, &mut output)?;
+                    debug_assert_eq!(output.len(), prev_len + selection_view.true_count());
+                }
             }
-
-            // TODO(ngates): ideally our chunks iter would use a usize...
-            let selection_chunk_usize = unsafe { std::mem::transmute(&selection_chunk) };
-            let selection = BitView::new(selection_chunk_usize);
-
-            // We know we have remaining capacity for N elements, so split off a size-N chunk.
-            let remaining_output = output.split_off(N);
-
-            kernel.step(&kernel_ctx, &selection, &mut output)?;
-            assert_eq!(
-                output.len(),
-                selection.true_count(),
-                "Kernel did not write expected number of elements"
-            );
-
-            // Now we un-split the output vector back onto its full size.
-            // output.unsplit(remaining_output);
-            output_chunks.push(output);
-            output = remaining_output;
-        }
-
-        // Combine all output chunks back into the output vector.
-        for chunk in output_chunks {
-            output.unsplit(chunk);
         }
 
         Ok(output.freeze())
@@ -106,14 +104,15 @@ impl BindContext for PipelineSourceBindCtx<'_> {
 
 #[cfg(test)]
 mod test {
-    use crate::arrays::PrimitiveArray;
-    use crate::pipeline::source_driver::PipelineSourceDriver;
-    use crate::validity::Validity;
     use vortex_buffer::buffer;
     use vortex_dtype::PTypeDowncastExt;
     use vortex_mask::Mask;
     use vortex_vector::VectorOps;
 
+    use crate::arrays::PrimitiveArray;
+    use crate::pipeline::source_driver::PipelineSourceDriver;
+    use crate::validity::Validity;
+
     #[test]
     fn test_primitive() {
         let array = PrimitiveArray::new::<u32>(buffer![0..100000u32], Validity::AllValid);
diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs
index 2dbf34a9002..9603ca0cf16 100644
--- a/vortex-array/src/vtable/operator.rs
+++ b/vortex-array/src/vtable/operator.rs
@@ -1,15 +1,15 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use vortex_error::{vortex_bail, VortexResult};
+use vortex_error::{VortexResult, vortex_bail};
 use vortex_mask::Mask;
 use vortex_vector::Vector;
 
+use crate::ArrayRef;
 use crate::array::IntoArray;
 use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx};
 use crate::pipeline::PipelinedSource;
 use crate::vtable::{NotSupported, VTable};
-use crate::ArrayRef;
 
 /// A vtable for the new operator-based array functionality. Eventually this vtable will be
 /// merged into the main `VTable`, but for now it is kept separate to allow for incremental
diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs
index 436006f7d2f..add9ae5f7be 100644
--- a/vortex-buffer/src/buffer_mut.rs
+++ b/vortex-buffer/src/buffer_mut.rs
@@ -9,7 +9,7 @@ use std::ops::{Deref, DerefMut};
 
 use bytes::buf::UninitSlice;
 use bytes::{Buf, BufMut, BytesMut};
-use vortex_error::{vortex_panic, VortexExpect};
+use vortex_error::{VortexExpect, vortex_panic};
 
 use crate::debug::TruncatedDebug;
 use crate::trusted_len::TrustedLen;
@@ -726,7 +726,7 @@ impl Write for ByteBufferMut {
 mod test {
     use bytes::{Buf, BufMut};
 
-    use crate::{buffer_mut, Alignment, BufferMut, ByteBufferMut};
+    use crate::{Alignment, BufferMut, ByteBufferMut, buffer_mut};
 
     #[test]
     fn capacity() {
diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs
index 2525592e1ae..9a02109803f 100644
--- a/vortex-vector/src/primitive/generic_mut.rs
+++ b/vortex-vector/src/primitive/generic_mut.rs
@@ -5,7 +5,7 @@
 
 use vortex_buffer::BufferMut;
 use vortex_dtype::NativePType;
-use vortex_error::{vortex_ensure, VortexExpect, VortexResult};
+use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
 use vortex_mask::MaskMut;
 
 use crate::primitive::PVector;
diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs
index eef24f04707..2666f3f9150 100644
--- a/vortex-vector/src/primitive/vector_mut.rs
+++ b/vortex-vector/src/primitive/vector_mut.rs
@@ -9,7 +9,7 @@ use vortex_error::vortex_panic;
 use vortex_mask::MaskMut;
 
 use crate::primitive::{PVectorMut, PrimitiveVector};
-use crate::{match_each_pvector_mut, VectorMutOps};
+use crate::{VectorMutOps, match_each_pvector_mut};
 
 /// A mutable vector of primitive values.
 ///
diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs
index 9be1f3fdfaa..522b491de51 100644
--- a/vortex-vector/src/vector_mut.rs
+++ b/vortex-vector/src/vector_mut.rs
@@ -18,7 +18,7 @@ use crate::listview::ListViewVectorMut;
 use crate::null::NullVectorMut;
 use crate::primitive::PrimitiveVectorMut;
 use crate::struct_::StructVectorMut;
-use crate::{match_each_vector_mut, match_vector_pair, Vector, VectorMutOps};
+use crate::{Vector, VectorMutOps, match_each_vector_mut, match_vector_pair};
 
 /// An enum over all kinds of mutable vectors, which represent fully decompressed (canonical) array
 /// data.
@@ -267,9 +267,9 @@ mod tests {
     use vortex_dtype::{DecimalDType, Nullability, PType};
 
     use super::*;
+    use crate::VectorOps;
     use crate::decimal::DecimalVectorMut;
     use crate::primitive::PVectorMut;
-    use crate::VectorOps;
 
     #[test]
     fn test_with_capacity() {

From cef3f517a01779e0afb4a57df0902137fdb60fb8 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Sat, 8 Nov 2025 18:26:11 -0500
Subject: [PATCH 07/10] pipelined execution

Signed-off-by: Nicholas Gates <nick@nickgates.com>
---
 vortex-array/src/array/operator.rs             | 12 ++++++++++--
 .../src/arrays/primitive/vtable/operator.rs    | 18 +++++++++---------
 vortex-array/src/pipeline/bit_view.rs          |  2 ++
 vortex-array/src/pipeline/source_driver.rs     |  6 +++---
 vortex-buffer/src/buffer_mut.rs                | 11 ++++++++---
 5 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs
index 0a0e0705046..74731a71ce5 100644
--- a/vortex-array/src/array/operator.rs
+++ b/vortex-array/src/array/operator.rs
@@ -3,11 +3,12 @@
 
 use std::sync::Arc;
 
-use vortex_error::{VortexResult, vortex_panic};
+use vortex_error::{vortex_panic, VortexResult};
 use vortex_mask::Mask;
-use vortex_vector::{Vector, VectorOps, vector_matches_dtype};
+use vortex_vector::{vector_matches_dtype, Vector, VectorOps};
 
 use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx};
+use crate::pipeline::source_driver::PipelineSourceDriver;
 use crate::vtable::{OperatorVTable, VTable};
 use crate::{Array, ArrayAdapter, ArrayRef};
 
@@ -62,6 +63,13 @@ impl ArrayOperator for Arc<dyn Array> {
 
 impl<V: VTable> ArrayOperator for ArrayAdapter<V> {
     fn execute_batch(&self, selection: &Mask, ctx: &mut dyn ExecutionCtx) -> VortexResult<Vector> {
+        // Check if the array is a pipeline source, and if so use the single-node driver for now.
+        if let Some(pipeline_source) =
+            <V::OperatorVTable as OperatorVTable<V>>::as_pipelined_source(&self.0)
+        {
+            return PipelineSourceDriver::new(pipeline_source).execute(selection);
+        }
+
         let vector =
             <V::OperatorVTable as OperatorVTable<V>>::execute_batch(&self.0, selection, ctx)?;
 
diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs
index caa15c04901..d4b74821bbf 100644
--- a/vortex-array/src/arrays/primitive/vtable/operator.rs
+++ b/vortex-array/src/arrays/primitive/vtable/operator.rs
@@ -3,16 +3,16 @@
 
 use vortex_buffer::Buffer;
 use vortex_compute::filter::Filter;
-use vortex_dtype::{NativePType, PTypeDowncastExt, match_each_native_ptype};
+use vortex_dtype::{match_each_native_ptype, NativePType, PTypeDowncastExt};
 use vortex_error::VortexResult;
 use vortex_mask::Mask;
-use vortex_vector::VectorMut;
 use vortex_vector::primitive::PVector;
+use vortex_vector::{VectorMut, VectorMutOps};
 
 use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable};
-use crate::execution::{BatchKernelRef, BindCtx, kernel};
+use crate::execution::{kernel, BatchKernelRef, BindCtx};
 use crate::pipeline::bit_view::BitView;
-use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, SourceKernel};
+use crate::pipeline::{BindContext, KernelContext, PipelinedSource, SourceKernel, N};
 use crate::vtable::{OperatorVTable, ValidityHelper};
 use crate::{ArrayRef, IntoArray};
 
@@ -106,16 +106,16 @@ impl<T: NativePType> SourceKernel for PrimitiveKernel<T> {
         //  separately from copying over the elements.
         unsafe {
             out.validity_mut().append_n(true, selection.true_count());
-            out.elements_mut().set_len(selection.true_count());
+            let prev_len = out.len();
+            out.elements_mut()
+                .set_len(prev_len + selection.true_count());
         }
 
         let source = &self.buffer.as_slice()[self.offset..];
 
         let mut out_pos = 0;
-        selection.iter_slices(|(start, end)| {
-            print!("Slicing {} to {}\n", start, end);
-            let len = end - start;
-            out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..end]);
+        selection.iter_slices(|(start, len)| {
+            out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..][..len]);
             out_pos += len;
         });
 
diff --git a/vortex-array/src/pipeline/bit_view.rs b/vortex-array/src/pipeline/bit_view.rs
index aa3129b66fc..76189e53f92 100644
--- a/vortex-array/src/pipeline/bit_view.rs
+++ b/vortex-array/src/pipeline/bit_view.rs
@@ -214,6 +214,8 @@ impl<'a> BitView<'a> {
 
     pub fn iter_slices<F>(&self, mut f: F)
     where
+        // FIXME(ngates): I have repeatedly assumed this to be a (start, end) slice, not a
+        //  (start, len)... I think we should wrap this in a struct to avoid confusion.
         F: FnMut((usize, usize)),
     {
         if self.true_count == 0 {
diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs
index 7f11ddcc370..04e7eda0a79 100644
--- a/vortex-array/src/pipeline/source_driver.rs
+++ b/vortex-array/src/pipeline/source_driver.rs
@@ -2,14 +2,14 @@
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
 use itertools::Itertools;
-use vortex_error::{VortexResult, vortex_panic};
+use vortex_error::{vortex_panic, VortexResult};
 use vortex_mask::Mask;
 use vortex_vector::{Vector, VectorMut, VectorMutOps};
 
 use crate::pipeline::bit_view::{BitView, BitViewExt};
-use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, VectorId};
+use crate::pipeline::{BindContext, KernelContext, PipelinedSource, VectorId, N};
 
-/// Temporary driver for executing a single array in a pipelined fashion.
+/// Temporary driver for executing a single source array in a pipelined fashion.
 pub struct PipelineSourceDriver<'a> {
     array: &'a dyn PipelinedSource,
 }
diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs
index add9ae5f7be..c00e573da52 100644
--- a/vortex-buffer/src/buffer_mut.rs
+++ b/vortex-buffer/src/buffer_mut.rs
@@ -9,7 +9,7 @@ use std::ops::{Deref, DerefMut};
 
 use bytes::buf::UninitSlice;
 use bytes::{Buf, BufMut, BytesMut};
-use vortex_error::{VortexExpect, vortex_panic};
+use vortex_error::{vortex_panic, VortexExpect};
 
 use crate::debug::TruncatedDebug;
 use crate::trusted_len::TrustedLen;
@@ -241,10 +241,15 @@ impl<T> BufferMut<T> {
         }
     }
 
+    /// Sets the length of the buffer.
+    ///
     /// # Safety
-    /// The caller must ensure that the buffer was properly initialized up to `len`.
+    ///
+    /// The caller must ensure that there is sufficient capacity in the buffer and that the values
+    /// are valid up to `len`.
     #[inline]
     pub unsafe fn set_len(&mut self, len: usize) {
+        debug_assert!(len <= self.capacity());
         unsafe { self.bytes.set_len(len * size_of::<T>()) };
         self.length = len;
     }
@@ -726,7 +731,7 @@ impl Write for ByteBufferMut {
 mod test {
     use bytes::{Buf, BufMut};
 
-    use crate::{Alignment, BufferMut, ByteBufferMut, buffer_mut};
+    use crate::{buffer_mut, Alignment, BufferMut, ByteBufferMut};
 
     #[test]
     fn capacity() {

From fe16812dc1b32b0f4b1b15220b73c654d33abf77 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Sat, 8 Nov 2025 21:22:40 -0500
Subject: [PATCH 08/10] pipelined execution

Signed-off-by: Nicholas Gates <nick@nickgates.com>
---
 vortex-array/src/array/operator.rs            |   4 +-
 .../src/arrays/primitive/vtable/operator.rs   | 101 ++++++++++++++----
 vortex-array/src/pipeline/bit_view.rs         |  43 +++++---
 vortex-array/src/pipeline/mod.rs              |  19 +++-
 vortex-array/src/pipeline/source_driver.rs    |   4 +-
 vortex-buffer/src/buffer_mut.rs               |   4 +-
 vortex-mask/src/mask_mut.rs                   |   4 +
 7 files changed, 136 insertions(+), 43 deletions(-)

diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs
index 74731a71ce5..8d7daae5a33 100644
--- a/vortex-array/src/array/operator.rs
+++ b/vortex-array/src/array/operator.rs
@@ -3,9 +3,9 @@
 
 use std::sync::Arc;
 
-use vortex_error::{vortex_panic, VortexResult};
+use vortex_error::{VortexResult, vortex_panic};
 use vortex_mask::Mask;
-use vortex_vector::{vector_matches_dtype, Vector, VectorOps};
+use vortex_vector::{Vector, VectorOps, vector_matches_dtype};
 
 use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx};
 use crate::pipeline::source_driver::PipelineSourceDriver;
diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs
index d4b74821bbf..60063f0d39a 100644
--- a/vortex-array/src/arrays/primitive/vtable/operator.rs
+++ b/vortex-array/src/arrays/primitive/vtable/operator.rs
@@ -1,18 +1,20 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use vortex_buffer::Buffer;
+use vortex_buffer::{BitBuffer, Buffer};
 use vortex_compute::filter::Filter;
-use vortex_dtype::{match_each_native_ptype, NativePType, PTypeDowncastExt};
+use vortex_dtype::{NativePType, PTypeDowncastExt, match_each_native_ptype};
 use vortex_error::VortexResult;
-use vortex_mask::Mask;
 use vortex_vector::primitive::PVector;
 use vortex_vector::{VectorMut, VectorMutOps};
 
 use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable};
-use crate::execution::{kernel, BatchKernelRef, BindCtx};
-use crate::pipeline::bit_view::BitView;
-use crate::pipeline::{BindContext, KernelContext, PipelinedSource, SourceKernel, N};
+use crate::execution::{BatchKernelRef, BindCtx, kernel};
+use crate::pipeline::bit_view::{BitSlice, BitView};
+use crate::pipeline::{
+    AllNullSourceKernel, BindContext, KernelContext, N, PipelinedSource, SourceKernel,
+};
+use crate::validity::Validity;
 use crate::vtable::{OperatorVTable, ValidityHelper};
 use crate::{ArrayRef, IntoArray};
 
@@ -71,25 +73,81 @@ impl OperatorVTable<PrimitiveVTable> for PrimitiveVTable {
 }
 
 impl PipelinedSource for PrimitiveArray {
-    fn bind_source(&self, _ctx: &mut dyn BindContext) -> VortexResult<Box<dyn SourceKernel>> {
-        match_each_native_ptype!(self.ptype(), |T| {
-            let primitive_kernel = PrimitiveKernel {
-                buffer: self.buffer::<T>().clone(),
-                validity: self.validity_mask(),
-                offset: 0,
-            };
-            Ok(Box::new(primitive_kernel))
-        })
+    fn bind_source(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn SourceKernel>> {
+        match self.validity() {
+            Validity::NonNullable | Validity::AllValid => {
+                match_each_native_ptype!(self.ptype(), |T| {
+                    let primitive_kernel = NonNullablePrimitiveKernel {
+                        buffer: self.buffer::<T>(),
+                        offset: 0,
+                    };
+                    Ok(Box::new(primitive_kernel))
+                })
+            }
+            Validity::AllInvalid => Ok(Box::new(AllNullSourceKernel)),
+            Validity::Array(_) => {
+                let validity = ctx.batch_input(0).into_bool();
+                // Validity is non-nullable, so we extract the inner bit buffer.
+                let (validity, _) = validity.into_parts();
+
+                match_each_native_ptype!(self.ptype(), |T| {
+                    let primitive_kernel = NullablePrimitiveKernel {
+                        buffer: self.buffer::<T>(),
+                        validity,
+                        offset: 0,
+                    };
+                    Ok(Box::new(primitive_kernel))
+                })
+            }
+        }
+    }
+}
+
+struct NonNullablePrimitiveKernel<T: NativePType> {
+    buffer: Buffer<T>,
+    offset: usize,
+}
+
+impl<T: NativePType> SourceKernel for NonNullablePrimitiveKernel<T> {
+    fn skip(&mut self, n: usize) {
+        self.offset += n * N;
+    }
+
+    fn step(
+        &mut self,
+        _ctx: &KernelContext,
+        selection: &BitView,
+        out: &mut VectorMut,
+    ) -> VortexResult<()> {
+        let out = out.as_primitive_mut().downcast::<T>();
+
+        // SAFETY: we know the output has sufficient capacity.
+        unsafe {
+            out.validity_mut().append_n(true, selection.true_count());
+            let prev_len = out.len();
+            out.elements_mut()
+                .set_len(prev_len + selection.true_count());
+        }
+
+        let source = &self.buffer.as_slice()[self.offset..];
+        let mut out_pos = 0;
+        selection.iter_slices(|BitSlice { start, len }| {
+            out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..][..len]);
+            out_pos += len;
+        });
+
+        Ok(())
     }
 }
 
-struct PrimitiveKernel<T: NativePType> {
+struct NullablePrimitiveKernel<T: NativePType> {
     buffer: Buffer<T>,
-    validity: Mask,
+    #[allow(dead_code)] // TODO(ngates): implement appending validity bits
+    validity: BitBuffer,
     offset: usize,
 }
 
-impl<T: NativePType> SourceKernel for PrimitiveKernel<T> {
+impl<T: NativePType> SourceKernel for NullablePrimitiveKernel<T> {
     fn skip(&mut self, n: usize) {
         self.offset += n * N;
     }
@@ -114,9 +172,14 @@ impl<T: NativePType> SourceKernel for PrimitiveKernel<T> {
         let source = &self.buffer.as_slice()[self.offset..];
 
         let mut out_pos = 0;
-        selection.iter_slices(|(start, len)| {
+        selection.iter_slices(|BitSlice { start, len }| {
+            // Copy over the elements.
             out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..][..len]);
             out_pos += len;
+
+            // Append the validity bits.
+            let _validity = unsafe { out.validity_mut() };
+            todo!("Append validity bits correctly and optimally!");
         });
 
         Ok(())
diff --git a/vortex-array/src/pipeline/bit_view.rs b/vortex-array/src/pipeline/bit_view.rs
index 76189e53f92..9e59dc89819 100644
--- a/vortex-array/src/pipeline/bit_view.rs
+++ b/vortex-array/src/pipeline/bit_view.rs
@@ -212,11 +212,13 @@ impl<'a> BitView<'a> {
         }
     }
 
+    /// Runs the provided function `f` for each range of `true` bits in the view.
+    ///
+    /// The function `f` receives a [`BitSlice`] containing the inclusive `start` bit as well as
+    /// the length.
     pub fn iter_slices<F>(&self, mut f: F)
     where
-        // FIXME(ngates): I have repeatedly assumed this to be a (start, end) slice, not a
-        //  (start, len)... I think we should wrap this in a struct to avoid confusion.
-        F: FnMut((usize, usize)),
+        F: FnMut(BitSlice),
     {
         if self.true_count == 0 {
             return;
@@ -231,7 +233,10 @@ impl<'a> BitView<'a> {
                 0 => {
                     // If a slice was being tracked, the run ends at the start of this word.
                     if slice_length > 0 {
-                        f((slice_start_bit, slice_length));
+                        f(BitSlice {
+                            start: slice_start_bit,
+                            len: slice_length,
+                        });
                         slice_length = 0;
                     }
                 }
@@ -250,7 +255,10 @@ impl<'a> BitView<'a> {
 
                         // If a run was open, and we hit a zero gap, report the finished slice
                         if slice_length > 0 && zeros > 0 {
-                            f((slice_start_bit, slice_length));
+                            f(BitSlice {
+                                start: slice_start_bit,
+                                len: slice_length,
+                            });
                             slice_length = 0; // Reset state for a new slice
                         }
 
@@ -284,22 +292,26 @@ impl<'a> BitView<'a> {
         }
 
         if slice_length > 0 {
-            f((slice_start_bit, slice_length));
+            f(BitSlice {
+                start: slice_start_bit,
+                len: slice_length,
+            });
         }
     }
 
-    /// Runs the provided function `f` for each range of `true` bits in the view.
-    ///
-    /// The function `f` receives a tuple `(start, len)` where `start` is the index of the first
-    /// `true` bit and `len` is the number of consecutive `true` bits.
-    ///
-    /// FIXME(ngates): this code is broken.
-
     pub fn as_raw(&self) -> &[u8; N_BYTES] {
         self.bits.as_ref()
     }
 }
 
+/// A slice of bits within a [`BitBuffer`].
+///
+/// We use this struct to avoid a common mistake of assuming the slices represent (start, end) ranges,
+pub struct BitSlice {
+    pub start: usize,
+    pub len: usize,
+}
+
 pub trait BitViewExt {
     /// Iterate the [`BitBuffer`] in fixed-size chunks of [`BitView`].
     ///
@@ -319,7 +331,7 @@ impl BitViewExt for BitBuffer {
             0,
             "BitView iteration requires zero bit offset"
         );
-        let n_views = (self.len() + N - 1) / N;
+        let n_views = self.len().div_ceil(N);
         BitViewIterator {
             bits: self.inner().as_ref(),
             view_idx: 0,
@@ -365,8 +377,6 @@ impl<'a> Iterator for BitViewIterator<'a> {
 
 #[cfg(test)]
 mod tests {
-    use std::usize;
-
     use super::*;
 
     #[test]
@@ -591,7 +601,6 @@ mod tests {
             view.iter_slices(|slice| slices.push(slice));
 
             assert_eq!(slices.len(), 1);
-            assert_eq!(slices[0], (0, i));
         }
     }
 }
diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs
index f71605479cd..ad8ca8d210b 100644
--- a/vortex-array/src/pipeline/mod.rs
+++ b/vortex-array/src/pipeline/mod.rs
@@ -25,7 +25,7 @@ use std::ops::Deref;
 
 use bit_view::BitView;
 use vortex_error::VortexResult;
-use vortex_vector::{Vector, VectorMut};
+use vortex_vector::{Vector, VectorMut, VectorMutOps};
 
 use crate::Array;
 
@@ -150,3 +150,20 @@ impl KernelContext {
         &self.vectors[vector_id]
     }
 }
+
+/// A general implementation of a source kernel that produces all null values.
+pub struct AllNullSourceKernel;
+
+impl SourceKernel for AllNullSourceKernel {
+    fn skip(&mut self, _n: usize) {}
+
+    fn step(
+        &mut self,
+        _ctx: &KernelContext,
+        selection: &BitView,
+        out: &mut VectorMut,
+    ) -> VortexResult<()> {
+        out.append_nulls(selection.true_count());
+        Ok(())
+    }
+}
diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs
index 04e7eda0a79..7515e603ec5 100644
--- a/vortex-array/src/pipeline/source_driver.rs
+++ b/vortex-array/src/pipeline/source_driver.rs
@@ -2,12 +2,12 @@
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
 use itertools::Itertools;
-use vortex_error::{vortex_panic, VortexResult};
+use vortex_error::{VortexResult, vortex_panic};
 use vortex_mask::Mask;
 use vortex_vector::{Vector, VectorMut, VectorMutOps};
 
 use crate::pipeline::bit_view::{BitView, BitViewExt};
-use crate::pipeline::{BindContext, KernelContext, PipelinedSource, VectorId, N};
+use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, VectorId};
 
 /// Temporary driver for executing a single source array in a pipelined fashion.
 pub struct PipelineSourceDriver<'a> {
diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs
index c00e573da52..f4ba91bbfef 100644
--- a/vortex-buffer/src/buffer_mut.rs
+++ b/vortex-buffer/src/buffer_mut.rs
@@ -9,7 +9,7 @@ use std::ops::{Deref, DerefMut};
 
 use bytes::buf::UninitSlice;
 use bytes::{Buf, BufMut, BytesMut};
-use vortex_error::{vortex_panic, VortexExpect};
+use vortex_error::{VortexExpect, vortex_panic};
 
 use crate::debug::TruncatedDebug;
 use crate::trusted_len::TrustedLen;
@@ -731,7 +731,7 @@ impl Write for ByteBufferMut {
 mod test {
     use bytes::{Buf, BufMut};
 
-    use crate::{buffer_mut, Alignment, BufferMut, ByteBufferMut};
+    use crate::{Alignment, BufferMut, ByteBufferMut, buffer_mut};
 
     #[test]
     fn capacity() {
diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs
index e7e5c736ed6..adb2d86ae17 100644
--- a/vortex-mask/src/mask_mut.rs
+++ b/vortex-mask/src/mask_mut.rs
@@ -95,6 +95,10 @@ impl MaskMut {
     }
 
     /// Set the length of the mask.
+    ///
+    /// # Safety
+    ///
+    /// The caller must ensure that `new_len` is less than the capacity of the mask.
     pub unsafe fn set_len(&mut self, new_len: usize) {
         debug_assert!(new_len < self.capacity());
         match &mut self.0 {

From 54ae1e43412e0ef74655b8beb6800d99662baf52 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Mon, 10 Nov 2025 08:44:38 -0500
Subject: [PATCH 09/10] pipelined execution

Signed-off-by: Nicholas Gates <nick@nickgates.com>
---
 .../src/arrays/primitive/vtable/operator.rs   | 128 +-----------------
 vortex-array/src/pipeline/mod.rs              |  61 ++++-----
 vortex-array/src/pipeline/source_driver.rs    |  37 +----
 vortex-array/src/vtable/operator.rs           |  16 ++-
 4 files changed, 47 insertions(+), 195 deletions(-)

diff --git a/vortex-array/src/arrays/primitive/vtable/operator.rs b/vortex-array/src/arrays/primitive/vtable/operator.rs
index 60063f0d39a..fa18e516cec 100644
--- a/vortex-array/src/arrays/primitive/vtable/operator.rs
+++ b/vortex-array/src/arrays/primitive/vtable/operator.rs
@@ -1,28 +1,18 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use vortex_buffer::{BitBuffer, Buffer};
+use vortex_buffer::Buffer;
 use vortex_compute::filter::Filter;
-use vortex_dtype::{NativePType, PTypeDowncastExt, match_each_native_ptype};
+use vortex_dtype::match_each_native_ptype;
 use vortex_error::VortexResult;
 use vortex_vector::primitive::PVector;
-use vortex_vector::{VectorMut, VectorMutOps};
 
 use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable};
 use crate::execution::{BatchKernelRef, BindCtx, kernel};
-use crate::pipeline::bit_view::{BitSlice, BitView};
-use crate::pipeline::{
-    AllNullSourceKernel, BindContext, KernelContext, N, PipelinedSource, SourceKernel,
-};
-use crate::validity::Validity;
 use crate::vtable::{OperatorVTable, ValidityHelper};
 use crate::{ArrayRef, IntoArray};
 
 impl OperatorVTable<PrimitiveVTable> for PrimitiveVTable {
-    fn as_pipelined_source(array: &PrimitiveArray) -> Option<&dyn PipelinedSource> {
-        Some(array)
-    }
-
     fn bind(
         array: &PrimitiveArray,
         selection: Option<&ArrayRef>,
@@ -71,117 +61,3 @@ impl OperatorVTable<PrimitiveVTable> for PrimitiveVTable {
         Ok(None)
     }
 }
-
-impl PipelinedSource for PrimitiveArray {
-    fn bind_source(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn SourceKernel>> {
-        match self.validity() {
-            Validity::NonNullable | Validity::AllValid => {
-                match_each_native_ptype!(self.ptype(), |T| {
-                    let primitive_kernel = NonNullablePrimitiveKernel {
-                        buffer: self.buffer::<T>(),
-                        offset: 0,
-                    };
-                    Ok(Box::new(primitive_kernel))
-                })
-            }
-            Validity::AllInvalid => Ok(Box::new(AllNullSourceKernel)),
-            Validity::Array(_) => {
-                let validity = ctx.batch_input(0).into_bool();
-                // Validity is non-nullable, so we extract the inner bit buffer.
-                let (validity, _) = validity.into_parts();
-
-                match_each_native_ptype!(self.ptype(), |T| {
-                    let primitive_kernel = NullablePrimitiveKernel {
-                        buffer: self.buffer::<T>(),
-                        validity,
-                        offset: 0,
-                    };
-                    Ok(Box::new(primitive_kernel))
-                })
-            }
-        }
-    }
-}
-
-struct NonNullablePrimitiveKernel<T: NativePType> {
-    buffer: Buffer<T>,
-    offset: usize,
-}
-
-impl<T: NativePType> SourceKernel for NonNullablePrimitiveKernel<T> {
-    fn skip(&mut self, n: usize) {
-        self.offset += n * N;
-    }
-
-    fn step(
-        &mut self,
-        _ctx: &KernelContext,
-        selection: &BitView,
-        out: &mut VectorMut,
-    ) -> VortexResult<()> {
-        let out = out.as_primitive_mut().downcast::<T>();
-
-        // SAFETY: we know the output has sufficient capacity.
-        unsafe {
-            out.validity_mut().append_n(true, selection.true_count());
-            let prev_len = out.len();
-            out.elements_mut()
-                .set_len(prev_len + selection.true_count());
-        }
-
-        let source = &self.buffer.as_slice()[self.offset..];
-        let mut out_pos = 0;
-        selection.iter_slices(|BitSlice { start, len }| {
-            out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..][..len]);
-            out_pos += len;
-        });
-
-        Ok(())
-    }
-}
-
-struct NullablePrimitiveKernel<T: NativePType> {
-    buffer: Buffer<T>,
-    #[allow(dead_code)] // TODO(ngates): implement appending validity bits
-    validity: BitBuffer,
-    offset: usize,
-}
-
-impl<T: NativePType> SourceKernel for NullablePrimitiveKernel<T> {
-    fn skip(&mut self, n: usize) {
-        self.offset += n * N;
-    }
-
-    fn step(
-        &mut self,
-        _ctx: &KernelContext,
-        selection: &BitView,
-        out: &mut VectorMut,
-    ) -> VortexResult<()> {
-        let out = out.as_primitive_mut().downcast::<T>();
-
-        // SAFETY: we know the output has sufficient capacity. We just have to append nulls
-        //  separately from copying over the elements.
-        unsafe {
-            out.validity_mut().append_n(true, selection.true_count());
-            let prev_len = out.len();
-            out.elements_mut()
-                .set_len(prev_len + selection.true_count());
-        }
-
-        let source = &self.buffer.as_slice()[self.offset..];
-
-        let mut out_pos = 0;
-        selection.iter_slices(|BitSlice { start, len }| {
-            // Copy over the elements.
-            out.as_mut()[out_pos..][..len].copy_from_slice(&source[start..][..len]);
-            out_pos += len;
-
-            // Append the validity bits.
-            let _validity = unsafe { out.validity_mut() };
-            todo!("Append validity bits correctly and optimally!");
-        });
-
-        Ok(())
-    }
-}
diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs
index ad8ca8d210b..3f76522bd6f 100644
--- a/vortex-array/src/pipeline/mod.rs
+++ b/vortex-array/src/pipeline/mod.rs
@@ -1,23 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-//! Vortex crate containing vectorized operator processing.
-//!
-//! This module contains experiments into pipelined data processing within Vortex.
-//!
-//! Arrays (and eventually Layouts) will be convertible into a [`Kernel`] that can then be
-//! exported into a [`ViewMut`] one chunk of [`N`] elements at a time. This allows us to keep
-//! compute largely within the L1 cache, as well as to write out canonical data into externally
-//! provided buffers.
-//!
-//! Each chunk is represented in a canonical physical form, as determined by the logical
-//! [`vortex_dtype::DType`] of the array. This provides a predicate base on which to perform
-//! compute. Unlike DuckDB and other vectorized systems, we force a single canonical representation
-//! instead of supporting multiple encodings because compute push-down is applied a priori to the
-//! logical representation.
-//!
-//! It is a work-in-progress and is not yet used in production.
-
 pub mod bit_view;
 pub mod source_driver;
 
@@ -38,8 +21,11 @@ pub const N_BYTES: usize = N / 8;
 /// Number of usize words needed to store N bits
 pub const N_WORDS: usize = N / usize::BITS as usize;
 
-/// Returned by an array to indicate that it can be executed in a pipelined fashion.
-pub trait PipelinedOperator: Array {
+/// Indicates that an array supports acting as a transformation node in a pipelined execution.
+///
+/// That is, it has one or more child arrays for which each input element produces a single output
+/// element. See [`PipelineSource`] for nodes that have zero pipelined children.
+pub trait PipelineTransform: Deref<Target = dyn Array> {
     // Whether this operator works by mutating its first child in-place.
     //
     // If `true`, the operator is invoked with the first child's input data passed via the
@@ -56,36 +42,39 @@ pub trait PipelinedOperator: Array {
     /// computed before pipelined execution begins.
     fn is_pipelined_child(&self, child_idx: usize) -> bool;
 
-    /// Bind the operator into a [`Kernel`] for pipelined execution.
+    /// Bind the operator into a [`TransformKernel`] for pipelined execution.
     ///
     /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and
     /// batch IDs for batch children. Each child can only be bound once.
-    fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn OperatorKernel>>;
+    fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn TransformKernel>>;
 }
 
-pub trait PipelinedSource: Deref<Target = dyn Array> {
-    /// Bind the operator into a [`Kernel`] for pipelined execution.
+/// Indicates that an array supports acting as a source node in a pipelined execution.
+pub trait PipelineSource: Deref<Target = dyn Array> {
+    /// Bind the operator into a [`SourceKernel`] for pipelined execution.
     ///
     /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and
     /// batch IDs for batch children. Each child can only be bound once.
-    fn bind_source(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn SourceKernel>>;
+    fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn SourceKernel>>;
 }
 
 /// The context used when binding an operator for execution.
 pub trait BindContext {
     /// Returns a [`VectorId`] that can be passed to the [`KernelContext`] within the body of
-    /// the [`Kernel`] to access the given child as a pipelined input vector.
+    /// the kernel to access the given child as a pipelined input vector.
     ///
     /// # Panics
     ///
-    /// If the child index requested here was not listed in [`Pipelined::pipelined_children`].
+    /// If the child index requested here was not marked as a pipelined child in
+    /// [`PipelineTransform::is_pipelined_child`].
     fn pipelined_input(&self, child_idx: usize) -> VectorId;
 
     /// Returns the batch input vector for the given child.
     ///
     /// # Panics
     ///
-    /// If the child index requested here was listed in [`Pipelined::pipelined_children`].
+    /// If the child index requested here was marked as a pipelined child in
+    /// [`PipelineTransform::is_pipelined_child`].
     fn batch_input(&self, child_idx: usize) -> Vector;
 }
 
@@ -115,7 +104,12 @@ pub trait SourceKernel: Send {
     /// For example, if `n` is 3, then the kernel should skip over `3 * N` elements of input data.
     fn skip(&mut self, n: usize);
 
-    /// Attempts to perform a single step of the operator, writing data to the output vector.
+    /// Attempts to perform a single step of the operator, appending data to the output vector.
+    ///
+    /// The provided selection mask indicates which elements of the current chunk should be
+    /// appended to the output vector.
+    ///
+    /// The provided output vector is guaranteed to have at least `N` elements of capacity.
     fn step(
         &mut self,
         ctx: &KernelContext,
@@ -124,12 +118,13 @@ pub trait SourceKernel: Send {
     ) -> VortexResult<()>;
 }
 
-pub trait OperatorKernel: Send {
-    /// Attempts to perform a single step of the operator, writing data to the output vector.
+pub trait TransformKernel: Send {
+    /// Attempts to perform a single step of the operator, appending data to the output vector.
+    ///
+    /// The input vectors can be accessed via the provided `KernelContext`.
     ///
-    /// The output vector has length equal to the number of valid elements in the input vectors.
-    /// This number of values should be written to the output vector.
-    fn step(&self, ctx: &KernelContext, out: &mut VectorMut) -> VortexResult<()>;
+    /// The provided output vector is guaranteed to have at least `N` elements of capacity.
+    fn step(&mut self, ctx: &KernelContext, out: &mut VectorMut) -> VortexResult<()>;
 }
 
 /// Context passed to kernels during execution, providing access to vectors.
diff --git a/vortex-array/src/pipeline/source_driver.rs b/vortex-array/src/pipeline/source_driver.rs
index 7515e603ec5..63b97ff849d 100644
--- a/vortex-array/src/pipeline/source_driver.rs
+++ b/vortex-array/src/pipeline/source_driver.rs
@@ -7,15 +7,15 @@ use vortex_mask::Mask;
 use vortex_vector::{Vector, VectorMut, VectorMutOps};
 
 use crate::pipeline::bit_view::{BitView, BitViewExt};
-use crate::pipeline::{BindContext, KernelContext, N, PipelinedSource, VectorId};
+use crate::pipeline::{BindContext, KernelContext, N, PipelineSource, VectorId};
 
 /// Temporary driver for executing a single source array in a pipelined fashion.
 pub struct PipelineSourceDriver<'a> {
-    array: &'a dyn PipelinedSource,
+    array: &'a dyn PipelineSource,
 }
 
 impl<'a> PipelineSourceDriver<'a> {
-    pub fn new(array: &'a dyn PipelinedSource) -> Self {
+    pub fn new(array: &'a dyn PipelineSource) -> Self {
         Self { array }
     }
 
@@ -34,7 +34,7 @@ impl<'a> PipelineSourceDriver<'a> {
         let mut bind_ctx = PipelineSourceBindCtx {
             batch_inputs: &batch_inputs,
         };
-        let mut kernel = self.array.bind_source(&mut bind_ctx)?;
+        let mut kernel = self.array.bind(&mut bind_ctx)?;
         let kernel_ctx = KernelContext::empty();
 
         // Allocate an output vector, with up to N bytes of padding to ensure every call to
@@ -46,7 +46,6 @@ impl<'a> PipelineSourceDriver<'a> {
             selection.true_count().next_multiple_of(N) + N,
         );
 
-        // TODO(ngates): change behaviour based on the density of the selection mask.
         match selection {
             Mask::AllTrue(_) => {
                 // Select everything, so we can just run the kernel in a tight loop.
@@ -101,31 +100,3 @@ impl BindContext for PipelineSourceBindCtx<'_> {
         self.batch_inputs[child_idx].clone()
     }
 }
-
-#[cfg(test)]
-mod test {
-    use vortex_buffer::buffer;
-    use vortex_dtype::PTypeDowncastExt;
-    use vortex_mask::Mask;
-    use vortex_vector::VectorOps;
-
-    use crate::arrays::PrimitiveArray;
-    use crate::pipeline::source_driver::PipelineSourceDriver;
-    use crate::validity::Validity;
-
-    #[test]
-    fn test_primitive() {
-        let array = PrimitiveArray::new::<u32>(buffer![0..100000u32], Validity::AllValid);
-
-        // Create a selection mask with some ranges.
-        let mask = Mask::from_iter((0..100000).map(|i| i % 30 < 20));
-
-        let out = PipelineSourceDriver::new(&array)
-            .execute(&mask)
-            .unwrap()
-            .into_primitive()
-            .downcast::<u32>();
-
-        assert_eq!(out.len(), mask.true_count());
-    }
-}
diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs
index 9603ca0cf16..2edb26d0213 100644
--- a/vortex-array/src/vtable/operator.rs
+++ b/vortex-array/src/vtable/operator.rs
@@ -8,7 +8,7 @@ use vortex_vector::Vector;
 use crate::ArrayRef;
 use crate::array::IntoArray;
 use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx};
-use crate::pipeline::PipelinedSource;
+use crate::pipeline::{PipelineSource, PipelineTransform};
 use crate::vtable::{NotSupported, VTable};
 
 /// A vtable for the new operator-based array functionality. Eventually this vtable will be
@@ -40,8 +40,10 @@ pub trait OperatorVTable<V: VTable> {
         Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute()
     }
 
-    /// Downcast this array into a [`PipelinedSource`] if it supports pipelined execution.
-    fn as_pipelined_source(_array: &V::Array) -> Option<&dyn PipelinedSource> {
+    /// Downcast this array into a [`PipelineNode`] if it supports pipelined execution.
+    ///
+    /// Each node is either a source node or a transformation node.
+    fn pipeline_node(_array: &V::Array) -> Option<PipelineNode<'_>> {
         None
     }
 
@@ -102,6 +104,14 @@ pub trait OperatorVTable<V: VTable> {
     }
 }
 
+/// An enum over the types of pipeline nodes.
+pub enum PipelineNode<'a> {
+    /// This node is a source node in a pipeline.
+    Source(&'a dyn PipelineSource),
+    /// This node is a transformation node in a pipeline.
+    Transform(&'a dyn PipelineTransform),
+}
+
 impl<V: VTable> OperatorVTable<V> for NotSupported {
     fn bind(
         array: &V::Array,

From 7a65acb3c5bde2ee5337e8033e4305ac5efa9787 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Mon, 10 Nov 2025 08:53:09 -0500
Subject: [PATCH 10/10] pipelined execution

Signed-off-by: Nicholas Gates <nick@nickgates.com>
---
 vortex-array/src/array/operator.rs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/vortex-array/src/array/operator.rs b/vortex-array/src/array/operator.rs
index 8d7daae5a33..7bb348cb63b 100644
--- a/vortex-array/src/array/operator.rs
+++ b/vortex-array/src/array/operator.rs
@@ -9,7 +9,7 @@ use vortex_vector::{Vector, VectorOps, vector_matches_dtype};
 
 use crate::execution::{BatchKernelRef, BindCtx, DummyExecutionCtx, ExecutionCtx};
 use crate::pipeline::source_driver::PipelineSourceDriver;
-use crate::vtable::{OperatorVTable, VTable};
+use crate::vtable::{OperatorVTable, PipelineNode, VTable};
 use crate::{Array, ArrayAdapter, ArrayRef};
 
 /// Array functions as provided by the `OperatorVTable`.
@@ -63,11 +63,12 @@ impl ArrayOperator for Arc<dyn Array> {
 
 impl<V: VTable> ArrayOperator for ArrayAdapter<V> {
     fn execute_batch(&self, selection: &Mask, ctx: &mut dyn ExecutionCtx) -> VortexResult<Vector> {
-        // Check if the array is a pipeline source, and if so use the single-node driver for now.
-        if let Some(pipeline_source) =
-            <V::OperatorVTable as OperatorVTable<V>>::as_pipelined_source(&self.0)
+        // Check if the array is a pipeline node
+        if let Some(pipeline_node) =
+            <V::OperatorVTable as OperatorVTable<V>>::pipeline_node(&self.0)
+            && let PipelineNode::Source(source) = pipeline_node
         {
-            return PipelineSourceDriver::new(pipeline_source).execute(selection);
+            return PipelineSourceDriver::new(source).execute(selection);
         }
 
         let vector =