From 5baacc5dde93a01280d9f2712dcfd00d2ec1706f Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 15 Jul 2025 21:04:01 -0400 Subject: [PATCH 01/11] update vortex-fastlanes to remove generic_const_expr Signed-off-by: Andrew Duffy --- encodings/fastlanes/src/delta/compress.rs | 38 ++++++++++------------- encodings/fastlanes/src/lib.rs | 1 - 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/encodings/fastlanes/src/delta/compress.rs b/encodings/fastlanes/src/delta/compress.rs index 22752d40a6d..e22b54c2496 100644 --- a/encodings/fastlanes/src/delta/compress.rs +++ b/encodings/fastlanes/src/delta/compress.rs @@ -2,7 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use arrayref::{array_mut_ref, array_ref}; -use fastlanes::{Delta, Transpose}; +use fastlanes::{Delta, FastLanes, Transpose}; use num_traits::{WrappingAdd, WrappingSub}; use vortex_array::ToCanonical; use vortex_array::arrays::PrimitiveArray; @@ -20,7 +20,8 @@ pub fn delta_compress(array: &PrimitiveArray) -> VortexResult<(PrimitiveArray, P // Compress the filled array let (bases, deltas) = match_each_unsigned_integer_ptype!(array.ptype(), |T| { - let (bases, deltas) = compress_primitive(array.as_slice::()); + const LANES: usize = T::LANES; + let (bases, deltas) = compress_primitive::(array.as_slice::()); let (base_validity, delta_validity) = if array.validity().nullability() != Nullability::NonNullable { (Validity::AllValid, Validity::AllValid) @@ -37,12 +38,9 @@ pub fn delta_compress(array: &PrimitiveArray) -> VortexResult<(PrimitiveArray, P Ok((bases, deltas)) } -fn compress_primitive( +fn compress_primitive( array: &[T], -) -> (Buffer, Buffer) -where - [(); T::LANES]:, -{ +) -> (Buffer, Buffer) { // How many fastlanes vectors we will process. let num_chunks = array.len() / 1024; @@ -66,9 +64,9 @@ where let delta_len = deltas.len(); unsafe { deltas.set_len(delta_len + 1024); - Delta::delta( + Delta::delta::( &transposed, - &*(transposed[0..T::LANES].as_ptr() as *const [_; T::LANES]), + &*(transposed[0..T::LANES].as_ptr().cast()), array_mut_ref![deltas[delta_len..], 0, 1024], ); } @@ -101,8 +99,10 @@ pub fn delta_decompress(array: &DeltaArray) -> VortexResult { let bases = array.bases().to_primitive()?; let deltas = array.deltas().to_primitive()?; let decoded = match_each_unsigned_integer_ptype!(deltas.ptype(), |T| { + const LANES: usize = T::LANES; + PrimitiveArray::new( - decompress_primitive::(bases.as_slice(), deltas.as_slice()), + decompress_primitive::(bases.as_slice(), deltas.as_slice()), array.validity().clone(), ) }); @@ -114,19 +114,13 @@ pub fn delta_decompress(array: &DeltaArray) -> VortexResult { // TODO(ngates): can we re-use the deltas buffer for the result? Might be tricky given the // traversal ordering, but possibly doable. -fn decompress_primitive( +fn decompress_primitive( bases: &[T], deltas: &[T], -) -> Buffer -where - [(); T::LANES]:, -{ +) -> Buffer { // How many fastlanes vectors we will process. let num_chunks = deltas.len() / 1024; - // How long each base vector will be. - let lanes = T::LANES; - // Allocate a result array. let mut output = BufferMut::with_capacity(deltas.len()); @@ -139,9 +133,9 @@ where let chunk: &[T; 1024] = array_ref![deltas, start_elem, 1024]; // Initialize the base vector for this chunk - Delta::undelta( + Delta::undelta::( chunk, - unsafe { &*(bases[i * lanes..(i + 1) * lanes].as_ptr() as *const [_; T::LANES]) }, + unsafe { &*(bases[i * LANES..(i + 1) * LANES].as_ptr().cast()) }, &mut transposed, ); @@ -156,8 +150,8 @@ where let remainder_size = deltas.len() % 1024; if remainder_size > 0 { let chunk = &deltas[num_chunks * 1024..]; - assert_eq!(bases.len(), num_chunks * lanes + 1); - let mut base_scalar = bases[num_chunks * lanes]; + assert_eq!(bases.len(), num_chunks * LANES + 1); + let mut base_scalar = bases[num_chunks * LANES]; for next_diff in chunk { let next = next_diff.wrapping_add(&base_scalar); output.push(next); diff --git a/encodings/fastlanes/src/lib.rs b/encodings/fastlanes/src/lib.rs index a3afafe9b89..5bb881484e4 100644 --- a/encodings/fastlanes/src/lib.rs +++ b/encodings/fastlanes/src/lib.rs @@ -3,7 +3,6 @@ #![allow(incomplete_features)] #![allow(clippy::cast_possible_truncation)] -#![feature(generic_const_exprs)] pub use bitpacking::*; pub use delta::*; From 59c7cad5b1434a620f535da9eb361dddf1f8c75a Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 15 Jul 2025 21:56:40 -0400 Subject: [PATCH 02/11] use extend_trusted, make our own TrustedLen trait that mirrors nightly Signed-off-by: Andrew Duffy --- .github/actions/setup-rust/action.yml | 2 +- .github/workflows/ci.yml | 2 + .github/workflows/fuzz.yml | 2 + encodings/alp/src/alp/mod.rs | 2 +- .../src/bitpacking/compute/filter.rs | 4 +- encodings/fastlanes/src/delta/compress.rs | 2 +- encodings/fsst/src/canonical.rs | 4 +- encodings/zstd/src/array.rs | 5 +- rust-toolchain.toml | 2 +- rustfmt.toml | 1 - vortex-array/src/arrays/chunked/decode.rs | 2 +- vortex-array/src/arrays/decimal/serde.rs | 2 +- vortex-array/src/builders/varbinview.rs | 10 +- vortex-buffer/src/buffer.rs | 43 ++++++- vortex-buffer/src/buffer_mut.rs | 62 +++++++++- vortex-buffer/src/lib.rs | 4 +- vortex-buffer/src/spec_extend.rs | 111 ------------------ vortex-buffer/src/trusted_len.rs | 61 ++++++++++ 18 files changed, 184 insertions(+), 137 deletions(-) delete mode 100644 vortex-buffer/src/spec_extend.rs create mode 100644 vortex-buffer/src/trusted_len.rs diff --git a/.github/actions/setup-rust/action.yml b/.github/actions/setup-rust/action.yml index c069fcd04f7..214ac45a419 100644 --- a/.github/actions/setup-rust/action.yml +++ b/.github/actions/setup-rust/action.yml @@ -24,7 +24,7 @@ runs: with: toolchain: "${{ steps.rust-version.outputs.version }}" targets: "${{inputs.targets || ''}}" - components: clippy, rustfmt, miri, llvm-tools-preview + components: clippy, rustfmt - name: Rust Dependency Cache uses: Swatinem/rust-cache@v2 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b135d51f6a3..e8eaf2d0085 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -484,6 +484,8 @@ jobs: with: submodules: "recursive" - uses: ./.github/actions/setup-rust + with: + rust-version: nightly - uses: taiki-e/install-action@v2 with: tool: nextest@0.9.98 diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 65fb33bb97e..22f766e9105 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -24,6 +24,8 @@ jobs: with: submodules: "recursive" - uses: ./.github/actions/setup-rust + with: + rust-version: nightly - name: Install cargo fuzz run: cargo install --locked cargo-fuzz - name: Restore corpus diff --git a/encodings/alp/src/alp/mod.rs b/encodings/alp/src/alp/mod.rs index 1fadb1bf52d..d71d09b031c 100644 --- a/encodings/alp/src/alp/mod.rs +++ b/encodings/alp/src/alp/mod.rs @@ -216,7 +216,7 @@ fn encode_chunk_unchecked( // encode the chunk, counting the number of patches let mut chunk_patch_count = 0; - encoded_output.extend(chunk.iter().map(|&v| { + encoded_output.extend_trusted(chunk.iter().map(|&v| { let encoded = T::encode_single_unchecked(v, exp); let decoded = T::decode_single(encoded, exp); let neq = !decoded.is_eq(v) as usize; diff --git a/encodings/fastlanes/src/bitpacking/compute/filter.rs b/encodings/fastlanes/src/bitpacking/compute/filter.rs index 8e2e983c731..f44adec7dfd 100644 --- a/encodings/fastlanes/src/bitpacking/compute/filter.rs +++ b/encodings/fastlanes/src/bitpacking/compute/filter.rs @@ -133,14 +133,14 @@ fn filter_indices( let dst: &mut [T] = mem::transmute(dst); BitPacking::unchecked_unpack(bit_width, packed, dst); } - values.extend( + values.extend_trusted( indices_within_chunk .iter() .map(|&idx| unsafe { unpacked.get_unchecked(idx).assume_init() }), ); } else { // Otherwise, unpack each element individually. - values.extend(indices_within_chunk.iter().map(|&idx| unsafe { + values.extend_trusted(indices_within_chunk.iter().map(|&idx| unsafe { BitPacking::unchecked_unpack_single(bit_width, packed, idx) })); } diff --git a/encodings/fastlanes/src/delta/compress.rs b/encodings/fastlanes/src/delta/compress.rs index e22b54c2496..3749cb2fc3f 100644 --- a/encodings/fastlanes/src/delta/compress.rs +++ b/encodings/fastlanes/src/delta/compress.rs @@ -58,7 +58,7 @@ fn compress_primitive for FSSTVTable { fsst_into_varbin_view(array.decompressor(), array, builder.completed_block_count())?; builder.push_buffer_and_adjusted_views( - view.buffers().iter().cloned(), - view.views().iter().cloned(), + &view.buffers(), + &view.views(), array.validity_mask()?, ); Ok(()) diff --git a/encodings/zstd/src/array.rs b/encodings/zstd/src/array.rs index 9d01393ff71..c8ec08e2126 100644 --- a/encodings/zstd/src/array.rs +++ b/encodings/zstd/src/array.rs @@ -115,8 +115,9 @@ fn collect_valid_vbv(vbv: &VarBinViewArray) -> VortexResult<(ByteBuffer, Vec(()) })??; diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 3dbbfe73489..f7a1097d298 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,4 +1,4 @@ [toolchain] -channel = "nightly-2025-02-24" +channel = "1.88.0" components = ["rust-src", "rustfmt", "clippy"] profile = "minimal" diff --git a/rustfmt.toml b/rustfmt.toml index a25926915c2..d479a9a08ec 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -2,7 +2,6 @@ condense_wildcard_suffixes = true format_macro_matchers = true format_macro_bodies = true group_imports = "StdExternalCrate" -unstable_features = true use_field_init_shorthand = true imports_granularity = "Module" style_edition="2024" diff --git a/vortex-array/src/arrays/chunked/decode.rs b/vortex-array/src/arrays/chunked/decode.rs index 25d60a7852f..d24ce25a7f8 100644 --- a/vortex-array/src/arrays/chunked/decode.rs +++ b/vortex-array/src/arrays/chunked/decode.rs @@ -111,7 +111,7 @@ fn pack_lists( let adjustment_from_previous = *offsets .last() .ok_or_else(|| vortex_err!("List offsets must have at least one element"))?; - offsets.extend( + offsets.extend_trusted( offsets_arr .as_slice::() .iter() diff --git a/vortex-array/src/arrays/decimal/serde.rs b/vortex-array/src/arrays/decimal/serde.rs index e43a96ed7e4..2198816bc68 100644 --- a/vortex-array/src/arrays/decimal/serde.rs +++ b/vortex-array/src/arrays/decimal/serde.rs @@ -108,7 +108,7 @@ mod tests { // Concat into a single buffer let mut concat = ByteBufferMut::empty(); for buf in out { - concat.extend(buf.as_ref()); + concat.extend_from_slice(buf.as_ref()); } let concat = concat.freeze(); diff --git a/vortex-array/src/builders/varbinview.rs b/vortex-array/src/builders/varbinview.rs index ab99040a9d0..abca3aa37f7 100644 --- a/vortex-array/src/builders/varbinview.rs +++ b/vortex-array/src/builders/varbinview.rs @@ -4,7 +4,7 @@ use std::any::Any; use std::cmp::max; -use vortex_buffer::{BufferMut, ByteBuffer, ByteBufferMut}; +use vortex_buffer::{Buffer, BufferMut, ByteBuffer, ByteBufferMut}; use vortex_dtype::{DType, Nullability}; use vortex_error::{VortexExpect, VortexResult}; use vortex_mask::Mask; @@ -107,14 +107,14 @@ impl VarBinViewBuilder { // the view length. pub fn push_buffer_and_adjusted_views( &mut self, - buffer: impl IntoIterator, - views: impl IntoIterator, + buffer: &[ByteBuffer], + views: &Buffer, validity_mask: Mask, ) { self.flush_in_progress(); - self.completed.extend(buffer); - self.views_builder.extend(views); + self.completed.extend(buffer.iter().cloned()); + self.views_builder.extend_trusted(views.iter().copied()); self.push_only_validity_mask(validity_mask); debug_assert_eq!(self.null_buffer_builder.len(), self.views_builder.len()) diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index f75baebf7f4..a08de325950 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -177,8 +177,10 @@ impl Buffer { } /// Returns an iterator over the buffer of elements of type T. - pub fn iter(&self) -> impl Iterator + '_ { - self.as_slice().iter() + pub fn iter(&self) -> Iter<'_, T> { + Iter { + inner: self.as_slice().iter(), + } } /// Returns a slice of self for the provided range. @@ -398,6 +400,43 @@ impl Buffer { } } +/// An iterator over Buffer elements. +/// +/// This is an analog to the `std::slice::Iter` type. +pub struct Iter<'a, T> { + inner: std::slice::Iter<'a, T>, +} + +impl<'a, T> Iterator for Iter<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + self.inner.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn count(self) -> usize { + self.inner.count() + } + + fn last(self) -> Option { + self.inner.last() + } + + fn nth(&mut self, n: usize) -> Option { + self.inner.nth(n) + } +} + +impl ExactSizeIterator for Iter<'_, T> { + fn len(&self) -> usize { + self.inner.len() + } +} + impl Debug for Buffer { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct(&format!("Buffer<{}>", type_name::())) diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs index 94e4b6a276f..a81e7e9e818 100644 --- a/vortex-buffer/src/buffer_mut.rs +++ b/vortex-buffer/src/buffer_mut.rs @@ -12,7 +12,7 @@ use bytes::{Buf, BufMut, BytesMut}; use vortex_error::{VortexExpect, vortex_panic}; use crate::debug::TruncatedDebug; -use crate::spec_extend::SpecExtend; +use crate::trusted_len::TrustedLen; use crate::{Alignment, Buffer, ByteBufferMut}; /// A mutable buffer that maintains a runtime-defined alignment through resizing operations. @@ -418,10 +418,66 @@ impl AsMut<[T]> for BufferMut { } } +impl BufferMut { + fn extend_iter(&mut self, mut iter: impl Iterator) { + // Attempt to reserve enough memory up-front, although this is only a lower bound. + let (lower, _) = iter.size_hint(); + self.reserve(lower); + + let remaining = self.capacity() - self.len(); + + let begin: *const T = self.bytes.spare_capacity_mut().as_mut_ptr().cast(); + let mut dst: *mut T = begin.cast_mut(); + for _ in 0..remaining { + if let Some(item) = iter.next() { + unsafe { + // SAFETY: We know we have enough capacity to write the item. + dst.write(item); + // Note. we used to have dst.add(iteration).write(item), here. + // however this was much slower than just incrementing dst. + dst = dst.add(1); + } + } else { + break; + } + } + + // TODO(joe): replace with ptr_sub when stable + let length = self.len() + unsafe { dst.byte_offset_from(begin) as usize / size_of::() }; + unsafe { self.set_len(length) }; + + // Append remaining elements + iter.for_each(|item| self.push(item)); + } + + /// An unsafe variant of the `Extend` trait and its `extend` method that receives what the + /// caller guarantees to be an iterator with a trusted upper bound. + pub fn extend_trusted>(&mut self, iter: I) { + // Reserve all memory upfront since it's an exact upper bound + let (_, high) = iter.size_hint(); + self.reserve(high.vortex_expect("TrustedLen iterator didn't have valid upper bound")); + + let begin: *const T = self.bytes.spare_capacity_mut().as_mut_ptr().cast(); + let mut dst: *mut T = begin.cast_mut(); + iter.for_each(|item| { + unsafe { + // SAFETY: We know we have enough capacity to write the item. + dst.write(item); + // Note. we used to have dst.add(iteration).write(item), here. + // however this was much slower than just incrementing dst. + dst = dst.add(1); + } + }); + // TODO(joe): replace with ptr_sub when stable + let length = self.len() + unsafe { dst.byte_offset_from(begin) as usize / size_of::() }; + unsafe { self.set_len(length) }; + } +} + impl Extend for BufferMut { #[inline] fn extend>(&mut self, iter: I) { - >::spec_extend(self, iter.into_iter()) + self.extend_iter(iter.into_iter()) } } @@ -431,7 +487,7 @@ where { #[inline] fn extend>(&mut self, iter: I) { - self.spec_extend(iter.into_iter()) + self.extend_iter(iter.into_iter().copied()) } } diff --git a/vortex-buffer/src/lib.rs b/vortex-buffer/src/lib.rs index ecd89be75ff..2c1bc0d50b2 100644 --- a/vortex-buffer/src/lib.rs +++ b/vortex-buffer/src/lib.rs @@ -2,8 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors #![deny(missing_docs)] -#![feature(min_specialization)] -#![feature(trusted_len)] //! A library for working with custom aligned buffers of sized values. //! @@ -67,8 +65,8 @@ mod debug; mod macros; #[cfg(feature = "memmap2")] mod memmap2; -mod spec_extend; mod string; +mod trusted_len; /// An immutable buffer of u8. pub type ByteBuffer = Buffer; diff --git a/vortex-buffer/src/spec_extend.rs b/vortex-buffer/src/spec_extend.rs deleted file mode 100644 index 9dba2216a72..00000000000 --- a/vortex-buffer/src/spec_extend.rs +++ /dev/null @@ -1,111 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::iter::TrustedLen; -use std::slice; - -use vortex_error::VortexExpect; - -use crate::BufferMut; - -impl BufferMut { - fn extend_iter>(&mut self, mut iter: I) { - // Attempt to reserve enough memory up-front, although this is only a lower bound. - let (lower, _) = iter.size_hint(); - self.reserve(lower); - - let remaining = self.capacity() - self.len(); - - let begin: *const T = self.bytes.spare_capacity_mut().as_mut_ptr().cast(); - let mut dst: *mut T = begin.cast_mut(); - for _ in 0..remaining { - if let Some(item) = iter.next() { - unsafe { - // SAFETY: We know we have enough capacity to write the item. - dst.write(item); - // Note. we used to have dst.add(iteration).write(item), here. - // however this was much slower than just incrementing dst. - dst = dst.add(1); - } - } else { - break; - } - } - - // TODO(joe): replace with ptr_sub when stable - let length = self.len() + unsafe { dst.byte_offset_from(begin) as usize / size_of::() }; - unsafe { self.set_len(length) }; - - // Append remaining elements - iter.for_each(|item| self.push(item)); - } - - fn extend_trusted>(&mut self, iter: I) { - // Reserve all memory upfront since it's an exact upper bound - let (_, high) = iter.size_hint(); - self.reserve(high.vortex_expect("TrustedLen iterator didn't have valid upper bound")); - - let begin: *const T = self.bytes.spare_capacity_mut().as_mut_ptr().cast(); - let mut dst: *mut T = begin.cast_mut(); - iter.for_each(|item| { - unsafe { - // SAFETY: We know we have enough capacity to write the item. - dst.write(item); - // Note. we used to have dst.add(iteration).write(item), here. - // however this was much slower than just incrementing dst. - dst = dst.add(1); - } - }); - // TODO(joe): replace with ptr_sub when stable - let length = self.len() + unsafe { dst.byte_offset_from(begin) as usize / size_of::() }; - unsafe { self.set_len(length) }; - } -} - -/// Specialization trait used for BufferMut::extend -pub(super) trait SpecExtend { - #[track_caller] - fn spec_extend(&mut self, iter: I); -} - -impl SpecExtend for BufferMut -where - I: Iterator, -{ - #[track_caller] - default fn spec_extend(&mut self, iter: I) { - self.extend_iter(iter) - } -} - -impl SpecExtend for BufferMut -where - I: TrustedLen, -{ - #[track_caller] - default fn spec_extend(&mut self, iterator: I) { - self.extend_trusted(iterator) - } -} - -impl<'a, T: 'a, I> SpecExtend<&'a T, I> for BufferMut -where - I: Iterator, - T: Clone, -{ - #[track_caller] - default fn spec_extend(&mut self, iterator: I) { - self.spec_extend(iterator.cloned()) - } -} - -impl<'a, T: 'a> SpecExtend<&'a T, slice::Iter<'a, T>> for BufferMut -where - T: Copy, -{ - #[track_caller] - fn spec_extend(&mut self, iterator: slice::Iter<'a, T>) { - let slice = iterator.as_slice(); - self.extend_from_slice(slice); - } -} diff --git a/vortex-buffer/src/trusted_len.rs b/vortex-buffer/src/trusted_len.rs new file mode 100644 index 00000000000..28b094beea9 --- /dev/null +++ b/vortex-buffer/src/trusted_len.rs @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +/// Trait for all types which have a known upper-bound. +/// +/// Functions that receive a `TrustedLen` iterator can assume that it's `size_hint` is exact, +/// and can pre-allocate memory, unroll loops, or otherwise optimize their implementations +/// accordingly. +/// +/// # Safety +/// +/// The type which implements this trait must provide an exact `Some` upper-bound for its +/// `size_hint` method. Failure to do so can trigger undefined behavior in users of the trait. +pub unsafe trait TrustedLen: Iterator {} + +macro_rules! impl_for_range { + ($($typ:ty),*) => { + $( + unsafe impl TrustedLen for std::ops::Range<$typ> {} + )* + }; +} + +impl_for_range!(u8, u16, u32, u64, i8, i16, i32, i64, usize); + +// std::slice related types +unsafe impl TrustedLen for std::slice::Iter<'_, T> {} + +unsafe impl TrustedLen for std::slice::IterMut<'_, T> {} + +// Iterator types +unsafe impl TrustedLen for std::iter::Map +where + I: TrustedLen, + F: FnMut(I::Item) -> B, +{ +} + +unsafe impl TrustedLen for std::iter::Skip where I: TrustedLen {} + +unsafe impl<'a, I, T: 'a> TrustedLen for std::iter::Copied +where + I: TrustedLen, + T: Copy, +{ +} + +unsafe impl<'a, I, T: 'a> TrustedLen for std::iter::Cloned +where + I: TrustedLen, + T: Clone, +{ +} + +unsafe impl TrustedLen for std::vec::IntoIter {} + +// Arrays +unsafe impl TrustedLen for std::array::IntoIter {} + +// Buffer +unsafe impl TrustedLen for crate::Iter<'_, T> {} From ae937b5a3ee4c33205a23cca4b129e317205bd8c Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 16 Jul 2025 09:30:34 -0400 Subject: [PATCH 03/11] fix regression for chunked_varbinview_canonical_into Signed-off-by: Andrew Duffy --- vortex-array/src/builders/varbinview.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/builders/varbinview.rs b/vortex-array/src/builders/varbinview.rs index abca3aa37f7..afc88d656fa 100644 --- a/vortex-array/src/builders/varbinview.rs +++ b/vortex-array/src/builders/varbinview.rs @@ -190,7 +190,7 @@ impl ArrayBuilder for VarBinViewBuilder { let buffers_offset = u32::try_from(self.completed.len())?; self.completed.extend_from_slice(array.buffers()); - self.views_builder.extend( + self.views_builder.extend_trusted( array .views() .iter() From bc09918180552052e36499eeca4e1f0659c76dd2 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 16 Jul 2025 10:32:47 -0400 Subject: [PATCH 04/11] cleanup a bunch of stuff Signed-off-by: Andrew Duffy --- Cargo.lock | 1 - Cargo.toml | 6 +- clippy.toml | 12 +- encodings/fastlanes/src/lib.rs | 1 - vortex-array/Cargo.toml | 5 - vortex-array/build.rs | 10 -- vortex-array/src/arrays/primitive/accessor.rs | 2 +- .../src/arrays/primitive/compute/take/mod.rs | 4 +- vortex-array/src/arrays/varbin/accessor.rs | 2 +- vortex-array/src/lib.rs | 3 +- vortex-dtype/src/struct_.rs | 4 +- vortex-error/src/lib.rs | 161 +++++++++--------- vortex-error/src/python.rs | 2 +- 13 files changed, 102 insertions(+), 111 deletions(-) delete mode 100644 vortex-array/build.rs diff --git a/Cargo.lock b/Cargo.lock index 579685a44e0..8e83657c902 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6213,7 +6213,6 @@ dependencies = [ "rstest", "rstest_reuse", "rustc-hash", - "rustversion", "serde", "static_assertions", "vortex-array", diff --git a/Cargo.toml b/Cargo.toml index a5ba4b326f7..8daf6f4f34e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,7 +49,7 @@ keywords = ["vortex"] license = "Apache-2.0" readme = "README.md" repository = "https://github.com/spiraldb/vortex" -rust-version = "1.86" +rust-version = "1.87" version = "0.1.0" [workspace.dependencies] @@ -238,6 +238,10 @@ redundant_lifetimes = "deny" unsafe_op_in_unsafe_fn = "deny" unused_lifetimes = "deny" unused_qualifications = "deny" +unexpected_cfgs = { level = "deny", check-cfg = [ + "cfg(codspeed)", + "cfg(vortex_nightly)", +] } warnings = "deny" [workspace.lints.clippy] diff --git a/clippy.toml b/clippy.toml index 2b87cc93526..a32b320a660 100644 --- a/clippy.toml +++ b/clippy.toml @@ -2,10 +2,12 @@ allow-dbg-in-tests = true allow-expect-in-tests = true allow-unwrap-in-tests = true single-char-binding-names-threshold = 2 -# We prefer using parking_lot for improved ergonomics and performance. -disallowed-types = ["std::collections::HashMap", "std::collections::HashSet", "std::sync::Mutex", "std::sync::RwLock"] +disallowed-types = [ + { path = "std::collections::HashMap", reason = "Use the HashMap in vortex_utils::aliases for better performance" }, + { path = "std::collections::HashSet", reason = "Use the HashSet in vortex_utils::aliases for better performance" }, + { path = "std::sync::Mutex", reason = "Prefer using parking_lot Mutex for improved ergonomics and performance" }, + { path = "std::sync::RwLock", reason = "Prefer using parking_lot RwLock for improved ergonomics and performance" }] disallowed-methods = [ - # It uses the default hasher and is very easy to just inline with a faster implementation - "itertools::Itertools::counts" - ] + { path = "itertools::Itertools::counts", reason = "It uses the default hasher which is slow for primitives. Just inline the loop for better performance." } +] diff --git a/encodings/fastlanes/src/lib.rs b/encodings/fastlanes/src/lib.rs index 5bb881484e4..4209310639d 100644 --- a/encodings/fastlanes/src/lib.rs +++ b/encodings/fastlanes/src/lib.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -#![allow(incomplete_features)] #![allow(clippy::cast_possible_truncation)] pub use bitpacking::*; diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 93146157b18..3a536599409 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -16,9 +16,6 @@ version = { workspace = true } [lints] workspace = true -[build-dependencies] -rustversion = { workspace = true } - [dependencies] arbitrary = { workspace = true, optional = true } arcref = { workspace = true } @@ -69,8 +66,6 @@ arbitrary = [ "vortex-scalar/arbitrary", ] canonical_counter = [] -# should only be enabled on nightly compiler -nightly = [] test-harness = ["dep:goldenfile", "dep:rstest", "dep:rstest_reuse"] [dev-dependencies] diff --git a/vortex-array/build.rs b/vortex-array/build.rs deleted file mode 100644 index 32936142ae6..00000000000 --- a/vortex-array/build.rs +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -#[rustversion::nightly] -fn main() { - println!("cargo:rustc-cfg=nightly"); -} - -#[rustversion::not(nightly)] -fn main() {} diff --git a/vortex-array/src/arrays/primitive/accessor.rs b/vortex-array/src/arrays/primitive/accessor.rs index 884f287f5c7..19a1e990449 100644 --- a/vortex-array/src/arrays/primitive/accessor.rs +++ b/vortex-array/src/arrays/primitive/accessor.rs @@ -15,7 +15,7 @@ use crate::vtable::ValidityHelper; impl ArrayAccessor for PrimitiveArray { fn with_iterator(&self, f: F) -> VortexResult where - F: for<'a> FnOnce(&mut (dyn Iterator>)) -> R, + F: for<'a> FnOnce(&mut dyn Iterator>) -> R, { match self.validity() { Validity::NonNullable | Validity::AllValid => { diff --git a/vortex-array/src/arrays/primitive/compute/take/mod.rs b/vortex-array/src/arrays/primitive/compute/take/mod.rs index 25a0cafe2d7..de585471928 100644 --- a/vortex-array/src/arrays/primitive/compute/take/mod.rs +++ b/vortex-array/src/arrays/primitive/compute/take/mod.rs @@ -4,7 +4,7 @@ #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] mod avx2; -#[cfg(feature = "nightly")] +#[cfg(vortex_nightly)] mod portable; use std::sync::LazyLock; @@ -25,7 +25,7 @@ use crate::{Array, ArrayRef, IntoArray, ToCanonical, register_kernel}; // and runtime feature detection to infer the best kernel for the platform. static PRIMITIVE_TAKE_KERNEL: LazyLock<&'static dyn TakeImpl> = LazyLock::new(|| { cfg_if::cfg_if! { - if #[cfg(feature = "nightly")] { + if #[cfg(vortex_nightly)] { // nightly codepath: use portable_simd kernel &portable::TakeKernelPortableSimd } else if #[cfg(target_arch = "x86_64")] { diff --git a/vortex-array/src/arrays/varbin/accessor.rs b/vortex-array/src/arrays/varbin/accessor.rs index 097c8efcede..6837ecf69e0 100644 --- a/vortex-array/src/arrays/varbin/accessor.rs +++ b/vortex-array/src/arrays/varbin/accessor.rs @@ -15,7 +15,7 @@ use crate::vtable::ValidityHelper; impl ArrayAccessor<[u8]> for VarBinArray { fn with_iterator(&self, f: F) -> VortexResult where - F: for<'a> FnOnce(&mut (dyn Iterator>)) -> R, + F: for<'a> FnOnce(&mut dyn Iterator>) -> R, { let offsets = self.offsets().to_primitive()?; let validity = self.validity(); diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 105786f8043..a8b22155399 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +#![cfg_attr(vortex_nightly, feature(portable_simd))] //! Vortex crate containing core logic for encoding and memory representation of [arrays](ArrayRef). //! //! At the heart of Vortex are [arrays](ArrayRef) and [encodings](EncodingRef). @@ -11,8 +12,6 @@ //! Every data type recognized by Vortex also has a canonical physical encoding format, which //! arrays can be [canonicalized](Canonical) into for ease of access in compute functions. -#![cfg_attr(feature = "nightly", feature(portable_simd))] - pub use array::*; pub use canonical::*; pub use context::*; diff --git a/vortex-dtype/src/struct_.rs b/vortex-dtype/src/struct_.rs index 409e034b513..8122a17dfaf 100644 --- a/vortex-dtype/src/struct_.rs +++ b/vortex-dtype/src/struct_.rs @@ -304,7 +304,7 @@ impl StructFields { .names .iter() .enumerate() - .filter(|&(i, _)| (i != index)) + .filter(|&(i, _)| i != index) .map(|(_, name)| name.clone()) .collect::(); @@ -313,7 +313,7 @@ impl StructFields { .dtypes .iter() .enumerate() - .filter(|&(i, _)| (i != index)) + .filter(|&(i, _)| i != index) .map(|(_, dtype)| dtype.clone()) .collect::>(); diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 0765f963644..e3f4fe75d37 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -68,71 +68,74 @@ impl From for VortexError { } } +const _: () = { + assert!(size_of::() < 128); +}; /// The top-level error type for Vortex. #[non_exhaustive] pub enum VortexError { /// A wrapped generic error - Generic(Box, Backtrace), + Generic(Box, Box), /// An index is out of bounds. - OutOfBounds(usize, usize, usize, Backtrace), + OutOfBounds(usize, usize, usize, Box), /// An error occurred while executing a compute kernel. - ComputeError(ErrString, Backtrace), + ComputeError(ErrString, Box), /// An invalid argument was provided. - InvalidArgument(ErrString, Backtrace), + InvalidArgument(ErrString, Box), /// The system has reached an invalid state, - InvalidState(ErrString, Backtrace), + InvalidState(ErrString, Box), /// An error occurred while serializing or deserializing. - InvalidSerde(ErrString, Backtrace), + InvalidSerde(ErrString, Box), /// An unimplemented function was called. - NotImplemented(ErrString, ErrString, Backtrace), + NotImplemented(ErrString, ErrString, Box), /// A type mismatch occurred. - MismatchedTypes(ErrString, ErrString, Backtrace), + MismatchedTypes(ErrString, ErrString, Box), /// An assertion failed. - AssertionFailed(ErrString, Backtrace), + AssertionFailed(ErrString, Box), /// A wrapper for other errors, carrying additional context. Context(ErrString, Box), /// A wrapper for shared errors that require cloning. Shared(Arc), /// A wrapper for errors from the Arrow library. - ArrowError(arrow_schema::ArrowError, Backtrace), + ArrowError(arrow_schema::ArrowError, Box), /// A wrapper for errors from the FlatBuffers library. #[cfg(feature = "flatbuffers")] - FlatBuffersError(flatbuffers::InvalidFlatbuffer, Backtrace), + FlatBuffersError(flatbuffers::InvalidFlatbuffer, Box), /// A wrapper for formatting errors. - FmtError(fmt::Error, Backtrace), + FmtError(fmt::Error, Box), /// A wrapper for IO errors. - IOError(io::Error, Backtrace), + IOError(io::Error, Box), /// A wrapper for UTF-8 conversion errors. - Utf8Error(std::str::Utf8Error, Backtrace), + Utf8Error(std::str::Utf8Error, Box), /// A wrapper for errors from the Parquet library. #[cfg(feature = "parquet")] - ParquetError(parquet::errors::ParquetError, Backtrace), + ParquetError(parquet::errors::ParquetError, Box), /// A wrapper for errors from the standard library when converting a slice to an array. - TryFromSliceError(std::array::TryFromSliceError, Backtrace), + TryFromSliceError(std::array::TryFromSliceError, Box), /// A wrapper for errors from the Object Store library. #[cfg(feature = "object_store")] - ObjectStore(object_store::Error, Backtrace), + ObjectStore(object_store::Error, Box), /// A wrapper for errors from the Jiff library. - JiffError(jiff::Error, Backtrace), + JiffError(jiff::Error, Box), /// A wrapper for Tokio join error. #[cfg(feature = "tokio")] - JoinError(tokio::task::JoinError, Backtrace), + JoinError(tokio::task::JoinError, Box), /// A wrapper for URL parsing errors. - UrlError(url::ParseError, Backtrace), + UrlError(url::ParseError, Box), /// Wrap errors for fallible integer casting. - TryFromInt(TryFromIntError, Backtrace), + TryFromInt(TryFromIntError, Box), /// Wrap serde and serde json errors #[cfg(feature = "serde")] - SerdeJsonError(serde_json::Error, Backtrace), + SerdeJsonError(serde_json::Error, Box), /// Wrap prost encode error #[cfg(feature = "prost")] - ProstEncodeError(prost::EncodeError, Backtrace), + ProstEncodeError(prost::EncodeError, Box), /// Wrap prost decode error #[cfg(feature = "prost")] - ProstDecodeError(prost::DecodeError, Backtrace), + ProstDecodeError(prost::DecodeError, Box), /// Wrap prost unknown enum value #[cfg(feature = "prost")] - ProstUnknownEnumValue(prost::UnknownEnumValue, Backtrace), + ProstUnknownEnumValue(prost::UnknownEnumValue, Box), } impl VortexError { @@ -146,103 +149,100 @@ impl Display for VortexError { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { VortexError::Generic(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } VortexError::OutOfBounds(idx, start, stop, backtrace) => { write!( f, - "index {} out of bounds from {} to {}\nBacktrace:\n{}", - idx, start, stop, backtrace + "index {idx} out of bounds from {start} to {stop}\nBacktrace:\n{backtrace}", ) } VortexError::ComputeError(msg, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", msg, backtrace) + write!(f, "{msg}\nBacktrace:\n{backtrace}") } VortexError::InvalidArgument(msg, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", msg, backtrace) + write!(f, "{msg}\nBacktrace:\n{backtrace}") } VortexError::InvalidState(msg, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", msg, backtrace) + write!(f, "{msg}\nBacktrace:\n{backtrace}") } VortexError::InvalidSerde(msg, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", msg, backtrace) + write!(f, "{msg}\nBacktrace:\n{backtrace}") } VortexError::NotImplemented(func, by_whom, backtrace) => { write!( f, - "function {} not implemented for {}\nBacktrace:\n{}", - func, by_whom, backtrace + "function {func} not implemented for {by_whom}\nBacktrace:\n{backtrace}", ) } VortexError::MismatchedTypes(expected, actual, backtrace) => { write!( f, - "expected type: {} but instead got {}\nBacktrace:\n{}", - expected, actual, backtrace + "expected type: {expected} but instead got {actual}\nBacktrace:\n{backtrace}", ) } VortexError::AssertionFailed(msg, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", msg, backtrace) + write!(f, "{msg}\nBacktrace:\n{backtrace}") } VortexError::Context(msg, inner) => { - write!(f, "{}: {}", msg, inner) + write!(f, "{msg}: {inner}") } VortexError::Shared(inner) => Display::fmt(inner, f), VortexError::ArrowError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } #[cfg(feature = "flatbuffers")] VortexError::FlatBuffersError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } VortexError::FmtError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } VortexError::IOError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } VortexError::Utf8Error(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } #[cfg(feature = "parquet")] VortexError::ParquetError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } VortexError::TryFromSliceError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } #[cfg(feature = "object_store")] VortexError::ObjectStore(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } VortexError::JiffError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } #[cfg(feature = "tokio")] VortexError::JoinError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } VortexError::UrlError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } VortexError::TryFromInt(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } #[cfg(feature = "serde")] VortexError::SerdeJsonError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } #[cfg(feature = "prost")] VortexError::ProstEncodeError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } #[cfg(feature = "prost")] VortexError::ProstDecodeError(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } #[cfg(feature = "prost")] VortexError::ProstUnknownEnumValue(err, backtrace) => { - write!(f, "{}\nBacktrace:\n{}", err, backtrace) + write!(f, "{err}\nBacktrace:\n{backtrace}") } } } @@ -364,7 +364,10 @@ impl VortexExpect for Option { #[inline(always)] fn vortex_expect(self, msg: &str) -> Self::Output { self.unwrap_or_else(|| { - let err = VortexError::AssertionFailed(msg.to_string().into(), Backtrace::capture()); + let err = VortexError::AssertionFailed( + msg.to_string().into(), + Box::new(Backtrace::capture()), + ); vortex_panic!(err) }) } @@ -377,37 +380,37 @@ macro_rules! vortex_err { use std::backtrace::Backtrace; let err_string = format!($($tts)*); $crate::__private::must_use( - $crate::VortexError::AssertionFailed(err_string.into(), Backtrace::capture()) + $crate::VortexError::AssertionFailed(err_string.into(), Box::new(Backtrace::capture())) ) }}; (IOError: $($tts:tt)*) => {{ use std::backtrace::Backtrace; $crate::__private::must_use( - $crate::VortexError::IOError(err_string.into(), Backtrace::capture()) + $crate::VortexError::IOError(err_string.into(), Box::new(Backtrace::capture())) ) }}; (OutOfBounds: $idx:expr, $start:expr, $stop:expr) => {{ use std::backtrace::Backtrace; $crate::__private::must_use( - $crate::VortexError::OutOfBounds($idx, $start, $stop, Backtrace::capture()) + $crate::VortexError::OutOfBounds($idx, $start, $stop, Box::new(Backtrace::capture())) ) }}; (NotImplemented: $func:expr, $by_whom:expr) => {{ use std::backtrace::Backtrace; $crate::__private::must_use( - $crate::VortexError::NotImplemented($func.into(), format!("{}", $by_whom).into(), Backtrace::capture()) + $crate::VortexError::NotImplemented($func.into(), format!("{}", $by_whom).into(), Box::new(Backtrace::capture())) ) }}; (MismatchedTypes: $expected:literal, $actual:expr) => {{ use std::backtrace::Backtrace; $crate::__private::must_use( - $crate::VortexError::MismatchedTypes($expected.into(), $actual.to_string().into(), Backtrace::capture()) + $crate::VortexError::MismatchedTypes($expected.into(), $actual.to_string().into(), Box::new(Backtrace::capture())) ) }}; (MismatchedTypes: $expected:expr, $actual:expr) => {{ use std::backtrace::Backtrace; $crate::__private::must_use( - $crate::VortexError::MismatchedTypes($expected.to_string().into(), $actual.to_string().into(), Backtrace::capture()) + $crate::VortexError::MismatchedTypes($expected.to_string().into(), $actual.to_string().into(), Box::new(Backtrace::capture())) ) }}; (Context: $msg:literal, $err:expr) => {{ @@ -418,7 +421,7 @@ macro_rules! vortex_err { ($variant:ident: $fmt:literal $(, $arg:expr)* $(,)?) => {{ use std::backtrace::Backtrace; $crate::__private::must_use( - $crate::VortexError::$variant(format!($fmt, $($arg),*).into(), Backtrace::capture()) + $crate::VortexError::$variant(format!($fmt, $($arg),*).into(), Box::new(Backtrace::capture())) ) }}; ($variant:ident: $err:expr $(,)?) => { @@ -476,105 +479,105 @@ macro_rules! vortex_panic { impl From for VortexError { fn from(value: arrow_schema::ArrowError) -> Self { - VortexError::ArrowError(value, Backtrace::capture()) + VortexError::ArrowError(value, Box::new(Backtrace::capture())) } } #[cfg(feature = "flatbuffers")] impl From for VortexError { fn from(value: flatbuffers::InvalidFlatbuffer) -> Self { - VortexError::FlatBuffersError(value, Backtrace::capture()) + VortexError::FlatBuffersError(value, Box::new(Backtrace::capture())) } } impl From for VortexError { fn from(value: fmt::Error) -> Self { - VortexError::FmtError(value, Backtrace::capture()) + VortexError::FmtError(value, Box::new(Backtrace::capture())) } } impl From for VortexError { fn from(value: io::Error) -> Self { - VortexError::IOError(value, Backtrace::capture()) + VortexError::IOError(value, Box::new(Backtrace::capture())) } } impl From for VortexError { fn from(value: std::str::Utf8Error) -> Self { - VortexError::Utf8Error(value, Backtrace::capture()) + VortexError::Utf8Error(value, Box::new(Backtrace::capture())) } } #[cfg(feature = "parquet")] impl From for VortexError { fn from(value: parquet::errors::ParquetError) -> Self { - VortexError::ParquetError(value, Backtrace::capture()) + VortexError::ParquetError(value, Box::new(Backtrace::capture())) } } impl From for VortexError { fn from(value: std::array::TryFromSliceError) -> Self { - VortexError::TryFromSliceError(value, Backtrace::capture()) + VortexError::TryFromSliceError(value, Box::new(Backtrace::capture())) } } #[cfg(feature = "object_store")] impl From for VortexError { fn from(value: object_store::Error) -> Self { - VortexError::ObjectStore(value, Backtrace::capture()) + VortexError::ObjectStore(value, Box::new(Backtrace::capture())) } } impl From for VortexError { fn from(value: jiff::Error) -> Self { - VortexError::JiffError(value, Backtrace::capture()) + VortexError::JiffError(value, Box::new(Backtrace::capture())) } } #[cfg(feature = "tokio")] impl From for VortexError { fn from(value: tokio::task::JoinError) -> Self { - VortexError::JoinError(value, Backtrace::capture()) + VortexError::JoinError(value, Box::new(Backtrace::capture())) } } impl From for VortexError { fn from(value: url::ParseError) -> Self { - VortexError::UrlError(value, Backtrace::capture()) + VortexError::UrlError(value, Box::new(Backtrace::capture())) } } impl From for VortexError { fn from(value: TryFromIntError) -> Self { - VortexError::TryFromInt(value, Backtrace::capture()) + VortexError::TryFromInt(value, Box::new(Backtrace::capture())) } } #[cfg(feature = "serde")] impl From for VortexError { fn from(value: serde_json::Error) -> Self { - VortexError::SerdeJsonError(value, Backtrace::capture()) + VortexError::SerdeJsonError(value, Box::new(Backtrace::capture())) } } #[cfg(feature = "prost")] impl From for VortexError { fn from(value: prost::EncodeError) -> Self { - VortexError::ProstEncodeError(value, Backtrace::capture()) + VortexError::ProstEncodeError(value, Box::new(Backtrace::capture())) } } #[cfg(feature = "prost")] impl From for VortexError { fn from(value: prost::DecodeError) -> Self { - VortexError::ProstDecodeError(value, Backtrace::capture()) + VortexError::ProstDecodeError(value, Box::new(Backtrace::capture())) } } #[cfg(feature = "prost")] impl From for VortexError { fn from(value: prost::UnknownEnumValue) -> Self { - VortexError::ProstUnknownEnumValue(value, Backtrace::capture()) + VortexError::ProstUnknownEnumValue(value, Box::new(Backtrace::capture())) } } @@ -593,6 +596,6 @@ pub mod __private { impl From> for VortexError { fn from(_value: PoisonError) -> Self { // We don't include the value since it may be sensitive. - Self::InvalidState("Lock poisoned".into(), Backtrace::capture()) + Self::InvalidState("Lock poisoned".into(), Box::new(Backtrace::capture())) } } diff --git a/vortex-error/src/python.rs b/vortex-error/src/python.rs index 4d57859944b..6d3dad0af5c 100644 --- a/vortex-error/src/python.rs +++ b/vortex-error/src/python.rs @@ -19,6 +19,6 @@ impl From for PyErr { impl From for VortexError { fn from(value: PyErr) -> Self { - VortexError::InvalidArgument(value.to_string().into(), Backtrace::capture()) + VortexError::InvalidArgument(value.to_string().into(), Box::new(Backtrace::capture())) } } From d63db3fae479880e41e21bc0fdb828148666348d Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 16 Jul 2025 11:04:16 -0400 Subject: [PATCH 05/11] more cleaning Signed-off-by: Andrew Duffy --- encodings/fsst/src/canonical.rs | 4 +- encodings/sequence/src/array.rs | 3 +- fuzz/fuzz_targets/array_ops.rs | 7 ++- fuzz/fuzz_targets/file_io.rs | 1 + fuzz/src/error.rs | 71 +++++++++++++++++++++----- fuzz/src/lib.rs | 3 +- rustfmt.toml | 3 +- vortex-array/src/array/mod.rs | 4 +- vortex-expr/src/transform/partition.rs | 2 +- 9 files changed, 75 insertions(+), 23 deletions(-) diff --git a/encodings/fsst/src/canonical.rs b/encodings/fsst/src/canonical.rs index 352cec80e4b..9a5804ee238 100644 --- a/encodings/fsst/src/canonical.rs +++ b/encodings/fsst/src/canonical.rs @@ -26,8 +26,8 @@ impl CanonicalVTable for FSSTVTable { fsst_into_varbin_view(array.decompressor(), array, builder.completed_block_count())?; builder.push_buffer_and_adjusted_views( - &view.buffers(), - &view.views(), + view.buffers(), + view.views(), array.validity_mask()?, ); Ok(()) diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index dd1a69cfbc0..6e714c87cab 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -61,8 +61,7 @@ impl SequenceArray { Self::try_last(base, multiplier, ptype, length).map_err(|e| { e.with_context(format!( - "final value not expressible, base = {:?}, multiplier = {:?}, len = {} ", - base, multiplier, length + "final value not expressible, base = {base:?}, multiplier = {multiplier:?}, len = {length} ", )) })?; diff --git a/fuzz/fuzz_targets/array_ops.rs b/fuzz/fuzz_targets/array_ops.rs index 2d202bd0180..0518d621f01 100644 --- a/fuzz/fuzz_targets/array_ops.rs +++ b/fuzz/fuzz_targets/array_ops.rs @@ -2,7 +2,9 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors #![no_main] -#![allow(clippy::unwrap_used)] +#![allow(clippy::unwrap_used, clippy::result_large_err)] + +use std::backtrace::Backtrace; use libfuzzer_sys::{Corpus, fuzz_target}; use vortex_array::arrays::{ @@ -108,6 +110,7 @@ fn assert_search_sorted( side, search_result, step, + Backtrace::capture(), )) } else { Ok(()) @@ -123,6 +126,7 @@ fn assert_array_eq(lhs: &ArrayRef, rhs: &ArrayRef, step: usize) -> VortexFuzzRes lhs.to_array(), rhs.to_array(), step, + Backtrace::capture(), )); } for idx in 0..lhs.len() { @@ -137,6 +141,7 @@ fn assert_array_eq(lhs: &ArrayRef, rhs: &ArrayRef, step: usize) -> VortexFuzzRes lhs.clone(), rhs.clone(), step, + Backtrace::capture(), )); } } diff --git a/fuzz/fuzz_targets/file_io.rs b/fuzz/fuzz_targets/file_io.rs index cbee3cffc2f..a75125e193c 100644 --- a/fuzz/fuzz_targets/file_io.rs +++ b/fuzz/fuzz_targets/file_io.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors #![no_main] +#![allow(clippy::result_large_err)] use arrow_buffer::BooleanBuffer; use arrow_ord::ord::make_comparator; diff --git a/fuzz/src/error.rs b/fuzz/src/error.rs index 14c713cb947..d8376109d8c 100644 --- a/fuzz/src/error.rs +++ b/fuzz/src/error.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::backtrace::Backtrace; +use std::error::Error; use std::fmt; use std::fmt::{Debug, Display, Formatter}; @@ -13,9 +15,8 @@ fn tree_display(arr: &ArrayRef) -> impl Display { arr.display_tree() } -#[derive(thiserror::Error)] +#[non_exhaustive] pub enum VortexFuzzError { - #[error("Expected to find {0} at {1} in {tree} from {3} but instead found it at {4} in step {5}", tree = tree_display(.2))] SearchSortedError( Scalar, SearchResult, @@ -23,20 +24,14 @@ pub enum VortexFuzzError { SearchSortedSide, SearchResult, usize, + Backtrace, ), - #[error("{0} != {1} at index {2}, lhs is {lhs_tree} rhs is {rhs_tree} in step {5}",lhs_tree = tree_display(.3), rhs_tree = tree_display(.4))] - ArrayNotEqual(Scalar, Scalar, usize, ArrayRef, ArrayRef, usize), + ArrayNotEqual(Scalar, Scalar, usize, ArrayRef, ArrayRef, usize, Backtrace), - #[error("LHS len {0} != RHS len {1}, lhs is {lhs_tree} rhs is {rhs_tree} in step {4}", lhs_tree = tree_display(.2), rhs_tree = tree_display(.3))] - LengthMismatch(usize, usize, ArrayRef, ArrayRef, usize), + LengthMismatch(usize, usize, ArrayRef, ArrayRef, usize, Backtrace), - #[error(transparent)] - VortexError( - #[from] - #[backtrace] - VortexError, - ), + VortexError(VortexError, Backtrace), } impl Debug for VortexFuzzError { @@ -45,4 +40,56 @@ impl Debug for VortexFuzzError { } } +impl Display for VortexFuzzError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + VortexFuzzError::SearchSortedError( + a, + expected, + array, + from, + actual, + step, + backtrace, + ) => { + write!( + f, + "Expected to find {a} at {expected} in {} from {from} but instead found it at {actual} in step {step}\nBacktrace:\n{backtrace}", + tree_display(array), + ) + } + VortexFuzzError::ArrayNotEqual(expected, actual, idx, lhs, rhs, step, backtrace) => { + write!( + f, + "{expected} != {actual} at index {idx}, lhs is {} rhs is {} in step {step}\nBacktrace:\n{backtrace}", + tree_display(lhs), + tree_display(rhs), + ) + } + VortexFuzzError::LengthMismatch(lhs_len, rhs_len, lhs, rhs, step, backtrace) => { + write!( + f, + "LHS len {lhs_len} != RHS len {rhs_len}, lhs is {} rhs is {} in step {step}\nBacktrace:\n{backtrace}", + tree_display(lhs), + tree_display(rhs), + ) + } + VortexFuzzError::VortexError(err, backtrace) => { + write!(f, "{err}\nBacktrace:\n{backtrace}") + } + } + } +} + +impl Error for VortexFuzzError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + VortexFuzzError::SearchSortedError(..) => None, + VortexFuzzError::ArrayNotEqual(..) => None, + VortexFuzzError::LengthMismatch(..) => None, + VortexFuzzError::VortexError(err, ..) => Some(err), + } + } +} + pub type VortexFuzzResult = Result; diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index 5a3661a3d21..86e9bba465c 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -1,7 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -#![feature(error_generic_member_access)] +// VortexFuzzError is quite large, but we don't care about the performance impact for fuzzing. +#![allow(clippy::result_large_err)] mod array; pub mod error; diff --git a/rustfmt.toml b/rustfmt.toml index d479a9a08ec..62bd702f181 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -2,6 +2,7 @@ condense_wildcard_suffixes = true format_macro_matchers = true format_macro_bodies = true group_imports = "StdExternalCrate" +unstable_features = true use_field_init_shorthand = true imports_granularity = "Module" -style_edition="2024" +style_edition = "2024" diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 0c4bb452027..607b634e731 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -437,9 +437,7 @@ impl Array for ArrayAdapter { let result_nbytes = result.nbytes(); assert!( result_nbytes <= nbytes, - "optimize() made the array larger: {} bytes -> {} bytes", - nbytes, - result_nbytes + "optimize() made the array larger: {nbytes} bytes -> {result_nbytes} bytes", ); } diff --git a/vortex-expr/src/transform/partition.rs b/vortex-expr/src/transform/partition.rs index 2f0a2b8d93d..a80194a7497 100644 --- a/vortex-expr/src/transform/partition.rs +++ b/vortex-expr/src/transform/partition.rs @@ -145,7 +145,7 @@ impl<'a, A: Annotation + Display> StructFieldExpressionSplitter<'a, A> { /// Each annotation may be associated with multiple sub-expressions, so we need to /// a unique name for each sub-expression. fn field_name(annotation: &A, idx: usize) -> FieldName { - format!("{}_{}", annotation, idx).into() + format!("{annotation}_{idx}").into() } } From 4f8a1e2c74a5a0e6f83d139ddacdd3db3e453f64 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 16 Jul 2025 11:10:21 -0400 Subject: [PATCH 06/11] fix toolchain setup for miri/sanitizer tasks Signed-off-by: Andrew Duffy --- .github/actions/setup-rust/action.yml | 10 ++++++++-- .github/workflows/ci.yml | 7 ++++++- vortex-expr/src/transform/partition.rs | 4 ++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/.github/actions/setup-rust/action.yml b/.github/actions/setup-rust/action.yml index 214ac45a419..4f8fb6e6618 100644 --- a/.github/actions/setup-rust/action.yml +++ b/.github/actions/setup-rust/action.yml @@ -2,6 +2,12 @@ name: "Setup Rust" description: "Toolchain setup and Initial compilation" inputs: + toolchain: + description: "optional override for the toolchain version (e.g. nightly)" + required: false + components: + description: "optional override for the components to install for the step (e.g. clippy, rustfmt, miri)" + required: false targets: description: "optional targets override (e.g. wasm32-unknown-unknown)" required: false @@ -22,9 +28,9 @@ runs: uses: dtolnay/rust-toolchain@master if: steps.rustup-cache.outputs.cache-hit != 'true' with: - toolchain: "${{ steps.rust-version.outputs.version }}" + toolchain: "${{ inputs.toolchain || steps.rust-version.outputs.version }}" targets: "${{inputs.targets || ''}}" - components: clippy, rustfmt + components: "${{ inputs.components || 'clippy, rustfmt' }}" - name: Rust Dependency Cache uses: Swatinem/rust-cache@v2 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e8eaf2d0085..1c13c42597d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -364,6 +364,9 @@ jobs: with: submodules: "recursive" - uses: ./.github/actions/setup-rust + with: + toolchain: nightly + components: "rust-src, rustfmt, clippy, llvm-tools-preview" - name: Install nextest uses: taiki-e/install-action@v2 with: @@ -463,6 +466,7 @@ jobs: - uses: actions/checkout@v4 - uses: ./.github/actions/setup-rust with: + toolchain: nightly targets: "wasm32-wasip1" - name: Setup Wasmer uses: wasmerio/setup-wasmer@v3.1 @@ -485,7 +489,8 @@ jobs: submodules: "recursive" - uses: ./.github/actions/setup-rust with: - rust-version: nightly + toolchain: nightly + components: "rust-src, rustfmt, clippy, miri" - uses: taiki-e/install-action@v2 with: tool: nextest@0.9.98 diff --git a/vortex-expr/src/transform/partition.rs b/vortex-expr/src/transform/partition.rs index a80194a7497..f83fdb01244 100644 --- a/vortex-expr/src/transform/partition.rs +++ b/vortex-expr/src/transform/partition.rs @@ -395,10 +395,10 @@ mod tests { let part_a = partitioned.find_partition(&"a".into()).unwrap(); let expected_a = pack([("a_0", col("a"))], NonNullable); - assert_eq!(part_a, &expected_a, "{} {}", part_a, expected_a); + assert_eq!(part_a, &expected_a, "{part_a} {expected_a}"); let part_b = partitioned.find_partition(&"b".into()).unwrap(); let expected_b = pack([("b_0", pack([("b", col("b"))], NonNullable))], NonNullable); - assert_eq!(part_b, &expected_b, "{} {}", part_b, expected_b); + assert_eq!(part_b, &expected_b, "{part_b} {expected_b}"); } } From 104fd44265cb5077350a540534e8a9da825a47c7 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 16 Jul 2025 11:35:24 -0400 Subject: [PATCH 07/11] clippy for vortex-duckdb Signed-off-by: Andrew Duffy --- vortex-duckdb/src/duckdb/connection.rs | 9 +++++---- vortex-duckdb/src/duckdb/data_chunk.rs | 4 ++-- vortex-duckdb/src/duckdb/database.rs | 4 ++-- vortex-duckdb/src/duckdb/expr.rs | 14 +++++++++----- vortex-duckdb/src/duckdb/query_result.rs | 2 +- vortex-duckdb/src/duckdb/table_filter.rs | 12 ++++++++---- vortex-duckdb/src/duckdb/table_function/mod.rs | 2 +- vortex-duckdb/src/duckdb/vector.rs | 3 ++- vortex-duckdb/src/vortex_e2e_tests.rs | 7 +++---- 9 files changed, 33 insertions(+), 24 deletions(-) diff --git a/vortex-duckdb/src/duckdb/connection.rs b/vortex-duckdb/src/duckdb/connection.rs index 6909257b1f6..00b91e33594 100644 --- a/vortex-duckdb/src/duckdb/connection.rs +++ b/vortex-duckdb/src/duckdb/connection.rs @@ -20,7 +20,7 @@ impl Connection { pub fn connect(db: &Database) -> VortexResult { let mut ptr: cpp::duckdb_connection = ptr::null_mut(); duckdb_try!( - unsafe { cpp::duckdb_connect(db.as_ptr(), &mut ptr) }, + unsafe { cpp::duckdb_connect(db.as_ptr(), &raw mut ptr) }, "Failed to connect to DuckDB database" ); Ok(unsafe { Self::own(ptr) }) @@ -32,11 +32,12 @@ impl Connection { let query_cstr = std::ffi::CString::new(query).map_err(|_| vortex_err!("Invalid query string"))?; - let status = unsafe { cpp::duckdb_query(self.as_ptr(), query_cstr.as_ptr(), &mut result) }; + let status = + unsafe { cpp::duckdb_query(self.as_ptr(), query_cstr.as_ptr(), &raw mut result) }; if status != cpp::duckdb_state::DuckDBSuccess { let error_msg = unsafe { - let error_ptr = cpp::duckdb_result_error(&mut result); + let error_ptr = cpp::duckdb_result_error(&raw mut result); if error_ptr.is_null() { "Unknown DuckDB error".to_string() } else { @@ -44,7 +45,7 @@ impl Connection { } }; - unsafe { cpp::duckdb_destroy_result(&mut result) }; + unsafe { cpp::duckdb_destroy_result(&raw mut result) }; return Err(vortex_err!("Failed to execute query: {}", error_msg)); } diff --git a/vortex-duckdb/src/duckdb/data_chunk.rs b/vortex-duckdb/src/duckdb/data_chunk.rs index fd9d7b5b17b..c9095865d2d 100644 --- a/vortex-duckdb/src/duckdb/data_chunk.rs +++ b/vortex-duckdb/src/duckdb/data_chunk.rs @@ -61,7 +61,7 @@ impl TryFrom<&DataChunk> for String { let mut err: duckdb_vx_error = ptr::null_mut(); #[cfg(debug_assertions)] unsafe { - cpp::duckdb_data_chunk_verify(value.as_ptr(), &mut err); + cpp::duckdb_data_chunk_verify(value.as_ptr(), &raw mut err); if !err.is_null() { vortex_bail!( "{}", @@ -69,7 +69,7 @@ impl TryFrom<&DataChunk> for String { ) } }; - let debug = unsafe { cpp::duckdb_data_chunk_to_string(value.as_ptr(), &mut err) }; + let debug = unsafe { cpp::duckdb_data_chunk_to_string(value.as_ptr(), &raw mut err) }; if !err.is_null() { vortex_bail!("{}", unsafe { CStr::from_ptr(cpp::duckdb_vx_error_value(err)).to_string_lossy() diff --git a/vortex-duckdb/src/duckdb/database.rs b/vortex-duckdb/src/duckdb/database.rs index dc3b10aecb7..d8fc52589bc 100644 --- a/vortex-duckdb/src/duckdb/database.rs +++ b/vortex-duckdb/src/duckdb/database.rs @@ -22,7 +22,7 @@ impl Database { pub fn open_in_memory() -> VortexResult { let mut ptr: cpp::duckdb_database = ptr::null_mut(); duckdb_try!( - unsafe { cpp::duckdb_open(ptr::null(), &mut ptr) }, + unsafe { cpp::duckdb_open(ptr::null(), &raw mut ptr) }, "Failed to open in-memory DuckDB database" ); Ok(unsafe { Self::own(ptr) }) @@ -41,7 +41,7 @@ impl Database { let mut ptr: cpp::duckdb_database = ptr::null_mut(); duckdb_try!( - unsafe { cpp::duckdb_open(path_cstr.as_ptr(), &mut ptr) }, + unsafe { cpp::duckdb_open(path_cstr.as_ptr(), &raw mut ptr) }, "Failed to open DuckDB database at path: {}", path_str ); diff --git a/vortex-duckdb/src/duckdb/expr.rs b/vortex-duckdb/src/duckdb/expr.rs index 71a2d595345..69c0077c63e 100644 --- a/vortex-duckdb/src/duckdb/expr.rs +++ b/vortex-duckdb/src/duckdb/expr.rs @@ -50,7 +50,9 @@ impl Expression { children_count: 0, type_: cpp::DUCKDB_VX_EXPR_TYPE::DUCKDB_VX_EXPR_TYPE_INVALID, }; - unsafe { cpp::duckdb_vx_expr_get_bound_conjunction(self.as_ptr(), &mut out) }; + unsafe { + cpp::duckdb_vx_expr_get_bound_conjunction(self.as_ptr(), &raw mut out) + }; let children = unsafe { std::slice::from_raw_parts(out.children, out.children_count) }; @@ -66,7 +68,9 @@ impl Expression { right: ptr::null_mut(), type_: cpp::DUCKDB_VX_EXPR_TYPE::DUCKDB_VX_EXPR_TYPE_INVALID, }; - unsafe { cpp::duckdb_vx_expr_get_bound_comparison(self.as_ptr(), &mut out) }; + unsafe { + cpp::duckdb_vx_expr_get_bound_comparison(self.as_ptr(), &raw mut out) + }; ExpressionClass::BoundComparison(BoundComparison { left: unsafe { Expression::borrow(out.left) }, @@ -83,7 +87,7 @@ impl Expression { upper_inclusive: false, }; unsafe { - cpp::duckdb_vx_expr_get_bound_between(self.as_ptr(), &mut out); + cpp::duckdb_vx_expr_get_bound_between(self.as_ptr(), &raw mut out); } ExpressionClass::BoundBetween(BoundBetween { @@ -100,7 +104,7 @@ impl Expression { children_count: 0, type_: cpp::DUCKDB_VX_EXPR_TYPE::DUCKDB_VX_EXPR_TYPE_INVALID, }; - unsafe { cpp::duckdb_vx_expr_get_bound_operator(self.as_ptr(), &mut out) }; + unsafe { cpp::duckdb_vx_expr_get_bound_operator(self.as_ptr(), &raw mut out) }; let children = unsafe { std::slice::from_raw_parts(out.children, out.children_count) }; @@ -117,7 +121,7 @@ impl Expression { scalar_function: ptr::null_mut(), bind_info: ptr::null_mut(), }; - unsafe { cpp::duckdb_vx_expr_get_bound_function(self.as_ptr(), &mut out) }; + unsafe { cpp::duckdb_vx_expr_get_bound_function(self.as_ptr(), &raw mut out) }; let children = unsafe { std::slice::from_raw_parts(out.children, out.children_count) }; diff --git a/vortex-duckdb/src/duckdb/query_result.rs b/vortex-duckdb/src/duckdb/query_result.rs index 8e5bc3581a3..cfa814849cc 100644 --- a/vortex-duckdb/src/duckdb/query_result.rs +++ b/vortex-duckdb/src/duckdb/query_result.rs @@ -16,7 +16,7 @@ wrapper! { |ptr: &mut *mut cpp::duckdb_result| { if !ptr.is_null() { unsafe { - cpp::duckdb_destroy_result(&mut **ptr); + cpp::duckdb_destroy_result(&raw mut **ptr); drop(Box::from_raw(*ptr)); } } diff --git a/vortex-duckdb/src/duckdb/table_filter.rs b/vortex-duckdb/src/duckdb/table_filter.rs index 07c1f21b14a..918f68e13e0 100644 --- a/vortex-duckdb/src/duckdb/table_filter.rs +++ b/vortex-duckdb/src/duckdb/table_filter.rs @@ -20,7 +20,11 @@ impl TableFilterSet { let mut filter_set: duckdb_vx_table_filter = ptr::null_mut(); let column_index = unsafe { - cpp::duckdb_vx_table_filter_set_get(self.as_ptr(), index.as_usize(), &mut filter_set) + cpp::duckdb_vx_table_filter_set_get( + self.as_ptr(), + index.as_usize(), + &raw mut filter_set, + ) }; if filter_set.is_null() { @@ -68,7 +72,7 @@ impl TableFilter { value: ptr::null_mut(), comparison_type: cpp::DUCKDB_VX_EXPR_TYPE::DUCKDB_VX_EXPR_TYPE_INVALID, }; - unsafe { cpp::duckdb_vx_table_filter_get_constant(self.as_ptr(), &mut out) }; + unsafe { cpp::duckdb_vx_table_filter_get_constant(self.as_ptr(), &raw mut out) }; TableFilterClass::ConstantComparison(ConstantComparison { value: unsafe { Value::borrow(out.value) }, @@ -86,7 +90,7 @@ impl TableFilter { children: ptr::null_mut(), children_count: 0, }; - unsafe { cpp::duckdb_vx_table_filter_get_conjunction_or(self.as_ptr(), &mut out) }; + unsafe { cpp::duckdb_vx_table_filter_get_conjunction_or(self.as_ptr(), &raw mut out) }; TableFilterClass::ConjunctionOr(Conjunction { children: unsafe { @@ -100,7 +104,7 @@ impl TableFilter { children_count: 0, }; unsafe { - cpp::duckdb_vx_table_filter_get_conjunction_and(self.as_ptr(), &mut out) + cpp::duckdb_vx_table_filter_get_conjunction_and(self.as_ptr(), &raw mut out) }; TableFilterClass::ConjunctionAnd(Conjunction { diff --git a/vortex-duckdb/src/duckdb/table_function/mod.rs b/vortex-duckdb/src/duckdb/table_function/mod.rs index 14cabcbf250..1e76a78a6fc 100644 --- a/vortex-duckdb/src/duckdb/table_function/mod.rs +++ b/vortex-duckdb/src/duckdb/table_function/mod.rs @@ -136,7 +136,7 @@ impl Connection { }; duckdb_try!( - unsafe { cpp::duckdb_vx_tfunc_register(self.as_ptr(), &vtab) }, + unsafe { cpp::duckdb_vx_tfunc_register(self.as_ptr(), &raw const vtab) }, "Failed to register table function '{}'", name.to_string_lossy() ); diff --git a/vortex-duckdb/src/duckdb/vector.rs b/vortex-duckdb/src/duckdb/vector.rs index 56626846ffd..29cdd458bba 100644 --- a/vortex-duckdb/src/duckdb/vector.rs +++ b/vortex-duckdb/src/duckdb/vector.rs @@ -141,7 +141,8 @@ impl Vector { pub fn try_to_string(&self, len: u64) -> VortexResult { let mut err: duckdb_vx_error = ptr::null_mut(); - let debug = unsafe { cpp::duckdb_vector_to_string(self.as_ptr(), len.as_u64(), &mut err) }; + let debug = + unsafe { cpp::duckdb_vector_to_string(self.as_ptr(), len.as_u64(), &raw mut err) }; if !err.is_null() { vortex_bail!("{}", unsafe { CStr::from_ptr(cpp::duckdb_vx_error_value(err)).to_string_lossy() diff --git a/vortex-duckdb/src/vortex_e2e_tests.rs b/vortex-duckdb/src/vortex_e2e_tests.rs index 9910a16ef8d..b94f5fb4ca4 100644 --- a/vortex-duckdb/src/vortex_e2e_tests.rs +++ b/vortex-duckdb/src/vortex_e2e_tests.rs @@ -56,7 +56,7 @@ mod tests { { let conn = database_connection(); let file_path = tmp_file.path().to_string_lossy(); - let formatted_query = query.replace('?', &format!("'{}'", file_path)); + let formatted_query = query.replace('?', &format!("'{file_path}'")); let result = conn.query(&formatted_query).unwrap(); result.get::(col_idx, 0).unwrap() @@ -68,7 +68,7 @@ mod tests { { let conn = database_connection(); let file_path = tmp_file.path().to_string_lossy(); - let formatted_query = query.replace('?', &format!("'{}'", file_path)); + let formatted_query = query.replace('?', &format!("'{file_path}'")); let result = conn.query(&formatted_query)?; @@ -249,8 +249,7 @@ mod tests { let conn = database_connection(); let result = conn .query(&format!( - "SELECT SUM(numbers) FROM vortex_scan('{}')", - glob_pattern + "SELECT SUM(numbers) FROM vortex_scan('{glob_pattern}')", )) .unwrap(); From c08ea403354b0021479948c00115bb1eb63dc44c Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 16 Jul 2025 11:46:10 -0400 Subject: [PATCH 08/11] more Signed-off-by: Andrew Duffy --- .github/workflows/ci.yml | 6 +++--- bench-vortex/src/datasets/mod.rs | 2 +- bench-vortex/src/engines/ddb/mod.rs | 4 ++-- bench-vortex/src/tpch/tpch_benchmark.rs | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1c13c42597d..2df793d7880 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -470,7 +470,7 @@ jobs: targets: "wasm32-wasip1" - name: Setup Wasmer uses: wasmerio/setup-wasmer@v3.1 - - run: cargo -Zbuild-std=panic_abort,std build --target wasm32-wasip1 + - run: cargo +nightly -Zbuild-std=panic_abort,std build --target wasm32-wasip1 working-directory: ./wasm-test - run: wasmer run ./target/wasm32-wasip1/debug/wasm-test.wasm working-directory: ./wasm-test @@ -498,10 +498,10 @@ jobs: # For now, we only run Miri against known "fiddly" crates. if: false run: | - cargo miri nextest run --no-fail-fast --workspace --exclude vortex-file --exclude vortex-layout \ + cargo +nightly miri nextest run --no-fail-fast --workspace --exclude vortex-file --exclude vortex-layout \ --exclude vortex-fsst --exclude vortex-array --exclude vortex-dtype --exclude vortex-expr \ --exclude vortex-scalar --exclude vortex-duckdb --exclude vortex-ffi --exclude bench-vortex \ - --exclude vortex-python --exclude vortex-jni + --exclude vortex-python --exclude vortex-jni --exclude vortex-fuzz - name: Run Miri run: cargo miri nextest run --no-fail-fast -p vortex-buffer -p vortex-ffi diff --git a/bench-vortex/src/datasets/mod.rs b/bench-vortex/src/datasets/mod.rs index ca783a7ad5c..fccfe7a4d92 100644 --- a/bench-vortex/src/datasets/mod.rs +++ b/bench-vortex/src/datasets/mod.rs @@ -106,7 +106,7 @@ impl BenchmarkDataset { } pub fn format_path(&self, format: Format, base_url: &Url) -> Result { - Ok(base_url.join(&format!("{}/", format))?) + Ok(base_url.join(&format!("{format}/"))?) } pub async fn register_tables( diff --git a/bench-vortex/src/engines/ddb/mod.rs b/bench-vortex/src/engines/ddb/mod.rs index 44f2aed584c..fa78c7f8999 100644 --- a/bench-vortex/src/engines/ddb/mod.rs +++ b/bench-vortex/src/engines/ddb/mod.rs @@ -66,7 +66,7 @@ impl DuckDBCtx { /// Execute DuckDB queries for benchmarks using the internal connection pub fn execute_query(&self, query: &str) -> Result<(Duration, usize)> { - trace!("execute duckdb query: {}", query); + trace!("execute duckdb query: {query}"); let time_instant = Instant::now(); let result = self.connection.query(query)?; let query_time = time_instant.elapsed(); @@ -104,7 +104,7 @@ impl DuckDBCtx { // Generate and execute table registration commands let commands = self.generate_table_commands(&effective_url, extension, dataset, object); self.execute_query(&commands)?; - trace!("Executing table registration commands: {}", commands); + trace!("Executing table registration commands: {commands}"); Ok(()) } diff --git a/bench-vortex/src/tpch/tpch_benchmark.rs b/bench-vortex/src/tpch/tpch_benchmark.rs index 6d0848be231..9028ba1d395 100644 --- a/bench-vortex/src/tpch/tpch_benchmark.rs +++ b/bench-vortex/src/tpch/tpch_benchmark.rs @@ -286,7 +286,7 @@ impl TpcHBenchmark { ChangeTag::Insert => "+", ChangeTag::Equal => " ", }; - print!("{}{}", sign, change); + print!("{sign}{change}"); } eprintln!("query output does not match the reference for {query_name}"); From 3f099f4e66f61f3f4ff44e4380aade3db45d5510 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 16 Jul 2025 11:52:57 -0400 Subject: [PATCH 09/11] try harder Signed-off-by: Andrew Duffy --- .github/workflows/ci.yml | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2df793d7880..390c0b68acb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -314,7 +314,7 @@ jobs: - name: Rust Tests if: ${{ matrix.suite == 'tests' }} run: | - cargo nextest run --locked --workspace --all-features --no-fail-fast + cargo +nightly nextest run --locked --workspace --all-features --no-fail-fast - name: Run TPC-H if: ${{ matrix.suite == 'tpc-h' }} run: | @@ -374,7 +374,7 @@ jobs: - name: Rust Tests # vortex-duckdb-ext currently fails linking for cargo test targets. run: | - cargo nextest run --locked --workspace --all-features --no-fail-fast --target x86_64-unknown-linux-gnu + cargo +nightly nextest run --locked --workspace --all-features --no-fail-fast --target x86_64-unknown-linux-gnu build-java: name: "Java" @@ -494,16 +494,8 @@ jobs: - uses: taiki-e/install-action@v2 with: tool: nextest@0.9.98 - - name: Run all tests with Miri - # For now, we only run Miri against known "fiddly" crates. - if: false - run: | - cargo +nightly miri nextest run --no-fail-fast --workspace --exclude vortex-file --exclude vortex-layout \ - --exclude vortex-fsst --exclude vortex-array --exclude vortex-dtype --exclude vortex-expr \ - --exclude vortex-scalar --exclude vortex-duckdb --exclude vortex-ffi --exclude bench-vortex \ - --exclude vortex-python --exclude vortex-jni --exclude vortex-fuzz - name: Run Miri - run: cargo miri nextest run --no-fail-fast -p vortex-buffer -p vortex-ffi + run: cargo +nightly miri nextest run --no-fail-fast -p vortex-buffer -p vortex-ffi generated-files: name: "Check generated proto/fbs files are up to date" From 43a2f0bc3b2e3a5c136e1a20fd40a112927a2ed7 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 16 Jul 2025 12:10:47 -0400 Subject: [PATCH 10/11] wasm Signed-off-by: Andrew Duffy --- vortex-expr/src/transform/annotations.rs | 2 +- vortex-expr/src/transform/partition.rs | 19 ++----------------- wasm-test/Cargo.lock | 23 ++++++++++++++--------- 3 files changed, 17 insertions(+), 27 deletions(-) diff --git a/vortex-expr/src/transform/annotations.rs b/vortex-expr/src/transform/annotations.rs index 01c3824dd9c..7c555f092f2 100644 --- a/vortex-expr/src/transform/annotations.rs +++ b/vortex-expr/src/transform/annotations.rs @@ -35,7 +35,7 @@ pub type Annotations<'a, A> = HashMap<&'a ExprRef, HashSet>; pub fn descendent_annotations( expr: &ExprRef, annotate: A, -) -> Annotations { +) -> Annotations<'_, A::Annotation> { let mut visitor = AnnotationVisitor { annotations: Default::default(), annotate, diff --git a/vortex-expr/src/transform/partition.rs b/vortex-expr/src/transform/partition.rs index f83fdb01244..696ee6f1d93 100644 --- a/vortex-expr/src/transform/partition.rs +++ b/vortex-expr/src/transform/partition.rs @@ -12,8 +12,8 @@ use crate::transform::annotations::{ Annotation, AnnotationFn, Annotations, descendent_annotations, }; use crate::transform::simplify_typed::simplify_typed; -use crate::traversal::{FoldDown, FoldUp, FolderMut, MutNodeVisitor, Node, TransformResult}; -use crate::{ExprRef, GetItemVTable, get_item, pack, root}; +use crate::traversal::{FoldDown, FoldUp, FolderMut, Node}; +use crate::{ExprRef, get_item, pack, root}; /// Partition an expression into sub-expressions that are uniquely associated with an annotation. /// A root expression is also returned that can be used to recombine the results of the partitions @@ -197,21 +197,6 @@ impl FolderMut for StructFieldExpressionSplitter<'_, A> } } -pub(crate) struct ReplaceAccessesWithChild(Vec); - -impl MutNodeVisitor for ReplaceAccessesWithChild { - type NodeTy = ExprRef; - - fn visit_up(&mut self, node: Self::NodeTy) -> VortexResult> { - if let Some(item) = node.as_opt::() { - if self.0.contains(item.field()) { - return Ok(TransformResult::yes(item.child().clone())); - } - } - Ok(TransformResult::no(node)) - } -} - #[cfg(test)] mod tests { use vortex_dtype::Nullability::NonNullable; diff --git a/wasm-test/Cargo.lock b/wasm-test/Cargo.lock index 156163cb028..e5db791bdc7 100644 --- a/wasm-test/Cargo.lock +++ b/wasm-test/Cargo.lock @@ -155,6 +155,9 @@ name = "arrow-schema" version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" +dependencies = [ + "bitflags", +] [[package]] name = "arrow-select" @@ -458,9 +461,9 @@ dependencies = [ [[package]] name = "fastlanes" -version = "0.1.8" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0504746b2d3b8249642d675325a6de910351f5aefc909a08450b2e239b169fd2" +checksum = "f9504b2eddaa58bb69cf825baa2f46c4e97a5891dd1f167eab788d9c22692aa5" dependencies = [ "arrayref", "const_for", @@ -1296,9 +1299,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" dependencies = [ "bytes", "prost-derive", @@ -1306,9 +1309,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", "itertools", @@ -1319,9 +1322,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" dependencies = [ "prost", ] @@ -1738,7 +1741,6 @@ dependencies = [ "prost", "rand 0.9.1", "rustc-hash", - "rustversion", "serde", "static_assertions", "vortex-buffer", @@ -1774,6 +1776,7 @@ dependencies = [ "vortex-mask", "vortex-runend", "vortex-scalar", + "vortex-sequence", "vortex-sparse", "vortex-utils", "vortex-zigzag", @@ -1976,6 +1979,7 @@ dependencies = [ "once_cell", "parking_lot", "paste", + "pco", "pin-project-lite", "prost", "sketches-ddsketch", @@ -1989,6 +1993,7 @@ dependencies = [ "vortex-flatbuffers", "vortex-mask", "vortex-metrics", + "vortex-pco", "vortex-scalar", "vortex-sequence", "vortex-utils", From afc822f8d5374502b5de1bc835a029f7a681874c Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 16 Jul 2025 12:12:22 -0400 Subject: [PATCH 11/11] components Signed-off-by: Andrew Duffy --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 390c0b68acb..5ddf07b4075 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -468,6 +468,7 @@ jobs: with: toolchain: nightly targets: "wasm32-wasip1" + components: "rust-src" - name: Setup Wasmer uses: wasmerio/setup-wasmer@v3.1 - run: cargo +nightly -Zbuild-std=panic_abort,std build --target wasm32-wasip1