diff --git a/bench-vortex/src/realnest/gharchive.rs b/bench-vortex/src/realnest/gharchive.rs index 020469a0dea..ae4008ff141 100644 --- a/bench-vortex/src/realnest/gharchive.rs +++ b/bench-vortex/src/realnest/gharchive.rs @@ -34,7 +34,7 @@ fn raw_json_url(hour: usize) -> String { } const QUERIES: &[&str] = &[ - "select * from events where payload.ref = 'refs/heads/main'", + "select count(*) from events where payload.ref = 'refs/heads/main'", "select distinct repo.name from events where repo.name like 'spiraldb/%'", "select distinct org.id as org_id from events order by org_id limit 100", "select actor.login, count() as freq from events group by actor.login order by freq desc limit 10", @@ -253,5 +253,6 @@ pub async fn register_table( .await?; let listing_table = Arc::new(ListingTable::try_new(config)?); session.register_table("events", listing_table)?; + info!("finished registering table for GHARCHIVE: {base_url}"); Ok(()) } diff --git a/vortex-btrblocks/src/rle.rs b/vortex-btrblocks/src/rle.rs index c6004642585..a6260342d31 100644 --- a/vortex-btrblocks/src/rle.rs +++ b/vortex-btrblocks/src/rle.rs @@ -8,8 +8,6 @@ use vortex_array::arrays::PrimitiveArray; use vortex_array::{ArrayRef, IntoArray, ToCanonical}; use vortex_error::VortexResult; use vortex_fastlanes::RLEArray; -#[cfg(feature = "unstable_encodings")] -use {crate::Compressor, crate::integer::IntCode}; use crate::integer::IntCompressor; use crate::{CompressorStats, Scheme, estimate_compression_ratio_with_sampling}; @@ -114,17 +112,19 @@ where &new_excludes, )?; + // NOTE(aduffy): this encoding appears to be faulty, and was causing Undefined Behavior + // checks to trigger in the gharchive benchmark dataset decompression. // Delta in an unstable encoding, once we deem it stable we can switch over to this always. - #[cfg(feature = "unstable_encodings")] - // For indices and offsets, we always use integer compression without dictionary encoding. - let compressed_indices = try_compress_delta( - &rle_array.indices().to_primitive().narrow()?, - is_sample, - allowed_cascading - 1, - &[], - )?; - - #[cfg(not(feature = "unstable_encodings"))] + // #[cfg(feature = "unstable_encodings")] + // // For indices and offsets, we always use integer compression without dictionary encoding. + // let compressed_indices = try_compress_delta( + // &rle_array.indices().to_primitive().narrow()?, + // is_sample, + // allowed_cascading - 1, + // &[], + // )?; + + // #[cfg(not(feature = "unstable_encodings"))] let compressed_indices = IntCompressor::compress_no_dict( &rle_array.indices().to_primitive().narrow()?, is_sample, @@ -154,20 +154,20 @@ where } } -#[cfg(feature = "unstable_encodings")] -fn try_compress_delta( - primitive_array: &PrimitiveArray, - is_sample: bool, - allowed_cascading: usize, - excludes: &[IntCode], -) -> VortexResult { - use vortex_fastlanes::{DeltaArray, delta_compress}; - - let (bases, deltas) = delta_compress(primitive_array)?; - let compressed_bases = IntCompressor::compress(&bases, is_sample, allowed_cascading, excludes)?; - let compressed_deltas = - IntCompressor::compress_no_dict(&deltas, is_sample, allowed_cascading, excludes)?; - - DeltaArray::try_from_delta_compress_parts(compressed_bases, compressed_deltas) - .map(DeltaArray::into_array) -} +// #[cfg(feature = "unstable_encodings")] +// fn try_compress_delta( +// primitive_array: &PrimitiveArray, +// is_sample: bool, +// allowed_cascading: usize, +// excludes: &[IntCode], +// ) -> VortexResult { +// use vortex_fastlanes::{DeltaArray, delta_compress}; +// +// let (bases, deltas) = delta_compress(primitive_array)?; +// let compressed_bases = IntCompressor::compress(&bases, is_sample, allowed_cascading, excludes)?; +// let compressed_deltas = +// IntCompressor::compress_no_dict(&deltas, is_sample, allowed_cascading, excludes)?; +// +// DeltaArray::try_from_delta_compress_parts(compressed_bases, compressed_deltas) +// .map(DeltaArray::into_array) +// }