Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bench-vortex/src/realnest/gharchive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ fn raw_json_url(hour: usize) -> String {
}

const QUERIES: &[&str] = &[
"select * from events where payload.ref = 'refs/heads/main'",
"select count(*) from events where payload.ref = 'refs/heads/main'",
"select distinct repo.name from events where repo.name like 'spiraldb/%'",
"select distinct org.id as org_id from events order by org_id limit 100",
"select actor.login, count() as freq from events group by actor.login order by freq desc limit 10",
Expand Down Expand Up @@ -253,5 +253,6 @@ pub async fn register_table(
.await?;
let listing_table = Arc::new(ListingTable::try_new(config)?);
session.register_table("events", listing_table)?;
info!("finished registering table for GHARCHIVE: {base_url}");
Ok(())
}
58 changes: 29 additions & 29 deletions vortex-btrblocks/src/rle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ use vortex_array::arrays::PrimitiveArray;
use vortex_array::{ArrayRef, IntoArray, ToCanonical};
use vortex_error::VortexResult;
use vortex_fastlanes::RLEArray;
#[cfg(feature = "unstable_encodings")]
use {crate::Compressor, crate::integer::IntCode};

use crate::integer::IntCompressor;
use crate::{CompressorStats, Scheme, estimate_compression_ratio_with_sampling};
Expand Down Expand Up @@ -114,17 +112,19 @@ where
&new_excludes,
)?;

// NOTE(aduffy): this encoding appears to be faulty, and was causing Undefined Behavior
// checks to trigger in the gharchive benchmark dataset decompression.
// Delta in an unstable encoding, once we deem it stable we can switch over to this always.
#[cfg(feature = "unstable_encodings")]
// For indices and offsets, we always use integer compression without dictionary encoding.
let compressed_indices = try_compress_delta(
&rle_array.indices().to_primitive().narrow()?,
is_sample,
allowed_cascading - 1,
&[],
)?;

#[cfg(not(feature = "unstable_encodings"))]
// #[cfg(feature = "unstable_encodings")]
// // For indices and offsets, we always use integer compression without dictionary encoding.
// let compressed_indices = try_compress_delta(
// &rle_array.indices().to_primitive().narrow()?,
// is_sample,
// allowed_cascading - 1,
// &[],
// )?;

// #[cfg(not(feature = "unstable_encodings"))]
let compressed_indices = IntCompressor::compress_no_dict(
&rle_array.indices().to_primitive().narrow()?,
is_sample,
Expand Down Expand Up @@ -154,20 +154,20 @@ where
}
}

#[cfg(feature = "unstable_encodings")]
fn try_compress_delta(
primitive_array: &PrimitiveArray,
is_sample: bool,
allowed_cascading: usize,
excludes: &[IntCode],
) -> VortexResult<ArrayRef> {
use vortex_fastlanes::{DeltaArray, delta_compress};

let (bases, deltas) = delta_compress(primitive_array)?;
let compressed_bases = IntCompressor::compress(&bases, is_sample, allowed_cascading, excludes)?;
let compressed_deltas =
IntCompressor::compress_no_dict(&deltas, is_sample, allowed_cascading, excludes)?;

DeltaArray::try_from_delta_compress_parts(compressed_bases, compressed_deltas)
.map(DeltaArray::into_array)
}
// #[cfg(feature = "unstable_encodings")]
// fn try_compress_delta(
// primitive_array: &PrimitiveArray,
// is_sample: bool,
// allowed_cascading: usize,
// excludes: &[IntCode],
// ) -> VortexResult<ArrayRef> {
// use vortex_fastlanes::{DeltaArray, delta_compress};
//
// let (bases, deltas) = delta_compress(primitive_array)?;
// let compressed_bases = IntCompressor::compress(&bases, is_sample, allowed_cascading, excludes)?;
// let compressed_deltas =
// IntCompressor::compress_no_dict(&deltas, is_sample, allowed_cascading, excludes)?;
//
// DeltaArray::try_from_delta_compress_parts(compressed_bases, compressed_deltas)
// .map(DeltaArray::into_array)
// }
Loading