Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions .github/actions/setup-rust/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ name: "Setup Rust"
description: "Toolchain setup and Initial compilation"

inputs:
toolchain:
description: "optional override for the toolchain version (e.g. nightly)"
required: false
components:
description: "optional override for the components to install for the step (e.g. clippy, rustfmt, miri)"
required: false
targets:
description: "optional targets override (e.g. wasm32-unknown-unknown)"
required: false
Expand All @@ -22,9 +28,9 @@ runs:
uses: dtolnay/rust-toolchain@master
if: steps.rustup-cache.outputs.cache-hit != 'true'
with:
toolchain: "${{ steps.rust-version.outputs.version }}"
toolchain: "${{ inputs.toolchain || steps.rust-version.outputs.version }}"
targets: "${{inputs.targets || ''}}"
components: clippy, rustfmt, miri, llvm-tools-preview
components: "${{ inputs.components || 'clippy, rustfmt' }}"

- name: Rust Dependency Cache
uses: Swatinem/rust-cache@v2
Expand Down
24 changes: 12 additions & 12 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ jobs:
- name: Rust Tests
if: ${{ matrix.suite == 'tests' }}
run: |
cargo nextest run --locked --workspace --all-features --no-fail-fast
cargo +nightly nextest run --locked --workspace --all-features --no-fail-fast
- name: Run TPC-H
if: ${{ matrix.suite == 'tpc-h' }}
run: |
Expand Down Expand Up @@ -364,14 +364,17 @@ jobs:
with:
submodules: "recursive"
- uses: ./.github/actions/setup-rust
with:
toolchain: nightly
components: "rust-src, rustfmt, clippy, llvm-tools-preview"
- name: Install nextest
uses: taiki-e/install-action@v2
with:
tool: nextest@0.9.98
- name: Rust Tests
# vortex-duckdb-ext currently fails linking for cargo test targets.
run: |
cargo nextest run --locked --workspace --all-features --no-fail-fast --target x86_64-unknown-linux-gnu
cargo +nightly nextest run --locked --workspace --all-features --no-fail-fast --target x86_64-unknown-linux-gnu
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting that this uses nightly, I thought nextest would be fine on stable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only for the Rust testing with ASAN job. That requires unstable -Z sanitizer flag


build-java:
name: "Java"
Expand Down Expand Up @@ -463,10 +466,12 @@ jobs:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-rust
with:
toolchain: nightly
targets: "wasm32-wasip1"
components: "rust-src"
- name: Setup Wasmer
uses: wasmerio/setup-wasmer@v3.1
- run: cargo -Zbuild-std=panic_abort,std build --target wasm32-wasip1
- run: cargo +nightly -Zbuild-std=panic_abort,std build --target wasm32-wasip1
working-directory: ./wasm-test
- run: wasmer run ./target/wasm32-wasip1/debug/wasm-test.wasm
working-directory: ./wasm-test
Expand All @@ -484,19 +489,14 @@ jobs:
with:
submodules: "recursive"
- uses: ./.github/actions/setup-rust
with:
toolchain: nightly
components: "rust-src, rustfmt, clippy, miri"
- uses: taiki-e/install-action@v2
with:
tool: nextest@0.9.98
- name: Run all tests with Miri
# For now, we only run Miri against known "fiddly" crates.
if: false
run: |
cargo miri nextest run --no-fail-fast --workspace --exclude vortex-file --exclude vortex-layout \
--exclude vortex-fsst --exclude vortex-array --exclude vortex-dtype --exclude vortex-expr \
--exclude vortex-scalar --exclude vortex-duckdb --exclude vortex-ffi --exclude bench-vortex \
--exclude vortex-python --exclude vortex-jni
- name: Run Miri
run: cargo miri nextest run --no-fail-fast -p vortex-buffer -p vortex-ffi
run: cargo +nightly miri nextest run --no-fail-fast -p vortex-buffer -p vortex-ffi

generated-files:
name: "Check generated proto/fbs files are up to date"
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/fuzz.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ jobs:
with:
submodules: "recursive"
- uses: ./.github/actions/setup-rust
with:
rust-version: nightly
- name: Install cargo fuzz
run: cargo install --locked cargo-fuzz
- name: Restore corpus
Expand Down
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ keywords = ["vortex"]
license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/spiraldb/vortex"
rust-version = "1.86"
rust-version = "1.87"
version = "0.1.0"

[workspace.dependencies]
Expand Down Expand Up @@ -238,6 +238,10 @@ redundant_lifetimes = "deny"
unsafe_op_in_unsafe_fn = "deny"
unused_lifetimes = "deny"
unused_qualifications = "deny"
unexpected_cfgs = { level = "deny", check-cfg = [
"cfg(codspeed)",
"cfg(vortex_nightly)",
] }
warnings = "deny"

[workspace.lints.clippy]
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/datasets/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ impl BenchmarkDataset {
}

pub fn format_path(&self, format: Format, base_url: &Url) -> Result<Url> {
Ok(base_url.join(&format!("{}/", format))?)
Ok(base_url.join(&format!("{format}/"))?)
}

pub async fn register_tables(
Expand Down
4 changes: 2 additions & 2 deletions bench-vortex/src/engines/ddb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ impl DuckDBCtx {

/// Execute DuckDB queries for benchmarks using the internal connection
pub fn execute_query(&self, query: &str) -> Result<(Duration, usize)> {
trace!("execute duckdb query: {}", query);
trace!("execute duckdb query: {query}");
let time_instant = Instant::now();
let result = self.connection.query(query)?;
let query_time = time_instant.elapsed();
Expand Down Expand Up @@ -104,7 +104,7 @@ impl DuckDBCtx {
// Generate and execute table registration commands
let commands = self.generate_table_commands(&effective_url, extension, dataset, object);
self.execute_query(&commands)?;
trace!("Executing table registration commands: {}", commands);
trace!("Executing table registration commands: {commands}");

Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/tpch/tpch_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ impl TpcHBenchmark {
ChangeTag::Insert => "+",
ChangeTag::Equal => " ",
};
print!("{}{}", sign, change);
print!("{sign}{change}");
}

eprintln!("query output does not match the reference for {query_name}");
Expand Down
12 changes: 7 additions & 5 deletions clippy.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ allow-dbg-in-tests = true
allow-expect-in-tests = true
allow-unwrap-in-tests = true
single-char-binding-names-threshold = 2
# We prefer using parking_lot for improved ergonomics and performance.
disallowed-types = ["std::collections::HashMap", "std::collections::HashSet", "std::sync::Mutex", "std::sync::RwLock"]
disallowed-types = [
{ path = "std::collections::HashMap", reason = "Use the HashMap in vortex_utils::aliases for better performance" },
{ path = "std::collections::HashSet", reason = "Use the HashSet in vortex_utils::aliases for better performance" },
{ path = "std::sync::Mutex", reason = "Prefer using parking_lot Mutex for improved ergonomics and performance" },
{ path = "std::sync::RwLock", reason = "Prefer using parking_lot RwLock for improved ergonomics and performance" }]

disallowed-methods = [
# It uses the default hasher and is very easy to just inline with a faster implementation
"itertools::Itertools::counts"
]
{ path = "itertools::Itertools::counts", reason = "It uses the default hasher which is slow for primitives. Just inline the loop for better performance." }
]
2 changes: 1 addition & 1 deletion encodings/alp/src/alp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ fn encode_chunk_unchecked<T: ALPFloat>(

// encode the chunk, counting the number of patches
let mut chunk_patch_count = 0;
encoded_output.extend(chunk.iter().map(|&v| {
encoded_output.extend_trusted(chunk.iter().map(|&v| {
let encoded = T::encode_single_unchecked(v, exp);
let decoded = T::decode_single(encoded, exp);
let neq = !decoded.is_eq(v) as usize;
Expand Down
4 changes: 2 additions & 2 deletions encodings/fastlanes/src/bitpacking/compute/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,14 @@ fn filter_indices<T: NativePType + BitPacking + ArrowNativeType>(
let dst: &mut [T] = mem::transmute(dst);
BitPacking::unchecked_unpack(bit_width, packed, dst);
}
values.extend(
values.extend_trusted(
indices_within_chunk
.iter()
.map(|&idx| unsafe { unpacked.get_unchecked(idx).assume_init() }),
);
} else {
// Otherwise, unpack each element individually.
values.extend(indices_within_chunk.iter().map(|&idx| unsafe {
values.extend_trusted(indices_within_chunk.iter().map(|&idx| unsafe {
BitPacking::unchecked_unpack_single(bit_width, packed, idx)
}));
}
Expand Down
40 changes: 17 additions & 23 deletions encodings/fastlanes/src/delta/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use arrayref::{array_mut_ref, array_ref};
use fastlanes::{Delta, Transpose};
use fastlanes::{Delta, FastLanes, Transpose};
use num_traits::{WrappingAdd, WrappingSub};
use vortex_array::ToCanonical;
use vortex_array::arrays::PrimitiveArray;
Expand All @@ -20,7 +20,8 @@ pub fn delta_compress(array: &PrimitiveArray) -> VortexResult<(PrimitiveArray, P

// Compress the filled array
let (bases, deltas) = match_each_unsigned_integer_ptype!(array.ptype(), |T| {
let (bases, deltas) = compress_primitive(array.as_slice::<T>());
const LANES: usize = T::LANES;
let (bases, deltas) = compress_primitive::<T, LANES>(array.as_slice::<T>());
let (base_validity, delta_validity) =
if array.validity().nullability() != Nullability::NonNullable {
(Validity::AllValid, Validity::AllValid)
Expand All @@ -37,12 +38,9 @@ pub fn delta_compress(array: &PrimitiveArray) -> VortexResult<(PrimitiveArray, P
Ok((bases, deltas))
}

fn compress_primitive<T: NativePType + Delta + Transpose + WrappingSub>(
fn compress_primitive<T: NativePType + Delta + Transpose + WrappingSub, const LANES: usize>(
array: &[T],
) -> (Buffer<T>, Buffer<T>)
where
[(); T::LANES]:,
{
) -> (Buffer<T>, Buffer<T>) {
// How many fastlanes vectors we will process.
let num_chunks = array.len() / 1024;

Expand All @@ -60,15 +58,15 @@ where
Transpose::transpose(chunk, &mut transposed);

// Initialize and store the base vector for each chunk
bases.extend(&transposed[0..T::LANES]);
bases.extend_from_slice(&transposed[0..T::LANES]);

deltas.reserve(1024);
let delta_len = deltas.len();
unsafe {
deltas.set_len(delta_len + 1024);
Delta::delta(
Delta::delta::<LANES>(
&transposed,
&*(transposed[0..T::LANES].as_ptr() as *const [_; T::LANES]),
&*(transposed[0..T::LANES].as_ptr().cast()),
array_mut_ref![deltas[delta_len..], 0, 1024],
);
}
Expand Down Expand Up @@ -101,8 +99,10 @@ pub fn delta_decompress(array: &DeltaArray) -> VortexResult<PrimitiveArray> {
let bases = array.bases().to_primitive()?;
let deltas = array.deltas().to_primitive()?;
let decoded = match_each_unsigned_integer_ptype!(deltas.ptype(), |T| {
const LANES: usize = T::LANES;

PrimitiveArray::new(
decompress_primitive::<T>(bases.as_slice(), deltas.as_slice()),
decompress_primitive::<T, LANES>(bases.as_slice(), deltas.as_slice()),
array.validity().clone(),
)
});
Expand All @@ -114,19 +114,13 @@ pub fn delta_decompress(array: &DeltaArray) -> VortexResult<PrimitiveArray> {

// TODO(ngates): can we re-use the deltas buffer for the result? Might be tricky given the
// traversal ordering, but possibly doable.
fn decompress_primitive<T: NativePType + Delta + Transpose + WrappingAdd>(
fn decompress_primitive<T: NativePType + Delta + Transpose + WrappingAdd, const LANES: usize>(
bases: &[T],
deltas: &[T],
) -> Buffer<T>
where
[(); T::LANES]:,
{
) -> Buffer<T> {
// How many fastlanes vectors we will process.
let num_chunks = deltas.len() / 1024;

// How long each base vector will be.
let lanes = T::LANES;

// Allocate a result array.
let mut output = BufferMut::with_capacity(deltas.len());

Expand All @@ -139,9 +133,9 @@ where
let chunk: &[T; 1024] = array_ref![deltas, start_elem, 1024];

// Initialize the base vector for this chunk
Delta::undelta(
Delta::undelta::<LANES>(
chunk,
unsafe { &*(bases[i * lanes..(i + 1) * lanes].as_ptr() as *const [_; T::LANES]) },
unsafe { &*(bases[i * LANES..(i + 1) * LANES].as_ptr().cast()) },
&mut transposed,
);

Expand All @@ -156,8 +150,8 @@ where
let remainder_size = deltas.len() % 1024;
if remainder_size > 0 {
let chunk = &deltas[num_chunks * 1024..];
assert_eq!(bases.len(), num_chunks * lanes + 1);
let mut base_scalar = bases[num_chunks * lanes];
assert_eq!(bases.len(), num_chunks * LANES + 1);
let mut base_scalar = bases[num_chunks * LANES];
for next_diff in chunk {
let next = next_diff.wrapping_add(&base_scalar);
output.push(next);
Expand Down
2 changes: 0 additions & 2 deletions encodings/fastlanes/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![allow(incomplete_features)]
#![allow(clippy::cast_possible_truncation)]
#![feature(generic_const_exprs)]

pub use bitpacking::*;
pub use delta::*;
Expand Down
4 changes: 2 additions & 2 deletions encodings/fsst/src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ impl CanonicalVTable<FSSTVTable> for FSSTVTable {
fsst_into_varbin_view(array.decompressor(), array, builder.completed_block_count())?;

builder.push_buffer_and_adjusted_views(
view.buffers().iter().cloned(),
view.views().iter().cloned(),
view.buffers(),
view.views(),
array.validity_mask()?,
);
Ok(())
Expand Down
3 changes: 1 addition & 2 deletions encodings/sequence/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ impl SequenceArray {

Self::try_last(base, multiplier, ptype, length).map_err(|e| {
e.with_context(format!(
"final value not expressible, base = {:?}, multiplier = {:?}, len = {} ",
base, multiplier, length
"final value not expressible, base = {base:?}, multiplier = {multiplier:?}, len = {length} ",
))
})?;

Expand Down
5 changes: 3 additions & 2 deletions encodings/zstd/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,9 @@ fn collect_valid_vbv(vbv: &VarBinViewArray) -> VortexResult<(ByteBuffer, Vec<usi
for value in iterator.flatten() {
value_byte_indices.push(buffer.len());
// here's where we write the string lengths
buffer.extend(ViewLen::try_from(value.len())?.to_le_bytes());
buffer.extend(value);
buffer
.extend_trusted(ViewLen::try_from(value.len())?.to_le_bytes().into_iter());
buffer.extend_from_slice(value);
}
Ok::<_, VortexError>(())
})??;
Expand Down
7 changes: 6 additions & 1 deletion fuzz/fuzz_targets/array_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![no_main]
#![allow(clippy::unwrap_used)]
#![allow(clippy::unwrap_used, clippy::result_large_err)]

use std::backtrace::Backtrace;

use libfuzzer_sys::{Corpus, fuzz_target};
use vortex_array::arrays::{
Expand Down Expand Up @@ -108,6 +110,7 @@ fn assert_search_sorted(
side,
search_result,
step,
Backtrace::capture(),
))
} else {
Ok(())
Expand All @@ -123,6 +126,7 @@ fn assert_array_eq(lhs: &ArrayRef, rhs: &ArrayRef, step: usize) -> VortexFuzzRes
lhs.to_array(),
rhs.to_array(),
step,
Backtrace::capture(),
));
}
for idx in 0..lhs.len() {
Expand All @@ -137,6 +141,7 @@ fn assert_array_eq(lhs: &ArrayRef, rhs: &ArrayRef, step: usize) -> VortexFuzzRes
lhs.clone(),
rhs.clone(),
step,
Backtrace::capture(),
));
}
}
Expand Down
Loading
Loading