Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions encodings/zstd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,10 @@ vortex-vector = { workspace = true }
zstd = { workspace = true }

[dev-dependencies]
divan = { workspace = true }
rstest = { workspace = true }
vortex-array = { workspace = true, features = ["test-harness"] }

[[bench]]
name = "listview_rebuild"
harness = false
36 changes: 36 additions & 0 deletions encodings/zstd/benches/listview_rebuild.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![allow(clippy::unwrap_used)]

use divan::Bencher;
use vortex_array::IntoArray;
use vortex_array::arrays::{ListViewArray, ListViewRebuildMode, VarBinViewArray};
use vortex_array::validity::Validity;
use vortex_buffer::Buffer;
use vortex_zstd::ZstdArray;

#[divan::bench]
fn rebuild_naive(bencher: Bencher) {
let dudes = VarBinViewArray::from_iter_str(["Washington", "Adams", "Jefferson", "Madison"])
.into_array();
let dudes = ZstdArray::from_array(dudes, 9, 1024).unwrap().into_array();

let offsets = std::iter::repeat_n(0u32, 1024)
.collect::<Buffer<u32>>()
.into_array();
let sizes = [0u64, 1, 2, 3, 4]
.into_iter()
.cycle()
.take(1024)
.collect::<Buffer<u64>>()
.into_array();

let list_view = ListViewArray::new(dudes, offsets, sizes, Validity::NonNullable);

bencher.bench_local(|| list_view.rebuild(ListViewRebuildMode::MakeZeroCopyToList))
}

fn main() {
divan::main()
}
81 changes: 63 additions & 18 deletions vortex-array/src/arrays/listview/rebuild.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use num_traits::FromPrimitive;
use vortex_buffer::BufferMut;
use vortex_dtype::{IntegerPType, Nullability, match_each_integer_ptype};
use vortex_error::VortexExpect;
use vortex_scalar::Scalar;

use crate::arrays::ListViewArray;
use crate::builders::{ArrayBuilder, ListViewBuilder};
use crate::arrays::{ChunkedArray, ListViewArray};
use crate::vtable::ValidityHelper;
use crate::{Array, compute};
use crate::{Array, IntoArray, ToCanonical, compute};

/// Modes for rebuilding a [`ListViewArray`].
pub enum ListViewRebuildMode {
Expand Down Expand Up @@ -74,35 +74,80 @@ impl ListViewArray {
let offsets_ptype = self.offsets().dtype().as_ptype();
let sizes_ptype = self.sizes().dtype().as_ptype();

match_each_integer_ptype!(offsets_ptype, |O| {
match_each_integer_ptype!(sizes_ptype, |S| { self.naive_rebuild::<O, S>() })
match_each_integer_ptype!(sizes_ptype, |S| {
match offsets_ptype {
PType::U8 => self.naive_rebuild::<u8, u32, S>(),
PType::U16 => self.naive_rebuild::<u16, u32, S>(),
PType::U32 => self.naive_rebuild::<u32, u32, S>(),
PType::U64 => self.naive_rebuild::<u64, u64, S>(),
PType::I8 => self.naive_rebuild::<i8, i32, S>(),
PType::I16 => self.naive_rebuild::<i16, i32, S>(),
PType::I32 => self.naive_rebuild::<i32, i32, S>(),
PType::I64 => self.naive_rebuild::<i64, i64, S>(),
_ => unreachable!("invalid offsets PType"),
}
})
}

/// The inner function for `rebuild_zero_copy_to_list`, which naively rebuilds a `ListViewArray`
/// via `append_scalar`.
fn naive_rebuild<O: IntegerPType, S: IntegerPType>(&self) -> ListViewArray {
fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
&self,
) -> ListViewArray {
let element_dtype = self
.dtype()
.as_list_element_opt()
.vortex_expect("somehow had a canonical list that was not a list");

let mut builder = ListViewBuilder::<O, S>::with_capacity(
element_dtype.clone(),
self.dtype().nullability(),
self.elements().len(),
self.len(),
);
// Upfront canonicalize the list elements, we're going to be doing a lot of
// slicing with them.
let elements_canonical = self.elements().to_canonical().into_array();
let offsets_canonical = self.offsets().to_primitive();
let sizes_canonical = self.sizes().to_primitive();

let offsets_canonical = offsets_canonical.as_slice::<O>();
let sizes_canonical = sizes_canonical.as_slice::<S>();

let mut offsets = BufferMut::<NewOffset>::with_capacity(self.len());
let mut sizes = BufferMut::<S>::with_capacity(self.len());

let mut chunks = Vec::with_capacity(self.len());

let mut n_elements = NewOffset::zero();

for i in 0..self.len() {
let list = self.scalar_at(i);
for index in 0..self.len() {
if !self.is_valid(index) {
offsets.push(offsets.last().copied().unwrap_or_default());
sizes.push(S::zero());
continue;
}

builder
.append_scalar(&list)
.vortex_expect("was unable to extend the `ListViewBuilder`")
let offset = offsets_canonical[index];
let size = sizes_canonical[index];

let start = offset.as_();
let stop = start + size.as_();

chunks.push(elements_canonical.slice(start..stop));
offsets.push(n_elements);
sizes.push(size);

n_elements += num_traits::cast(size).vortex_expect("cast");
}

builder.finish_into_listview()
let offsets = offsets.into_array();
let sizes = sizes.into_array();

// SAFETY: all chunks were sliced from the same array so have same DType.
let elements =
unsafe { ChunkedArray::new_unchecked(chunks, element_dtype.as_ref().clone()) };

ListViewArray::new(
elements.to_canonical().into_array(),
offsets,
sizes,
self.validity.clone(),
)
}

/// Rebuilds a [`ListViewArray`] by trimming any unused / unreferenced leading and trailing
Expand Down
21 changes: 0 additions & 21 deletions vortex-array/src/builders/listview.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,6 @@ impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
elements_capacity: usize,
capacity: usize,
) -> Self {
// Validate that size type's maximum value fits within offset type's maximum value.
// Since offsets are non-negative, we only need to check max values.
assert!(
S::max_value_as_u64() <= O::max_value_as_u64(),
"Size type {:?} (max offset {}) must fit within offset type {:?} (max offset {})",
S::PTYPE,
S::max_value_as_u64(),
O::PTYPE,
O::max_value_as_u64()
);

let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);

let offsets_builder =
Expand Down Expand Up @@ -628,14 +617,4 @@ mod tests {
.contains("null value to non-nullable")
);
}

#[test]
#[should_panic(
expected = "Size type I32 (max offset 2147483647) must fit within offset type I16 (max offset 32767)"
)]
fn test_error_invalid_type_combination() {
let dtype: Arc<DType> = Arc::new(I32.into());
// This should panic because i32 (4 bytes) cannot fit within i16 (2 bytes).
let _builder = ListViewBuilder::<i16, i32>::with_capacity(dtype, NonNullable, 0, 0);
}
}
Loading