Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support list in the fuzzer (only for implemented actions) #1735

Merged
merged 23 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ cargo-fuzz = true

[dependencies]
libfuzzer-sys = { workspace = true }
arrow-buffer = { workspace = true }
vortex-array = { workspace = true, features = ["arbitrary"] }
vortex-buffer = { workspace = true }
vortex-dtype = { workspace = true, features = ["arbitrary"] }
Expand Down
4 changes: 3 additions & 1 deletion fuzz/fuzz_targets/array_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
use libfuzzer_sys::{fuzz_target, Corpus};
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::{
BoolEncoding, PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
BoolEncoding, ListEncoding, PrimitiveEncoding, StructEncoding, VarBinEncoding,
VarBinViewEncoding,
};
use vortex_array::compute::{
filter, scalar_at, search_sorted, slice, take, SearchResult, SearchSortedSide,
Expand Down Expand Up @@ -48,6 +49,7 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus {
&VarBinViewEncoding,
&BoolEncoding,
&StructEncoding,
&ListEncoding,
])
.contains(&current_array.encoding())
{
Expand Down
11 changes: 11 additions & 0 deletions fuzz/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ use vortex_buffer::Buffer;
use vortex_dtype::{match_each_native_ptype, DType};
use vortex_error::VortexExpect;

use crate::take::take_canonical_array;

pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
let validity = if array.dtype().is_nullable() {
let validity_buff = array
Expand Down Expand Up @@ -83,6 +85,15 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
.unwrap()
.into_array()
}
DType::List(..) => {
let mut indices = Vec::new();
for (idx, bool) in filter.iter().enumerate() {
if *bool {
indices.push(idx);
}
}
take_canonical_array(array, &indices)
}
_ => unreachable!("Not a canonical array"),
}
}
39 changes: 36 additions & 3 deletions fuzz/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@ mod take;

use std::fmt::Debug;
use std::iter;
use std::ops::Range;
use std::ops::{Range, RangeInclusive};

use libfuzzer_sys::arbitrary::Error::EmptyChoose;
use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured};
pub use sort::sort_canonical_array;
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::ListEncoding;
use vortex_array::compute::{scalar_at, FilterMask, SearchResult, SearchSortedSide};
use vortex_array::{ArrayDType, ArrayData, IntoArrayData};
use vortex_array::encoding::{Encoding, EncodingRef};
use vortex_array::{
ArrayChildrenIterator, ArrayDType, ArrayData, IntoArrayData, NamedTreeCollector, ToArrayData,
};
use vortex_buffer::Buffer;
use vortex_sampling_compressor::SamplingCompressor;
use vortex_scalar::arbitrary::random_scalar;
Expand Down Expand Up @@ -64,10 +69,13 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
let array = ArrayData::arbitrary(u)?;
let mut current_array = array.clone();

let valid_actions = actions_for_array(&current_array);

let mut actions = Vec::new();
let action_count = u.int_in_range(1..=4)?;
for _ in 0..action_count {
actions.push(match u.int_in_range(0..=4)? {
actions.push(match random_value_from_list(u, valid_actions.as_slice())? {
0 => {
if actions
.last()
Expand Down Expand Up @@ -164,3 +172,28 @@ fn random_vec_in_range(u: &mut Unstructured<'_>, min: usize, max: usize) -> Resu
})
.collect::<Result<Vec<_>>>()
}

fn random_value_from_list(u: &mut Unstructured<'_>, vec: &[usize]) -> Result<usize> {
u.choose_iter(vec).cloned()
}

const ALL_ACTIONS: RangeInclusive<usize> = 0..=4;

fn actions_for_encoding(encoding: EncodingRef) -> HashSet<usize> {
if ListEncoding::ID == encoding.id() {
// compress, slice and filter
vec![0, 1, 4].into_iter().collect()
} else {
ALL_ACTIONS.collect()
}
}

fn actions_for_array(array: &ArrayData) -> Vec<usize> {
ArrayChildrenIterator::new(array.to_array())
.into_iter()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this can be just array.depth_first_traversal()

.map(|child| actions_for_encoding(child.encoding()))
.fold(ALL_ACTIONS.collect::<Vec<_>>(), |mut acc, actions| {
acc.retain(|a| actions.contains(a));
acc
})
}
6 changes: 6 additions & 0 deletions fuzz/src/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ pub fn search_sorted_canonical_array(
.collect::<Vec<_>>();
scalar_vals.search_sorted(&scalar.cast(array.dtype()).unwrap(), side)
}
DType::List(..) => {
let scalar_vals = (0..array.len())
.map(|i| scalar_at(array, i).unwrap())
.collect::<Vec<_>>();
scalar_vals.search_sorted(&scalar.cast(array.dtype()).unwrap(), side)
}
_ => unreachable!("Not a canonical array"),
}
}
45 changes: 38 additions & 7 deletions fuzz/src/slice.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use arrow_buffer::ArrowNativeType;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::array::{BoolArray, PrimitiveArray, StructArray, VarBinViewArray};
use vortex_array::array::{BoolArray, ListArray, PrimitiveArray, StructArray, VarBinViewArray};
use vortex_array::validity::{ArrayValidity, Validity};
use vortex_array::variants::StructArrayTrait;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_dtype::{match_each_native_ptype, DType};
use vortex_array::variants::{PrimitiveArrayTrait, StructArrayTrait};
use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant};
use vortex_dtype::{match_each_native_ptype, DType, NativePType};
use vortex_error::VortexExpect;

pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> ArrayData {
Expand All @@ -28,10 +29,12 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar
.vortex_expect("Validity length cannot mismatch")
.into_array()
}
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
DType::Primitive(p, _) => {
let primitive_array = array.clone().into_primitive().unwrap();
PrimitiveArray::new(primitive_array.buffer::<$P>().slice(start..stop), validity).into_array()
}),
match_each_native_ptype!(p, |$P| {
PrimitiveArray::new(primitive_array.buffer::<$P>().slice(start..stop), validity).into_array()
})
}
DType::Utf8(_) | DType::Binary(_) => {
let utf8 = array.clone().into_varbinview().unwrap();
let values = utf8
Expand All @@ -55,6 +58,34 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar
.unwrap()
.into_array()
}
DType::List(..) => {
robert3005 marked this conversation as resolved.
Show resolved Hide resolved
let list_array = array.clone().into_list().unwrap();
let offsets = slice_canonical_array(&list_array.offsets(), start, stop + 1)
.into_primitive()
.unwrap();

let elements = slice_canonical_array(
&list_array.elements(),
offsets.get_as_cast::<u64>(0) as usize,
offsets.get_as_cast::<u64>(offsets.len() - 1) as usize,
);
let offsets = match_each_native_ptype!(offsets.ptype(), |$P| {
shift_offsets::<$P>(offsets)
})
.into_array();
ListArray::try_new(elements, offsets, validity)
.unwrap()
.into_array()
}
_ => unreachable!("Not a canonical array"),
}
}

fn shift_offsets<O: NativePType + ArrowNativeType>(offsets: PrimitiveArray) -> PrimitiveArray {
if offsets.is_empty() {
return offsets;
}
let offsets: Vec<O> = offsets.as_slice().to_vec();
let start = offsets[0];
PrimitiveArray::from_iter(offsets.into_iter().map(|o| o - start).collect::<Vec<_>>())
}
12 changes: 11 additions & 1 deletion fuzz/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,17 @@ pub fn sort_canonical_array(array: &ArrayData) -> ArrayData {
});
take_canonical_array(array, &sort_indices)
}
_ => unreachable!("Not a canonical array"),
DType::List(..) => {
let mut sort_indices = (0..array.len()).collect::<Vec<_>>();
sort_indices.sort_by(|a, b| {
scalar_at(array, *a)
.unwrap()
.partial_cmp(&scalar_at(array, *b).unwrap())
.unwrap()
});
take_canonical_array(array, &sort_indices)
}
a => unreachable!("Not a canonical array {:?}", a),
}
}

Expand Down
44 changes: 34 additions & 10 deletions fuzz/src/take.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
use arrow_buffer::ArrowNativeType;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::array::{BoolArray, PrimitiveArray, StructArray, VarBinViewArray};
use vortex_array::builders::{builder_with_capacity, ArrayBuilderExt};
use vortex_array::compute::scalar_at;
use vortex_array::validity::{ArrayValidity, Validity};
use vortex_array::variants::StructArrayTrait;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_buffer::Buffer;
use vortex_dtype::{match_each_native_ptype, DType};
use vortex_dtype::{match_each_native_ptype, DType, NativePType};
use vortex_error::VortexExpect;

pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData {
Expand All @@ -31,16 +34,12 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData {
.vortex_expect("Validity length cannot mismatch")
.into_array()
}
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
DType::Primitive(p, _) => {
let primitive_array = array.clone().into_primitive().unwrap();
let vec_values = primitive_array
.as_slice::<$P>()
.iter()
.copied()
.collect::<Vec<_>>();
PrimitiveArray::new(indices.iter().map(|i| vec_values[*i]).collect::<Buffer<$P>>(), validity)
.into_array()
}),
match_each_native_ptype!(p, |$P| {
take_primitive::<$P>(primitive_array, validity, indices)
})
}
DType::Utf8(_) | DType::Binary(_) => {
let utf8 = array.clone().into_varbinview().unwrap();
let values = utf8
Expand Down Expand Up @@ -68,6 +67,31 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData {
.unwrap()
.into_array()
}
DType::List(..) => {
let mut builder = builder_with_capacity(array.dtype(), indices.len());
for idx in indices {
builder
.append_scalar(&scalar_at(array, *idx).unwrap())
.unwrap();
}
builder.finish().unwrap()
}
_ => unreachable!("Not a canonical array"),
}
}

fn take_primitive<T: NativePType + ArrowNativeType>(
primitive_array: PrimitiveArray,
validity: Validity,
indices: &[usize],
) -> ArrayData {
let vec_values = primitive_array.as_slice::<T>().to_vec();
PrimitiveArray::new(
indices
.iter()
.map(|i| vec_values[*i])
.collect::<Buffer<T>>(),
validity,
)
.into_array()
}
26 changes: 16 additions & 10 deletions vortex-array/src/array/arbitrary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Resu
.vortex_unwrap()
.into_array())
}
DType::List(ldt, n) => random_list(u, ldt, n),
DType::List(ldt, n) => random_list(u, ldt, n, chunk_len),
DType::Extension(..) => {
todo!("Extension arrays are not implemented")
}
Expand All @@ -106,14 +106,19 @@ fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Resu
}
}

fn random_list(u: &mut Unstructured, ldt: &Arc<DType>, n: &Nullability) -> Result<ArrayData> {
fn random_list(
u: &mut Unstructured,
ldt: &Arc<DType>,
n: &Nullability,
chunk_len: Option<usize>,
) -> Result<ArrayData> {
match u.int_in_range(0..=5)? {
0 => random_list_offset::<i16>(u, ldt, n),
1 => random_list_offset::<i32>(u, ldt, n),
2 => random_list_offset::<i64>(u, ldt, n),
3 => random_list_offset::<u16>(u, ldt, n),
4 => random_list_offset::<u32>(u, ldt, n),
5 => random_list_offset::<u64>(u, ldt, n),
0 => random_list_offset::<i16>(u, ldt, n, chunk_len),
1 => random_list_offset::<i32>(u, ldt, n, chunk_len),
2 => random_list_offset::<i64>(u, ldt, n, chunk_len),
3 => random_list_offset::<u16>(u, ldt, n, chunk_len),
4 => random_list_offset::<u32>(u, ldt, n, chunk_len),
5 => random_list_offset::<u64>(u, ldt, n, chunk_len),
_ => unreachable!("int_in_range returns a value in the above range"),
}
}
Expand All @@ -122,14 +127,15 @@ fn random_list_offset<O>(
u: &mut Unstructured,
ldt: &Arc<DType>,
n: &Nullability,
chunk_len: Option<usize>,
) -> Result<ArrayData>
where
O: PrimInt + NativePType,
Scalar: From<O>,
usize: AsPrimitive<O>,
{
let list_len = u.int_in_range(0..=20)?;
let mut builder = ListBuilder::<O>::with_capacity(ldt.clone(), *n, 1);
let list_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
let mut builder = ListBuilder::<O>::with_capacity(ldt.clone(), *n, 10);
for _ in 0..list_len {
if matches!(n, Nullability::Nullable) || u.arbitrary::<bool>()? {
let elem_len = u.int_in_range(0..=20)?;
Expand Down
21 changes: 20 additions & 1 deletion vortex-array/src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,14 +225,15 @@ impl ListArray {
mod test {
use std::sync::Arc;

use arrow_buffer::BooleanBuffer;
use vortex_dtype::Nullability;
use vortex_dtype::Nullability::NonNullable;
use vortex_dtype::PType::I32;
use vortex_scalar::Scalar;

use crate::array::list::ListArray;
use crate::array::PrimitiveArray;
use crate::compute::scalar_at;
use crate::compute::{filter, scalar_at, FilterMask};
use crate::validity::Validity;
use crate::{ArrayLen, IntoArrayData};

Expand Down Expand Up @@ -301,4 +302,22 @@ mod test {
scalar_at(&list_from_iter, 1).unwrap()
);
}

#[test]
fn test_simple_list_filter() {
let elements = PrimitiveArray::from_option_iter([None, Some(2), Some(3), Some(4), Some(5)]);
let offsets = PrimitiveArray::from_iter([0, 2, 4, 5]);
let validity = Validity::AllValid;

let list = ListArray::try_new(elements.into_array(), offsets.into_array(), validity)
.unwrap()
.into_array();

let filtered = filter(
&list,
FilterMask::from(BooleanBuffer::from(vec![false, true, true])),
);

assert!(filtered.is_ok())
}
}
Loading
Loading