diff --git a/Cargo.toml b/Cargo.toml index 9a9a531db29..c996600fb14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,8 @@ lexical-core = { version = "0.8", optional = true } # We need to Hash values before sending them to an hasher. This # crate provides HashMap that assumes pre-hashed values. hash_hasher = "^2.0.3" +# For SIMD utf8 validation +simdutf8 = "0.1.3" csv = { version = "^1.1", optional = true } regex = { version = "^1.3", optional = true } @@ -68,15 +70,13 @@ strength_reduce = { version = "0.2", optional = true } # For instruction multiversioning multiversion = { version = "0.6.1", optional = true } -# For SIMD utf8 validation -simdutf8 = "0.1.3" - [dev-dependencies] -rand = "0.8" criterion = "0.3" flate2 = "1" doc-comment = "0.3" crossbeam-channel = "0.5.1" +# used to run formal property testing +proptest = { version = "1", default_features = false, features = ["std"] } [package.metadata.docs.rs] features = ["full"] diff --git a/tests/it/array/binary/mutable.rs b/tests/it/array/binary/mutable.rs index c42431b15ee..6763a9d4984 100644 --- a/tests/it/array/binary/mutable.rs +++ b/tests/it/array/binary/mutable.rs @@ -1,4 +1,4 @@ -use arrow2::array::{Array, BinaryArray, MutableBinaryArray}; +use arrow2::array::{BinaryArray, MutableBinaryArray}; use arrow2::bitmap::Bitmap; #[test] diff --git a/tests/it/array/utf8/mutable.rs b/tests/it/array/utf8/mutable.rs index 20087010743..cde2a9eeafe 100644 --- a/tests/it/array/utf8/mutable.rs +++ b/tests/it/array/utf8/mutable.rs @@ -1,4 +1,4 @@ -use arrow2::array::{Array, MutableUtf8Array, Utf8Array}; +use arrow2::array::{MutableUtf8Array, Utf8Array}; use arrow2::bitmap::Bitmap; use arrow2::buffer::MutableBuffer; use arrow2::datatypes::DataType; diff --git a/tests/it/bitmap/bitmap_ops.rs b/tests/it/bitmap/bitmap_ops.rs index 12080438761..18eda8e3ab9 100644 --- a/tests/it/bitmap/bitmap_ops.rs +++ b/tests/it/bitmap/bitmap_ops.rs @@ -1,12 +1,16 @@ +use proptest::prelude::*; + use arrow2::bitmap::Bitmap; -#[test] -fn not_random() { - let iter = (0..100).map(|x| x % 7 == 0); - let iter_not = iter.clone().map(|x| !x); +use crate::bitmap::bitmap_strategy; - let bitmap: Bitmap = iter.collect(); - let expected: Bitmap = iter_not.collect(); +proptest! { + /// Asserts that !bitmap equals all bits flipped + #[test] + #[cfg_attr(miri, ignore)] // miri and proptest do not work well :( + fn not(bitmap in bitmap_strategy()) { + let not_bitmap: Bitmap = bitmap.iter().map(|x| !x).collect(); - assert_eq!(!&bitmap, expected); + assert_eq!(!&bitmap, not_bitmap); + } } diff --git a/tests/it/bitmap/mod.rs b/tests/it/bitmap/mod.rs index 78d47d604d3..3b415d0a6c2 100644 --- a/tests/it/bitmap/mod.rs +++ b/tests/it/bitmap/mod.rs @@ -3,8 +3,28 @@ mod immutable; mod mutable; mod utils; +use proptest::prelude::*; + use arrow2::{bitmap::Bitmap, buffer::MutableBuffer}; +/// Returns a strategy of an arbitrary sliced [`Bitmap`] of size up to 1000 +pub(crate) fn bitmap_strategy() -> impl Strategy { + prop::collection::vec(any::(), 1..1000) + .prop_flat_map(|vec| { + let len = vec.len(); + (Just(vec), 0..len) + }) + .prop_flat_map(|(vec, index)| { + let len = vec.len(); + (Just(vec), Just(index), 0..len - index) + }) + .prop_flat_map(|(vec, index, len)| { + let bitmap = Bitmap::from(&vec); + let bitmap = bitmap.slice(index, len); + Just(bitmap) + }) +} + fn create_bitmap>(bytes: P, len: usize) -> Bitmap { let buffer = MutableBuffer::::from(bytes.as_ref()); Bitmap::from_u8_buffer(buffer, len) diff --git a/tests/it/bitmap/utils/mod.rs b/tests/it/bitmap/utils/mod.rs index 4392ca4510c..e92f31124ca 100644 --- a/tests/it/bitmap/utils/mod.rs +++ b/tests/it/bitmap/utils/mod.rs @@ -1,5 +1,9 @@ +use proptest::prelude::*; + use arrow2::bitmap::utils::*; +use crate::bitmap::bitmap_strategy; + mod bit_chunks_exact; mod chunk_iter; mod iterator; @@ -67,3 +71,13 @@ fn count_zeros_1() { let input: &[u8] = &[73, 146, 36, 73, 146, 36, 73, 146, 36, 73, 146, 36, 9]; assert_eq!(count_zeros(input, 10, 90), 60); } + +proptest! { + /// Asserts that `Bitmap::null_count` equals the number of unset bits + #[test] + #[cfg_attr(miri, ignore)] // miri and proptest do not work well :( + fn null_count(bitmap in bitmap_strategy()) { + let sum_of_sets: usize = (0..bitmap.len()).map(|x| (!bitmap.get_bit(x)) as usize).sum(); + assert_eq!(bitmap.null_count(), sum_of_sets); + } +} diff --git a/tests/it/bitmap/utils/slice_iterator.rs b/tests/it/bitmap/utils/slice_iterator.rs index ea60940c41f..f9d9e409cec 100644 --- a/tests/it/bitmap/utils/slice_iterator.rs +++ b/tests/it/bitmap/utils/slice_iterator.rs @@ -1,27 +1,34 @@ -use rand::distributions::{Bernoulli, Uniform}; -use rand::prelude::StdRng; -use rand::Rng; -use rand::SeedableRng; +use proptest::prelude::*; use arrow2::bitmap::utils::SlicesIterator; use arrow2::bitmap::Bitmap; -#[test] -fn check_invariant() { - let values = (0..8).map(|i| i % 2 != 0).collect::(); - let iter = SlicesIterator::new(&values); +use crate::bitmap::bitmap_strategy; - let slots = iter.slots(); +proptest! { + /// Asserts that: + /// * `slots` is the number of set bits in the bitmap + /// * the sum of the lens of the slices equals `slots` + /// * each item on each slice is set + #[test] + #[cfg_attr(miri, ignore)] // miri and proptest do not work well :( + fn check_invariants(bitmap in bitmap_strategy()) { + let iter = SlicesIterator::new(&bitmap); - let slices = iter.collect::>(); + let slots = iter.slots(); - assert_eq!(slices, vec![(1, 1), (3, 1), (5, 1), (7, 1)]); + assert_eq!(bitmap.len() - bitmap.null_count(), slots); - let mut sum = 0; - for (_, len) in slices { - sum += len; + let slices = iter.collect::>(); + let mut sum = 0; + for (start, len) in slices { + sum += len; + for i in start..(start+len) { + assert!(bitmap.get_bit(i)); + } + } + assert_eq!(sum, slots); } - assert_eq!(sum, slots); } #[test] @@ -142,28 +149,3 @@ fn remainder_1() { let chunks = iter.collect::>(); assert_eq!(chunks, vec![(2, 1), (4, 1)]); } - -#[test] -fn filter_slices() { - let mut rng = StdRng::seed_from_u64(42); - let length = 500; - - let mask: Bitmap = (0..length) - .map(|_| { - let v: bool = (&mut rng).sample(Bernoulli::new(0.5).unwrap()); - v - }) - .collect(); - - for offset in 100usize..(length - 1) { - let len = (&mut rng).sample(Uniform::new(0, length - offset)); - let mask_s = mask.clone().slice(offset, len); - - let iter = SlicesIterator::new(&mask_s); - iter.for_each(|(start, slice_len)| { - if start + slice_len > len { - panic!("Fail") - } - }); - } -}