From 0e7e892f308535bd79316312ced9834caee0adda Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Mon, 5 Sep 2022 13:01:11 +0800 Subject: [PATCH] extend test, fix estimate --- fastfield_codecs/src/blockwise_linear.rs | 2 +- fastfield_codecs/src/lib.rs | 22 +++++++++++++++++----- fastfield_codecs/src/line.rs | 4 ++-- fastfield_codecs/src/linear.rs | 6 +++--- src/fastfield/mod.rs | 2 +- 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/fastfield_codecs/src/blockwise_linear.rs b/fastfield_codecs/src/blockwise_linear.rs index e8d8177044..32dcaf0056 100644 --- a/fastfield_codecs/src/blockwise_linear.rs +++ b/fastfield_codecs/src/blockwise_linear.rs @@ -116,7 +116,7 @@ impl FastFieldCodec for BlockwiseLinearCodec { } let estimated_bit_width = first_chunk .iter() - .map(|el| (((el + 1) as f32 * 1.5) * 2.0) as u64) + .map(|el| ((el + 1) as f32 * 3.0) as u64) .map(compute_num_bits) .max() .unwrap(); diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index b15cf16797..92cf6961a3 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -284,14 +284,14 @@ mod tests { let data: VecColumn = data.as_slice().into(); let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap(); - assert_le!(linear_interpol_estimation, 0.04); + assert_le!(linear_interpol_estimation, 0.01); let multi_linear_interpol_estimation = BlockwiseLinearCodec::estimate(&data).unwrap(); assert_le!(multi_linear_interpol_estimation, 0.2); - assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation); + assert_lt!(linear_interpol_estimation, multi_linear_interpol_estimation); let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap(); - assert_le!(linear_interpol_estimation, bitpacked_estimation); + assert_lt!(linear_interpol_estimation, bitpacked_estimation); } #[test] fn estimation_test_bad_interpolation_case() { @@ -299,11 +299,23 @@ mod tests { let data: VecColumn = data.into(); let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap(); - assert_le!(linear_interpol_estimation, 0.32); + assert_le!(linear_interpol_estimation, 0.34); let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap(); - assert_le!(bitpacked_estimation, linear_interpol_estimation); + assert_lt!(bitpacked_estimation, linear_interpol_estimation); + } + + #[test] + fn estimation_prefer_bitpacked() { + let data: &[u64] = &[10, 10, 10, 10]; + + let data: VecColumn = data.into(); + let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap(); + + let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap(); + assert_lt!(bitpacked_estimation, linear_interpol_estimation); } + #[test] fn estimation_test_bad_interpolation_case_monotonically_increasing() { let mut data: Vec = (200..=20000_u64).collect(); diff --git a/fastfield_codecs/src/line.rs b/fastfield_codecs/src/line.rs index c263b5d312..3cdaa7c88b 100644 --- a/fastfield_codecs/src/line.rs +++ b/fastfield_codecs/src/line.rs @@ -72,7 +72,7 @@ impl Line { Self::train_from(ys, sample_positions.iter().cloned()) } - // Same as train, but the intercept is only estimated from provided sample positions + // Intercept is only computed from provided positions fn train_from(ys: &dyn Column, positions: impl Iterator) -> Self { let num_vals = if let Some(num_vals) = NonZeroU64::new(ys.num_vals() - 1) { num_vals @@ -164,7 +164,7 @@ mod tests { /// This function operates translation over the data for better coverage. #[track_caller] fn test_line_interpol_with_translation(ys: &[u64], expected: Option) { - let mut translations = vec![0, 100, u64::MAX, u64::MAX - 1]; + let mut translations = vec![0, 100, u64::MAX / 2, u64::MAX, u64::MAX - 1]; translations.extend_from_slice(ys); for translation in translations { let translated_ys: Vec = ys diff --git a/fastfield_codecs/src/linear.rs b/fastfield_codecs/src/linear.rs index 3081e2754f..dc411b7681 100644 --- a/fastfield_codecs/src/linear.rs +++ b/fastfield_codecs/src/linear.rs @@ -159,7 +159,7 @@ impl FastFieldCodec for LinearCodec { .max() .unwrap_or(0); - let num_bits = estimated_bit_width as u64 * fastfield_accessor.num_vals() as u64; + let num_bits = (estimated_bit_width as u64 * fastfield_accessor.num_vals() as u64) + 64; let num_bits_uncompressed = 64 * fastfield_accessor.num_vals(); Some(num_bits as f32 / num_bits_uncompressed as f32) } @@ -182,8 +182,8 @@ mod tests { let (estimate, actual_compression) = create_and_validate(&data, "simple monotonically large").unwrap(); - assert!(actual_compression < 0.03); - assert!(estimate < 0.04); + assert_le!(actual_compression, 0.001); + assert_le!(estimate, 0.02); } #[test] diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 29f93e064a..be4713c7b6 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -401,7 +401,7 @@ mod tests { // assert_eq!(file.len(), 17710 as usize); //bitpacked size // assert_eq!(file.len(), 10175_usize); // linear interpol size // assert_eq!(file.len(), 75_usize); // linear interpol size after calc improvement - //assert_eq!(file.len(), 1325_usize); // linear interpol size after switching to int based + // assert_eq!(file.len(), 1325_usize); // linear interpol size after switching to int based assert_eq!(file.len(), 62_usize); // linear interpol size after switching to int based, off // by one fix