From 0e7e892f308535bd79316312ced9834caee0adda Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Mon, 5 Sep 2022 13:01:11 +0800
Subject: [PATCH] extend test, fix estimate

---
 fastfield_codecs/src/blockwise_linear.rs |  2 +-
 fastfield_codecs/src/lib.rs              | 22 +++++++++++++++++-----
 fastfield_codecs/src/line.rs             |  4 ++--
 fastfield_codecs/src/linear.rs           |  6 +++---
 src/fastfield/mod.rs                     |  2 +-
 5 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/fastfield_codecs/src/blockwise_linear.rs b/fastfield_codecs/src/blockwise_linear.rs
index e8d8177044..32dcaf0056 100644
--- a/fastfield_codecs/src/blockwise_linear.rs
+++ b/fastfield_codecs/src/blockwise_linear.rs
@@ -116,7 +116,7 @@ impl FastFieldCodec for BlockwiseLinearCodec {
         }
         let estimated_bit_width = first_chunk
             .iter()
-            .map(|el| (((el + 1) as f32 * 1.5) * 2.0) as u64)
+            .map(|el| ((el + 1) as f32 * 3.0) as u64)
             .map(compute_num_bits)
             .max()
             .unwrap();
diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs
index b15cf16797..92cf6961a3 100644
--- a/fastfield_codecs/src/lib.rs
+++ b/fastfield_codecs/src/lib.rs
@@ -284,14 +284,14 @@ mod tests {
         let data: VecColumn = data.as_slice().into();
 
         let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
-        assert_le!(linear_interpol_estimation, 0.04);
+        assert_le!(linear_interpol_estimation, 0.01);
 
         let multi_linear_interpol_estimation = BlockwiseLinearCodec::estimate(&data).unwrap();
         assert_le!(multi_linear_interpol_estimation, 0.2);
-        assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation);
+        assert_lt!(linear_interpol_estimation, multi_linear_interpol_estimation);
 
         let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
-        assert_le!(linear_interpol_estimation, bitpacked_estimation);
+        assert_lt!(linear_interpol_estimation, bitpacked_estimation);
     }
     #[test]
     fn estimation_test_bad_interpolation_case() {
@@ -299,11 +299,23 @@ mod tests {
 
         let data: VecColumn = data.into();
         let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
-        assert_le!(linear_interpol_estimation, 0.32);
+        assert_le!(linear_interpol_estimation, 0.34);
 
         let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
-        assert_le!(bitpacked_estimation, linear_interpol_estimation);
+        assert_lt!(bitpacked_estimation, linear_interpol_estimation);
+    }
+
+    #[test]
+    fn estimation_prefer_bitpacked() {
+        let data: &[u64] = &[10, 10, 10, 10];
+
+        let data: VecColumn = data.into();
+        let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
+
+        let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
+        assert_lt!(bitpacked_estimation, linear_interpol_estimation);
     }
+
     #[test]
     fn estimation_test_bad_interpolation_case_monotonically_increasing() {
         let mut data: Vec<u64> = (200..=20000_u64).collect();
diff --git a/fastfield_codecs/src/line.rs b/fastfield_codecs/src/line.rs
index c263b5d312..3cdaa7c88b 100644
--- a/fastfield_codecs/src/line.rs
+++ b/fastfield_codecs/src/line.rs
@@ -72,7 +72,7 @@ impl Line {
         Self::train_from(ys, sample_positions.iter().cloned())
     }
 
-    // Same as train, but the intercept is only estimated from provided sample positions
+    // Intercept is only computed from provided positions
     fn train_from(ys: &dyn Column, positions: impl Iterator<Item = u64>) -> Self {
         let num_vals = if let Some(num_vals) = NonZeroU64::new(ys.num_vals() - 1) {
             num_vals
@@ -164,7 +164,7 @@ mod tests {
     /// This function operates translation over the data for better coverage.
     #[track_caller]
     fn test_line_interpol_with_translation(ys: &[u64], expected: Option<u64>) {
-        let mut translations = vec![0, 100, u64::MAX, u64::MAX - 1];
+        let mut translations = vec![0, 100, u64::MAX / 2, u64::MAX, u64::MAX - 1];
         translations.extend_from_slice(ys);
         for translation in translations {
             let translated_ys: Vec<u64> = ys
diff --git a/fastfield_codecs/src/linear.rs b/fastfield_codecs/src/linear.rs
index 3081e2754f..dc411b7681 100644
--- a/fastfield_codecs/src/linear.rs
+++ b/fastfield_codecs/src/linear.rs
@@ -159,7 +159,7 @@ impl FastFieldCodec for LinearCodec {
             .max()
             .unwrap_or(0);
 
-        let num_bits = estimated_bit_width as u64 * fastfield_accessor.num_vals() as u64;
+        let num_bits = (estimated_bit_width as u64 * fastfield_accessor.num_vals() as u64) + 64;
         let num_bits_uncompressed = 64 * fastfield_accessor.num_vals();
         Some(num_bits as f32 / num_bits_uncompressed as f32)
     }
@@ -182,8 +182,8 @@ mod tests {
         let (estimate, actual_compression) =
             create_and_validate(&data, "simple monotonically large").unwrap();
 
-        assert!(actual_compression < 0.03);
-        assert!(estimate < 0.04);
+        assert_le!(actual_compression, 0.001);
+        assert_le!(estimate, 0.02);
     }
 
     #[test]
diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs
index 29f93e064a..be4713c7b6 100644
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -401,7 +401,7 @@ mod tests {
         // assert_eq!(file.len(), 17710 as usize); //bitpacked size
         // assert_eq!(file.len(), 10175_usize); // linear interpol size
         // assert_eq!(file.len(), 75_usize); // linear interpol size after calc improvement
-        //assert_eq!(file.len(), 1325_usize); // linear interpol size after switching to int based
+        // assert_eq!(file.len(), 1325_usize); // linear interpol size after switching to int based
         assert_eq!(file.len(), 62_usize); // linear interpol size after switching to int based, off
                                           // by one fix