Merge pull request #16 from marshallpierce/better-fix-for-overflow-with-large-unit_magnitude

jonhoo · web-flow · commit 19766045a514 · 2017-02-20T14:17:18.000-05:00
Limit unit magnitude + precision to fit into 63 bits.
diff --git a/src/lib.rs b/src/lib.rs
@@ -628,9 +628,13 @@ impl<T: Counter> Histogram<T> {
         let sub_bucket_count = 1_u32 << (sub_bucket_count_magnitude as u32);
 
         if unit_magnitude + sub_bucket_count_magnitude > 63 {
-            // Cannot represent shifted sub bucket count in 64 bits.
-            // This will cause an infinite loop when calculating number of buckets
-            return Err("Cannot represent significant figures' worth of measurements beyond lowest value");
+            // sub_bucket_count entries can't be represented, with unit_magnitude applied, in a
+            // u64. Technically it still sort of works if their sum is 64: you can represent all
+            // but the last number in the shifted sub_bucket_count. However, the utility of such a
+            // histogram vs ones whose magnitude here fits in 63 bits is debatable, and it makes
+            // it harder to work through the logic. Sums larger than 64 are totally broken as
+            // leading_zero_count_base would go negative.
+            return Err("Cannot represent sigfig worth of values beyond low");
         };
 
         let sub_bucket_half_count = sub_bucket_count / 2;
@@ -1241,6 +1245,7 @@ impl<T: Counter> Histogram<T> {
 
     /// Find the number of buckets needed such that `value` is representable.
     fn buckets_to_cover(&self, value: u64) -> u8 {
+        // Shift won't overflow because sub_bucket_magnitude + unit_magnitude <= 63.
         // the k'th bucket can express from 0 * 2^k to sub_bucket_count * 2^k in units of 2^k
         let mut smallest_untrackable_value = (self.sub_bucket_count as u64) << self.unit_magnitude;
 
diff --git a/src/tests/index_calculation.rs b/src/tests/index_calculation.rs
@@ -0,0 +1,153 @@
+use super::Histogram;
+use tests::helpers::histo64;
+
+#[test]
+fn unit_magnitude_0_index_calculations() {
+    let h = histo64(1_u64, 1_u64 << 32, 3);
+    assert_eq!(2048, h.sub_bucket_count);
+    assert_eq!(0, h.unit_magnitude);
+    // sub_bucket_count = 2^11, so 2^11 << 22 is > the max of 2^32 for 23 buckets total
+    assert_eq!(23, h.bucket_count);
+
+    // first half of first bucket
+    assert_eq!(0, h.bucket_for(3));
+    assert_eq!(3, h.sub_bucket_for(3, 0));
+
+    // second half of first bucket
+    assert_eq!(0, h.bucket_for(1024 + 3));
+    assert_eq!(1024 + 3, h.sub_bucket_for(1024 + 3, 0));
+
+    // second bucket (top half)
+    assert_eq!(1, h.bucket_for(2048 + 3 * 2));
+    // counting by 2s, starting at halfway through the bucket
+    assert_eq!(1024 + 3, h.sub_bucket_for(2048 + 3 * 2, 1));
+
+    // third bucket (top half)
+    assert_eq!(2, h.bucket_for((2048 << 1) + 3 * 4));
+    // counting by 4s, starting at halfway through the bucket
+    assert_eq!(1024 + 3, h.sub_bucket_for((2048 << 1) + 3 * 4, 2));
+
+    // past last bucket -- not near u64::max_value(), so should still calculate ok.
+    assert_eq!(23, h.bucket_for((2048_u64 << 22) + 3 * (1 << 23)));
+    assert_eq!(1024 + 3, h.sub_bucket_for((2048_u64 << 22) + 3 * (1 << 23), 23));
+}
+
+#[test]
+fn unit_magnitude_4_index_calculations() {
+    let h = histo64(1_u64 << 12, 1_u64 << 32, 3);
+    assert_eq!(2048, h.sub_bucket_count);
+    assert_eq!(12, h.unit_magnitude);
+    // sub_bucket_count = 2^11. With unit magnitude shift, it's 2^23. 2^23 << 10 is > the max of
+    // 2^32 for 11 buckets total
+    assert_eq!(11, h.bucket_count);
+    let unit = 1_u64 << 12;
+
+    // below lowest value
+    assert_eq!(0, h.bucket_for(3));
+    assert_eq!(0, h.sub_bucket_for(3, 0));
+
+    // first half of first bucket
+    assert_eq!(0, h.bucket_for(3 * unit));
+    assert_eq!(3, h.sub_bucket_for(3 * unit, 0));
+
+    // second half of first bucket
+    // sub_bucket_half_count's worth of units, plus 3 more
+    assert_eq!(0, h.bucket_for(unit * (1024 + 3)));
+    assert_eq!(1024 + 3, h.sub_bucket_for(unit * (1024 + 3), 0));
+
+    // second bucket (top half), bucket scale = unit << 1.
+    // Middle of bucket is (sub_bucket_half_count = 2^10) of bucket scale, = unit << 11.
+    // Add on 3 of bucket scale.
+    assert_eq!(1, h.bucket_for((unit << 11) + 3 * (unit << 1)));
+    assert_eq!(1024 + 3, h.sub_bucket_for((unit << 11) + 3 * (unit << 1), 1));
+
+    // third bucket (top half), bucket scale = unit << 2.
+    // Middle of bucket is (sub_bucket_half_count = 2^10) of bucket scale, = unit << 12.
+    // Add on 3 of bucket scale.
+    assert_eq!(2, h.bucket_for((unit << 12) + 3 * (unit << 2)));
+    assert_eq!(1024 + 3, h.sub_bucket_for((unit << 12) + 3 * (unit << 2), 2));
+
+    // past last bucket -- not near u64::max_value(), so should still calculate ok.
+    assert_eq!(11, h.bucket_for((unit << 21) + 3 * (unit << 11)));
+    assert_eq!(1024 + 3, h.sub_bucket_for((unit << 21) + 3 * (unit << 11), 11));
+}
+
+#[test]
+fn unit_magnitude_52_sub_bucket_magnitude_11_index_calculations() {
+    // maximum unit magnitude for this precision
+    let h = histo64(1_u64 << 52, u64::max_value(), 3);
+    assert_eq!(2048, h.sub_bucket_count);
+    assert_eq!(52, h.unit_magnitude);
+    // sub_bucket_count = 2^11. With unit magnitude shift, it's 2^63. 1 more bucket to (almost)
+    // reach 2^64.
+    assert_eq!(2, h.bucket_count);
+    assert_eq!(1, h.leading_zero_count_base);
+    let unit = 1_u64 << 52;
+
+    // below lowest value
+    assert_eq!(0, h.bucket_for(3));
+    assert_eq!(0, h.sub_bucket_for(3, 0));
+
+    // first half of first bucket
+    assert_eq!(0, h.bucket_for(3 * unit));
+    assert_eq!(3, h.sub_bucket_for(3 * unit, 0));
+
+    // second half of first bucket
+    // sub_bucket_half_count's worth of units, plus 3 more
+    assert_eq!(0, h.bucket_for(unit * (1024 + 3)));
+    assert_eq!(1024 + 3, h.sub_bucket_for(unit * (1024 + 3), 0));
+
+    // end of second half
+    assert_eq!(0, h.bucket_for(unit * 1024 + 1023 * unit));
+    assert_eq!(1024 + 1023, h.sub_bucket_for(unit * 1024 + 1023 * unit, 0));
+
+    // second bucket (top half), bucket scale = unit << 1.
+    // Middle of bucket is (sub_bucket_half_count = 2^10) of bucket scale, = unit << 11.
+    // Add on 3 of bucket scale.
+    assert_eq!(1, h.bucket_for((unit << 11) + 3 * (unit << 1)));
+    assert_eq!(1024 + 3, h.sub_bucket_for((unit << 11) + 3 * (unit << 1), 1));
+
+    // upper half of second bucket, last slot
+    assert_eq!(1, h.bucket_for(u64::max_value()));
+    assert_eq!(1024 + 1023, h.sub_bucket_for(u64::max_value(), 1));
+}
+
+#[test]
+fn unit_magnitude_53_sub_bucket_magnitude_11_throws() {
+    assert_eq!("Cannot represent sigfig worth of values beyond low",
+        Histogram::<u64>::new_with_bounds(1_u64 << 53, 1_u64 << 63, 3).unwrap_err());
+}
+
+#[test]
+fn unit_magnitude_55_sub_bucket_magnitude_8_ok() {
+    let h = histo64(1_u64 << 55, 1_u64 << 63, 2);
+    assert_eq!(256, h.sub_bucket_count);
+    assert_eq!(55, h.unit_magnitude);
+    // sub_bucket_count = 2^8. With unit magnitude shift, it's 2^63.
+    assert_eq!(2, h.bucket_count);
+
+    // below lowest value
+    assert_eq!(0, h.bucket_for(3));
+    assert_eq!(0, h.sub_bucket_for(3, 0));
+
+    // upper half of second bucket, last slot
+    assert_eq!(1, h.bucket_for(u64::max_value()));
+    assert_eq!(128 + 127, h.sub_bucket_for(u64::max_value(), 1));
+}
+
+#[test]
+fn unit_magnitude_62_sub_bucket_magnitude_1_ok() {
+    let h = histo64(1_u64 << 62, 1_u64 << 63, 0);
+    assert_eq!(2, h.sub_bucket_count);
+    assert_eq!(62, h.unit_magnitude);
+    // sub_bucket_count = 2^1. With unit magnitude shift, it's 2^63.
+    assert_eq!(2, h.bucket_count);
+
+    // below lowest value
+    assert_eq!(0, h.bucket_for(3));
+    assert_eq!(0, h.sub_bucket_for(3, 0));
+
+    // upper half of second bucket, last slot
+    assert_eq!(1, h.bucket_for(u64::max_value()));
+    assert_eq!(1, h.sub_bucket_for(u64::max_value(), 1));
+}
diff --git a/src/tests/tests.rs b/src/tests/tests.rs
@@ -4,6 +4,8 @@ use super::Histogram;
 mod helpers;
 #[path = "init.rs"]
 mod init;
+#[path = "index_calculation.rs"]
+mod index_calculation;
 
 #[test]
 fn new_err_high_not_double_low() {