From 3d6f699b5e9f12b35ce506f32215ac3dd8dd80c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20St=C3=A4ber?= Date: Sun, 30 Jan 2022 21:38:06 +0100 Subject: [PATCH 1/4] CKMS Quantiles: Fix test and disallow corner cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fabian Stäber --- .../io/prometheus/client/CKMSQuantiles.java | 12 ++-- .../java/io/prometheus/client/Summary.java | 4 +- .../prometheus/client/CKMSQuantilesTest.java | 57 ++++++------------- 3 files changed, 22 insertions(+), 51 deletions(-) diff --git a/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java b/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java index 7fe6cff3b..140c41696 100644 --- a/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java +++ b/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java @@ -129,14 +129,10 @@ final class CKMSQuantiles { * @param quantiles The targeted quantiles, can be empty. */ CKMSQuantiles(Quantile[] quantiles) { - // hard-coded epsilon of 0.1% to determine the batch size, and default epsilon in case of empty quantiles - double pointOnePercent = 0.001; if (quantiles.length == 0) { // we need at least one for this algorithm to work - this.quantiles = new Quantile[1]; - this.quantiles[0] = new Quantile(0.5, pointOnePercent / 2); - } else { - this.quantiles = quantiles; + throw new IllegalArgumentException("quantiles cannot be empty"); } + this.quantiles = quantiles; // section 5.1 Methods - Batch. // This is hardcoded to 500, which corresponds to an epsilon of 0.1%. @@ -431,8 +427,8 @@ static class Quantile { * @param epsilon the desired error for this quantile, between 0 and 1. */ Quantile(double quantile, double epsilon) { - if (quantile < 0 || quantile > 1.0) throw new IllegalArgumentException("Quantile must be between 0 and 1"); - if (epsilon < 0 || epsilon > 1.0) throw new IllegalArgumentException("Epsilon must be between 0 and 1"); + if (quantile <= 0 || quantile >= 1.0) throw new IllegalArgumentException("Quantile must be between 0 and 1"); + if (epsilon <= 0 || epsilon >= 1.0) throw new IllegalArgumentException("Epsilon must be between 0 and 1"); this.quantile = quantile; this.epsilon = epsilon; diff --git a/simpleclient/src/main/java/io/prometheus/client/Summary.java b/simpleclient/src/main/java/io/prometheus/client/Summary.java index 34b123ba0..886358cd9 100644 --- a/simpleclient/src/main/java/io/prometheus/client/Summary.java +++ b/simpleclient/src/main/java/io/prometheus/client/Summary.java @@ -99,10 +99,10 @@ public static class Builder extends SimpleCollector.Builder { private int ageBuckets = 5; public Builder quantile(double quantile, double error) { - if (quantile < 0.0 || quantile > 1.0) { + if (quantile <= 0.0 || quantile >= 1.0) { throw new IllegalArgumentException("Quantile " + quantile + " invalid: Expected number between 0.0 and 1.0."); } - if (error < 0.0 || error > 1.0) { + if (error <= 0.0 || error >= 1.0) { throw new IllegalArgumentException("Error " + error + " invalid: Expected number between 0.0 and 1.0."); } quantiles.add(new Quantile(quantile, error)); diff --git a/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java b/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java index 63a281007..7e5f273c9 100644 --- a/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java +++ b/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java @@ -25,37 +25,6 @@ public void testGetOnEmptyValues() { assertEquals(Double.NaN, ckms.get(0), 0); } - @Test - public void testGetWhenNoQuantilesAreDefined() { - CKMSQuantiles ckms = new CKMSQuantiles(new Quantile[]{}); - assertEquals(Double.NaN, ckms.get(0), 0); - } - - @Test - public void testInsertWhenNoQuantilesAreDefined() { - CKMSQuantiles ckms = new CKMSQuantiles(new Quantile[]{}); - ckms.insert(1.0); - ckms.insert(2.0); - ckms.insert(3.0); - assertEquals(1.0, ckms.get(0), 0); - assertEquals(2.0, ckms.get(0.5), 0); - assertEquals(3.0, ckms.get(1), 0); - } - - @Test - public void testCompressWhenBufferSize500Reached() { - CKMSQuantiles ckms = new CKMSQuantiles(new Quantile[]{}); - List input = makeSequence(1, 499); - - for (double v : input) { - ckms.insert(v); - } - assertEquals("No compress should be triggered", 0, ckms.samples.size()); - - ckms.insert(500); - assertEquals(500, ckms.samples.size()); - } - @Test public void testGet() { List quantiles = new ArrayList(); @@ -80,20 +49,19 @@ public void testGet() { @Test public void testGetWithAMillionElements() { List quantiles = new ArrayList(); - quantiles.add(new Quantile(0.0, 0.01)); + quantiles.add(new Quantile(0.01, 0.001)); quantiles.add(new Quantile(0.10, 0.01)); quantiles.add(new Quantile(0.90, 0.001)); quantiles.add(new Quantile(0.95, 0.02)); quantiles.add(new Quantile(0.99, 0.001)); final int elemCount = 1000000; - double[] shuffle = new double[elemCount]; - for (int i = 0; i < shuffle.length; i++) { - shuffle[i] = i + 1; + List shuffle = new ArrayList(elemCount); + for (int i = 0; i < elemCount; i++) { + shuffle.add(i+1.0); } Random rand = new Random(0); - - Collections.shuffle(Arrays.asList(shuffle), rand); + Collections.shuffle(shuffle, rand); CKMSQuantiles ckms = new CKMSQuantiles( quantiles.toArray(new Quantile[]{})); @@ -102,14 +70,21 @@ public void testGetWithAMillionElements() { ckms.insert(v); } // given the linear distribution, we set the delta equal to the εn value for this quantile - assertEquals(0.1 * elemCount, ckms.get(0.1), 0.01 * elemCount); - assertEquals(0.9 * elemCount, ckms.get(0.9), 0.001 * elemCount); - assertEquals(0.95 * elemCount, ckms.get(0.95), 0.02 * elemCount); - assertEquals(0.99 * elemCount, ckms.get(0.99), 0.001 * elemCount); + assertRank(elemCount, ckms.get(0.01), 0.01, 0.001); + assertRank(elemCount, ckms.get(0.1), 0.1, 0.01); + assertRank(elemCount, ckms.get(0.9), 0.9, 0.001); + assertRank(elemCount, ckms.get(0.95), 0.95, 0.02); + assertRank(elemCount, ckms.get(0.99), 0.99, 0.001); assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000); } + private void assertRank(int elemCount, double actual, double quantile, double epsilon) { + double lowerBound = elemCount * (quantile - epsilon); + double upperBound = elemCount * (quantile + epsilon); + assertTrue("quantile=" + quantile + ", actual=" + actual + ", lowerBound=" + lowerBound, actual >= lowerBound); + assertTrue("quantile=" + quantile + ", actual=" + actual + ", upperBound=" + upperBound, actual <= upperBound); + } @Test public void testGetGaussian() { From 60319f45411401535a981be145c949b3a40d9787 Mon Sep 17 00:00:00 2001 From: Jens Date: Mon, 31 Jan 2022 21:39:44 +0100 Subject: [PATCH 2/4] Allow p0 and p100 queries The last element is always preserved. By skipping the first element in samples, we can also answer the p0 query. Signed-off-by: Jens --- .../java/io/prometheus/client/CKMSQuantiles.java | 16 +++++++++++++--- .../io/prometheus/client/CKMSQuantilesTest.java | 6 ++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java b/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java index 140c41696..982b63eb4 100644 --- a/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java +++ b/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java @@ -180,6 +180,13 @@ public double get(double q) { return Double.NaN; } + // short-circuit min (p0) and max (p100) queries + if (q == 0.0) { + return samples.getFirst().value; + } else if (q == 1.0) { + return samples.getLast().value; + } + // Straightforward implementation of Output(q). // Paper Section 3.1 on true rank: let r_i = Sum_{j=1}^{i−1} g_j int currentRank = 0; @@ -200,8 +207,8 @@ public double get(double q) { } } - // edge case of wanting max value - return samples.getLast().value; + // The iterator is exhausted, return the last value. + return cur.value; } /** @@ -328,6 +335,9 @@ private void compress() { ListIterator it = samples.listIterator(); + // Preserve the first element (min) so that q=0.0 can be looked up + it.next(); + Item prev; Item next = it.next(); @@ -427,7 +437,7 @@ static class Quantile { * @param epsilon the desired error for this quantile, between 0 and 1. */ Quantile(double quantile, double epsilon) { - if (quantile <= 0 || quantile >= 1.0) throw new IllegalArgumentException("Quantile must be between 0 and 1"); + if (quantile < 0 || quantile > 1.0) throw new IllegalArgumentException("Quantile must be between 0 and 1"); if (epsilon <= 0 || epsilon >= 1.0) throw new IllegalArgumentException("Epsilon must be between 0 and 1"); this.quantile = quantile; diff --git a/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java b/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java index 7e5f273c9..ca8accbbe 100644 --- a/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java +++ b/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java @@ -49,11 +49,12 @@ public void testGet() { @Test public void testGetWithAMillionElements() { List quantiles = new ArrayList(); - quantiles.add(new Quantile(0.01, 0.001)); + quantiles.add(new Quantile(0.0, 0.001)); quantiles.add(new Quantile(0.10, 0.01)); quantiles.add(new Quantile(0.90, 0.001)); quantiles.add(new Quantile(0.95, 0.02)); quantiles.add(new Quantile(0.99, 0.001)); + quantiles.add(new Quantile(1.0, 0.001)); final int elemCount = 1000000; List shuffle = new ArrayList(elemCount); @@ -70,11 +71,12 @@ public void testGetWithAMillionElements() { ckms.insert(v); } // given the linear distribution, we set the delta equal to the εn value for this quantile - assertRank(elemCount, ckms.get(0.01), 0.01, 0.001); + assertRank(elemCount, ckms.get(0.0), 0.0, 0.001); assertRank(elemCount, ckms.get(0.1), 0.1, 0.01); assertRank(elemCount, ckms.get(0.9), 0.9, 0.001); assertRank(elemCount, ckms.get(0.95), 0.95, 0.02); assertRank(elemCount, ckms.get(0.99), 0.99, 0.001); + assertRank(elemCount, ckms.get(1.0), 1.0, 0.001); assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000); } From 56ce7cc80693661f35105ccfa3e1a53936de897f Mon Sep 17 00:00:00 2001 From: Jens Date: Mon, 31 Jan 2022 22:48:03 +0100 Subject: [PATCH 3/4] Determine buffer size given an epsilon Signed-off-by: Jens --- .../io/prometheus/client/CKMSQuantiles.java | 14 +++- .../prometheus/client/CKMSQuantilesTest.java | 76 ++++++++++++++----- 2 files changed, 69 insertions(+), 21 deletions(-) diff --git a/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java b/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java index 982b63eb4..7131be963 100644 --- a/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java +++ b/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java @@ -134,12 +134,22 @@ final class CKMSQuantiles { } this.quantiles = quantiles; + // section 5.1 Methods - Batch. + // This is hardcoded to 500, which corresponds to an epsilon of 0.1%. - this.insertThreshold = 500; + // Benchmarks showed that this size has a good performance on the Arrays.sort method used in insertBatch() + // Larger values grow too much in time to sort. + int threshold = 500; + for (Quantile q : quantiles) { + // Find a smaller threshold to cater for scenarios where this is required, e.g., large epsilon. + threshold = Math.min(threshold, (int) (1 / (2 * q.epsilon))); + } + + this.insertThreshold = threshold; // create a buffer with size equal to threshold - this.buffer = new double[insertThreshold]; + this.buffer = new double[threshold]; // Initialize empty items this.samples = new LinkedList(); diff --git a/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java b/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java index ca8accbbe..49294f58c 100644 --- a/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java +++ b/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java @@ -6,7 +6,10 @@ import org.apache.commons.math3.random.RandomGenerator; import org.junit.Test; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; import static org.junit.Assert.*; @@ -33,7 +36,7 @@ public void testGet() { quantiles.add(new Quantile(0.95, 0.01)); quantiles.add(new Quantile(0.99, 0.01)); - List input = makeSequence(1, 100); + List input = setupInput(100); CKMSQuantiles ckms = new CKMSQuantiles( quantiles.toArray(new Quantile[]{})); for (double v : input) { @@ -57,12 +60,7 @@ public void testGetWithAMillionElements() { quantiles.add(new Quantile(1.0, 0.001)); final int elemCount = 1000000; - List shuffle = new ArrayList(elemCount); - for (int i = 0; i < elemCount; i++) { - shuffle.add(i+1.0); - } - Random rand = new Random(0); - Collections.shuffle(shuffle, rand); + List shuffle = setupInput(elemCount); CKMSQuantiles ckms = new CKMSQuantiles( quantiles.toArray(new Quantile[]{})); @@ -81,6 +79,57 @@ public void testGetWithAMillionElements() { assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000); } + @Test + public void testGetWithASingleQuantile() { + List quantiles = new ArrayList(); + quantiles.add(new Quantile(0.95, 0.02)); + + final int elemCount = 1000000; + List shuffle = setupInput(elemCount); + + CKMSQuantiles ckms = new CKMSQuantiles( + quantiles.toArray(new Quantile[]{})); + + for (double v : shuffle) { + ckms.insert(v); + } + // given the linear distribution, we set the delta equal to the εn value for this quantile + assertRank(elemCount, ckms.get(0.95), 0.95, 0.02); + + assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000); + } + + @Test + public void testReturnMinAndMaxWithoutPassingTheQuantile() { + List quantiles = new ArrayList(); + quantiles.add(new Quantile(0.95, 0.02)); + + final int elemCount = 1000000; + List shuffle = setupInput(elemCount); + + CKMSQuantiles ckms = new CKMSQuantiles( + quantiles.toArray(new Quantile[]{})); + + for (double v : shuffle) { + ckms.insert(v); + } + // given the linear distribution, we set the delta equal to the εn value for this quantile + assertRank(elemCount, ckms.get(0), 0.0, 0.001); + assertRank(elemCount, ckms.get(1), 1.0, 0.001); + + assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000); + } + + private List setupInput(int elemCount) { + List shuffle = new ArrayList(elemCount); + for (int i = 0; i < elemCount; i++) { + shuffle.add(i + 1.0); + } + Random rand = new Random(0); + Collections.shuffle(shuffle, rand); + return shuffle; + } + private void assertRank(int elemCount, double actual, double quantile, double epsilon) { double lowerBound = elemCount * (quantile - epsilon); double upperBound = elemCount * (quantile + epsilon); @@ -159,15 +208,4 @@ public void checkBounds() { // subtract and divide by 2, assuming that the increase is linear in this small epsilon. return Math.abs(upperBound - lowerBound) / 2; } - - /** - * In Java 8 we could use IntStream - */ - List makeSequence(int begin, int end) { - List ret = new ArrayList(end - begin + 1); - for (int i = begin; i <= end; i++) { - ret.add((double) i); - } - return ret; - } } From 6fcfd6fc752bae398d0089223eb01f55ed77a729 Mon Sep 17 00:00:00 2001 From: Jens Date: Tue, 1 Feb 2022 21:15:14 +0100 Subject: [PATCH 4/4] Correctly implement algorithm Increase the currentRank variable if an item is inserted Signed-off-by: Jens --- .../java/io/prometheus/client/CKMSQuantiles.java | 16 +++++++++------- .../io/prometheus/client/CKMSQuantilesTest.java | 9 +++------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java b/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java index 7131be963..20e918123 100644 --- a/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java +++ b/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java @@ -317,6 +317,8 @@ private void insertBatch() { it.add(newItem); count++; item = newItem; + // Note that if the new value v is inserted before vi then ri increases by 1. + currentRank += item.g; } // reset buffered items to 0. @@ -414,7 +416,14 @@ public String toString() { } /** + * Data class for Targeted quantile: T = {(φ_j , ε_j )} * + * Rather than requesting the same ε for all quantiles (the uniform case) + * or ε scaled by φ (the biased case), one might specify an arbitrary set + * of quantiles and the desired errors of ε for each in the form (φj , εj ). + * For example, input to the targeted quantiles problem might be {(0.5, 0.1), (0.2, 0.05), (0.9, 0.01)}, + * meaning that the median should be returned with 10% error, the 20th percentile with 5% error, + * and the 90th percentile with 1%. */ static class Quantile { /** @@ -435,13 +444,6 @@ static class Quantile { final double v; /** - * Targeted quantile: T = {(φ_j , ε_j )} - * Rather than requesting the same ε for all quantiles (the uniform case) - * or ε scaled by φ (the biased case), one might specify an arbitrary set - * of quantiles and the desired errors of ε for each in the form (φj , εj ). - * For example, input to the targeted quantiles problem might be {(0.5, 0.1), (0.2, 0.05), (0.9, 0.01)}, - * meaning that the median should be returned with 10% error, the 20th percentile with 5% error, - * and the 90th percentile with 1%. * * @param quantile the quantile between 0 and 1 * @param epsilon the desired error for this quantile, between 0 and 1. diff --git a/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java b/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java index 49294f58c..021b64b55 100644 --- a/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java +++ b/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java @@ -79,12 +79,13 @@ public void testGetWithAMillionElements() { assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000); } + @Test public void testGetWithASingleQuantile() { List quantiles = new ArrayList(); quantiles.add(new Quantile(0.95, 0.02)); - final int elemCount = 1000000; + final int elemCount = 100; List shuffle = setupInput(elemCount); CKMSQuantiles ckms = new CKMSQuantiles( @@ -95,8 +96,6 @@ public void testGetWithASingleQuantile() { } // given the linear distribution, we set the delta equal to the εn value for this quantile assertRank(elemCount, ckms.get(0.95), 0.95, 0.02); - - assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000); } @Test @@ -104,7 +103,7 @@ public void testReturnMinAndMaxWithoutPassingTheQuantile() { List quantiles = new ArrayList(); quantiles.add(new Quantile(0.95, 0.02)); - final int elemCount = 1000000; + final int elemCount = 1000; List shuffle = setupInput(elemCount); CKMSQuantiles ckms = new CKMSQuantiles( @@ -116,8 +115,6 @@ public void testReturnMinAndMaxWithoutPassingTheQuantile() { // given the linear distribution, we set the delta equal to the εn value for this quantile assertRank(elemCount, ckms.get(0), 0.0, 0.001); assertRank(elemCount, ckms.get(1), 1.0, 0.001); - - assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000); } private List setupInput(int elemCount) {