Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve shortcomings in the first iteration of CKMS #759

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 36 additions & 18 deletions simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,21 +129,27 @@ final class CKMSQuantiles {
* @param quantiles The targeted quantiles, can be empty.
*/
CKMSQuantiles(Quantile[] quantiles) {
// hard-coded epsilon of 0.1% to determine the batch size, and default epsilon in case of empty quantiles
double pointOnePercent = 0.001;
if (quantiles.length == 0) { // we need at least one for this algorithm to work
this.quantiles = new Quantile[1];
this.quantiles[0] = new Quantile(0.5, pointOnePercent / 2);
} else {
this.quantiles = quantiles;
throw new IllegalArgumentException("quantiles cannot be empty");
}
this.quantiles = quantiles;


// section 5.1 Methods - Batch.

// This is hardcoded to 500, which corresponds to an epsilon of 0.1%.
this.insertThreshold = 500;
// Benchmarks showed that this size has a good performance on the Arrays.sort method used in insertBatch()
// Larger values grow too much in time to sort.
int threshold = 500;
for (Quantile q : quantiles) {
// Find a smaller threshold to cater for scenarios where this is required, e.g., large epsilon.
threshold = Math.min(threshold, (int) (1 / (2 * q.epsilon)));
}

this.insertThreshold = threshold;

// create a buffer with size equal to threshold
this.buffer = new double[insertThreshold];
this.buffer = new double[threshold];

// Initialize empty items
this.samples = new LinkedList<Item>();
Expand Down Expand Up @@ -184,6 +190,13 @@ public double get(double q) {
return Double.NaN;
}

// short-circuit min (p0) and max (p100) queries
if (q == 0.0) {
return samples.getFirst().value;
} else if (q == 1.0) {
return samples.getLast().value;
}

// Straightforward implementation of Output(q).
// Paper Section 3.1 on true rank: let r_i = Sum_{j=1}^{i−1} g_j
int currentRank = 0;
Expand All @@ -204,8 +217,8 @@ public double get(double q) {
}
}

// edge case of wanting max value
return samples.getLast().value;
// The iterator is exhausted, return the last value.
return cur.value;
}

/**
Expand Down Expand Up @@ -304,6 +317,8 @@ private void insertBatch() {
it.add(newItem);
count++;
item = newItem;
// Note that if the new value v is inserted before vi then ri increases by 1.
currentRank += item.g;
}

// reset buffered items to 0.
Expand Down Expand Up @@ -332,6 +347,9 @@ private void compress() {

ListIterator<Item> it = samples.listIterator();

// Preserve the first element (min) so that q=0.0 can be looked up
it.next();

Item prev;
Item next = it.next();

Expand Down Expand Up @@ -398,7 +416,14 @@ public String toString() {
}

/**
* Data class for Targeted quantile: T = {(φ_j , ε_j )}
*
* Rather than requesting the same ε for all quantiles (the uniform case)
* or ε scaled by φ (the biased case), one might specify an arbitrary set
* of quantiles and the desired errors of ε for each in the form (φj , εj ).
* For example, input to the targeted quantiles problem might be {(0.5, 0.1), (0.2, 0.05), (0.9, 0.01)},
* meaning that the median should be returned with 10% error, the 20th percentile with 5% error,
* and the 90th percentile with 1%.
*/
static class Quantile {
/**
Expand All @@ -419,20 +444,13 @@ static class Quantile {
final double v;

/**
* Targeted quantile: T = {(φ_j , ε_j )}
* Rather than requesting the same ε for all quantiles (the uniform case)
* or ε scaled by φ (the biased case), one might specify an arbitrary set
* of quantiles and the desired errors of ε for each in the form (φj , εj ).
* For example, input to the targeted quantiles problem might be {(0.5, 0.1), (0.2, 0.05), (0.9, 0.01)},
* meaning that the median should be returned with 10% error, the 20th percentile with 5% error,
* and the 90th percentile with 1%.
*
* @param quantile the quantile between 0 and 1
* @param epsilon the desired error for this quantile, between 0 and 1.
*/
Quantile(double quantile, double epsilon) {
if (quantile < 0 || quantile > 1.0) throw new IllegalArgumentException("Quantile must be between 0 and 1");
if (epsilon < 0 || epsilon > 1.0) throw new IllegalArgumentException("Epsilon must be between 0 and 1");
if (epsilon <= 0 || epsilon >= 1.0) throw new IllegalArgumentException("Epsilon must be between 0 and 1");

this.quantile = quantile;
this.epsilon = epsilon;
Expand Down
4 changes: 2 additions & 2 deletions simpleclient/src/main/java/io/prometheus/client/Summary.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ public static class Builder extends SimpleCollector.Builder<Builder, Summary> {
private int ageBuckets = 5;

public Builder quantile(double quantile, double error) {
if (quantile < 0.0 || quantile > 1.0) {
if (quantile <= 0.0 || quantile >= 1.0) {
throw new IllegalArgumentException("Quantile " + quantile + " invalid: Expected number between 0.0 and 1.0.");
}
if (error < 0.0 || error > 1.0) {
if (error <= 0.0 || error >= 1.0) {
throw new IllegalArgumentException("Error " + error + " invalid: Expected number between 0.0 and 1.0.");
}
quantiles.add(new Quantile(quantile, error));
Expand Down
122 changes: 67 additions & 55 deletions simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
import org.apache.commons.math3.random.RandomGenerator;
import org.junit.Test;

import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;

import static org.junit.Assert.*;

Expand All @@ -25,37 +28,6 @@ public void testGetOnEmptyValues() {
assertEquals(Double.NaN, ckms.get(0), 0);
}

@Test
public void testGetWhenNoQuantilesAreDefined() {
CKMSQuantiles ckms = new CKMSQuantiles(new Quantile[]{});
assertEquals(Double.NaN, ckms.get(0), 0);
}

@Test
public void testInsertWhenNoQuantilesAreDefined() {
CKMSQuantiles ckms = new CKMSQuantiles(new Quantile[]{});
ckms.insert(1.0);
ckms.insert(2.0);
ckms.insert(3.0);
assertEquals(1.0, ckms.get(0), 0);
assertEquals(2.0, ckms.get(0.5), 0);
assertEquals(3.0, ckms.get(1), 0);
}

@Test
public void testCompressWhenBufferSize500Reached() {
CKMSQuantiles ckms = new CKMSQuantiles(new Quantile[]{});
List<Double> input = makeSequence(1, 499);

for (double v : input) {
ckms.insert(v);
}
assertEquals("No compress should be triggered", 0, ckms.samples.size());

ckms.insert(500);
assertEquals(500, ckms.samples.size());
}

@Test
public void testGet() {
List<Quantile> quantiles = new ArrayList<Quantile>();
Expand All @@ -64,7 +36,7 @@ public void testGet() {
quantiles.add(new Quantile(0.95, 0.01));
quantiles.add(new Quantile(0.99, 0.01));

List<Double> input = makeSequence(1, 100);
List<Double> input = setupInput(100);
CKMSQuantiles ckms = new CKMSQuantiles(
quantiles.toArray(new Quantile[]{}));
for (double v : input) {
Expand All @@ -80,20 +52,41 @@ public void testGet() {
@Test
public void testGetWithAMillionElements() {
List<Quantile> quantiles = new ArrayList<Quantile>();
quantiles.add(new Quantile(0.0, 0.01));
quantiles.add(new Quantile(0.0, 0.001));
quantiles.add(new Quantile(0.10, 0.01));
quantiles.add(new Quantile(0.90, 0.001));
quantiles.add(new Quantile(0.95, 0.02));
quantiles.add(new Quantile(0.99, 0.001));
quantiles.add(new Quantile(1.0, 0.001));

final int elemCount = 1000000;
double[] shuffle = new double[elemCount];
for (int i = 0; i < shuffle.length; i++) {
shuffle[i] = i + 1;
List<Double> shuffle = setupInput(elemCount);

CKMSQuantiles ckms = new CKMSQuantiles(
quantiles.toArray(new Quantile[]{}));

for (double v : shuffle) {
ckms.insert(v);
}
Random rand = new Random(0);
// given the linear distribution, we set the delta equal to the εn value for this quantile
assertRank(elemCount, ckms.get(0.0), 0.0, 0.001);
assertRank(elemCount, ckms.get(0.1), 0.1, 0.01);
assertRank(elemCount, ckms.get(0.9), 0.9, 0.001);
assertRank(elemCount, ckms.get(0.95), 0.95, 0.02);
assertRank(elemCount, ckms.get(0.99), 0.99, 0.001);
assertRank(elemCount, ckms.get(1.0), 1.0, 0.001);

assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000);
}


Collections.shuffle(Arrays.asList(shuffle), rand);
@Test
public void testGetWithASingleQuantile() {
List<Quantile> quantiles = new ArrayList<Quantile>();
quantiles.add(new Quantile(0.95, 0.02));

final int elemCount = 100;
List<Double> shuffle = setupInput(elemCount);

CKMSQuantiles ckms = new CKMSQuantiles(
quantiles.toArray(new Quantile[]{}));
Expand All @@ -102,14 +95,44 @@ public void testGetWithAMillionElements() {
ckms.insert(v);
}
// given the linear distribution, we set the delta equal to the εn value for this quantile
assertEquals(0.1 * elemCount, ckms.get(0.1), 0.01 * elemCount);
assertEquals(0.9 * elemCount, ckms.get(0.9), 0.001 * elemCount);
assertEquals(0.95 * elemCount, ckms.get(0.95), 0.02 * elemCount);
assertEquals(0.99 * elemCount, ckms.get(0.99), 0.001 * elemCount);
assertRank(elemCount, ckms.get(0.95), 0.95, 0.02);
}

assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000);
@Test
public void testReturnMinAndMaxWithoutPassingTheQuantile() {
List<Quantile> quantiles = new ArrayList<Quantile>();
quantiles.add(new Quantile(0.95, 0.02));

final int elemCount = 1000;
List<Double> shuffle = setupInput(elemCount);

CKMSQuantiles ckms = new CKMSQuantiles(
quantiles.toArray(new Quantile[]{}));

for (double v : shuffle) {
ckms.insert(v);
}
// given the linear distribution, we set the delta equal to the εn value for this quantile
assertRank(elemCount, ckms.get(0), 0.0, 0.001);
assertRank(elemCount, ckms.get(1), 1.0, 0.001);
}

private List<Double> setupInput(int elemCount) {
List<Double> shuffle = new ArrayList<Double>(elemCount);
for (int i = 0; i < elemCount; i++) {
shuffle.add(i + 1.0);
}
Random rand = new Random(0);
Collections.shuffle(shuffle, rand);
return shuffle;
}

private void assertRank(int elemCount, double actual, double quantile, double epsilon) {
double lowerBound = elemCount * (quantile - epsilon);
double upperBound = elemCount * (quantile + epsilon);
assertTrue("quantile=" + quantile + ", actual=" + actual + ", lowerBound=" + lowerBound, actual >= lowerBound);
assertTrue("quantile=" + quantile + ", actual=" + actual + ", upperBound=" + upperBound, actual <= upperBound);
}

@Test
public void testGetGaussian() {
Expand Down Expand Up @@ -182,15 +205,4 @@ public void checkBounds() {
// subtract and divide by 2, assuming that the increase is linear in this small epsilon.
return Math.abs(upperBound - lowerBound) / 2;
}

/**
* In Java 8 we could use IntStream
*/
List<Double> makeSequence(int begin, int end) {
List<Double> ret = new ArrayList<Double>(end - begin + 1);
for (int i = begin; i <= end; i++) {
ret.add((double) i);
}
return ret;
}
}