Skip to content

Commit

Permalink
Improve stats accuracy at low percentile
Browse files Browse the repository at this point in the history
And corresponding test
  • Loading branch information
ldemailly committed Jul 4, 2017
1 parent 1410a5c commit debb027
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 7 deletions.
6 changes: 6 additions & 0 deletions test/Stats_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,13 @@ TEST(StatsTest, HistogramPercentile) {
std::cout << i << "% at " << h.calcPercentile(i) << std::endl;
}
EXPECT_EQ(501, h.getAverage());
EXPECT_EQ(1, h.calcPercentile(-1));
EXPECT_EQ(1, h.calcPercentile(0));
EXPECT_EQ(1.045, h.calcPercentile(0.1));
EXPECT_EQ(1.45, h.calcPercentile(1));
EXPECT_EQ(2.8, h.calcPercentile(4));
EXPECT_EQ(10, h.calcPercentile(20));
EXPECT_EQ(250.25, h.calcPercentile(20.1));
EXPECT_EQ(550, h.calcPercentile(50));
EXPECT_EQ(775, h.calcPercentile(75));
EXPECT_EQ(1000.5, h.calcPercentile(90));
Expand Down
19 changes: 13 additions & 6 deletions util/Stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,20 @@ int32_t* initHistLookup() {
static const int32_t* kVal2Bucket = initHistLookup();

void Counter::record(int64_t v) {
int64_t newCount = ++count_; // atomic_add(count_, 1);
// atomic_add(sum_, v);
int64_t newCount = ++count_;
sum_ += v;
if (newCount == 1) {
min_ = max_ = v;
realMin_ = min_ = max_ = v;
}
// this can actually miss/revert the min/max (race condition)
// don't count 0 in the min - makes for a slightly more interesting min
// don't count 0s in the min - makes for a slightly more interesting min
// in most cases
if (v && ((min_ == 0) || (v < min_))) {
min_ = v;
}
// real min (can be 0):
if (v < realMin_ ) {
realMin_ = v;
}
if (v > max_) {
max_ = v;
}
Expand Down Expand Up @@ -176,7 +178,7 @@ double Histogram::calcPercentile(double percentile) const {
return getMax();
}
if (percentile <= 0) {
return 0;
return getRealMin();
}
// Initial value of prev should in theory be offset_
// but if the data is wrong (smaller than offset - eg 'negative') that
Expand All @@ -185,6 +187,7 @@ double Histogram::calcPercentile(double percentile) const {
int64_t total = 0;
const int64_t ctrTotal = getCount();
const int64_t ctrMax = getMax();
const int64_t ctrMin = getRealMin();
double prevPerc = 0;
double perc = 0;
bool found = false;
Expand Down Expand Up @@ -213,6 +216,10 @@ double Histogram::calcPercentile(double percentile) const {
cur = ctrMax;
perc = 100.; // can't be removed
}
// Fix up the min too to never return < min and increase low p accuracy
if (prev < ctrMin) {
prev = ctrMin;
}
return (prev + (percentile - prevPerc) * (cur - prev) / (perc - prevPerc));
}

Expand Down
8 changes: 7 additions & 1 deletion util/Stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ class Counter : crashifcopied {
public:
explicit Counter(double printScale = 1.0)
: count_(0),
realMin_(0),
min_(0),
max_(0),
sum_(0),
Expand All @@ -161,6 +162,11 @@ class Counter : crashifcopied {
return count_;
}

// Includes 0s
inline int64_t getRealMin() const {
return realMin_;
}
// Excludes 0
inline int64_t getMin() const {
return min_;
}
Expand Down Expand Up @@ -209,7 +215,7 @@ class Counter : crashifcopied {
void merge(const Counter& c);

private:
int64_t count_, min_, max_, sum_, sumOfSquares_;
int64_t count_, realMin_, min_, max_, sum_, sumOfSquares_;
double printScale_;
};

Expand Down

0 comments on commit debb027

Please sign in to comment.