Skip to content
This repository was archived by the owner on Aug 23, 2023. It is now read-only.

Add removeAbovePercentile and removeBelowPercentile functions #992

Merged
merged 6 commits into from
Aug 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/graphite.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ See also:


| Function name and signature | Alias | Metrictank |
| -------------------------------------------------------------- | ----------- | ---------- |
| -------------------------------------------------------------- | ------------ | ---------- |
| absolute | | No |
| aggregate | | No |
| aggregateLine | | No |
Expand Down Expand Up @@ -136,9 +136,9 @@ See also:
| randomWalkFunction | randomWalk | No |
| rangeOfSeries(seriesList) series | | Stable |
| reduceSeries | reduce | No |
| removeAbovePercentile | | No |
| removeAbovePercentile(seriesList, n) seriesList | | No |
| removeAboveValue(seriesList, n) seriesList | | Stable |
| removeBelowPercentile | | No |
| removeBelowPercentile(seriesList, n) seriesList | | No |
| removeBelowValue(seriesList, n) seriesList | | Stable |
| removeBetweenPercentile | | No |
| removeEmptySeries | | No |
Expand Down
105 changes: 105 additions & 0 deletions expr/func_removeabovebelowpercentile.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package expr

import (
"fmt"
"math"
"sort"

schema "gopkg.in/raintank/schema.v1"

"github.com/grafana/metrictank/api/models"
)

type FuncRemoveAboveBelowPercentile struct {
in GraphiteFunc
n float64
above bool
}

func NewRemoveAboveBelowPercentileConstructor(above bool) func() GraphiteFunc {
return func() GraphiteFunc {
return &FuncRemoveAboveBelowPercentile{above: above}
}
}

func (s *FuncRemoveAboveBelowPercentile) Signature() ([]Arg, []Arg) {

return []Arg{
ArgSeriesList{val: &s.in},
ArgFloat{key: "n", val: &s.n, validator: []Validator{NonNegativePercent}},
}, []Arg{ArgSeriesList{}}
}

func (s *FuncRemoveAboveBelowPercentile) Context(context Context) Context {
return context
}

func (s *FuncRemoveAboveBelowPercentile) Exec(cache map[Req][]models.Series) ([]models.Series, error) {
series, err := s.in.Exec(cache)
if err != nil {
return nil, err
}

if len(series) == 0 {
return series, nil
}

var output []models.Series

// will be reused for each getPercentileValue call
sortedDatapointVals := make([]float64, 0, len(series[0].Datapoints))
for _, serie := range series {
if s.above {
serie.Target = fmt.Sprintf("removeAbovePercentile(%s, %g)", serie.Target, s.n)
} else {
serie.Target = fmt.Sprintf("removeBelowPercentile(%s, %g)", serie.Target, s.n)
}
serie.QueryPatt = serie.Target

newTags := make(map[string]string, len(serie.Tags)+1)
for k, v := range serie.Tags {
newTags[k] = v
}
newTags["nPercentile"] = fmt.Sprintf("%g", s.n)
serie.Tags = newTags

percentile := getPercentileValue(serie.Datapoints, s.n, sortedDatapointVals)

out := pointSlicePool.Get().([]schema.Point)
for _, p := range serie.Datapoints {
if s.above {
if p.Val > percentile {
p.Val = math.NaN()
}
} else {
if p.Val < percentile {
p.Val = math.NaN()
}
}
out = append(out, p)
}
serie.Datapoints = out
output = append(output, serie)
}

cache[Req{}] = append(cache[Req{}], output...)

return output, nil
}

// sortedDatapointVals is an empty slice to be used for sorting datapoints.
// n must be > 0. if n > 100, the largest value is returned.
func getPercentileValue(datapoints []schema.Point, n float64, sortedDatapointVals []float64) float64 {
sortedDatapointVals = sortedDatapointVals[:0]
for _, p := range datapoints {
if !math.IsNaN(p.Val) {
sortedDatapointVals = append(sortedDatapointVals, p.Val)
}
}

sort.Float64s(sortedDatapointVals)

index := math.Min(math.Ceil(n/100.0*float64(len(sortedDatapointVals)+1)), float64(len(sortedDatapointVals))) - 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this correspond to the method used by graphite? it looks different.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I just simplified it.


return sortedDatapointVals[int(index)]
}
272 changes: 272 additions & 0 deletions expr/func_removeabovebelowpercentile_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
package expr

import (
"math"
"math/rand"
"strconv"
"testing"

"github.com/grafana/metrictank/api/models"
"github.com/grafana/metrictank/test"
"gopkg.in/raintank/schema.v1"
)

func TestRemoveAbovePercentileSingleAllNonNull(t *testing.T) {
testRemoveAboveBelowPercentile(
"removeAbovePercentile",
true,
60,
[]models.Series{
{
Interval: 10,
QueryPatt: "abcd",
Target: "a",
Datapoints: getCopy(a),
},
{
Interval: 10,
QueryPatt: "abcd",
Target: "b",
Datapoints: getCopy(b),
},
{
Interval: 10,
QueryPatt: "abcd",
Target: "c",
Datapoints: getCopy(c),
},
{
Interval: 10,
QueryPatt: "abcd",
Target: "d",
Datapoints: getCopy(d),
},
},
[]models.Series{
{
Interval: 10,
QueryPatt: "removeAbovePercentile(a, 60)",
Datapoints: []schema.Point{
{Val: 0, Ts: 10},
{Val: 0, Ts: 20},
{Val: 5.5, Ts: 30},
{Val: math.NaN(), Ts: 40},
{Val: math.NaN(), Ts: 50},
{Val: math.NaN(), Ts: 60},
},
},
{
Interval: 10,
QueryPatt: "removeAbovePercentile(b, 60)",
Datapoints: []schema.Point{
{Val: 0, Ts: 10},
{Val: math.MaxFloat64, Ts: 20},
{Val: math.MaxFloat64 - 20, Ts: 30},
{Val: math.NaN(), Ts: 40},
{Val: 1234567890, Ts: 50},
{Val: math.NaN(), Ts: 60},
},
},
{
Interval: 10,
QueryPatt: "removeAbovePercentile(c, 60)",
Datapoints: []schema.Point{
{Val: 0, Ts: 10},
{Val: 0, Ts: 20},
{Val: 1, Ts: 30},
{Val: 2, Ts: 40},
{Val: 3, Ts: 50},
{Val: math.NaN(), Ts: 60},
},
},
{
Interval: 10,
QueryPatt: "removeAbovePercentile(d, 60)",
Datapoints: []schema.Point{
{Val: 0, Ts: 10},
{Val: 33, Ts: 20},
{Val: 199, Ts: 30},
{Val: 29, Ts: 40},
{Val: 80, Ts: 50},
{Val: math.NaN(), Ts: 60},
},
},
},
t,
)
}

func TestRemoveBelowPercentileSingleAllNonNull(t *testing.T) {
testRemoveAboveBelowPercentile(
"removeBelowPercentile",
false,
50,
[]models.Series{
{
Interval: 10,
QueryPatt: "abcd",
Target: "a",
Datapoints: getCopy(a),
},
{
Interval: 10,
QueryPatt: "abcd",
Target: "b",
Datapoints: getCopy(b),
},
{
Interval: 10,
QueryPatt: "abcd",
Target: "c",
Datapoints: getCopy(c),
},
{
Interval: 10,
QueryPatt: "abcd",
Target: "d",
Datapoints: getCopy(d),
},
},
[]models.Series{
{
Interval: 10,
QueryPatt: "removeBelowPercentile(a, 50)",
Datapoints: []schema.Point{
{Val: math.NaN(), Ts: 10},
{Val: math.NaN(), Ts: 20},
{Val: 5.5, Ts: 30},
{Val: math.NaN(), Ts: 40},
{Val: math.NaN(), Ts: 50},
{Val: 1234567890, Ts: 60},
},
},
{
Interval: 10,
QueryPatt: "removeBelowPercentile(b, 50)",
Datapoints: []schema.Point{
{Val: math.NaN(), Ts: 10},
{Val: math.MaxFloat64, Ts: 20},
{Val: math.MaxFloat64 - 20, Ts: 30},
{Val: math.NaN(), Ts: 40},
{Val: math.NaN(), Ts: 50},
{Val: math.NaN(), Ts: 60},
},
},
{
Interval: 10,
QueryPatt: "removeBelowPercentile(c, 50)",
Datapoints: []schema.Point{
{Val: math.NaN(), Ts: 10},
{Val: math.NaN(), Ts: 20},
{Val: math.NaN(), Ts: 30},
{Val: 2, Ts: 40},
{Val: 3, Ts: 50},
{Val: 4, Ts: 60},
},
},
{
Interval: 10,
QueryPatt: "removeBelowPercentile(d, 50)",
Datapoints: []schema.Point{
{Val: math.NaN(), Ts: 10},
{Val: math.NaN(), Ts: 20},
{Val: 199, Ts: 30},
{Val: math.NaN(), Ts: 40},
{Val: 80, Ts: 50},
{Val: 250, Ts: 60},
},
},
},
t,
)
}

func testRemoveAboveBelowPercentile(name string, above bool, n float64, in []models.Series, out []models.Series, t *testing.T) {
f := NewRemoveAboveBelowPercentileConstructor(above)()
f.(*FuncRemoveAboveBelowPercentile).in = NewMock(in)
f.(*FuncRemoveAboveBelowPercentile).n = n
gots, err := f.Exec(make(map[Req][]models.Series))
if err != nil {
t.Fatalf("case %q (%f): err should be nil. got %q", name, n, err)
}
if len(gots) != len(out) {
t.Fatalf("case %q (%f): isNonNull len output expected %d, got %d", name, n, len(out), len(gots))
}
for i, g := range gots {
exp := out[i]
if g.QueryPatt != exp.QueryPatt {
t.Fatalf("case %q (%f): expected target %q, got %q", name, n, exp.QueryPatt, g.QueryPatt)
}
if len(g.Datapoints) != len(exp.Datapoints) {
t.Fatalf("case %q (%f) len output expected %d, got %d", name, n, len(exp.Datapoints), len(g.Datapoints))
}
for j, p := range g.Datapoints {
bothNaN := math.IsNaN(p.Val) && math.IsNaN(exp.Datapoints[j].Val)
if (bothNaN || p.Val == exp.Datapoints[j].Val) && p.Ts == exp.Datapoints[j].Ts {
continue
}
t.Fatalf("case %q (%f): output point %d - expected %v got %v", name, n, j, exp.Datapoints[j], p)
}
}
}
func BenchmarkRemoveAboveBelowPercentile10k_1NoNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 1, test.RandFloats10k, test.RandFloats10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_10NoNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 10, test.RandFloats10k, test.RandFloats10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_100NoNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 100, test.RandFloats10k, test.RandFloats10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_1000NoNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 1000, test.RandFloats10k, test.RandFloats10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_1SomeSeriesHalfNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 1, test.RandFloats10k, test.RandFloatsWithNulls10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_10SomeSeriesHalfNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 10, test.RandFloats10k, test.RandFloatsWithNulls10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_100SomeSeriesHalfNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 100, test.RandFloats10k, test.RandFloatsWithNulls10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_1000SomeSeriesHalfNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 1000, test.RandFloats10k, test.RandFloatsWithNulls10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_1AllSeriesHalfNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 1, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_10AllSeriesHalfNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 10, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_100AllSeriesHalfNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 100, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
}
func BenchmarkRemoveAboveBelowPercentile10k_1000AllSeriesHalfNulls(b *testing.B) {
benchmarkRemoveAboveBelowPercentile(b, 1000, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
}
func benchmarkRemoveAboveBelowPercentile(b *testing.B, numSeries int, fn0, fn1 func() []schema.Point) {
var input []models.Series
for i := 0; i < numSeries; i++ {
series := models.Series{
QueryPatt: strconv.Itoa(i),
}
if i%2 == 0 {
series.Datapoints = fn0()
} else {
series.Datapoints = fn1()
}
input = append(input, series)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
f := NewRemoveAboveBelowPercentileConstructor(rand.Int()%2 == 0)()
f.(*FuncRemoveAboveBelowPercentile).in = NewMock(input)
f.(*FuncRemoveAboveBelowPercentile).n = float64(rand.Int()%100 + 1)
got, err := f.Exec(make(map[Req][]models.Series))
if err != nil {
b.Fatalf("%s", err)
}
results = got
}
}
Loading