Skip to content
This repository was archived by the owner on Aug 23, 2023. It is now read-only.

Commit 3afbcd6

Browse files
authoredAug 15, 2018
Merge pull request #992 from bloomberg/removeAboveBelowPercentile
Add removeAbovePercentile and removeBelowPercentile functions
2 parents e933e23 + ddc1e0f commit 3afbcd6

5 files changed

+390
-3
lines changed
 

‎docs/graphite.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ See also:
3535

3636

3737
| Function name and signature | Alias | Metrictank |
38-
| -------------------------------------------------------------- | ----------- | ---------- |
38+
| -------------------------------------------------------------- | ------------ | ---------- |
3939
| absolute | | No |
4040
| aggregate | | No |
4141
| aggregateLine | | No |
@@ -136,9 +136,9 @@ See also:
136136
| randomWalkFunction | randomWalk | No |
137137
| rangeOfSeries(seriesList) series | | Stable |
138138
| reduceSeries | reduce | No |
139-
| removeAbovePercentile | | No |
139+
| removeAbovePercentile(seriesList, n) seriesList | | No |
140140
| removeAboveValue(seriesList, n) seriesList | | Stable |
141-
| removeBelowPercentile | | No |
141+
| removeBelowPercentile(seriesList, n) seriesList | | No |
142142
| removeBelowValue(seriesList, n) seriesList | | Stable |
143143
| removeBetweenPercentile | | No |
144144
| removeEmptySeries | | No |
+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package expr
2+
3+
import (
4+
"fmt"
5+
"math"
6+
"sort"
7+
8+
schema "gopkg.in/raintank/schema.v1"
9+
10+
"github.com/grafana/metrictank/api/models"
11+
)
12+
13+
type FuncRemoveAboveBelowPercentile struct {
14+
in GraphiteFunc
15+
n float64
16+
above bool
17+
}
18+
19+
func NewRemoveAboveBelowPercentileConstructor(above bool) func() GraphiteFunc {
20+
return func() GraphiteFunc {
21+
return &FuncRemoveAboveBelowPercentile{above: above}
22+
}
23+
}
24+
25+
func (s *FuncRemoveAboveBelowPercentile) Signature() ([]Arg, []Arg) {
26+
27+
return []Arg{
28+
ArgSeriesList{val: &s.in},
29+
ArgFloat{key: "n", val: &s.n, validator: []Validator{NonNegativePercent}},
30+
}, []Arg{ArgSeriesList{}}
31+
}
32+
33+
func (s *FuncRemoveAboveBelowPercentile) Context(context Context) Context {
34+
return context
35+
}
36+
37+
func (s *FuncRemoveAboveBelowPercentile) Exec(cache map[Req][]models.Series) ([]models.Series, error) {
38+
series, err := s.in.Exec(cache)
39+
if err != nil {
40+
return nil, err
41+
}
42+
43+
if len(series) == 0 {
44+
return series, nil
45+
}
46+
47+
var output []models.Series
48+
49+
// will be reused for each getPercentileValue call
50+
sortedDatapointVals := make([]float64, 0, len(series[0].Datapoints))
51+
for _, serie := range series {
52+
if s.above {
53+
serie.Target = fmt.Sprintf("removeAbovePercentile(%s, %g)", serie.Target, s.n)
54+
} else {
55+
serie.Target = fmt.Sprintf("removeBelowPercentile(%s, %g)", serie.Target, s.n)
56+
}
57+
serie.QueryPatt = serie.Target
58+
59+
newTags := make(map[string]string, len(serie.Tags)+1)
60+
for k, v := range serie.Tags {
61+
newTags[k] = v
62+
}
63+
newTags["nPercentile"] = fmt.Sprintf("%g", s.n)
64+
serie.Tags = newTags
65+
66+
percentile := getPercentileValue(serie.Datapoints, s.n, sortedDatapointVals)
67+
68+
out := pointSlicePool.Get().([]schema.Point)
69+
for _, p := range serie.Datapoints {
70+
if s.above {
71+
if p.Val > percentile {
72+
p.Val = math.NaN()
73+
}
74+
} else {
75+
if p.Val < percentile {
76+
p.Val = math.NaN()
77+
}
78+
}
79+
out = append(out, p)
80+
}
81+
serie.Datapoints = out
82+
output = append(output, serie)
83+
}
84+
85+
cache[Req{}] = append(cache[Req{}], output...)
86+
87+
return output, nil
88+
}
89+
90+
// sortedDatapointVals is an empty slice to be used for sorting datapoints.
91+
// n must be > 0. if n > 100, the largest value is returned.
92+
func getPercentileValue(datapoints []schema.Point, n float64, sortedDatapointVals []float64) float64 {
93+
sortedDatapointVals = sortedDatapointVals[:0]
94+
for _, p := range datapoints {
95+
if !math.IsNaN(p.Val) {
96+
sortedDatapointVals = append(sortedDatapointVals, p.Val)
97+
}
98+
}
99+
100+
sort.Float64s(sortedDatapointVals)
101+
102+
index := math.Min(math.Ceil(n/100.0*float64(len(sortedDatapointVals)+1)), float64(len(sortedDatapointVals))) - 1
103+
104+
return sortedDatapointVals[int(index)]
105+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
package expr
2+
3+
import (
4+
"math"
5+
"math/rand"
6+
"strconv"
7+
"testing"
8+
9+
"github.com/grafana/metrictank/api/models"
10+
"github.com/grafana/metrictank/test"
11+
"gopkg.in/raintank/schema.v1"
12+
)
13+
14+
func TestRemoveAbovePercentileSingleAllNonNull(t *testing.T) {
15+
testRemoveAboveBelowPercentile(
16+
"removeAbovePercentile",
17+
true,
18+
60,
19+
[]models.Series{
20+
{
21+
Interval: 10,
22+
QueryPatt: "abcd",
23+
Target: "a",
24+
Datapoints: getCopy(a),
25+
},
26+
{
27+
Interval: 10,
28+
QueryPatt: "abcd",
29+
Target: "b",
30+
Datapoints: getCopy(b),
31+
},
32+
{
33+
Interval: 10,
34+
QueryPatt: "abcd",
35+
Target: "c",
36+
Datapoints: getCopy(c),
37+
},
38+
{
39+
Interval: 10,
40+
QueryPatt: "abcd",
41+
Target: "d",
42+
Datapoints: getCopy(d),
43+
},
44+
},
45+
[]models.Series{
46+
{
47+
Interval: 10,
48+
QueryPatt: "removeAbovePercentile(a, 60)",
49+
Datapoints: []schema.Point{
50+
{Val: 0, Ts: 10},
51+
{Val: 0, Ts: 20},
52+
{Val: 5.5, Ts: 30},
53+
{Val: math.NaN(), Ts: 40},
54+
{Val: math.NaN(), Ts: 50},
55+
{Val: math.NaN(), Ts: 60},
56+
},
57+
},
58+
{
59+
Interval: 10,
60+
QueryPatt: "removeAbovePercentile(b, 60)",
61+
Datapoints: []schema.Point{
62+
{Val: 0, Ts: 10},
63+
{Val: math.MaxFloat64, Ts: 20},
64+
{Val: math.MaxFloat64 - 20, Ts: 30},
65+
{Val: math.NaN(), Ts: 40},
66+
{Val: 1234567890, Ts: 50},
67+
{Val: math.NaN(), Ts: 60},
68+
},
69+
},
70+
{
71+
Interval: 10,
72+
QueryPatt: "removeAbovePercentile(c, 60)",
73+
Datapoints: []schema.Point{
74+
{Val: 0, Ts: 10},
75+
{Val: 0, Ts: 20},
76+
{Val: 1, Ts: 30},
77+
{Val: 2, Ts: 40},
78+
{Val: 3, Ts: 50},
79+
{Val: math.NaN(), Ts: 60},
80+
},
81+
},
82+
{
83+
Interval: 10,
84+
QueryPatt: "removeAbovePercentile(d, 60)",
85+
Datapoints: []schema.Point{
86+
{Val: 0, Ts: 10},
87+
{Val: 33, Ts: 20},
88+
{Val: 199, Ts: 30},
89+
{Val: 29, Ts: 40},
90+
{Val: 80, Ts: 50},
91+
{Val: math.NaN(), Ts: 60},
92+
},
93+
},
94+
},
95+
t,
96+
)
97+
}
98+
99+
func TestRemoveBelowPercentileSingleAllNonNull(t *testing.T) {
100+
testRemoveAboveBelowPercentile(
101+
"removeBelowPercentile",
102+
false,
103+
50,
104+
[]models.Series{
105+
{
106+
Interval: 10,
107+
QueryPatt: "abcd",
108+
Target: "a",
109+
Datapoints: getCopy(a),
110+
},
111+
{
112+
Interval: 10,
113+
QueryPatt: "abcd",
114+
Target: "b",
115+
Datapoints: getCopy(b),
116+
},
117+
{
118+
Interval: 10,
119+
QueryPatt: "abcd",
120+
Target: "c",
121+
Datapoints: getCopy(c),
122+
},
123+
{
124+
Interval: 10,
125+
QueryPatt: "abcd",
126+
Target: "d",
127+
Datapoints: getCopy(d),
128+
},
129+
},
130+
[]models.Series{
131+
{
132+
Interval: 10,
133+
QueryPatt: "removeBelowPercentile(a, 50)",
134+
Datapoints: []schema.Point{
135+
{Val: math.NaN(), Ts: 10},
136+
{Val: math.NaN(), Ts: 20},
137+
{Val: 5.5, Ts: 30},
138+
{Val: math.NaN(), Ts: 40},
139+
{Val: math.NaN(), Ts: 50},
140+
{Val: 1234567890, Ts: 60},
141+
},
142+
},
143+
{
144+
Interval: 10,
145+
QueryPatt: "removeBelowPercentile(b, 50)",
146+
Datapoints: []schema.Point{
147+
{Val: math.NaN(), Ts: 10},
148+
{Val: math.MaxFloat64, Ts: 20},
149+
{Val: math.MaxFloat64 - 20, Ts: 30},
150+
{Val: math.NaN(), Ts: 40},
151+
{Val: math.NaN(), Ts: 50},
152+
{Val: math.NaN(), Ts: 60},
153+
},
154+
},
155+
{
156+
Interval: 10,
157+
QueryPatt: "removeBelowPercentile(c, 50)",
158+
Datapoints: []schema.Point{
159+
{Val: math.NaN(), Ts: 10},
160+
{Val: math.NaN(), Ts: 20},
161+
{Val: math.NaN(), Ts: 30},
162+
{Val: 2, Ts: 40},
163+
{Val: 3, Ts: 50},
164+
{Val: 4, Ts: 60},
165+
},
166+
},
167+
{
168+
Interval: 10,
169+
QueryPatt: "removeBelowPercentile(d, 50)",
170+
Datapoints: []schema.Point{
171+
{Val: math.NaN(), Ts: 10},
172+
{Val: math.NaN(), Ts: 20},
173+
{Val: 199, Ts: 30},
174+
{Val: math.NaN(), Ts: 40},
175+
{Val: 80, Ts: 50},
176+
{Val: 250, Ts: 60},
177+
},
178+
},
179+
},
180+
t,
181+
)
182+
}
183+
184+
func testRemoveAboveBelowPercentile(name string, above bool, n float64, in []models.Series, out []models.Series, t *testing.T) {
185+
f := NewRemoveAboveBelowPercentileConstructor(above)()
186+
f.(*FuncRemoveAboveBelowPercentile).in = NewMock(in)
187+
f.(*FuncRemoveAboveBelowPercentile).n = n
188+
gots, err := f.Exec(make(map[Req][]models.Series))
189+
if err != nil {
190+
t.Fatalf("case %q (%f): err should be nil. got %q", name, n, err)
191+
}
192+
if len(gots) != len(out) {
193+
t.Fatalf("case %q (%f): isNonNull len output expected %d, got %d", name, n, len(out), len(gots))
194+
}
195+
for i, g := range gots {
196+
exp := out[i]
197+
if g.QueryPatt != exp.QueryPatt {
198+
t.Fatalf("case %q (%f): expected target %q, got %q", name, n, exp.QueryPatt, g.QueryPatt)
199+
}
200+
if len(g.Datapoints) != len(exp.Datapoints) {
201+
t.Fatalf("case %q (%f) len output expected %d, got %d", name, n, len(exp.Datapoints), len(g.Datapoints))
202+
}
203+
for j, p := range g.Datapoints {
204+
bothNaN := math.IsNaN(p.Val) && math.IsNaN(exp.Datapoints[j].Val)
205+
if (bothNaN || p.Val == exp.Datapoints[j].Val) && p.Ts == exp.Datapoints[j].Ts {
206+
continue
207+
}
208+
t.Fatalf("case %q (%f): output point %d - expected %v got %v", name, n, j, exp.Datapoints[j], p)
209+
}
210+
}
211+
}
212+
func BenchmarkRemoveAboveBelowPercentile10k_1NoNulls(b *testing.B) {
213+
benchmarkRemoveAboveBelowPercentile(b, 1, test.RandFloats10k, test.RandFloats10k)
214+
}
215+
func BenchmarkRemoveAboveBelowPercentile10k_10NoNulls(b *testing.B) {
216+
benchmarkRemoveAboveBelowPercentile(b, 10, test.RandFloats10k, test.RandFloats10k)
217+
}
218+
func BenchmarkRemoveAboveBelowPercentile10k_100NoNulls(b *testing.B) {
219+
benchmarkRemoveAboveBelowPercentile(b, 100, test.RandFloats10k, test.RandFloats10k)
220+
}
221+
func BenchmarkRemoveAboveBelowPercentile10k_1000NoNulls(b *testing.B) {
222+
benchmarkRemoveAboveBelowPercentile(b, 1000, test.RandFloats10k, test.RandFloats10k)
223+
}
224+
func BenchmarkRemoveAboveBelowPercentile10k_1SomeSeriesHalfNulls(b *testing.B) {
225+
benchmarkRemoveAboveBelowPercentile(b, 1, test.RandFloats10k, test.RandFloatsWithNulls10k)
226+
}
227+
func BenchmarkRemoveAboveBelowPercentile10k_10SomeSeriesHalfNulls(b *testing.B) {
228+
benchmarkRemoveAboveBelowPercentile(b, 10, test.RandFloats10k, test.RandFloatsWithNulls10k)
229+
}
230+
func BenchmarkRemoveAboveBelowPercentile10k_100SomeSeriesHalfNulls(b *testing.B) {
231+
benchmarkRemoveAboveBelowPercentile(b, 100, test.RandFloats10k, test.RandFloatsWithNulls10k)
232+
}
233+
func BenchmarkRemoveAboveBelowPercentile10k_1000SomeSeriesHalfNulls(b *testing.B) {
234+
benchmarkRemoveAboveBelowPercentile(b, 1000, test.RandFloats10k, test.RandFloatsWithNulls10k)
235+
}
236+
func BenchmarkRemoveAboveBelowPercentile10k_1AllSeriesHalfNulls(b *testing.B) {
237+
benchmarkRemoveAboveBelowPercentile(b, 1, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
238+
}
239+
func BenchmarkRemoveAboveBelowPercentile10k_10AllSeriesHalfNulls(b *testing.B) {
240+
benchmarkRemoveAboveBelowPercentile(b, 10, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
241+
}
242+
func BenchmarkRemoveAboveBelowPercentile10k_100AllSeriesHalfNulls(b *testing.B) {
243+
benchmarkRemoveAboveBelowPercentile(b, 100, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
244+
}
245+
func BenchmarkRemoveAboveBelowPercentile10k_1000AllSeriesHalfNulls(b *testing.B) {
246+
benchmarkRemoveAboveBelowPercentile(b, 1000, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
247+
}
248+
func benchmarkRemoveAboveBelowPercentile(b *testing.B, numSeries int, fn0, fn1 func() []schema.Point) {
249+
var input []models.Series
250+
for i := 0; i < numSeries; i++ {
251+
series := models.Series{
252+
QueryPatt: strconv.Itoa(i),
253+
}
254+
if i%2 == 0 {
255+
series.Datapoints = fn0()
256+
} else {
257+
series.Datapoints = fn1()
258+
}
259+
input = append(input, series)
260+
}
261+
b.ResetTimer()
262+
for i := 0; i < b.N; i++ {
263+
f := NewRemoveAboveBelowPercentileConstructor(rand.Int()%2 == 0)()
264+
f.(*FuncRemoveAboveBelowPercentile).in = NewMock(input)
265+
f.(*FuncRemoveAboveBelowPercentile).n = float64(rand.Int()%100 + 1)
266+
got, err := f.Exec(make(map[Req][]models.Series))
267+
if err != nil {
268+
b.Fatalf("%s", err)
269+
}
270+
results = got
271+
}
272+
}

‎expr/funcs.go

+2
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@ func init() {
8282
"nonNegativeDerivative": {NewNonNegativeDerivative, true},
8383
"perSecond": {NewPerSecond, true},
8484
"rangeOfSeries": {NewAggregateConstructor("rangeOf", crossSeriesRange), true},
85+
"removeAbovePercentile": {NewRemoveAboveBelowPercentileConstructor(true), true},
8586
"removeAboveValue": {NewRemoveAboveBelowValueConstructor(true), true},
87+
"removeBelowPercentile": {NewRemoveAboveBelowPercentileConstructor(false), true},
8688
"removeBelowValue": {NewRemoveAboveBelowValueConstructor(false), true},
8789
"scale": {NewScale, true},
8890
"scaleToSeconds": {NewScaleToSeconds, true},

‎expr/validator.go

+8
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
var ErrIntPositive = errors.New("integer must be positive")
1111
var ErrInvalidAggFunc = errors.New("Invalid aggregation func")
12+
var ErrNonNegativePercent = errors.New("The requested percent is required to be greater than 0")
1213

1314
// Validator is a function to validate an input
1415
type Validator func(e *expr) error
@@ -44,3 +45,10 @@ func IsOperator(e *expr) error {
4445
}
4546
return errors.New("Unsupported operator: " + e.str)
4647
}
48+
49+
func NonNegativePercent(e *expr) error {
50+
if e.float < 0 || e.int < 0 {
51+
return ErrNonNegativePercent
52+
}
53+
return nil
54+
}

0 commit comments

Comments
 (0)
This repository has been archived.