Skip to content
This repository was archived by the owner on Aug 23, 2023. It is now read-only.

Commit 85def60

Browse files
authored
Merge pull request #1165 from bloomberg/GBT_perf
groupByTags Performance improvements + fix setting consolidator per group
2 parents 58c4fb1 + 56ded2e commit 85def60

8 files changed

+121
-45
lines changed

expr/func_alias.go

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ func (s *FuncAlias) Exec(cache map[Req][]models.Series) ([]models.Series, error)
3232
for i := range series {
3333
series[i].Target = s.alias
3434
series[i].QueryPatt = s.alias
35+
series[i].Tags["name"] = s.alias
3536
}
3637
return series, nil
3738
}

expr/func_alias_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ func testAlias(name string, in []models.Series, out []models.Series, t *testing.
8989
if o.Target != g.Target {
9090
t.Fatalf("case %q: expected target %q, got %q", name, o.Target, g.Target)
9191
}
92+
if o.Target != g.Tags["name"] {
93+
t.Fatalf("case %q: expected target to match name tag but target = %q, tag = %q", name, o.Target, g.Tags["name"])
94+
}
9295
if len(o.Datapoints) != len(g.Datapoints) {
9396
t.Fatalf("case %q: len output expected %d, got %d", name, len(o.Datapoints), len(g.Datapoints))
9497
}

expr/func_aliasbynode.go

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ func (s *FuncAliasByNode) Exec(cache map[Req][]models.Series) ([]models.Series,
3333
n := aggKey(serie, s.nodes)
3434
series[i].Target = n
3535
series[i].QueryPatt = n
36+
series[i].Tags["name"] = n
3637
}
3738
return series, nil
3839
}

expr/func_aliassub.go

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ func (s *FuncAliasSub) Exec(cache map[Req][]models.Series) ([]models.Series, err
4848
name := s.search.ReplaceAllString(metric, replace)
4949
series[i].Target = name
5050
series[i].QueryPatt = name
51+
series[i].Tags["name"] = name
5152
}
5253
return series, err
5354
}

expr/func_aliassub_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ func TestAliasSub(t *testing.T) {
6464
if o != g.Target {
6565
t.Fatalf("case %d: expected target %q, got %q", i, o, g.Target)
6666
}
67+
if o != g.Tags["name"] {
68+
t.Fatalf("case %d: expected name tag %q, got %q", i, o, g.Tags["name"])
69+
}
6770
}
6871
}
6972
}

expr/func_groupbytags.go

+9-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"bytes"
55
"errors"
66
"sort"
7-
"strings"
87

98
"github.com/grafana/metrictank/api/models"
109
"github.com/raintank/schema"
@@ -38,6 +37,10 @@ func (s *FuncGroupByTags) Exec(cache map[Req][]models.Series) ([]models.Series,
3837
return nil, err
3938
}
4039

40+
if len(series) == 0 {
41+
return series, nil
42+
}
43+
4144
if len(s.tags) == 0 {
4245
return nil, errors.New("No tags specified")
4346
}
@@ -59,7 +62,7 @@ func (s *FuncGroupByTags) Exec(cache map[Req][]models.Series) ([]models.Series,
5962
if !useName {
6063
// if all series have the same name, name becomes one of our tags
6164
for _, serie := range series {
62-
thisName := strings.Split(serie.Target, ";")[0]
65+
thisName := serie.Tags["name"]
6366
if nameReplace == "" {
6467
nameReplace = thisName
6568
} else if nameReplace != thisName {
@@ -75,12 +78,10 @@ func (s *FuncGroupByTags) Exec(cache map[Req][]models.Series) ([]models.Series,
7578
// First pass - group our series together by key
7679
var buffer bytes.Buffer
7780
for _, serie := range series {
78-
name := strings.SplitN(serie.Target, ";", 2)[0]
79-
8081
buffer.Reset()
8182

8283
if useName {
83-
buffer.WriteString(name)
84+
buffer.WriteString(serie.Tags["name"])
8485
} else {
8586
buffer.WriteString(nameReplace)
8687
}
@@ -106,7 +107,8 @@ func (s *FuncGroupByTags) Exec(cache map[Req][]models.Series) ([]models.Series,
106107

107108
// Now, for each key perform the requested aggregation
108109
for name, groupSeries := range groups {
109-
cons, queryCons := summarizeCons(series)
110+
cons, queryCons := summarizeCons(groupSeries)
111+
110112
newSeries := models.Series{
111113
Target: name,
112114
QueryPatt: name,
@@ -117,6 +119,7 @@ func (s *FuncGroupByTags) Exec(cache map[Req][]models.Series) ([]models.Series,
117119
newSeries.SetTags()
118120

119121
newSeries.Datapoints = pointSlicePool.Get().([]schema.Point)
122+
120123
aggFunc(groupSeries, &newSeries.Datapoints)
121124
cache[Req{}] = append(cache[Req{}], newSeries)
122125

expr/func_groupbytags_test.go

+101-39
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ func TestGroupByTagsSingleGroupByName(t *testing.T) {
133133
getModel("name1", sumabc),
134134
}
135135

136-
testGroupByTags("MultipleSeriesMultipleResultsMultipleNamesMoreTags", in, out, "sum", []string{"name"}, nil, t)
136+
testGroupByTags("SingleGroupByName", in, out, "sum", []string{"name"}, nil, t)
137137
}
138138

139139
func TestGroupByTagsMultipleGroupByName(t *testing.T) {
@@ -148,7 +148,7 @@ func TestGroupByTagsMultipleGroupByName(t *testing.T) {
148148
getModel("name2", sumcd),
149149
}
150150

151-
testGroupByTags("MultipleSeriesMultipleResultsMultipleNamesMoreTags", in, out, "sum", []string{"name"}, nil, t)
151+
testGroupByTags("MultipleGroupByName", in, out, "sum", []string{"name"}, nil, t)
152152
}
153153

154154
func TestGroupByTagsMultipleSeriesMissingTag(t *testing.T) {
@@ -163,7 +163,7 @@ func TestGroupByTagsMultipleSeriesMissingTag(t *testing.T) {
163163
getModel("name2;missingTag=;tag1=val1_1", sumcd),
164164
}
165165

166-
testGroupByTags("MultipleSeriesMultipleResultsGroupByName", in, out, "sum", []string{"tag1", "name", "missingTag"}, nil, t)
166+
testGroupByTags("MultipleSeriesMissingTag", in, out, "sum", []string{"tag1", "name", "missingTag"}, nil, t)
167167
}
168168

169169
func TestGroupByTagsAllAggregators(t *testing.T) {
@@ -257,62 +257,114 @@ func testGroupByTags(name string, in []models.Series, out []models.Series, agg s
257257
}
258258
}
259259

260-
func BenchmarkGroupByTags10k_1NoNulls(b *testing.B) {
261-
benchmarkGroupByTags(b, 1, test.RandFloats10k, test.RandFloats10k)
260+
// Benchmarks:
261+
262+
// input series: 1, 10, 100, 1k, 10k, 100k
263+
// output series: 1, same as input, then if applicable: 10, 100, 1k, 10k
264+
265+
// 1 input series
266+
func BenchmarkGroupByTags1in1out(b *testing.B) {
267+
benchmarkGroupByTags(b, 1, 1)
268+
}
269+
270+
// 10 input Series
271+
func BenchmarkGroupByTags10in1out(b *testing.B) {
272+
benchmarkGroupByTags(b, 10, 1)
273+
}
274+
275+
func BenchmarkGroupByTags10in10out(b *testing.B) {
276+
benchmarkGroupByTags(b, 10, 10)
277+
}
278+
279+
// 100 input series
280+
func BenchmarkGroupByTags100in1out(b *testing.B) {
281+
benchmarkGroupByTags(b, 100, 1)
282+
}
283+
284+
func BenchmarkGroupByTags100in10out(b *testing.B) {
285+
benchmarkGroupByTags(b, 100, 10)
286+
}
287+
288+
func BenchmarkGroupByTags100in100out(b *testing.B) {
289+
benchmarkGroupByTags(b, 100, 100)
290+
}
291+
292+
// 1k input series
293+
func BenchmarkGroupByTags1000in1out(b *testing.B) {
294+
benchmarkGroupByTags(b, 1000, 1)
295+
}
296+
297+
func BenchmarkGroupByTags1000in10out(b *testing.B) {
298+
benchmarkGroupByTags(b, 1000, 10)
299+
}
300+
301+
func BenchmarkGroupByTags1000in100out(b *testing.B) {
302+
benchmarkGroupByTags(b, 1000, 100)
303+
}
304+
305+
func BenchmarkGroupByTags1000in1000out(b *testing.B) {
306+
benchmarkGroupByTags(b, 1000, 1000)
262307
}
263-
func BenchmarkGroupByTags10k_10NoNulls(b *testing.B) {
264-
benchmarkGroupByTags(b, 10, test.RandFloats10k, test.RandFloats10k)
308+
309+
// 10k input series
310+
func BenchmarkGroupByTags10000in1out(b *testing.B) {
311+
benchmarkGroupByTags(b, 10000, 1)
265312
}
266-
func BenchmarkGroupByTags10k_100NoNulls(b *testing.B) {
267-
benchmarkGroupByTags(b, 100, test.RandFloats10k, test.RandFloats10k)
313+
314+
func BenchmarkGroupByTags10000in10out(b *testing.B) {
315+
benchmarkGroupByTags(b, 10000, 10)
268316
}
269-
func BenchmarkGroupByTags10k_1000NoNulls(b *testing.B) {
270-
benchmarkGroupByTags(b, 1000, test.RandFloats10k, test.RandFloats10k)
317+
318+
func BenchmarkGroupByTags10000in100out(b *testing.B) {
319+
benchmarkGroupByTags(b, 10000, 100)
271320
}
272321

273-
func BenchmarkGroupByTags10k_1SomeSeriesHalfNulls(b *testing.B) {
274-
benchmarkGroupByTags(b, 1, test.RandFloats10k, test.RandFloatsWithNulls10k)
322+
func BenchmarkGroupByTags10000in1000out(b *testing.B) {
323+
benchmarkGroupByTags(b, 10000, 1000)
275324
}
276-
func BenchmarkGroupByTags10k_10SomeSeriesHalfNulls(b *testing.B) {
277-
benchmarkGroupByTags(b, 10, test.RandFloats10k, test.RandFloatsWithNulls10k)
325+
326+
func BenchmarkGroupByTags10000in10000out(b *testing.B) {
327+
benchmarkGroupByTags(b, 10000, 10000)
278328
}
279-
func BenchmarkGroupByTags10k_100SomeSeriesHalfNulls(b *testing.B) {
280-
benchmarkGroupByTags(b, 100, test.RandFloats10k, test.RandFloatsWithNulls10k)
329+
330+
// 100k input series
331+
func BenchmarkGroupByTags100000in1out(b *testing.B) {
332+
benchmarkGroupByTags(b, 100000, 1)
281333
}
282-
func BenchmarkGroupByTags10k_1000SomeSeriesHalfNulls(b *testing.B) {
283-
benchmarkGroupByTags(b, 1000, test.RandFloats10k, test.RandFloatsWithNulls10k)
334+
335+
func BenchmarkGroupByTags100000in10out(b *testing.B) {
336+
benchmarkGroupByTags(b, 100000, 10)
284337
}
285338

286-
func BenchmarkGroupByTags10k_1AllSeriesHalfNulls(b *testing.B) {
287-
benchmarkGroupByTags(b, 1, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
339+
func BenchmarkGroupByTags100000in100out(b *testing.B) {
340+
benchmarkGroupByTags(b, 100000, 100)
288341
}
289-
func BenchmarkGroupByTags10k_10AllSeriesHalfNulls(b *testing.B) {
290-
benchmarkGroupByTags(b, 10, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
342+
343+
func BenchmarkGroupByTags100000in1000out(b *testing.B) {
344+
benchmarkGroupByTags(b, 100000, 1000)
291345
}
292-
func BenchmarkGroupByTags10k_100AllSeriesHalfNulls(b *testing.B) {
293-
benchmarkGroupByTags(b, 100, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
346+
347+
func BenchmarkGroupByTags100000in10000out(b *testing.B) {
348+
benchmarkGroupByTags(b, 100000, 10000)
294349
}
295-
func BenchmarkGroupByTags10k_1000AllSeriesHalfNulls(b *testing.B) {
296-
benchmarkGroupByTags(b, 1000, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
350+
351+
func BenchmarkGroupByTags100000in100000out(b *testing.B) {
352+
benchmarkGroupByTags(b, 100000, 100000)
297353
}
298354

299-
func benchmarkGroupByTags(b *testing.B, numSeries int, fn0, fn1 func() []schema.Point) {
355+
func benchmarkGroupByTags(b *testing.B, numInputSeries, numOutputSeries int) {
300356
var input []models.Series
301357
tagValues := []string{"tag1", "tag2", "tag3", "tag4"}
302-
for i := 0; i < numSeries; i++ {
303-
tags := make(map[string]string, len(tagValues))
304-
305-
for t, tag := range tagValues {
306-
tags[tag] = strconv.Itoa(t)
307-
}
358+
for i := 0; i < numInputSeries; i++ {
308359
series := models.Series{
309360
Target: strconv.Itoa(i),
310361
}
311-
if i%1 == 0 {
312-
series.Datapoints = fn0()
313-
} else {
314-
series.Datapoints = fn1()
362+
363+
for _, tag := range tagValues {
364+
series.Target += ";" + tag + "=" + strconv.Itoa(i%numOutputSeries)
315365
}
366+
367+
series.Datapoints = test.RandFloats100()
316368
input = append(input, series)
317369
}
318370
b.ResetTimer()
@@ -327,6 +379,16 @@ func benchmarkGroupByTags(b *testing.B, numSeries int, fn0, fn1 func() []schema.
327379
if err != nil {
328380
b.Fatalf("%s", err)
329381
}
382+
383+
if len(results) != numOutputSeries {
384+
b.Fatalf("Expected %d groups, got %d", numOutputSeries, len(results))
385+
}
386+
387+
if true {
388+
for _, serie := range results {
389+
pointSlicePool.Put(serie.Datapoints[:0])
390+
}
391+
}
330392
}
331-
b.SetBytes(int64(numSeries * len(results[0].Datapoints) * 12))
393+
b.SetBytes(int64(numInputSeries * len(results[0].Datapoints) * 12))
332394
}

test/points.go

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
var randFloats = make(map[int][]schema.Point)
1414
var randFloatsWithNulls = make(map[int][]schema.Point)
1515

16+
func RandFloats100() []schema.Point { return RandFloats(100) }
1617
func RandFloats10k() []schema.Point { return RandFloats(10000) }
1718
func RandFloats1M() []schema.Point { return RandFloats(1000000) }
1819

@@ -30,6 +31,7 @@ func RandFloats(size int) []schema.Point {
3031
return out
3132
}
3233

34+
func RandFloatsWithNulls100() []schema.Point { return RandFloatsWithNulls(100) }
3335
func RandFloatsWithNulls10k() []schema.Point { return RandFloatsWithNulls(10000) }
3436
func RandFloatsWithNulls1M() []schema.Point { return RandFloatsWithNulls(1000000) }
3537

0 commit comments

Comments
 (0)