Merge pull request #1165 from bloomberg/GBT_perf

Dieterbe · web-flow · commit 85def6044a93 · 2018-12-27T16:29:45.000+01:00
groupByTags Performance improvements + fix setting consolidator per group
diff --git a/expr/func_alias.go b/expr/func_alias.go
@@ -32,6 +32,7 @@ func (s *FuncAlias) Exec(cache map[Req][]models.Series) ([]models.Series, error)
 	for i := range series {
 		series[i].Target = s.alias
 		series[i].QueryPatt = s.alias
+		series[i].Tags["name"] = s.alias
 	}
 	return series, nil
 }
diff --git a/expr/func_alias_test.go b/expr/func_alias_test.go
@@ -89,6 +89,9 @@ func testAlias(name string, in []models.Series, out []models.Series, t *testing.
 		if o.Target != g.Target {
 			t.Fatalf("case %q: expected target %q, got %q", name, o.Target, g.Target)
 		}
+		if o.Target != g.Tags["name"] {
+			t.Fatalf("case %q: expected target to match name tag but target = %q, tag =  %q", name, o.Target, g.Tags["name"])
+		}
 		if len(o.Datapoints) != len(g.Datapoints) {
 			t.Fatalf("case %q: len output expected %d, got %d", name, len(o.Datapoints), len(g.Datapoints))
 		}
diff --git a/expr/func_aliasbynode.go b/expr/func_aliasbynode.go
@@ -33,6 +33,7 @@ func (s *FuncAliasByNode) Exec(cache map[Req][]models.Series) ([]models.Series,
 		n := aggKey(serie, s.nodes)
 		series[i].Target = n
 		series[i].QueryPatt = n
+		series[i].Tags["name"] = n
 	}
 	return series, nil
 }
diff --git a/expr/func_aliassub.go b/expr/func_aliassub.go
@@ -48,6 +48,7 @@ func (s *FuncAliasSub) Exec(cache map[Req][]models.Series) ([]models.Series, err
 		name := s.search.ReplaceAllString(metric, replace)
 		series[i].Target = name
 		series[i].QueryPatt = name
+		series[i].Tags["name"] = name
 	}
 	return series, err
 }
diff --git a/expr/func_aliassub_test.go b/expr/func_aliassub_test.go
@@ -64,6 +64,9 @@ func TestAliasSub(t *testing.T) {
 			if o != g.Target {
 				t.Fatalf("case %d: expected target %q, got %q", i, o, g.Target)
 			}
+			if o != g.Tags["name"] {
+				t.Fatalf("case %d: expected name tag %q, got %q", i, o, g.Tags["name"])
+			}
 		}
 	}
 }
diff --git a/expr/func_groupbytags.go b/expr/func_groupbytags.go
@@ -4,7 +4,6 @@ import (
 	"bytes"
 	"errors"
 	"sort"
-	"strings"
 
 	"github.com/grafana/metrictank/api/models"
 	"github.com/raintank/schema"
@@ -38,6 +37,10 @@ func (s *FuncGroupByTags) Exec(cache map[Req][]models.Series) ([]models.Series,
 		return nil, err
 	}
 
+	if len(series) == 0 {
+		return series, nil
+	}
+
 	if len(s.tags) == 0 {
 		return nil, errors.New("No tags specified")
 	}
@@ -59,7 +62,7 @@ func (s *FuncGroupByTags) Exec(cache map[Req][]models.Series) ([]models.Series,
 	if !useName {
 		// if all series have the same name, name becomes one of our tags
 		for _, serie := range series {
-			thisName := strings.Split(serie.Target, ";")[0]
+			thisName := serie.Tags["name"]
 			if nameReplace == "" {
 				nameReplace = thisName
 			} else if nameReplace != thisName {
@@ -75,12 +78,10 @@ func (s *FuncGroupByTags) Exec(cache map[Req][]models.Series) ([]models.Series,
 	// First pass - group our series together by key
 	var buffer bytes.Buffer
 	for _, serie := range series {
-		name := strings.SplitN(serie.Target, ";", 2)[0]
-
 		buffer.Reset()
 
 		if useName {
-			buffer.WriteString(name)
+			buffer.WriteString(serie.Tags["name"])
 		} else {
 			buffer.WriteString(nameReplace)
 		}
@@ -106,7 +107,8 @@ func (s *FuncGroupByTags) Exec(cache map[Req][]models.Series) ([]models.Series,
 
 	// Now, for each key perform the requested aggregation
 	for name, groupSeries := range groups {
-		cons, queryCons := summarizeCons(series)
+		cons, queryCons := summarizeCons(groupSeries)
+
 		newSeries := models.Series{
 			Target:       name,
 			QueryPatt:    name,
@@ -117,6 +119,7 @@ func (s *FuncGroupByTags) Exec(cache map[Req][]models.Series) ([]models.Series,
 		newSeries.SetTags()
 
 		newSeries.Datapoints = pointSlicePool.Get().([]schema.Point)
+
 		aggFunc(groupSeries, &newSeries.Datapoints)
 		cache[Req{}] = append(cache[Req{}], newSeries)
 
diff --git a/expr/func_groupbytags_test.go b/expr/func_groupbytags_test.go
@@ -133,7 +133,7 @@ func TestGroupByTagsSingleGroupByName(t *testing.T) {
 		getModel("name1", sumabc),
 	}
 
-	testGroupByTags("MultipleSeriesMultipleResultsMultipleNamesMoreTags", in, out, "sum", []string{"name"}, nil, t)
+	testGroupByTags("SingleGroupByName", in, out, "sum", []string{"name"}, nil, t)
 }
 
 func TestGroupByTagsMultipleGroupByName(t *testing.T) {
@@ -148,7 +148,7 @@ func TestGroupByTagsMultipleGroupByName(t *testing.T) {
 		getModel("name2", sumcd),
 	}
 
-	testGroupByTags("MultipleSeriesMultipleResultsMultipleNamesMoreTags", in, out, "sum", []string{"name"}, nil, t)
+	testGroupByTags("MultipleGroupByName", in, out, "sum", []string{"name"}, nil, t)
 }
 
 func TestGroupByTagsMultipleSeriesMissingTag(t *testing.T) {
@@ -163,7 +163,7 @@ func TestGroupByTagsMultipleSeriesMissingTag(t *testing.T) {
 		getModel("name2;missingTag=;tag1=val1_1", sumcd),
 	}
 
-	testGroupByTags("MultipleSeriesMultipleResultsGroupByName", in, out, "sum", []string{"tag1", "name", "missingTag"}, nil, t)
+	testGroupByTags("MultipleSeriesMissingTag", in, out, "sum", []string{"tag1", "name", "missingTag"}, nil, t)
 }
 
 func TestGroupByTagsAllAggregators(t *testing.T) {
@@ -257,62 +257,114 @@ func testGroupByTags(name string, in []models.Series, out []models.Series, agg s
 	}
 }
 
-func BenchmarkGroupByTags10k_1NoNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 1, test.RandFloats10k, test.RandFloats10k)
+// Benchmarks:
+
+// input series: 1, 10, 100, 1k, 10k, 100k
+// output series: 1, same as input, then if applicable: 10, 100, 1k, 10k
+
+// 1 input series
+func BenchmarkGroupByTags1in1out(b *testing.B) {
+	benchmarkGroupByTags(b, 1, 1)
+}
+
+// 10 input Series
+func BenchmarkGroupByTags10in1out(b *testing.B) {
+	benchmarkGroupByTags(b, 10, 1)
+}
+
+func BenchmarkGroupByTags10in10out(b *testing.B) {
+	benchmarkGroupByTags(b, 10, 10)
+}
+
+// 100 input series
+func BenchmarkGroupByTags100in1out(b *testing.B) {
+	benchmarkGroupByTags(b, 100, 1)
+}
+
+func BenchmarkGroupByTags100in10out(b *testing.B) {
+	benchmarkGroupByTags(b, 100, 10)
+}
+
+func BenchmarkGroupByTags100in100out(b *testing.B) {
+	benchmarkGroupByTags(b, 100, 100)
+}
+
+// 1k input series
+func BenchmarkGroupByTags1000in1out(b *testing.B) {
+	benchmarkGroupByTags(b, 1000, 1)
+}
+
+func BenchmarkGroupByTags1000in10out(b *testing.B) {
+	benchmarkGroupByTags(b, 1000, 10)
+}
+
+func BenchmarkGroupByTags1000in100out(b *testing.B) {
+	benchmarkGroupByTags(b, 1000, 100)
+}
+
+func BenchmarkGroupByTags1000in1000out(b *testing.B) {
+	benchmarkGroupByTags(b, 1000, 1000)
 }
-func BenchmarkGroupByTags10k_10NoNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 10, test.RandFloats10k, test.RandFloats10k)
+
+// 10k input series
+func BenchmarkGroupByTags10000in1out(b *testing.B) {
+	benchmarkGroupByTags(b, 10000, 1)
 }
-func BenchmarkGroupByTags10k_100NoNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 100, test.RandFloats10k, test.RandFloats10k)
+
+func BenchmarkGroupByTags10000in10out(b *testing.B) {
+	benchmarkGroupByTags(b, 10000, 10)
 }
-func BenchmarkGroupByTags10k_1000NoNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 1000, test.RandFloats10k, test.RandFloats10k)
+
+func BenchmarkGroupByTags10000in100out(b *testing.B) {
+	benchmarkGroupByTags(b, 10000, 100)
 }
 
-func BenchmarkGroupByTags10k_1SomeSeriesHalfNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 1, test.RandFloats10k, test.RandFloatsWithNulls10k)
+func BenchmarkGroupByTags10000in1000out(b *testing.B) {
+	benchmarkGroupByTags(b, 10000, 1000)
 }
-func BenchmarkGroupByTags10k_10SomeSeriesHalfNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 10, test.RandFloats10k, test.RandFloatsWithNulls10k)
+
+func BenchmarkGroupByTags10000in10000out(b *testing.B) {
+	benchmarkGroupByTags(b, 10000, 10000)
 }
-func BenchmarkGroupByTags10k_100SomeSeriesHalfNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 100, test.RandFloats10k, test.RandFloatsWithNulls10k)
+
+// 100k input series
+func BenchmarkGroupByTags100000in1out(b *testing.B) {
+	benchmarkGroupByTags(b, 100000, 1)
 }
-func BenchmarkGroupByTags10k_1000SomeSeriesHalfNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 1000, test.RandFloats10k, test.RandFloatsWithNulls10k)
+
+func BenchmarkGroupByTags100000in10out(b *testing.B) {
+	benchmarkGroupByTags(b, 100000, 10)
 }
 
-func BenchmarkGroupByTags10k_1AllSeriesHalfNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 1, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
+func BenchmarkGroupByTags100000in100out(b *testing.B) {
+	benchmarkGroupByTags(b, 100000, 100)
 }
-func BenchmarkGroupByTags10k_10AllSeriesHalfNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 10, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
+
+func BenchmarkGroupByTags100000in1000out(b *testing.B) {
+	benchmarkGroupByTags(b, 100000, 1000)
 }
-func BenchmarkGroupByTags10k_100AllSeriesHalfNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 100, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
+
+func BenchmarkGroupByTags100000in10000out(b *testing.B) {
+	benchmarkGroupByTags(b, 100000, 10000)
 }
-func BenchmarkGroupByTags10k_1000AllSeriesHalfNulls(b *testing.B) {
-	benchmarkGroupByTags(b, 1000, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
+
+func BenchmarkGroupByTags100000in100000out(b *testing.B) {
+	benchmarkGroupByTags(b, 100000, 100000)
 }
 
-func benchmarkGroupByTags(b *testing.B, numSeries int, fn0, fn1 func() []schema.Point) {
+func benchmarkGroupByTags(b *testing.B, numInputSeries, numOutputSeries int) {
 	var input []models.Series
 	tagValues := []string{"tag1", "tag2", "tag3", "tag4"}
-	for i := 0; i < numSeries; i++ {
-		tags := make(map[string]string, len(tagValues))
-
-		for t, tag := range tagValues {
-			tags[tag] = strconv.Itoa(t)
-		}
+	for i := 0; i < numInputSeries; i++ {
 		series := models.Series{
 			Target: strconv.Itoa(i),
 		}
-		if i%1 == 0 {
-			series.Datapoints = fn0()
-		} else {
-			series.Datapoints = fn1()
+
+		for _, tag := range tagValues {
+			series.Target += ";" + tag + "=" + strconv.Itoa(i%numOutputSeries)
 		}
+
+		series.Datapoints = test.RandFloats100()
 		input = append(input, series)
 	}
 	b.ResetTimer()
@@ -327,6 +379,16 @@ func benchmarkGroupByTags(b *testing.B, numSeries int, fn0, fn1 func() []schema.
 		if err != nil {
 			b.Fatalf("%s", err)
 		}
+
+		if len(results) != numOutputSeries {
+			b.Fatalf("Expected %d groups, got %d", numOutputSeries, len(results))
+		}
+
+		if true {
+			for _, serie := range results {
+				pointSlicePool.Put(serie.Datapoints[:0])
+			}
+		}
 	}
-	b.SetBytes(int64(numSeries * len(results[0].Datapoints) * 12))
+	b.SetBytes(int64(numInputSeries * len(results[0].Datapoints) * 12))
 }
diff --git a/test/points.go b/test/points.go
@@ -13,6 +13,7 @@ import (
 var randFloats = make(map[int][]schema.Point)
 var randFloatsWithNulls = make(map[int][]schema.Point)
 
+func RandFloats100() []schema.Point { return RandFloats(100) }
 func RandFloats10k() []schema.Point { return RandFloats(10000) }
 func RandFloats1M() []schema.Point  { return RandFloats(1000000) }
 
@@ -30,6 +31,7 @@ func RandFloats(size int) []schema.Point {
 	return out
 }
 
+func RandFloatsWithNulls100() []schema.Point { return RandFloatsWithNulls(100) }
 func RandFloatsWithNulls10k() []schema.Point { return RandFloatsWithNulls(10000) }
 func RandFloatsWithNulls1M() []schema.Point  { return RandFloatsWithNulls(1000000) }
 

Original file line number	Diff line number	Diff line change
`@@ -32,6 +32,7 @@ func (s *FuncAlias) Exec(cache map[Req][]models.Series) ([]models.Series, error)`
`32`	`32`	`for i := range series {`
`33`	`33`	`series[i].Target = s.alias`
`34`	`34`	`series[i].QueryPatt = s.alias`
	`35`	`+ series[i].Tags["name"] = s.alias`
`35`	`36`	`}`
`36`	`37`	`return series, nil`
`37`	`38`	`}`
Original file line number	Diff line number	Diff line change
`@@ -89,6 +89,9 @@ func testAlias(name string, in []models.Series, out []models.Series, t *testing.`
`89`	`89`	`if o.Target != g.Target {`
`90`	`90`	`t.Fatalf("case %q: expected target %q, got %q", name, o.Target, g.Target)`
`91`	`91`	`}`
	`92`	`+ if o.Target != g.Tags["name"] {`
	`93`	`+ t.Fatalf("case %q: expected target to match name tag but target = %q, tag = %q", name, o.Target, g.Tags["name"])`
	`94`	`+ }`
`92`	`95`	`if len(o.Datapoints) != len(g.Datapoints) {`
`93`	`96`	`t.Fatalf("case %q: len output expected %d, got %d", name, len(o.Datapoints), len(g.Datapoints))`
`94`	`97`	`}`
Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@ func (s *FuncAliasByNode) Exec(cache map[Req][]models.Series) ([]models.Series,`
`33`	`33`	`n := aggKey(serie, s.nodes)`
`34`	`34`	`series[i].Target = n`
`35`	`35`	`series[i].QueryPatt = n`
	`36`	`+ series[i].Tags["name"] = n`
`36`	`37`	`}`
`37`	`38`	`return series, nil`
`38`	`39`	`}`
Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,7 @@ func (s *FuncAliasSub) Exec(cache map[Req][]models.Series) ([]models.Series, err`
`48`	`48`	`name := s.search.ReplaceAllString(metric, replace)`
`49`	`49`	`series[i].Target = name`
`50`	`50`	`series[i].QueryPatt = name`
	`51`	`+ series[i].Tags["name"] = name`
`51`	`52`	`}`
`52`	`53`	`return series, err`
`53`	`54`	`}`
Original file line number	Diff line number	Diff line change
`@@ -64,6 +64,9 @@ func TestAliasSub(t *testing.T) {`
`64`	`64`	`if o != g.Target {`
`65`	`65`	`t.Fatalf("case %d: expected target %q, got %q", i, o, g.Target)`
`66`	`66`	`}`
	`67`	`+ if o != g.Tags["name"] {`
	`68`	`+ t.Fatalf("case %d: expected name tag %q, got %q", i, o, g.Tags["name"])`
	`69`	`+ }`
`67`	`70`	`}`
`68`	`71`	`}`
`69`	`72`	`}`