Merge pull request #1745 from bloomberg/unique

Dieterbe · web-flow · commit c3ae72d58038 · 2020-04-02T17:39:30.000+03:00
Add unique processing function
diff --git a/docs/graphite.md b/docs/graphite.md
@@ -169,7 +169,7 @@ See also:
 | timeSlice                                                      |              | No         |
 | timeStack                                                      |              | No         |
 | transformNull(seriesList, default=0) seriesList                |              | Stable     |
-| unique                                                         |              | No         |
+| unique                                                         |              | Stable     |
 | useSeriesAbove                                                 |              | No         |
 | verticalLine                                                   |              | No         |
 | weightedAverage                                                |              | No         |
diff --git a/expr/func_unique.go b/expr/func_unique.go
@@ -0,0 +1,38 @@
+package expr
+
+import (
+	"github.com/grafana/metrictank/api/models"
+)
+
+type FuncUnique struct {
+	in []GraphiteFunc
+}
+
+func NewUnique() GraphiteFunc {
+	return &FuncUnique{}
+}
+
+func (s *FuncUnique) Signature() ([]Arg, []Arg) {
+	return []Arg{
+		ArgSeriesLists{val: &s.in}}, []Arg{ArgSeriesList{}}
+}
+
+func (s *FuncUnique) Context(context Context) Context {
+	return context
+}
+
+func (s *FuncUnique) Exec(dataMap DataMap) ([]models.Series, error) {
+	series, _, err := consumeFuncs(dataMap, s.in)
+	if err != nil {
+		return nil, err
+	}
+	seenNames := make(map[string]bool)
+	var uniqueSeries []models.Series
+	for _, serie := range series {
+		if _, ok := seenNames[serie.Target]; !ok {
+			seenNames[serie.Target] = true
+			uniqueSeries = append(uniqueSeries, serie)
+		}
+	}
+	return uniqueSeries, nil
+}
diff --git a/expr/func_unique_test.go b/expr/func_unique_test.go
@@ -0,0 +1,167 @@
+package expr
+
+import (
+	"strconv"
+	"testing"
+
+	"github.com/grafana/metrictank/api/models"
+	"github.com/grafana/metrictank/schema"
+	"github.com/grafana/metrictank/test"
+)
+
+func getNewUnique(in [][]models.Series) *FuncUnique {
+	f := NewUnique()
+	s := f.(*FuncUnique)
+	for i := range in {
+		s.in = append(s.in, NewMock(in[i]))
+	}
+	return s
+}
+
+func TestUnique(t *testing.T) {
+	f := getNewUnique([][]models.Series{
+		{
+			{
+				Interval:   10,
+				QueryPatt:  "foo.a",
+				Target:     "foo.a",
+				Datapoints: getCopy(a),
+			},
+			{
+				Interval:   100,
+				QueryPatt:  "foo.b",
+				Target:     "foo.b",
+				Datapoints: getCopy(b),
+			},
+		},
+		{
+			{
+				Interval:   10,
+				QueryPatt:  "bar.b",
+				Target:     "bar.b",
+				Datapoints: getCopy(b),
+			},
+		},
+		{
+			{
+				Interval:   10,
+				QueryPatt:  "foo.a",
+				Target:     "foo.a",
+				Datapoints: getCopy(a),
+			},
+			{
+				Interval:   10,
+				QueryPatt:  "bar.*",
+				Target:     "bar.b",
+				Datapoints: getCopy(b),
+			},
+			{
+				Interval:   100,
+				QueryPatt:  "bar.*",
+				Target:     "bar.d",
+				Datapoints: getCopy(d),
+			},
+			{
+				Interval:   10,
+				QueryPatt:  "foo.a",
+				Target:     "foo.a",
+				Datapoints: getCopy(a),
+			},
+		},
+	},
+	)
+
+	out := []models.Series{
+		{
+			Interval:   10,
+			QueryPatt:  "foo.a",
+			Target:     "foo.a",
+			Datapoints: getCopy(a),
+		},
+		{
+			Interval:   100,
+			QueryPatt:  "foo.b",
+			Target:     "foo.b",
+			Datapoints: getCopy(b),
+		},
+		{
+			Interval:   10,
+			QueryPatt:  "bar.b",
+			Target:     "bar.b",
+			Datapoints: getCopy(b),
+		},
+		{
+			Interval:   100,
+			QueryPatt:  "bar.*",
+			Target:     "bar.d",
+			Datapoints: getCopy(d),
+		},
+	}
+
+	got, err := f.Exec(make(map[Req][]models.Series))
+	if err := equalOutput(out, got, nil, err); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func BenchmarkUnique10k_1NoNulls(b *testing.B) {
+	benchmarkUnique(b, 1, test.RandFloats10k, test.RandFloats10k)
+}
+func BenchmarkUnique10k_10NoNulls(b *testing.B) {
+	benchmarkUnique(b, 10, test.RandFloats10k, test.RandFloats10k)
+}
+func BenchmarkUnique10k_100NoNulls(b *testing.B) {
+	benchmarkUnique(b, 100, test.RandFloats10k, test.RandFloats10k)
+}
+func BenchmarkUnique10k_1000NoNulls(b *testing.B) {
+	benchmarkUnique(b, 1000, test.RandFloats10k, test.RandFloats10k)
+}
+func BenchmarkUnique10k_1SomeSeriesHalfNulls(b *testing.B) {
+	benchmarkUnique(b, 1, test.RandFloats10k, test.RandFloatsWithNulls10k)
+}
+func BenchmarkUnique10k_10SomeSeriesHalfNulls(b *testing.B) {
+	benchmarkUnique(b, 10, test.RandFloats10k, test.RandFloatsWithNulls10k)
+}
+func BenchmarkUnique10k_100SomeSeriesHalfNulls(b *testing.B) {
+	benchmarkUnique(b, 100, test.RandFloats10k, test.RandFloatsWithNulls10k)
+}
+func BenchmarkUnique10k_1000SomeSeriesHalfNulls(b *testing.B) {
+	benchmarkUnique(b, 1000, test.RandFloats10k, test.RandFloatsWithNulls10k)
+}
+func BenchmarkUnique10k_1AllSeriesHalfNulls(b *testing.B) {
+	benchmarkUnique(b, 1, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
+}
+func BenchmarkUnique10k_10AllSeriesHalfNulls(b *testing.B) {
+	benchmarkUnique(b, 10, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
+}
+func BenchmarkUnique10k_100AllSeriesHalfNulls(b *testing.B) {
+	benchmarkUnique(b, 100, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
+}
+func BenchmarkUnique10k_1000AllSeriesHalfNulls(b *testing.B) {
+	benchmarkUnique(b, 1000, test.RandFloatsWithNulls10k, test.RandFloatsWithNulls10k)
+}
+
+func benchmarkUnique(b *testing.B, numSeries int, fn0, fn1 func() []schema.Point) {
+	var input []models.Series
+	for i := 0; i < numSeries; i++ {
+		series := models.Series{
+			QueryPatt: strconv.Itoa(i),
+		}
+		if i%2 == 0 {
+			series.Datapoints = fn0()
+		} else {
+			series.Datapoints = fn1()
+		}
+		input = append(input, series)
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		f := NewUnique()
+		f.(*FuncUnique).in = append(f.(*FuncUnique).in, NewMock(input))
+		got, err := f.Exec(make(map[Req][]models.Series))
+		if err != nil {
+			b.Fatalf("%s", err)
+		}
+		results = got
+	}
+}
diff --git a/expr/funcs.go b/expr/funcs.go
@@ -119,6 +119,7 @@ func init() {
 		"sumSeries":             {NewAggregateConstructor("sum", crossSeriesSum), true},
 		"summarize":             {NewSummarize, true},
 		"transformNull":         {NewTransformNull, true},
+		"unique":                {NewUnique, true},
 	}
 }
 

Original file line number	Diff line number	Diff line change
`@@ -119,6 +119,7 @@ func init() {`
`119`	`119`	`"sumSeries": {NewAggregateConstructor("sum", crossSeriesSum), true},`
`120`	`120`	`"summarize": {NewSummarize, true},`
`121`	`121`	`"transformNull": {NewTransformNull, true},`
	`122`	`+ "unique": {NewUnique, true},`
`122`	`123`	`}`
`123`	`124`	`}`
`124`	`125`