Skip to content
This repository was archived by the owner on Aug 23, 2023. It is now read-only.

Commit 9beff46

Browse files
authored
Merge pull request #1920 from grafana/improve_performance_of_metatag_doesnt_exist_expr
Improve performance of metatag doesnt exist expr
2 parents ad962a3 + 23abfa4 commit 9beff46

File tree

2 files changed

+65
-16
lines changed

2 files changed

+65
-16
lines changed

idx/memory/meta_tags_query_test.go

+40
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,46 @@ func BenchmarkFilter100kByMetaTagWithIndexSize1mAnd50kMetaRecordsWithMultipleExp
698698
benchmarkFindByMetaTag(b, metricCnt, metaRecordCnt, expectedResCount, expectedTagsPerDef, expectedMetaTagsPerDef, queryGen, tagGen, metaRecordGen)
699699
}
700700

701+
func BenchmarkFilterByAbsenceOfMetaTag(b *testing.B) {
702+
metaRecordCnt := 50000
703+
metricCnt := 1000000
704+
expectedResCount := 50000
705+
expectedTagsPerDef := 5
706+
expectedMetaTagsPerDef := 1
707+
708+
metaRecordGen := func(metaRecordId int) struct {
709+
expressions []string
710+
metaTags []string
711+
} {
712+
res := struct {
713+
expressions []string
714+
metaTags []string
715+
}{}
716+
if metaRecordId < metaRecordCnt/2 {
717+
res.metaTags = []string{"stage=prod"}
718+
} else {
719+
// half of all meta records assign the meta tag filterBy=thisTag,
720+
// in the query we will filter for the absence of this tag
721+
res.metaTags = []string{"filterBy=thisTag"}
722+
}
723+
res.expressions = []string{fmt.Sprintf("host=hostname%d", metaRecordId)}
724+
725+
return res
726+
}
727+
728+
tagGen := func(id int) []string {
729+
metaRecordId := id % metaRecordCnt
730+
// each host value will be assigned to 1M/50k = 20 metrics
731+
return []string{fmt.Sprintf("host=hostname%d", metaRecordId), fmt.Sprintf("other=property%d", metaRecordId), fmt.Sprintf("cluster=cluster%d", id%10)}
732+
}
733+
734+
queryGen := func(id uint32) []string {
735+
return []string{fmt.Sprintf("cluster=cluster%d", id%10), "filterBy="}
736+
}
737+
738+
benchmarkFindByMetaTag(b, metricCnt, metaRecordCnt, expectedResCount, expectedTagsPerDef, expectedMetaTagsPerDef, queryGen, tagGen, metaRecordGen)
739+
}
740+
701741
func getMetaRecordsForMetaTagQueryBenchmark(b *testing.B, metaRecordCount int, metaRecordGen func(metaRecordId int) struct {
702742
expressions []string
703743
metaTags []string

idx/memory/tag_query_id_filter.go

+25-16
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,13 @@ func newIdFilter(expressions tagquery.Expressions, ctx *TagQueryContext) *idFilt
9292
records = append(records, record)
9393
}
9494

95-
// if we don't use an inverted set of meta records, then we check if
96-
// all meta records involved in a meta tag filter use the "=" operator.
97-
// if this is the case then it is cheaper to build a set of acceptable tags
95+
// if a query only involves meta tags of which all underlying expressions
96+
// use the "=" operator, then it is cheaper to build a set of acceptable tags
9897
// based on the meta record expressions and just check whether they are present
9998
// in a metric that gets filtered, compared to doing a full tag index lookup
10099
// to check whether a metric has one of the necessary meta tags associated
101100
// with it.
102-
onlyEqualOperators, singleExprPerRecord := viableOptimizations(invertSetOfMetaRecords, records)
101+
onlyEqualOperators, singleExprPerRecord := viableOptimizations(records)
103102

104103
if onlyEqualOperators {
105104
// there are two different ways how we optimize for the case where all expressions
@@ -108,9 +107,9 @@ func newIdFilter(expressions tagquery.Expressions, ctx *TagQueryContext) *idFilt
108107
// otherwise we use the second way which is a bit more expensive but it also works
109108
// if some of the involved meta records have multiple expressions.
110109
if singleExprPerRecord {
111-
res.filters[i].testByMetaTags = metaRecordFilterBySetOfValidValues(records)
110+
res.filters[i].testByMetaTags = metaRecordFilterBySetOfValidValues(records, invertSetOfMetaRecords)
112111
} else {
113-
res.filters[i].testByMetaTags = metaRecordFilterBySetOfValidValueSets(records)
112+
res.filters[i].testByMetaTags = metaRecordFilterBySetOfValidValueSets(records, invertSetOfMetaRecords)
114113
}
115114
} else {
116115
metaRecordFilters := make([]tagquery.MetricDefinitionFilter, 0, len(records))
@@ -132,16 +131,12 @@ func newIdFilter(expressions tagquery.Expressions, ctx *TagQueryContext) *idFilt
132131
// viableOptimizations looks at a set of meta tag records and decides whether two possible
133132
// optimizations can be applied when filtering by these records. it returns two bools to
134133
// indicate which optimizations are or are not viable.
135-
// if invertSetOfMetaRecords is true then none of these optimizations can be used.
136134
//
137135
// * the first bool refers to the optimization for sets of records which all have only one
138136
// expression and this expression is using the equal operator.
139137
// * the second bool refers to the optimization for sets of records which all only have
140138
// expressions using the equal operator, but there may be more than one per record.
141-
func viableOptimizations(invertSetOfMetaRecords bool, records []tagquery.MetaTagRecord) (bool, bool) {
142-
if invertSetOfMetaRecords {
143-
return false, false
144-
}
139+
func viableOptimizations(records []tagquery.MetaTagRecord) (bool, bool) {
145140
singleExprPerRecord := true
146141
for recordIdx := range records {
147142
for exprIdx := range records[recordIdx].Expressions {
@@ -161,7 +156,9 @@ func viableOptimizations(invertSetOfMetaRecords bool, records []tagquery.MetaTag
161156
// which only involves meta records of which each only has exactly one expression and that
162157
// expression is using the "=" operator. this is quite a narrow scenario, but since it is
163158
// a very common use case it makes sense to optimize for it.
164-
func metaRecordFilterBySetOfValidValues(records []tagquery.MetaTagRecord) tagquery.MetricDefinitionFilter {
159+
// The invertFilter bool flips the filter logic so that instead of removing metrics which
160+
// do not have a meta tag it filters metrics which do have a meta tag.
161+
func metaRecordFilterBySetOfValidValues(records []tagquery.MetaTagRecord, invertFilter bool) tagquery.MetricDefinitionFilter {
165162
// we first build a set of valid tags and names.
166163
// since we know that each of the involved meta records uses exactly one expression
167164
// which is using the "=" operator we know that if a given metric's name matches a
@@ -183,14 +180,19 @@ func metaRecordFilterBySetOfValidValues(records []tagquery.MetaTagRecord) tagque
183180
}
184181
}
185182

183+
resultOnHit := tagquery.Pass
184+
if invertFilter {
185+
resultOnHit = tagquery.Fail
186+
}
187+
186188
return func(_ schema.MKey, name string, tags []string) tagquery.FilterDecision {
187189
for i := range tags {
188190
if _, ok := validValues[tags[i]]; ok {
189-
return tagquery.Pass
191+
return resultOnHit
190192
}
191193
}
192194
if _, ok := validNames[name]; ok {
193-
return tagquery.Pass
195+
return resultOnHit
194196
}
195197
return tagquery.None
196198
}
@@ -199,7 +201,9 @@ func metaRecordFilterBySetOfValidValues(records []tagquery.MetaTagRecord) tagque
199201
// metaRecordFilterBySetOfValidValueSets creates a filter function to filter by a meta tag
200202
// which only involves meta records of which all expressions are only using the "=" operator,
201203
// it is ok if one meta record uses multiple such expressions.
202-
func metaRecordFilterBySetOfValidValueSets(records []tagquery.MetaTagRecord) tagquery.MetricDefinitionFilter {
204+
// The invertFilter bool flips the filter logic so that instead of removing metrics which
205+
// do not have a meta tag it filters metrics which do have a meta tag.
206+
func metaRecordFilterBySetOfValidValueSets(records []tagquery.MetaTagRecord, invertFilter bool) tagquery.MetricDefinitionFilter {
203207
// we first build a set of tag and name value combinations of which each is sufficient
204208
// to pass the generated filter when a metric contains all values of one of these
205209
// combinations
@@ -221,6 +225,11 @@ func metaRecordFilterBySetOfValidValueSets(records []tagquery.MetaTagRecord) tag
221225
sort.Strings(validValueSets[i].tags)
222226
}
223227

228+
resultOnHit := tagquery.Pass
229+
if invertFilter {
230+
resultOnHit = tagquery.Fail
231+
}
232+
224233
return func(_ schema.MKey, name string, tags []string) tagquery.FilterDecision {
225234
// iterate over the acceptable value combinations and check if one matches this metric
226235
for _, validValueSet := range validValueSets {
@@ -231,7 +240,7 @@ func metaRecordFilterBySetOfValidValueSets(records []tagquery.MetaTagRecord) tag
231240
}
232241

233242
if sliceContainsElements(validValueSet.tags, tags) {
234-
return tagquery.Pass
243+
return resultOnHit
235244
}
236245
}
237246

0 commit comments

Comments
 (0)