From 7c5fb6e3f7c5b2b9c1929d415e00c115e6e777f6 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Fri, 28 Jun 2019 19:03:22 +0000 Subject: [PATCH 01/40] make tag query expression an interface with one type per operator this allows us to flexibly chain expressions and evaluate them based on the tags that have been associated to metrics from multiple indexes --- expr/tagquery/expression.go | 370 +++++++++++++++++------- expr/tagquery/expression_common.go | 29 ++ expr/tagquery/expression_equal.go | 60 ++++ expr/tagquery/expression_has_tag.go | 47 +++ expr/tagquery/expression_match.go | 105 +++++++ expr/tagquery/expression_match_tag.go | 82 ++++++ expr/tagquery/expression_not_equal.go | 60 ++++ expr/tagquery/expression_not_has_tag.go | 50 ++++ expr/tagquery/expression_not_match.go | 109 +++++++ expr/tagquery/expression_prefix.go | 56 ++++ expr/tagquery/expression_prefix_tag.go | 46 +++ expr/tagquery/query.go | 102 +++---- 12 files changed, 959 insertions(+), 157 deletions(-) create mode 100644 expr/tagquery/expression_common.go create mode 100644 expr/tagquery/expression_equal.go create mode 100644 expr/tagquery/expression_has_tag.go create mode 100644 expr/tagquery/expression_match.go create mode 100644 expr/tagquery/expression_match_tag.go create mode 100644 expr/tagquery/expression_not_equal.go create mode 100644 expr/tagquery/expression_not_has_tag.go create mode 100644 expr/tagquery/expression_not_match.go create mode 100644 expr/tagquery/expression_prefix.go create mode 100644 expr/tagquery/expression_prefix_tag.go diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index b3afe2d296..68da22cb0f 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -7,6 +7,10 @@ import ( "strings" ) +const invalidExpressionError = "Invalid expression: %s" + +var matchCacheSize int + type Expressions []Expression func ParseExpressions(expressions []string) (Expressions, error) { @@ -21,19 +25,58 @@ func ParseExpressions(expressions []string) (Expressions, error) { return res, nil } -// Sort sorts all the expressions first by key, then by value, then by operator -func (e Expressions) Sort() { +// SortByFilterOrder sorts all the expressions first by operator +// roughly in cost-increaseing order when they are used as filters, +// then by key, then by value +func (e Expressions) SortByFilterOrder() { + costByOperator := map[ExpressionOperator]int{ + MATCH_NONE: 0, + EQUAL: 1, + HAS_TAG: 2, + PREFIX: 3, + PREFIX_TAG: 4, + NOT_EQUAL: 5, + NOT_HAS_TAG: 6, + MATCH: 7, + MATCH_TAG: 8, + NOT_MATCH: 9, + MATCH_ALL: 10, + } + sort.Slice(e, func(i, j int) bool { - if e[i].Key == e[j].Key { - if e[i].Value == e[j].Value { - return e[i].Operator < e[j].Operator + if e[i].GetOperator() == e[j].GetOperator() { + if e[i].GetKey() == e[j].GetKey() { + return e[i].GetValue() < e[j].GetValue() } - return e[i].Value < e[j].Value + return e[i].GetKey() < e[j].GetKey() } - return e[i].Key < e[j].Key + return costByOperator[e[i].GetOperator()] < costByOperator[e[j].GetOperator()] }) } +// findInitialExpression returns the id of the expression which is the +// most suitable to start the query execution with. the chosen expression +// should be as cheap as possible and it must require a non-empty value +func (e Expressions) findInitialExpression() int { + // order of preference to start with the viable operators + for _, op := range []ExpressionOperator{ + EQUAL, + HAS_TAG, + PREFIX, + PREFIX_TAG, + MATCH, + MATCH_TAG, + NOT_MATCH, + } { + for i := range e { + if e[i].GetOperator() == op && e[i].RequiresNonEmptyValue() { + return i + } + } + } + return -1 +} + func (e Expressions) Strings() []string { builder := strings.Builder{} res := make([]string, len(e)) @@ -45,134 +88,263 @@ func (e Expressions) Strings() []string { return res } -type Expression struct { - Tag - Operator ExpressionOperator - RequiresNonEmptyValue bool - UsesRegex bool - Regex *regexp.Regexp +type Expression interface { + // GetMetricDefinitionFilter returns a MetricDefinitionFilter. It takes a metric definition, looks + // at its tags and returns a decision regarding this query expression applied to its tags. + GetMetricDefinitionFilter() MetricDefinitionFilter + + // GetDefaultDecision defines what decision should be made if the filter has not come to a conclusive + // decision based on a single index. When looking at more than one tag index in order of decreasing + // priority to decide whether a metric should be part of the final result set, some operators and metric + // combinations can come to a conclusive decision without looking at all indexes and some others can't. + // if an expression has evaluated a metric against all indexes and has not come to a conclusive + // decision, then the default decision gets applied. + // + // Example + // metric1 has tags ["name=a.b.c", "some=value"] in the metric tag index, we evaluate the expression + // "anothertag!=value": + // 1) expression looks at the metric tag index and it sees that metric1 does not have a tag "anothertag" + // with the value "value", but at this point it doesn't know if another index that will be looked + // at later does, so it returns the decision "none". + // 2) expression now looks at index2 and sees again that metric1 does not have the tag and value + // it is looking for, so it returns "none" again. + // 3) the expression execution sees that there are no more indexes left, so it applies the default + // decision for the operator != which is "pass", meaning the expression "anothertag!=value" has + // not filtered the metric metric1 out of the result set. + // + // metric2 has tags ["name=a.b.c", "anothertag=value"] according to the metric tag index and it has + // no meta tags, we still evaluate the same expression: + // 1) expression looks at metric tag index and see metric2 has tag "anothertag" with value "value". + // it directly comes to a conclusive decision that this metric needs to be filtered out of the + // result set and returns the filter decision "fail". + // + // metric3 has tags ["name=aaa", "abc=cba"] according to the metric tag index and there is a meta + // record assigning the tag "anothertag=value" to metrics matching that query expression "abc=cba". + // 1) expression looks at metric3 and sees it does not have the tag & value it's looking for, so + // it returns the filter decision "none" because it cannot know for sure whether another index + // will assign "anothertag=value" to metric3. + // 2) expression looks at the meta tag index and it sees that there are meta records matching the + // tag "anothertag" and the value "value", so it retrieves the according filter functions of + // of these meta records and passes metric3's tag set into them. + // 3) the filter function of the meta record for the query set "abc=cba" returns true, indicating + // that its meta tag gets applied to metric3. + // 4) based on that the tag expression comes to the decision that metric3 should not be part of + // final result set, so it returns "fail". + GetDefaultDecision() FilterDecision + + // GetKey returns tag to who's values this expression get's applied if it operates on the value + // (OperatorsOnTag returns "false") + // example: + // in the expression "tag1=value" GetKey() would return "tag1" and OperatesOnTag() returns "false" + GetKey() string + + // GetValue the value part of the expression + // example: + // in the expression "abc!=cba" this would return "cba" + GetValue() string + + // GetOperator returns the operator of this expression + GetOperator() ExpressionOperator + + // FilterValues takes a map that's indexed by strings and applies this expression's criteria to + // each of the strings, then it returns the strings that have matched + // In case of expressions that get applied to tags, the first level map of the metric tag index + // or meta tag index can get passed into this function, otherwise the second level under the key + // returned by GetKey() + ValuePasses(string) bool + + // HasRe indicates whether the evaluation of this expression involves regular expressions + HasRe() bool + + RequiresNonEmptyValue() bool + OperatesOnTag() bool + StringIntoBuilder(builder *strings.Builder) } -// ParseQueryExpression takes a tag query expression as a string and validates it -func ParseExpression(expression string) (Expression, error) { - var operatorStartPos, operatorEndPos, equalPos int - var res Expression +// ParseExpression returns an expression that's been generated from the given +// string, in case of an error the error gets returned as the second value +func ParseExpression(expr string) (Expression, error) { + var pos int + prefix, regex, not := false, false, false + resCommon := expressionCommon{} - equalPos = strings.Index(expression, "=") - if equalPos < 0 { - return res, fmt.Errorf("Missing equal sign: %s", expression) + // scan up to operator to get key +FIND_OPERATOR: + for ; pos < len(expr); pos++ { + switch expr[pos] { + case '=': + break FIND_OPERATOR + case '!': + not = true + break FIND_OPERATOR + case '^': + prefix = true + break FIND_OPERATOR + case ';': + return nil, fmt.Errorf(invalidExpressionError, expr) + } } - if equalPos == 0 { - return res, fmt.Errorf("Empty tag key: %s", expression) + // key must not be empty + if pos == 0 { + return nil, fmt.Errorf(invalidExpressionError, expr) } - res.RequiresNonEmptyValue = true - if expression[equalPos-1] == '!' { - operatorStartPos = equalPos - 1 - res.RequiresNonEmptyValue = false - res.Operator = NOT_EQUAL - } else if expression[equalPos-1] == '^' { - operatorStartPos = equalPos - 1 - res.Operator = PREFIX - } else { - operatorStartPos = equalPos - res.Operator = EQUAL + resCommon.key = expr[:pos] + err := validateQueryExpressionTagKey(resCommon.key) + if err != nil { + return nil, fmt.Errorf("Error when validating key \"%s\" of expression \"%s\": %s", resCommon.key, expr, err) } - res.Key = expression[:operatorStartPos] - err := validateQueryExpressionTagKey(res.Key) - if err != nil { - return res, fmt.Errorf("Error when validating key \"%s\" of expression \"%s\": %s", res.Key, expression, err) + // shift over the !/^ characters + if not || prefix { + pos++ } - res.UsesRegex = false - if len(expression)-1 == equalPos { - operatorEndPos = equalPos - } else if expression[equalPos+1] == '~' { - operatorEndPos = equalPos + 1 - res.UsesRegex = true + if len(expr) <= pos || expr[pos] != '=' { + return nil, fmt.Errorf(invalidExpressionError, expr) + } + pos++ - switch res.Operator { - case EQUAL: - res.Operator = MATCH - case NOT_EQUAL: - res.Operator = NOT_MATCH - case PREFIX: - return res, fmt.Errorf("The string \"^=~\" is not a valid operator in expression %s", expression) + if len(expr) > pos && expr[pos] == '~' { + // ^=~ is not a valid operator + if prefix { + return nil, fmt.Errorf(invalidExpressionError, expr) } - } else { - operatorEndPos = equalPos + regex = true + pos++ } - res.Value = expression[operatorEndPos+1:] - - if res.UsesRegex { - if len(res.Value) > 0 && res.Value[0] != '^' { - // always anchor all regular expressions at the beginning if they do not start with ^ - res.Value = "^(?:" + res.Value + ")" + valuePos := pos + for ; pos < len(expr); pos++ { + // disallow ; in value + if expr[pos] == 59 { + return nil, fmt.Errorf(invalidExpressionError, expr) } + } + resCommon.value = expr[valuePos:] + var operator ExpressionOperator - res.Regex, err = regexp.Compile(res.Value) - if err != nil { - return res, fmt.Errorf("Invalid regular expression given as value %s in expression %s: %s", res.Value, expression, err) + if not { + if len(resCommon.value) == 0 { + operator = HAS_TAG + } else if regex { + operator = NOT_MATCH + } else { + operator = NOT_EQUAL } + } else { + if prefix { + if len(resCommon.value) == 0 { + operator = HAS_TAG + } else { + operator = PREFIX + } + } else if len(resCommon.value) == 0 { + operator = NOT_HAS_TAG + } else if regex { + operator = MATCH + } else { + operator = EQUAL + } + } - if res.Regex.Match(nil) { - // if value matches empty string, then requiresNonEmptyValue gets negated - res.RequiresNonEmptyValue = !res.RequiresNonEmptyValue + // special key to match on tag instead of a value + if resCommon.key == "__tag" { + // currently ! (not) queries on tags are not supported + // and unlike normal queries a value must be set + if not || len(resCommon.value) == 0 { + return nil, fmt.Errorf(invalidExpressionError, expr) } - } else { - if len(res.Value) == 0 { - // if value is empty, then requiresNonEmptyValue gets negated - // f.e. - // tag1!= means there must be a tag "tag1", instead of there must not be - // tag1= means there must not be a "tag1", instead of there must be - res.RequiresNonEmptyValue = !res.RequiresNonEmptyValue + + if operator == PREFIX { + operator = PREFIX_TAG + } else if operator == MATCH { + operator = MATCH_TAG } } - if res.Key == "__tag" { - if len(res.Value) == 0 { - return res, errInvalidQuery + if operator == MATCH || operator == NOT_MATCH || operator == MATCH_TAG { + if len(resCommon.value) > 0 && resCommon.value[0] != '^' { + resCommon.value = "^(?:" + resCommon.value + ")" } - if res.Operator == PREFIX { - res.Operator = PREFIX_TAG - } else if res.Operator == MATCH { - res.Operator = MATCH_TAG - } else { - return res, errInvalidQuery + valueRe, err := regexp.Compile(resCommon.value) + if err != nil { + return nil, err + } + switch operator { + case MATCH: + return &expressionMatch{expressionCommon: resCommon, valueRe: valueRe}, nil + case NOT_MATCH: + return &expressionNotMatch{expressionCommon: resCommon, valueRe: valueRe}, nil + case MATCH_TAG: + return &expressionMatchTag{expressionCommon: resCommon, valueRe: valueRe}, nil + } + } else { + switch operator { + case EQUAL: + return &expressionEqual{expressionCommon: resCommon}, nil + case NOT_EQUAL: + return &expressionNotEqual{expressionCommon: resCommon}, nil + case PREFIX: + return &expressionPrefix{expressionCommon: resCommon}, nil + case MATCH_TAG: + return &expressionMatchTag{expressionCommon: resCommon}, nil + case HAS_TAG: + return &expressionHasTag{expressionCommon: resCommon}, nil + case NOT_HAS_TAG: + return &expressionNotHasTag{expressionCommon: resCommon}, nil + case PREFIX_TAG: + return &expressionPrefixTag{expressionCommon: resCommon}, nil } } - return res, nil + return nil, fmt.Errorf("ParseExpression: Invalid operator in expression %s", expr) } -func (e *Expression) IsEqualTo(other Expression) bool { - return e.Key == other.Key && e.Operator == other.Operator && e.Value == other.Value +func ExpressionsAreEqual(expr1, expr2 Expression) bool { + return expr1.GetKey() == expr2.GetKey() && expr1.GetOperator() == expr2.GetOperator() && expr1.GetValue() == expr2.GetValue() } -func (e *Expression) StringIntoBuilder(builder *strings.Builder) { - if e.Operator == MATCH_TAG || e.Operator == PREFIX_TAG { - builder.WriteString("__tag") - } else { - builder.WriteString(e.Key) +// MetricDefinitionFilter takes a metric name together with its tags and returns a FilterDecision +type MetricDefinitionFilter func(name string, tags []string) FilterDecision + +type MetricDefinitionFilters []MetricDefinitionFilter + +func (m MetricDefinitionFilters) Filter(name string, tags []string) FilterDecision { + for i := range m { + decision := m[i](name, tags) + if decision == Fail { + return Fail + } else if decision == Pass { + return Pass + } } - e.Operator.StringIntoBuilder(builder) - builder.WriteString(e.Value) + + return None } +type FilterDecision uint8 + +const ( + None FilterDecision = iota // no decision has been made, because the decision might change depending on what other indexes defines + Fail // it has been decided by the filter that this metric does not end up in the result set + Pass // the filter has passed +) + type ExpressionOperator uint16 const ( - EQUAL ExpressionOperator = iota // = - NOT_EQUAL // != - MATCH // =~ regular expression - MATCH_TAG // __tag=~ relies on special key __tag. non-standard, required for `/metrics/tags` requests with "filter" - NOT_MATCH // !=~ - PREFIX // ^= exact prefix, not regex. non-standard, required for auto complete of tag values - PREFIX_TAG // __tag^= exact prefix with tag. non-standard, required for auto complete of tag keys + EQUAL ExpressionOperator = iota // = + NOT_EQUAL // != + MATCH // =~ regular expression + MATCH_TAG // __tag=~ relies on special key __tag. non-standard, required for `/metrics/tags` requests with "filter" + NOT_MATCH // !=~ + PREFIX // ^= exact prefix, not regex. non-standard, required for auto complete of tag values + PREFIX_TAG // __tag^= exact prefix with tag. non-standard, required for auto complete of tag keys + HAS_TAG // !="" specified tag must be present + NOT_HAS_TAG // ="" specified tag must not be present ) func (o ExpressionOperator) StringIntoBuilder(builder *strings.Builder) { @@ -191,5 +363,9 @@ func (o ExpressionOperator) StringIntoBuilder(builder *strings.Builder) { builder.WriteString("^=") case PREFIX_TAG: builder.WriteString("^=") + case HAS_TAG: + builder.WriteString("!=") + case NOT_HAS_TAG: + builder.WriteString("=") } } diff --git a/expr/tagquery/expression_common.go b/expr/tagquery/expression_common.go new file mode 100644 index 0000000000..13c674741a --- /dev/null +++ b/expr/tagquery/expression_common.go @@ -0,0 +1,29 @@ +package tagquery + +type expressionCommon struct { + key string + value string +} + +func (e *expressionCommon) GetKey() string { + return e.key +} + +func (e *expressionCommon) GetValue() string { + return e.value +} + +func (e *expressionCommon) RequiresNonEmptyValue() bool { + // by default assume true, unless a concrete type overrides this method + return true +} + +func (e *expressionCommon) OperatesOnTag() bool { + // by default assume false, unless a concrete type overrides this method + return false +} + +func (e *expressionCommon) HasRe() bool { + // by default assume false, unless a concrete type overrides this method + return false +} diff --git a/expr/tagquery/expression_equal.go b/expr/tagquery/expression_equal.go new file mode 100644 index 0000000000..f918413dd2 --- /dev/null +++ b/expr/tagquery/expression_equal.go @@ -0,0 +1,60 @@ +package tagquery + +import ( + "strings" +) + +type expressionEqual struct { + expressionCommon +} + +func (e *expressionEqual) GetOperator() ExpressionOperator { + return EQUAL +} + +func (e *expressionEqual) ValuePasses(value string) bool { + return value == e.value +} + +func (e *expressionEqual) GetDefaultDecision() FilterDecision { + return Fail +} + +func (e *expressionEqual) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("=") + builder.WriteString(e.value) +} + +func (e *expressionEqual) GetMetricDefinitionFilter() MetricDefinitionFilter { + if e.key == "name" { + if e.value == "" { + // every metric has a name, the value will never be empty + return func(_ string, _ []string) FilterDecision { return Fail } + } + return func(name string, _ []string) FilterDecision { + if name == e.value { + return Pass + } + return Fail + } + } + + prefix := e.key + "=" + matchString := prefix + e.value + return func(name string, tags []string) FilterDecision { + for _, tag := range tags { + if tag == matchString { + return Pass + } + + // the tag is set, but it has a different value, + // no need to keep looking at other indexes + if strings.HasPrefix(tag, prefix) { + return Fail + } + } + + return None + } +} diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go new file mode 100644 index 0000000000..9361d672ce --- /dev/null +++ b/expr/tagquery/expression_has_tag.go @@ -0,0 +1,47 @@ +package tagquery + +import ( + "strings" +) + +type expressionHasTag struct { + expressionCommon +} + +func (e *expressionHasTag) GetOperator() ExpressionOperator { + return HAS_TAG +} + +func (e *expressionHasTag) ValuePasses(value string) bool { + return value == e.key +} + +func (e *expressionHasTag) GetDefaultDecision() FilterDecision { + return Fail +} + +func (e *expressionHasTag) OperatesOnTag() bool { + return true +} + +func (e *expressionHasTag) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("!=") +} + +func (e *expressionHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { + if e.key == "name" { + return func(_ string, _ []string) FilterDecision { return Pass } + } + + matchPrefix := e.GetKey() + "=" + return func(_ string, tags []string) FilterDecision { + for _, tag := range tags { + if strings.HasPrefix(tag, matchPrefix) { + return Pass + } + } + + return None + } +} diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go new file mode 100644 index 0000000000..b2f2a76dc7 --- /dev/null +++ b/expr/tagquery/expression_match.go @@ -0,0 +1,105 @@ +package tagquery + +import ( + "regexp" + "strings" + "sync" + "sync/atomic" +) + +type expressionMatch struct { + expressionCommon + valueRe *regexp.Regexp +} + +func (e *expressionMatch) GetOperator() ExpressionOperator { + return MATCH +} + +func (e *expressionMatch) HasRe() bool { + return true +} + +func (e *expressionMatch) ValuePasses(value string) bool { + return e.valueRe.MatchString(value) +} + +func (e *expressionMatch) GetDefaultDecision() FilterDecision { + // if the pattern matches "" (f.e. "tag=~.*) then a metric which + // does not have the tag "tag" at all should also be part of the + // result set + // docs: https://graphite.readthedocs.io/en/latest/tags.html + // > Any tag spec that matches an empty value is considered to + // > match series that don’t have that tag + if e.matchesEmpty { + return Pass + } + return Fail +} + +func (e *expressionMatch) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("=~") + builder.WriteString(e.value) +} + +func (e *expressionMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { + if e.key == "name" { + if e.value == "" { + // silly query, always fails + return func(_ string, _ []string) FilterDecision { return Fail } + } + return func(name string, _ []string) FilterDecision { + if e.valueRe.MatchString(name) { + return Pass + } else { + return Fail + } + } + } + + var matchCache, missCache sync.Map + var currentMatchCacheSize, currentMissCacheSize int32 + prefix := e.key + "=" + + return func(_ string, tags []string) FilterDecision { + for _, tag := range tags { + if !strings.HasPrefix(tag, prefix) { + continue + } + + // if value is empty, every metric which has this tag fails + if e.value == "" { + return Fail + } + + value := tag[len(prefix):] + + // reduce regex matching by looking up cached non-matches + if _, ok := missCache.Load(value); ok { + return Fail + } + + // reduce regex matching by looking up cached matches + if _, ok := matchCache.Load(value); ok { + return Pass + } + + if e.valueRe.MatchString(value) { + if atomic.LoadInt32(¤tMatchCacheSize) < int32(matchCacheSize) { + matchCache.Store(value, struct{}{}) + atomic.AddInt32(¤tMatchCacheSize, 1) + } + return Pass + } else { + if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { + missCache.Store(value, struct{}{}) + atomic.AddInt32(¤tMissCacheSize, 1) + } + return Fail + } + } + + return None + } +} diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go new file mode 100644 index 0000000000..c92c0a6696 --- /dev/null +++ b/expr/tagquery/expression_match_tag.go @@ -0,0 +1,82 @@ +package tagquery + +import ( + "regexp" + "strings" + "sync" + "sync/atomic" +) + +type expressionMatchTag struct { + expressionCommon + valueRe *regexp.Regexp +} + +func (e *expressionMatchTag) GetOperator() ExpressionOperator { + return MATCH_TAG +} + +func (e *expressionMatchTag) HasRe() bool { + return true +} + +func (e *expressionMatchTag) ValuePasses(tag string) bool { + return e.valueRe.MatchString(tag) +} + +func (e *expressionMatchTag) GetDefaultDecision() FilterDecision { + return Fail +} + +func (e *expressionMatchTag) OperatesOnTag() bool { + return true +} + +func (e *expressionMatchTag) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString("__tag=~") + builder.WriteString(e.value) +} + +func (e *expressionMatchTag) GetMetricDefinitionFilter() MetricDefinitionFilter { + if e.valueRe.Match([]byte("name")) { + // every metric has a tag name, so we can always return Pass + return func(_ string, _ []string) FilterDecision { return Pass } + } + + var matchCache, missCache sync.Map + var currentMatchCacheSize, currentMissCacheSize int32 + + return func(_ string, tags []string) FilterDecision { + for _, tag := range tags { + values := strings.SplitN(tag, "=", 2) + if len(values) < 2 { + continue + } + value := values[0] + + if _, ok := missCache.Load(value); ok { + continue + } + + if _, ok := matchCache.Load(value); ok { + return Pass + } + + if e.valueRe.Match([]byte(value)) { + if atomic.LoadInt32(¤tMatchCacheSize) < int32(matchCacheSize) { + matchCache.Store(value, struct{}{}) + atomic.AddInt32(¤tMatchCacheSize, 1) + } + return Pass + } else { + if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { + missCache.Store(value, struct{}{}) + atomic.AddInt32(¤tMissCacheSize, 1) + } + continue + } + } + + return None + } +} diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go new file mode 100644 index 0000000000..1dabf27d27 --- /dev/null +++ b/expr/tagquery/expression_not_equal.go @@ -0,0 +1,60 @@ +package tagquery + +import ( + "strings" +) + +type expressionNotEqual struct { + expressionCommon +} + +func (e *expressionNotEqual) GetOperator() ExpressionOperator { + return NOT_EQUAL +} + +func (e *expressionNotEqual) RequiresNonEmptyValue() bool { + return false +} + +func (e *expressionNotEqual) ValuePasses(value string) bool { + return value != e.value +} + +func (e *expressionNotEqual) GetDefaultDecision() FilterDecision { + return Pass +} + +func (e *expressionNotEqual) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("!=") + builder.WriteString(e.value) +} + +func (e *expressionNotEqual) GetMetricDefinitionFilter() MetricDefinitionFilter { + if e.key == "name" { + if e.value == "" { + return func(_ string, _ []string) FilterDecision { return Pass } + } + return func(name string, _ []string) FilterDecision { + if name == e.value { + return Fail + } + return Pass + } + } + + prefix := e.key + "=" + matchString := prefix + e.value + return func(_ string, tags []string) FilterDecision { + for _, tag := range tags { + if strings.HasPrefix(tag, prefix) { + if tag == matchString { + return Fail + } else { + return Pass + } + } + } + return None + } +} diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go new file mode 100644 index 0000000000..2b4126a404 --- /dev/null +++ b/expr/tagquery/expression_not_has_tag.go @@ -0,0 +1,50 @@ +package tagquery + +import ( + "strings" +) + +type expressionNotHasTag struct { + expressionCommon +} + +func (e *expressionNotHasTag) GetOperator() ExpressionOperator { + return NOT_HAS_TAG +} + +func (e *expressionNotHasTag) RequiresNonEmptyValue() bool { + return false +} + +func (e *expressionNotHasTag) ValuePasses(value string) bool { + return value == e.key +} + +func (e *expressionNotHasTag) GetDefaultDecision() FilterDecision { + return Pass +} + +func (e *expressionNotHasTag) OperatesOnTag() bool { + return true +} + +func (e *expressionNotHasTag) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("=") +} + +func (e *expressionNotHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { + if e.key == "name" { + return func(_ string, _ []string) FilterDecision { return Fail } + } + + matchPrefix := e.key + "=" + return func(_ string, tags []string) FilterDecision { + for _, tag := range tags { + if strings.HasPrefix(tag, matchPrefix) { + return Fail + } + } + return None + } +} diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go new file mode 100644 index 0000000000..754e80f75b --- /dev/null +++ b/expr/tagquery/expression_not_match.go @@ -0,0 +1,109 @@ +package tagquery + +import ( + "regexp" + "strings" + "sync" + "sync/atomic" +) + +type expressionNotMatch struct { + expressionCommon + valueRe *regexp.Regexp +} + +func (e *expressionNotMatch) GetOperator() ExpressionOperator { + return NOT_MATCH +} + +func (e *expressionNotMatch) RequiresNonEmptyValue() bool { + return false +} + +func (e *expressionNotMatch) HasRe() bool { + return true +} + +func (e *expressionNotMatch) ValuePasses(value string) bool { + return !e.valueRe.MatchString(value) +} + +func (e *expressionNotMatch) GetDefaultDecision() FilterDecision { + // if the pattern matches "" (f.e. "tag!=~.*) then a metric which + // does not have the tag "tag" at all should not be part of the + // result set + // docs: https://graphite.readthedocs.io/en/latest/tags.html + // > Any tag spec that matches an empty value is considered to + // > match series that don’t have that tag + if e.matchesEmpty { + return Fail + } + return Pass +} + +func (e *expressionNotMatch) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("!=~") + builder.WriteString(e.value) +} + +func (e *expressionNotMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { + if e.key == "name" { + if e.value == "" { + // every metric has a name + return func(_ string, _ []string) FilterDecision { return Pass } + } + + return func(name string, _ []string) FilterDecision { + if e.valueRe.MatchString(name) { + return Fail + } + return Pass + } + } + + var matchCache, missCache sync.Map + var currentMatchCacheSize, currentMissCacheSize int32 + prefix := e.key + "=" + + return func(_ string, tags []string) FilterDecision { + for _, tag := range tags { + if !strings.HasPrefix(tag, prefix) { + continue + } + + // if value is empty, every metric which has this tag passes + if e.value == "" { + return Pass + } + + value := tag[len(prefix):] + + // reduce regex matching by looking up cached non-matches + if _, ok := missCache.Load(value); ok { + return Pass + } + + // reduce regex matching by looking up cached matches + if _, ok := matchCache.Load(value); ok { + return Fail + } + + if e.valueRe.MatchString(value) { + if atomic.LoadInt32(¤tMatchCacheSize) < int32(matchCacheSize) { + matchCache.Store(value, struct{}{}) + atomic.AddInt32(¤tMatchCacheSize, 1) + } + return Fail + } else { + if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { + missCache.Store(value, struct{}{}) + atomic.AddInt32(¤tMissCacheSize, 1) + } + return Pass + } + } + + return None + } +} diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go new file mode 100644 index 0000000000..82a2080619 --- /dev/null +++ b/expr/tagquery/expression_prefix.go @@ -0,0 +1,56 @@ +package tagquery + +import ( + "strings" +) + +type expressionPrefix struct { + expressionCommon +} + +func (e *expressionPrefix) GetOperator() ExpressionOperator { + return PREFIX +} + +func (e *expressionPrefix) ValuePasses(value string) bool { + return strings.HasPrefix(value, e.value) +} + +func (e *expressionPrefix) GetDefaultDecision() FilterDecision { + return Fail +} + +func (e *expressionPrefix) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("^=") + builder.WriteString(e.value) +} + +func (e *expressionPrefix) GetMetricDefinitionFilter() MetricDefinitionFilter { + prefix := e.key + "=" + matchString := prefix + e.value + + if e.key == "name" { + return func(name string, _ []string) FilterDecision { + if strings.HasPrefix(name, e.value) { + return Pass + } + + return Fail + } + } + + return func(_ string, tags []string) FilterDecision { + for _, tag := range tags { + if strings.HasPrefix(tag, matchString) { + return Pass + } + + if strings.HasPrefix(tag, prefix) { + return Fail + } + } + + return None + } +} diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go new file mode 100644 index 0000000000..35fc8c5b30 --- /dev/null +++ b/expr/tagquery/expression_prefix_tag.go @@ -0,0 +1,46 @@ +package tagquery + +import ( + "strings" +) + +type expressionPrefixTag struct { + expressionCommon +} + +func (e *expressionPrefixTag) GetOperator() ExpressionOperator { + return PREFIX_TAG +} + +func (e *expressionPrefixTag) ValuePasses(tag string) bool { + return strings.HasPrefix(tag, e.value) +} + +func (e *expressionPrefixTag) OperatesOnTag() bool { + return true +} + +func (e *expressionPrefixTag) GetDefaultDecision() FilterDecision { + return Fail +} + +func (e *expressionPrefixTag) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString("__tag^=") + builder.WriteString(e.value) +} + +func (e *expressionPrefixTag) GetMetricDefinitionFilter() MetricDefinitionFilter { + if strings.HasPrefix("name", e.value) { + // every metric has a name + return func(_ string, _ []string) FilterDecision { return Pass } + } + + return func(_ string, tags []string) FilterDecision { + for _, tag := range tags { + if strings.HasPrefix(tag, e.value) { + return Pass + } + } + return None + } +} diff --git a/expr/tagquery/query.go b/expr/tagquery/query.go index 516144c42c..a68e7345eb 100644 --- a/expr/tagquery/query.go +++ b/expr/tagquery/query.go @@ -12,16 +12,15 @@ type Query struct { // clause that operates on LastUpdate field From int64 - // clauses that operate on values. from expressions like tagvalue - Expressions map[ExpressionOperator]Expressions + // slice of expressions sorted by the estimated cost of their operators + Expressions Expressions + + // the index in the Expressions slice at which we start evaluating the query + StartWith int // clause that operate on tags (keys) // we only need to support 1 condition for now: a prefix or match - TagClause ExpressionOperator // to know the clause type. either PREFIX_TAG or MATCH_TAG (or 0 if unset) - TagMatch Expression // only used for /metrics/tags with regex in filter param - TagPrefix string // only used for auto complete of tags to match exact prefix - - StartWith ExpressionOperator // choses the first clause to generate the initial result set (one of EQUAL PREFIX MATCH MATCH_TAG PREFIX_TAG) + TagClause Expression } func NewQueryFromStrings(expressionStrs []string, from int64) (Query, error) { @@ -40,69 +39,52 @@ func NewQuery(expressions Expressions, from int64) (Query, error) { return q, errInvalidQuery } - expressions.Sort() - q.Expressions = make(map[ExpressionOperator]Expressions) - for i, e := range expressions { + expressions.SortByFilterOrder() + for i := 0; i < len(expressions); i++ { // skip duplicate expression - if i > 0 && e.IsEqualTo(expressions[i-1]) { + if i > 0 && ExpressionsAreEqual(expressions[i], expressions[i-1]) { + expressions = append(expressions[:i], expressions[i+1:]...) + i-- continue } - // special case of empty value - if len(e.Value) == 0 { - if e.Operator == EQUAL || e.Operator == MATCH { - q.Expressions[NOT_MATCH] = append(q.Expressions[NOT_MATCH], e) - } else { - q.Expressions[MATCH] = append(q.Expressions[MATCH], e) - } - } else { - switch e.Operator { - case EQUAL: - q.Expressions[EQUAL] = append(q.Expressions[EQUAL], e) - case NOT_EQUAL: - q.Expressions[NOT_EQUAL] = append(q.Expressions[NOT_EQUAL], e) - case MATCH: - q.Expressions[MATCH] = append(q.Expressions[MATCH], e) - case NOT_MATCH: - q.Expressions[NOT_MATCH] = append(q.Expressions[NOT_MATCH], e) - case PREFIX: - q.Expressions[PREFIX] = append(q.Expressions[PREFIX], e) - case MATCH_TAG: - // we only allow one expression operating on tags - if q.TagClause != 0 { - return q, errInvalidQuery - } - - q.TagMatch = e - q.TagClause = MATCH_TAG - case PREFIX_TAG: - // we only allow one expression operating on tags - if q.TagClause != 0 { - return q, errInvalidQuery - } - - q.TagPrefix = e.Value - q.TagClause = PREFIX_TAG + op := expressions[i].GetOperator() + switch op { + case MATCH_TAG: + fallthrough + case PREFIX_TAG: + // we only allow one query by tag + if q.TagClause != nil { + return q, errInvalidQuery } + + q.TagClause = expressions[i] + expressions = append(expressions[:i], expressions[i+1:]...) + i-- } } - // the cheapest operator to minimize the result set should have precedence - if len(q.Expressions[EQUAL]) > 0 { - q.StartWith = EQUAL - } else if len(q.Expressions[PREFIX]) > 0 { - q.StartWith = PREFIX - } else if len(q.Expressions[MATCH]) > 0 { - q.StartWith = MATCH - } else if q.TagClause == PREFIX_TAG { - // starting with a tag based expression can be very expensive because they - // have the potential to result in a huge initial result set - q.StartWith = PREFIX_TAG - } else if q.TagClause == MATCH_TAG { - q.StartWith = MATCH_TAG - } else { + q.Expressions = expressions + q.StartWith = q.Expressions.findInitialExpression() + if q.TagClause == nil && q.StartWith < 0 { return q, errInvalidQuery } return q, nil } + +func (q *Query) GetMetricDefinitionFilters() (MetricDefinitionFilters, []FilterDecision) { + var filters MetricDefinitionFilters + var defaultDecisions []FilterDecision + for i := range q.Expressions { + // the one we start with does not need to be added to the filters, + // because we use it to build the initial result set + if i == q.StartWith { + continue + } + filters = append(filters, q.Expressions[i].GetMetricDefinitionFilter()) + defaultDecisions = append(defaultDecisions, q.Expressions[i].GetDefaultDecision()) + } + + return filters, defaultDecisions +} From 46b28636b5fa134d1a7834c9ab3fe54bbee03ae2 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Sat, 29 Jun 2019 01:39:34 +0000 Subject: [PATCH 02/40] use the new expression types when running queries --- api/cluster.go | 13 +- api/graphite.go | 20 +- expr/tagquery/meta_tag_record.go | 4 +- idx/memory/meta_tags.go | 4 +- idx/memory/tag_query.go | 576 +++++-------------------------- 5 files changed, 107 insertions(+), 510 deletions(-) diff --git a/api/cluster.go b/api/cluster.go index eb4521a48d..e63187b1b0 100644 --- a/api/cluster.go +++ b/api/cluster.go @@ -7,6 +7,7 @@ import ( "net/http" "regexp" "strconv" + "strings" "sync" "time" @@ -264,12 +265,16 @@ func (s *Server) indexTagDelSeries(ctx *middleware.Context, request models.Index } expressions := make(tagquery.Expressions, len(tags)) + builder := strings.Builder{} for i := range tags { - expressions[i] = tagquery.Expression{ - Tag: tags[i], - Operator: tagquery.EQUAL, - RequiresNonEmptyValue: true, + tags[i].StringIntoBuilder(&builder) + var err error + expressions[i], err = tagquery.ParseExpression(builder.String()) + if err != nil { + response.Write(ctx, response.WrapErrorForTagDB(err)) + return } + builder.Reset() } query, err := tagquery.NewQuery(expressions, 0) diff --git a/api/graphite.go b/api/graphite.go index 6ec8df4f2b..6403177a3f 100644 --- a/api/graphite.go +++ b/api/graphite.go @@ -819,7 +819,7 @@ func getTagQueryExpressions(expressions string) (tagquery.Expressions, error) { return nil, err } - requiresNonEmptyValue = requiresNonEmptyValue || expression.RequiresNonEmptyValue + requiresNonEmptyValue = requiresNonEmptyValue || expression.RequiresNonEmptyValue() results = append(results, expression) continue @@ -1222,13 +1222,17 @@ func (s *Server) graphiteTagDelSeries(ctx *middleware.Context, request models.Gr return } - expressions := make(tagquery.Expressions, 0, len(tags)) - for _, tag := range tags { - expressions = append(expressions, tagquery.Expression{ - Tag: tag, - Operator: tagquery.EQUAL, - RequiresNonEmptyValue: true, - }) + expressions := make(tagquery.Expressions, len(tags)) + builder := strings.Builder{} + for i := range tags { + tags[i].StringIntoBuilder(&builder) + var err error + expressions[i], err = tagquery.ParseExpression(builder.String()) + if err != nil { + response.Write(ctx, response.WrapErrorForTagDB(err)) + return + } + builder.Reset() } query, err := tagquery.NewQuery(expressions, 0) diff --git a/expr/tagquery/meta_tag_record.go b/expr/tagquery/meta_tag_record.go index a80f4b81f5..7ff36d3511 100644 --- a/expr/tagquery/meta_tag_record.go +++ b/expr/tagquery/meta_tag_record.go @@ -33,13 +33,13 @@ func ParseMetaTagRecord(metaTags []string, queries []string) (MetaTagRecord, err // MatchesQueries compares another tag record's queries to this // one's queries. Returns true if they are equal, otherwise false. // It is assumed that all the queries are already sorted -func (m *MetaTagRecord) MatchesQueries(other MetaTagRecord) bool { +func (m *MetaTagRecord) MatchesQueries(other *MetaTagRecord) bool { if len(m.Queries) != len(other.Queries) { return false } for i, query := range m.Queries { - if !query.IsEqualTo(other.Queries[i]) { + if !ExpressionsAreEqual(query, other.Queries[i]) { return false } } diff --git a/idx/memory/meta_tags.go b/idx/memory/meta_tags.go index 100c62351d..1464d0fca6 100644 --- a/idx/memory/meta_tags.go +++ b/idx/memory/meta_tags.go @@ -45,7 +45,7 @@ func (m metaTagRecords) upsert(record tagquery.MetaTagRecord) (recordId, *tagque // the exact same queries as the one we're upserting for i := uint32(0); i < collisionAvoidanceWindow; i++ { if existingRecord, ok := m[id+recordId(i)]; ok { - if record.MatchesQueries(existingRecord) { + if record.MatchesQueries(&existingRecord) { oldRecord = &existingRecord oldId = id + recordId(i) delete(m, oldId) @@ -75,7 +75,7 @@ func (m metaTagRecords) upsert(record tagquery.MetaTagRecord) (recordId, *tagque // hashMetaTagRecord generates a hash of all the queries in the record func (m *metaTagRecords) hashMetaTagRecord(record tagquery.MetaTagRecord) recordId { - record.Queries.Sort() + record.Queries.SortByFilterOrder() builder := strings.Builder{} for _, query := range record.Queries { query.StringIntoBuilder(&builder) diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go index afe6395592..73db6cbd80 100644 --- a/idx/memory/tag_query.go +++ b/idx/memory/tag_query.go @@ -2,8 +2,6 @@ package memory import ( "math" - "regexp" - "sort" "strings" "sync" "sync/atomic" @@ -15,265 +13,103 @@ import ( log "github.com/sirupsen/logrus" ) -// the supported operators are documented together with the graphite -// reference implementation: -// http://graphite.readthedocs.io/en/latest/tags.html -// -// some of the following operators are non-standard and are only used -// internally to implement certain functionalities requiring them - -// a key / value combo used to represent a tag expression like "key=value" -// the cost is an estimate how expensive this query is compared to others -// with the same operator -type kv struct { - tagquery.Tag - cost uint // cost of evaluating expression, compared to other kv objects -} - -// kv expressions that rely on regular expressions will get converted to kvRe in -// NewTagQueryContext() to accommodate the additional requirements of regex queries -type kvRe struct { - kv - Regex *regexp.Regexp - matchCache *sync.Map // needs to be reference so kvRe can be copied, caches regex matches - matchCacheSize int32 // sync.Map does not have a way to get the length - missCache *sync.Map // needs to be reference so kvRe can be copied, caches regex misses - missCacheSize int32 // sync.Map does not have a way to get the length -} - -type KvByCost []kv - -func (a KvByCost) Len() int { return len(a) } -func (a KvByCost) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a KvByCost) Less(i, j int) bool { return a[i].cost < a[j].cost } - -type KvReByCost []kvRe - -func (a KvReByCost) Len() int { return len(a) } -func (a KvReByCost) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a KvReByCost) Less(i, j int) bool { return a[i].cost < a[j].cost } - // TagQueryContext runs a set of pattern or string matches on tag keys and values against // the index. It is executed via: // Run() which returns a set of matching MetricIDs // RunGetTags() which returns a list of tags of the matching metrics type TagQueryContext struct { - // clause that operates on LastUpdate field - from int64 - - // clauses that operate on values. from expressions like tagvalue - equal []kv // EQUAL - match []kvRe // MATCH - notEqual []kv // NOT_EQUAL - notMatch []kvRe // NOT_MATCH - prefix []kv // PREFIX - - index TagIndex // the tag index, hierarchy of tags & values, set by Run()/RunGetTags() - byId map[schema.MKey]*idx.Archive // the metric index by ID, set by Run()/RunGetTags() - tagClause tagquery.ExpressionOperator // to know the clause type. either PREFIX_TAG or MATCH_TAG (or 0 if unset) - tagMatch kvRe // only used for /metrics/tags with regex in filter param - tagPrefix string // only used for auto complete of tags to match exact prefix - startWith tagquery.ExpressionOperator // choses the first clause to generate the initial result set (one of EQUAL PREFIX MATCH MATCH_TAG PREFIX_TAG) - wg *sync.WaitGroup + wg sync.WaitGroup + + query tagquery.Query + filters tagquery.MetricDefinitionFilters + defaultDecisions []tagquery.FilterDecision + + index TagIndex // the tag index, hierarchy of tags & values, set by Run()/RunGetTags() + byId map[schema.MKey]*idx.Archive // the metric index by ID, set by Run()/RunGetTags() } // NewTagQueryContext takes a tag query and wraps it into all the -// context structs necessary to execute the query on the index +// context structs necessary to execute the query on the indexes func NewTagQueryContext(query tagquery.Query) TagQueryContext { - kvsFromExpressions := func(expressions []tagquery.Expression) []kv { - res := make([]kv, len(expressions)) - for i := range expressions { - res[i] = kv{Tag: expressions[i].Tag} - } - return res - } - - kvReFromExpression := func(expression tagquery.Expression) kvRe { - return kvRe{ - kv: kv{Tag: expression.Tag}, - Regex: expression.Regex, - matchCache: &sync.Map{}, - missCache: &sync.Map{}, - } + ctx := TagQueryContext{ + query: query, } + ctx.filters, ctx.defaultDecisions = query.GetMetricDefinitionFilters() - kvResFromExpressions := func(expressions []tagquery.Expression) []kvRe { - res := make([]kvRe, len(expressions)) - for i := range expressions { - res[i] = kvReFromExpression(expressions[i]) - } - return res - } - - return TagQueryContext{ - wg: &sync.WaitGroup{}, - equal: kvsFromExpressions(query.Expressions[tagquery.EQUAL]), - match: kvResFromExpressions(query.Expressions[tagquery.MATCH]), - notEqual: kvsFromExpressions(query.Expressions[tagquery.NOT_EQUAL]), - notMatch: kvResFromExpressions(query.Expressions[tagquery.NOT_MATCH]), - prefix: kvsFromExpressions(query.Expressions[tagquery.PREFIX]), - tagClause: query.TagClause, - tagPrefix: query.TagPrefix, - tagMatch: kvReFromExpression(query.TagMatch), - startWith: query.StartWith, - from: query.From, - } + return ctx } -// getInitialByEqual generates the initial resultset by executing the given equal expression -func (q *TagQueryContext) getInitialByEqual(expr kv, idCh chan schema.MKey, stopCh chan struct{}) { - defer q.wg.Done() +// getInitialIds asynchronously collects all ID's of the initial result set. It returns: +// a channel through which the IDs of the initial result set will be sent +// a stop channel, which when closed, will cause it to abort the background worker. +func (q *TagQueryContext) getInitialIds() (chan schema.MKey, chan struct{}) { + idCh := make(chan schema.MKey, 1000) + stopCh := make(chan struct{}) + initial := q.query.Expressions[q.query.StartWith] -KEYS: - for k := range q.index[expr.Key][expr.Value] { - select { - case <-stopCh: - break KEYS - case idCh <- k: - } + if initial.OperatesOnTag() { + q.getInitialByTag(initial, idCh, stopCh) + } else { + q.getInitialByTagValue(initial, idCh, stopCh) } - close(idCh) + return idCh, stopCh } -// getInitialByPrefix generates the initial resultset by executing the given prefix match expression -func (q *TagQueryContext) getInitialByPrefix(expr kv, idCh chan schema.MKey, stopCh chan struct{}) { - defer q.wg.Done() +// getInitialByTagValue generates an initial ID set which is later filtered down +// it only handles those expressions which involve matching a tag value: +// f.e. key=value but not key!= +func (q *TagQueryContext) getInitialByTagValue(expr tagquery.Expression, idCh chan schema.MKey, stopCh chan struct{}) { + q.wg.Add(1) + go func() { + defer close(idCh) + defer q.wg.Done() -VALUES: - for v, ids := range q.index[expr.Key] { - if !strings.HasPrefix(v, expr.Value) { - continue - } + key := expr.GetKey() - for id := range ids { - select { - case <-stopCh: - break VALUES - case idCh <- id: + OUTER: + for value, ids := range q.index[key] { + if !expr.ValuePasses(value) { + continue } - } - } - - close(idCh) -} - -// getInitialByMatch generates the initial resultset by executing the given match expression -func (q *TagQueryContext) getInitialByMatch(expr kvRe, idCh chan schema.MKey, stopCh chan struct{}) { - defer q.wg.Done() - // shortcut if Regex == nil. - // this will simply match any value, like ^.+. since we know that every value - // in the index must not be empty, we can skip the matching. - if expr.Regex == nil { - VALUES1: - for _, ids := range q.index[expr.Key] { for id := range ids { select { case <-stopCh: - break VALUES1 + break OUTER case idCh <- id: } } } - close(idCh) - return - } - -VALUES2: - for v, ids := range q.index[expr.Key] { - if !expr.Regex.MatchString(v) { - continue - } - - for id := range ids { - select { - case <-stopCh: - break VALUES2 - case idCh <- id: - } - } - } - - close(idCh) + }() } -// getInitialByTagPrefix generates the initial resultset by creating a list of -// metric IDs of which at least one tag starts with the defined prefix -func (q *TagQueryContext) getInitialByTagPrefix(idCh chan schema.MKey, stopCh chan struct{}) { - defer q.wg.Done() - -TAGS: - for tag, values := range q.index { - if !strings.HasPrefix(tag, q.tagPrefix) { - continue - } +// getInitialByTag generates an initial ID set which is later filtered down +// it only handles those expressions which do not involve matching a tag value: +// f.e. key!= but not key=value +func (q *TagQueryContext) getInitialByTag(expr tagquery.Expression, idCh chan schema.MKey, stopCh chan struct{}) { + q.wg.Add(1) + go func() { + defer close(idCh) + defer q.wg.Done() - for _, ids := range values { - for id := range ids { - select { - case <-stopCh: - break TAGS - case idCh <- id: - } + OUTER: + for tag := range q.index { + if !expr.ValuePasses(tag) { + continue } - } - } - - close(idCh) -} -// getInitialByTagMatch generates the initial resultset by creating a list of -// metric IDs of which at least one tag matches the defined regex -func (q *TagQueryContext) getInitialByTagMatch(idCh chan schema.MKey, stopCh chan struct{}) { - defer q.wg.Done() - -TAGS: - for tag, values := range q.index { - if q.tagMatch.Regex.MatchString(tag) { - for _, ids := range values { + for _, ids := range q.index[tag] { for id := range ids { select { case <-stopCh: - break TAGS + break OUTER case idCh <- id: } } } } - } - - close(idCh) -} - -// getInitialIds asynchronously collects all ID's of the initial result set. It returns: -// a channel through which the IDs of the initial result set will be sent -// a stop channel, which when closed, will cause it to abort the background worker. -func (q *TagQueryContext) getInitialIds() (chan schema.MKey, chan struct{}) { - idCh := make(chan schema.MKey, 1000) - stopCh := make(chan struct{}) - q.wg.Add(1) - - switch q.startWith { - case tagquery.EQUAL: - query := q.equal[0] - q.equal = q.equal[1:] - go q.getInitialByEqual(query, idCh, stopCh) - case tagquery.PREFIX: - query := q.prefix[0] - q.prefix = q.prefix[1:] - go q.getInitialByPrefix(query, idCh, stopCh) - case tagquery.MATCH: - query := q.match[0] - q.match = q.match[1:] - go q.getInitialByMatch(query, idCh, stopCh) - case tagquery.PREFIX_TAG: - go q.getInitialByTagPrefix(idCh, stopCh) - case tagquery.MATCH_TAG: - go q.getInitialByTagMatch(idCh, stopCh) - } - - return idCh, stopCh + }() } // testByAllExpressions takes and id and a MetricDefinition and runs it through @@ -285,221 +121,28 @@ func (q *TagQueryContext) testByAllExpressions(id schema.MKey, def *idx.Archive, return false } - if len(q.equal) > 0 && !q.testByEqual(id, q.equal, false) { - return false - } - - if len(q.notEqual) > 0 && !q.testByEqual(id, q.notEqual, true) { - return false - } - - if q.tagClause == tagquery.PREFIX_TAG && !omitTagFilters && q.startWith != tagquery.PREFIX_TAG { - if !q.testByTagPrefix(def) { - return false - } - } - - if !q.testByPrefix(def, q.prefix) { - return false - } - - if q.tagClause == tagquery.MATCH_TAG && !omitTagFilters && q.startWith != tagquery.MATCH_TAG { - if !q.testByTagMatch(def) { - return false - } - } - - if len(q.match) > 0 && !q.testByMatch(def, q.match, false) { - return false - } - - if len(q.notMatch) > 0 && !q.testByMatch(def, q.notMatch, true) { - return false - } - - return true -} + for i := range q.filters { + decision := q.filters[i](schema.SanitizeNameAsTagValue(def.Name), def.Tags) -// testByMatch filters a given metric by matching a regular expression against -// the values of specific associated tags -func (q *TagQueryContext) testByMatch(def *idx.Archive, exprs []kvRe, not bool) bool { -EXPRS: - for _, e := range exprs { - if e.Key == "name" { - if e.Regex == nil || e.Regex.MatchString(def.NameSanitizedAsTagValue()) { - if not { - return false - } - continue EXPRS - } else { - if !not { - return false - } - continue EXPRS - } + if decision == tagquery.None { + decision = q.defaultDecisions[i] } - prefix := e.Key + "=" - for _, tag := range def.Tags { - if !strings.HasPrefix(tag, prefix) { - continue - } - - value := tag[len(e.Key)+1:] - - // reduce regex matching by looking up cached non-matches - if _, ok := e.missCache.Load(value); ok { - continue - } - - // reduce regex matching by looking up cached matches - if _, ok := e.matchCache.Load(value); ok { - if not { - return false - } - continue EXPRS - } - - // Regex == nil means that this expression can be short cut - // by not evaluating it - if e.Regex == nil || e.Regex.MatchString(value) { - if atomic.LoadInt32(&e.matchCacheSize) < int32(matchCacheSize) { - e.matchCache.Store(value, struct{}{}) - atomic.AddInt32(&e.matchCacheSize, 1) - } - if not { - return false - } - continue EXPRS - } else { - if atomic.LoadInt32(&e.missCacheSize) < int32(matchCacheSize) { - e.missCache.Store(value, struct{}{}) - atomic.AddInt32(&e.missCacheSize, 1) - } - } - } - if !not { - return false - } - } - return true -} - -// testByTagMatch filters a given metric by matching a regular expression against -// the associated tags -func (q *TagQueryContext) testByTagMatch(def *idx.Archive) bool { - // special case for tag "name" - if _, ok := q.tagMatch.missCache.Load("name"); !ok { - if _, ok := q.tagMatch.matchCache.Load("name"); ok || q.tagMatch.Regex.MatchString("name") { - if !ok { - if atomic.LoadInt32(&q.tagMatch.matchCacheSize) < int32(matchCacheSize) { - q.tagMatch.matchCache.Store("name", struct{}{}) - atomic.AddInt32(&q.tagMatch.matchCacheSize, 1) - } - } - return true - } - if atomic.LoadInt32(&q.tagMatch.missCacheSize) < int32(matchCacheSize) { - q.tagMatch.missCache.Store("name", struct{}{}) - atomic.AddInt32(&q.tagMatch.missCacheSize, 1) - } - } - - for _, tag := range def.Tags { - equal := strings.Index(tag, "=") - if equal < 0 { - corruptIndex.Inc() - log.Errorf("memory-idx: ID %q has tag %q in index without '=' sign", def.Id, tag) - continue - } - key := tag[:equal] - - if _, ok := q.tagMatch.missCache.Load(key); ok { + if decision == tagquery.Pass { continue } - if _, ok := q.tagMatch.matchCache.Load(key); ok || q.tagMatch.Regex.MatchString(key) { - if !ok { - if atomic.LoadInt32(&q.tagMatch.matchCacheSize) < int32(matchCacheSize) { - q.tagMatch.matchCache.Store(key, struct{}{}) - atomic.AddInt32(&q.tagMatch.matchCacheSize, 1) - } - } - return true - } - if atomic.LoadInt32(&q.tagMatch.missCacheSize) < int32(matchCacheSize) { - q.tagMatch.missCache.Store(key, struct{}{}) - atomic.AddInt32(&q.tagMatch.missCacheSize, 1) + if decision == tagquery.Fail { + return false } - continue } - return false + return true } // testByFrom filters a given metric by its LastUpdate time func (q *TagQueryContext) testByFrom(def *idx.Archive) bool { - return q.from <= atomic.LoadInt64(&def.LastUpdate) -} - -// testByPrefix filters a given metric by matching prefixes against the values -// of a specific tag -func (q *TagQueryContext) testByPrefix(def *idx.Archive, exprs []kv) bool { -EXPRS: - for _, e := range exprs { - if e.Key == "name" && strings.HasPrefix(def.NameSanitizedAsTagValue(), e.Value) { - continue EXPRS - } - - prefix := e.Key + "=" + e.Value - for _, tag := range def.Tags { - if !strings.HasPrefix(tag, prefix) { - continue - } - continue EXPRS - } - return false - } - return true -} - -// testByTagPrefix filters a given metric by matching prefixes against its tags -func (q *TagQueryContext) testByTagPrefix(def *idx.Archive) bool { - if strings.HasPrefix("name", q.tagPrefix) { - return true - } - - for _, tag := range def.Tags { - if strings.HasPrefix(tag, q.tagPrefix) { - return true - } - } - - return false -} - -// testByEqual filters a given metric by the defined "=" expressions -func (q *TagQueryContext) testByEqual(id schema.MKey, exprs []kv, not bool) bool { - for _, e := range exprs { - indexIds := q.index[e.Key][e.Value] - - // shortcut if key=value combo does not exist at all - if len(indexIds) == 0 { - return not - } - - if _, ok := indexIds[id]; ok { - if not { - return false - } - } else { - if !not { - return false - } - } - } - - return true + return q.query.From <= atomic.LoadInt64(&def.LastUpdate) } // filterIdsFromChan takes a channel of metric ids and runs them through the @@ -528,40 +171,11 @@ func (q *TagQueryContext) filterIdsFromChan(idCh, resCh chan schema.MKey) { q.wg.Done() } -// sortByCost tries to estimate the cost of different expressions and sort them -// in increasing order -// this is to reduce the result set cheaply and only apply expensive tests to an -// already reduced set of results -func (q *TagQueryContext) sortByCost() { - for i, kv := range q.equal { - q.equal[i].cost = uint(len(q.index[kv.Key][kv.Value])) - } - - // for prefix and match clauses we can't determine the actual cost - // without actually evaluating them, so we estimate based on - // cardinality of the key - for i, kv := range q.prefix { - q.prefix[i].cost = uint(len(q.index[kv.Key])) - } - - for i, kvRe := range q.match { - q.match[i].cost = uint(len(q.index[kvRe.Key])) - } - - sort.Sort(KvByCost(q.equal)) - sort.Sort(KvByCost(q.notEqual)) - sort.Sort(KvByCost(q.prefix)) - sort.Sort(KvReByCost(q.match)) - sort.Sort(KvReByCost(q.notMatch)) -} - // Run executes the tag query on the given index and returns a list of ids func (q *TagQueryContext) Run(index TagIndex, byId map[schema.MKey]*idx.Archive) IdSet { q.index = index q.byId = byId - q.sortByCost() - idCh, _ := q.getInitialIds() resCh := make(chan schema.MKey) @@ -594,23 +208,16 @@ func (q *TagQueryContext) Run(index TagIndex, byId map[schema.MKey]*idx.Archive) // we know that there can't be more tags discovered and added to the result set func (q *TagQueryContext) getMaxTagCount() int { defer q.wg.Done() - var maxTagCount int - if q.tagClause == tagquery.PREFIX_TAG && len(q.tagPrefix) > 0 { - for tag := range q.index { - if !strings.HasPrefix(tag, q.tagPrefix) { - continue - } + if q.query.TagClause == nil { + return len(q.index) + } + + var maxTagCount int + for tag := range q.index { + if q.query.TagClause.ValuePasses(tag) { maxTagCount++ } - } else if q.tagClause == tagquery.MATCH_TAG { - for tag := range q.index { - if q.tagMatch.Regex.MatchString(tag) { - maxTagCount++ - } - } - } else { - maxTagCount = len(q.index) } return maxTagCount @@ -655,18 +262,10 @@ IDS: continue } - if q.tagClause == tagquery.PREFIX_TAG { - if !strings.HasPrefix(key, q.tagPrefix) { - continue - } - } else if q.tagClause == tagquery.MATCH_TAG { - if _, ok := q.tagMatch.missCache.Load(key); ok || !q.tagMatch.Regex.MatchString(tag) { - if !ok { - q.tagMatch.missCache.Store(key, struct{}{}) - } - continue - } + if q.query.TagClause != nil && !q.query.TagClause.ValuePasses(key) { + continue } + metricTags[key] = struct{}{} } @@ -713,26 +312,16 @@ IDS: // tag "name". if it does, then we can omit some filtering because we know // that every metric has a name func (q *TagQueryContext) tagFilterMatchesName() bool { - matchName := false - - if q.tagClause == tagquery.PREFIX_TAG || q.startWith == tagquery.PREFIX_TAG { - if strings.HasPrefix("name", q.tagPrefix) { - matchName = true - } - } else if q.tagClause == tagquery.MATCH_TAG || q.startWith == tagquery.MATCH_TAG { - if q.tagMatch.Regex.MatchString("name") { - matchName = true - } - } else { - // some tag queries might have no prefix specified yet, in this case - // we do not need to filter by the name - // f.e. we know that every metric has a name, and we know that the - // prefix "" matches the string "name", so we know that every metric - // will pass the tag prefix test. hence we can omit the entire test. - matchName = true + // some tag queries might have no prefix specified yet, in this case + // we do not need to filter by the name + // f.e. we know that every metric has a name, and we know that the + // prefix "" matches the string "name", so we know that every metric + // will pass the tag prefix test. hence we can omit the entire test. + if q.query.TagClause == nil { + return true } - return matchName + return q.query.TagClause.ValuePasses("name") } // RunGetTags executes the tag query and returns all the tags of the @@ -751,7 +340,6 @@ func (q *TagQueryContext) RunGetTags(index TagIndex, byId map[schema.MKey]*idx.A q.wg.Add(1) go atomic.StoreInt32(&maxTagCount, int32(q.getMaxTagCount())) - q.sortByCost() idCh, stopCh := q.getInitialIds() tagCh := make(chan string) From 36c2cfa0cb2d840416ded3037c7f4941b4b8013e Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Sat, 29 Jun 2019 16:03:25 -0400 Subject: [PATCH 03/40] fix expression tests --- expr/tagquery/expression_test.go | 301 ++++++++++--------------------- 1 file changed, 96 insertions(+), 205 deletions(-) diff --git a/expr/tagquery/expression_test.go b/expr/tagquery/expression_test.go index cc7c01de74..5d2aaafbac 100644 --- a/expr/tagquery/expression_test.go +++ b/expr/tagquery/expression_test.go @@ -1,8 +1,8 @@ package tagquery import ( + "fmt" "reflect" - "regexp" "strings" "testing" ) @@ -27,19 +27,19 @@ func TestExpressionParsing(t *testing.T) { expression: "key!=", key: "key", value: "", - operator: NOT_EQUAL, + operator: HAS_TAG, err: false, }, { expression: "key=", key: "key", value: "", - operator: EQUAL, + operator: NOT_HAS_TAG, err: false, }, { expression: "key=~", key: "key", value: "", - operator: MATCH, + operator: NOT_HAS_TAG, err: false, }, { expression: "key=~v_alue", @@ -92,230 +92,121 @@ func TestExpressionParsing(t *testing.T) { }, } - for _, tc := range testCases { - expression, err := ParseExpression(tc.expression) - if (err != nil) != tc.err || (err == nil && (expression.Key != tc.key || expression.Value != tc.value || expression.Operator != tc.operator)) { - t.Fatalf("Expected the values %s, %s, %d, %t, but got %s, %s, %d, %q", tc.key, tc.value, tc.operator, tc.err, expression.Key, expression.Value, expression.Operator, err) - } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d \"%s\"", i, tc.expression), func(t *testing.T) { + expression, err := ParseExpression(tc.expression) + if (err != nil) != tc.err || (err == nil && (expression.GetKey() != tc.key || expression.GetValue() != tc.value || expression.GetOperator() != tc.operator)) { + t.Fatalf("Expected the values %s, %s, %d, %t, but got %s, %s, %d, %q", tc.key, tc.value, tc.operator, tc.err, expression.GetKey(), expression.GetValue(), expression.GetOperator(), err) + } + }) } } func TestExpressions_Sort(t *testing.T) { - tests := []struct { + tests := make([]struct { name string - e Expressions + have Expressions want Expressions - }{ - { - name: "simple sort", - e: Expressions{ - { - Tag: Tag{Key: "a", Value: "a"}, - Operator: NOT_EQUAL, - }, { - Tag: Tag{Key: "b", Value: "a"}, - Operator: EQUAL, - }, { - Tag: Tag{Key: "a", Value: "b"}, - Operator: EQUAL, - }, { - Tag: Tag{Key: "a", Value: "a"}, - Operator: EQUAL, - }, - }, - want: Expressions{ - { - Tag: Tag{Key: "a", Value: "a"}, - Operator: EQUAL, - }, { - Tag: Tag{Key: "a", Value: "a"}, - Operator: NOT_EQUAL, - }, { - Tag: Tag{Key: "a", Value: "b"}, - Operator: EQUAL, - }, { - Tag: Tag{Key: "b", Value: "a"}, - Operator: EQUAL, - }, - }, - }, - } + }, 1) + + tests[0].name = "simple sort" + tests[0].have, _ = ParseExpressions([]string{"a!=a", "b=a", "a=b", "a=a"}) + tests[0].want, _ = ParseExpressions([]string{"a=a", "a=b", "b=a", "a!=a"}) + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - tt.e.Sort() - if !reflect.DeepEqual(tt.e, tt.want) { - t.Fatalf("Expected expressions to be sorted:\nExpected:\n%+v\nGot:\n%+v\n", tt.want, tt.e) + tt.have.SortByFilterOrder() + if !reflect.DeepEqual(tt.have, tt.want) { + t.Fatalf("Expected expressions to be sorted:\nExpected:\n%+v\nGot:\n%+v\n", tt.want, tt.have) } }) } } -func TestExpressions_Strings(t *testing.T) { - tests := []struct { - name string - e Expressions - want []string - }{ - { - name: "simple expressions", - e: Expressions{ - { - Tag: Tag{Key: "a", Value: "b"}, - Operator: EQUAL, - }, { - Tag: Tag{Key: "ccc", Value: "$@#@"}, - Operator: NOT_EQUAL, - }, { - Tag: Tag{Key: "~", Value: "!"}, - Operator: MATCH, - }, { - Tag: Tag{Key: "d", Value: "e"}, - Operator: MATCH_TAG, - }, { - Tag: Tag{Key: "f", Value: "g"}, - Operator: NOT_MATCH, - }, { - Tag: Tag{Key: "h", Value: "i"}, - Operator: PREFIX, - }, { - Tag: Tag{Key: "j", Value: "q"}, - Operator: PREFIX_TAG, - }, - }, - want: []string{"a=b", "ccc!=$@#@", "~=~!", "__tag=~e", "f!=~g", "h^=i", "__tag^=q"}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := tt.e.Strings(); !reflect.DeepEqual(got, tt.want) { - t.Errorf("Expressions.Strings() = %v, want %v", got, tt.want) +func TestExpressionsParsingAndBackToString(t *testing.T) { + tests := make([]struct { + got string + expect string + }, 8) + + tests[0].got = "a=b" + tests[0].expect = "a=b" + tests[1].got = "ccc!=$@#@" + tests[1].expect = "ccc!=$@#@" + tests[2].got = "~=~!" + tests[2].expect = "~=~^(?:!)" + tests[3].got = "d=~e" + tests[3].expect = "d=~^(?:e)" + tests[4].got = "f!=~g" + tests[4].expect = "f!=~^(?:g)" + tests[5].got = "h^=i" + tests[5].expect = "h^=i" + tests[6].got = "__tag^=q" + tests[6].expect = "__tag^=q" + tests[7].got = "__tag=~abc" + tests[7].expect = "__tag=~^(?:abc)" + + builder := strings.Builder{} + for i, tc := range tests { + t.Run(fmt.Sprintf("TC %d (%s)", i, tc.got), func(t *testing.T) { + e, err := ParseExpression(tc.got) + if err != nil { + t.Fatalf("Error when parsing expression: %s", err) + } + + e.StringIntoBuilder(&builder) + res := builder.String() + builder.Reset() + + if res != tc.expect { + t.Fatalf("Expected expression \"%s\", but got \"%s\"", tc.expect, res) } }) } } func TestExpression_IsEqualTo(t *testing.T) { - type fields struct { - Tag Tag - Operator ExpressionOperator - RequiresNonEmptyValue bool - UsesRegex bool - Regex *regexp.Regexp - } - type args struct { - other Expression - } - tests := []struct { - name string - fields fields - args args - want bool - }{ - { - name: "Equal expressions with different internal settings", - fields: fields{ - Tag: Tag{Key: "a", Value: "b"}, - Operator: EQUAL, - RequiresNonEmptyValue: false, - UsesRegex: false, - }, - args: args{ - other: Expression{ - Tag: Tag{Key: "a", Value: "b"}, - Operator: EQUAL, - RequiresNonEmptyValue: true, - UsesRegex: true, - }, - }, - want: true, - }, { - name: "Different key", - fields: fields{ - Tag: Tag{Key: "a", Value: "b"}, - Operator: EQUAL, - }, - args: args{ - other: Expression{ - Tag: Tag{Key: "b", Value: "b"}, - Operator: EQUAL, - }, - }, - want: false, - }, { - name: "Different value", - fields: fields{ - Tag: Tag{Key: "a", Value: "a"}, - Operator: EQUAL, - }, - args: args{ - other: Expression{ - Tag: Tag{Key: "a", Value: "b"}, - Operator: EQUAL, - }, - }, - want: false, - }, { - name: "Different operator", - fields: fields{ - Tag: Tag{Key: "a", Value: "b"}, - Operator: EQUAL, - }, - args: args{ - other: Expression{ - Tag: Tag{Key: "a", Value: "b"}, - Operator: NOT_EQUAL, - }, - }, - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - e := &Expression{ - Tag: tt.fields.Tag, - Operator: tt.fields.Operator, - RequiresNonEmptyValue: tt.fields.RequiresNonEmptyValue, - UsesRegex: tt.fields.UsesRegex, - Regex: tt.fields.Regex, + tests := make([]struct { + expression string + notEqual []string + }, 3) + + tests[0].expression = "a=b" + tests[0].notEqual = []string{"a!=b", "a=a", "b=b"} + tests[1].expression = "a!=b" + tests[1].notEqual = []string{"a=b", "a!=a", "b!=b"} + tests[2].expression = "a=~b" + tests[2].notEqual = []string{"a=b", "a!=~b", "a=~b", "b=~b"} + tests[2].expression = "__tag=a" + tests[2].notEqual = []string{"tag=a", "__tag^=a", "__tag=~a", "__tag=b"} + tests[2].expression = "a=" + tests[2].notEqual = []string{"a=b", "b=", "a=~b", "b=~"} + + for i, tc := range tests { + t.Run(fmt.Sprintf("TC %d \"%s\"", i, tc.expression), func(t *testing.T) { + e1, err := ParseExpression(tc.expression) + if err != nil { + t.Fatalf("Unexpected parsing error of \"%s\": %s", tc.expression, err) } - if got := e.IsEqualTo(tt.args.other); got != tt.want { - t.Errorf("Expression.IsEqualTo() = %v, want %v", got, tt.want) + e2, err := ParseExpression(tc.expression) + if err != nil { + t.Fatalf("Unexpected parsing error of \"%s\": %s", tc.expression, err) + } + if !ExpressionsAreEqual(e1, e2) { + t.Fatalf("Expected two instantiations of expressions to be equal, but they were not: \"%s\"", tc.expression) } - }) - } -} -func TestParseExpressionAndBackToString(t *testing.T) { - expressions := []string{ - "a=b", - "a=b", - "a!=b", - "a!=b", - "a=~b", - "a=~^(?:b)", - "a!=~b", - "a!=~^(?:b)", - "a^=b", - "a^=b", - "__tag=~abc", - "__tag=~^(?:abc)", - "__tag^=cba", - "__tag^=cba", - } + for j := range tc.notEqual { + other, err := ParseExpression(tc.notEqual[j]) + if err != nil { + t.Fatalf("Unexpected parsing error of \"%s\": %s", tc.notEqual[j], err) + } - builder := strings.Builder{} - for i := 0; i < len(expressions); i += 2 { - parsed, err := ParseExpression(expressions[i]) - if err != nil { - t.Fatalf("TC %d: Unexpected error: %s", i, err) - } - parsed.StringIntoBuilder(&builder) - toString := builder.String() - expected := expressions[i+1] - if toString != expected { - t.Fatalf("TC %d: After parsing and converting back to string, expressions has changed unexpectedly: \"%s\" / \"%s\"", i, toString, expected) - } - builder.Reset() + if ExpressionsAreEqual(e1, other) || ExpressionsAreEqual(e2, other) { + t.Fatalf("Expressions are supposed to not be equal, but they were: \"%s\"/\"%s\"", tc.expression, tc.notEqual[j]) + } + } + }) } } From 05845d68d2268a45b585a457727fb1d2bfa00c3b Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Sat, 29 Jun 2019 16:40:37 -0400 Subject: [PATCH 04/40] simplify query struct leave all expressions in he .Expressions slice and only store offsets as .startWith and .tagClause property. Then add a bunch of helper methods to retrieve the according expressions from the query. --- expr/tagquery/query.go | 51 ++++++++++++++++++++++++++++++----------- idx/memory/tag_query.go | 29 ++++++++++++++--------- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/expr/tagquery/query.go b/expr/tagquery/query.go index a68e7345eb..6e4d6394c2 100644 --- a/expr/tagquery/query.go +++ b/expr/tagquery/query.go @@ -16,11 +16,12 @@ type Query struct { Expressions Expressions // the index in the Expressions slice at which we start evaluating the query - StartWith int + startWith int - // clause that operate on tags (keys) - // we only need to support 1 condition for now: a prefix or match - TagClause Expression + // the index of clause that operate on tags (keys) + // we only support 0 or 1 tag expression per query + // tag expressions are __tag^= and __tag=~ + tagClause int } func NewQueryFromStrings(expressionStrs []string, from int64) (Query, error) { @@ -33,12 +34,13 @@ func NewQueryFromStrings(expressionStrs []string, from int64) (Query, error) { } func NewQuery(expressions Expressions, from int64) (Query, error) { - q := Query{From: from} + q := Query{From: from, tagClause: -1} if len(expressions) == 0 { return q, errInvalidQuery } + foundExpressionRequiringNonEmptyValue := false expressions.SortByFilterOrder() for i := 0; i < len(expressions); i++ { // skip duplicate expression @@ -48,25 +50,29 @@ func NewQuery(expressions Expressions, from int64) (Query, error) { continue } + foundExpressionRequiringNonEmptyValue = foundExpressionRequiringNonEmptyValue || expressions[i].RequiresNonEmptyValue() + op := expressions[i].GetOperator() switch op { case MATCH_TAG: fallthrough case PREFIX_TAG: - // we only allow one query by tag - if q.TagClause != nil { + // we only allow one expression operating on the tag per query + if q.tagClause >= 0 { return q, errInvalidQuery } - q.TagClause = expressions[i] - expressions = append(expressions[:i], expressions[i+1:]...) - i-- + q.tagClause = i } } + if !foundExpressionRequiringNonEmptyValue { + return q, errInvalidQuery + } + q.Expressions = expressions - q.StartWith = q.Expressions.findInitialExpression() - if q.TagClause == nil && q.StartWith < 0 { + q.startWith = q.Expressions.findInitialExpression() + if q.startWith < 0 { return q, errInvalidQuery } @@ -79,7 +85,7 @@ func (q *Query) GetMetricDefinitionFilters() (MetricDefinitionFilters, []FilterD for i := range q.Expressions { // the one we start with does not need to be added to the filters, // because we use it to build the initial result set - if i == q.StartWith { + if i == q.startWith { continue } filters = append(filters, q.Expressions[i].GetMetricDefinitionFilter()) @@ -88,3 +94,22 @@ func (q *Query) GetMetricDefinitionFilters() (MetricDefinitionFilters, []FilterD return filters, defaultDecisions } + +// GetInitialExpression returns the expression which should be used to generate the initial +// result set, to later filter it down with the remaining expressions. +// We assume Query has been instantiated via NewQuery(), in which case it is guaranteed that +// that .startWith has been set correctly or otherwise an error would have been returned +func (q *Query) GetInitialExpression() Expression { + return q.Expressions[q.startWith] +} + +// GetTagClause returns the expression which operates on tags, if one is present. +// This assumes that Query has been instantiated via NewQuery(), which either sets +// .tagClause to a valid value or returns an error. +// There can only be one tagClause per Query. +func (q *Query) GetTagClause() Expression { + if q.tagClause < 0 { + return nil + } + return q.Expressions[q.tagClause] +} diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go index 73db6cbd80..b93508c219 100644 --- a/idx/memory/tag_query.go +++ b/idx/memory/tag_query.go @@ -45,12 +45,11 @@ func NewTagQueryContext(query tagquery.Query) TagQueryContext { func (q *TagQueryContext) getInitialIds() (chan schema.MKey, chan struct{}) { idCh := make(chan schema.MKey, 1000) stopCh := make(chan struct{}) - initial := q.query.Expressions[q.query.StartWith] - if initial.OperatesOnTag() { - q.getInitialByTag(initial, idCh, stopCh) + if q.query.GetInitialExpression().OperatesOnTag() { + q.getInitialByTag(idCh, stopCh) } else { - q.getInitialByTagValue(initial, idCh, stopCh) + q.getInitialByTagValue(idCh, stopCh) } return idCh, stopCh @@ -59,7 +58,9 @@ func (q *TagQueryContext) getInitialIds() (chan schema.MKey, chan struct{}) { // getInitialByTagValue generates an initial ID set which is later filtered down // it only handles those expressions which involve matching a tag value: // f.e. key=value but not key!= -func (q *TagQueryContext) getInitialByTagValue(expr tagquery.Expression, idCh chan schema.MKey, stopCh chan struct{}) { +func (q *TagQueryContext) getInitialByTagValue(idCh chan schema.MKey, stopCh chan struct{}) { + expr := q.query.GetInitialExpression() + q.wg.Add(1) go func() { defer close(idCh) @@ -87,7 +88,9 @@ func (q *TagQueryContext) getInitialByTagValue(expr tagquery.Expression, idCh ch // getInitialByTag generates an initial ID set which is later filtered down // it only handles those expressions which do not involve matching a tag value: // f.e. key!= but not key=value -func (q *TagQueryContext) getInitialByTag(expr tagquery.Expression, idCh chan schema.MKey, stopCh chan struct{}) { +func (q *TagQueryContext) getInitialByTag(idCh chan schema.MKey, stopCh chan struct{}) { + expr := q.query.GetInitialExpression() + q.wg.Add(1) go func() { defer close(idCh) @@ -209,13 +212,14 @@ func (q *TagQueryContext) Run(index TagIndex, byId map[schema.MKey]*idx.Archive) func (q *TagQueryContext) getMaxTagCount() int { defer q.wg.Done() - if q.query.TagClause == nil { + tagClause := q.query.GetTagClause() + if tagClause == nil { return len(q.index) } var maxTagCount int for tag := range q.index { - if q.query.TagClause.ValuePasses(tag) { + if tagClause.ValuePasses(tag) { maxTagCount++ } } @@ -231,6 +235,7 @@ func (q *TagQueryContext) filterTagsFromChan(idCh chan schema.MKey, tagCh chan s // used to prevent that this worker thread will push the same result into // the chan twice resultsCache := make(map[string]struct{}) + tagClause := q.query.GetTagClause() IDS: for id := range idCh { @@ -262,7 +267,7 @@ IDS: continue } - if q.query.TagClause != nil && !q.query.TagClause.ValuePasses(key) { + if tagClause != nil && !tagClause.ValuePasses(key) { continue } @@ -312,16 +317,18 @@ IDS: // tag "name". if it does, then we can omit some filtering because we know // that every metric has a name func (q *TagQueryContext) tagFilterMatchesName() bool { + tagClause := q.query.GetTagClause() + // some tag queries might have no prefix specified yet, in this case // we do not need to filter by the name // f.e. we know that every metric has a name, and we know that the // prefix "" matches the string "name", so we know that every metric // will pass the tag prefix test. hence we can omit the entire test. - if q.query.TagClause == nil { + if tagClause == nil { return true } - return q.query.TagClause.ValuePasses("name") + return tagClause.ValuePasses("name") } // RunGetTags executes the tag query and returns all the tags of the From 4d70df9a9642b8836d9d4ad34eec1edca4224d91 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Thu, 4 Jul 2019 23:45:47 +0000 Subject: [PATCH 05/40] fix test --- expr/tagquery/meta_tag_record_test.go | 19 ++- expr/tagquery/query_test.go | 130 ++++++++++--------- idx/memory/meta_tags_test.go | 179 ++++++-------------------- idx/memory/tag_query_test.go | 29 +---- 4 files changed, 121 insertions(+), 236 deletions(-) diff --git a/expr/tagquery/meta_tag_record_test.go b/expr/tagquery/meta_tag_record_test.go index ec2567f342..6ffa5e39ae 100644 --- a/expr/tagquery/meta_tag_record_test.go +++ b/expr/tagquery/meta_tag_record_test.go @@ -22,18 +22,17 @@ func TestParseMetaTagRecord(t *testing.T) { }, }, Queries: Expressions{ - { - Tag: Tag{ - Key: "e", - Value: "f", + &expressionNotEqual{ + expressionCommon{ + key: "e", + value: "f", }, - Operator: NOT_EQUAL, - }, { - Tag: Tag{ - Key: "g", - Value: "h", + }, + &expressionPrefix{ + expressionCommon{ + key: "g", + value: "h", }, - Operator: PREFIX, }, }, }) diff --git a/expr/tagquery/query_test.go b/expr/tagquery/query_test.go index c81df42e37..be8650a1ef 100644 --- a/expr/tagquery/query_test.go +++ b/expr/tagquery/query_test.go @@ -7,7 +7,7 @@ import ( func TestQueryByTagFilterByTagPrefixWithEmptyString(t *testing.T) { _, err := NewQueryFromStrings([]string{"__tag^="}, 0) - if err != errInvalidQuery { + if err == nil { t.Fatalf("Expected an error, but didn't get it") } } @@ -38,55 +38,55 @@ func TestNewQueryFromStrings(t *testing.T) { }, want: Query{ From: 321, - Expressions: map[ExpressionOperator]Expressions{ - EQUAL: { - { - Tag: Tag{Key: "a", Value: "b"}, - Operator: EQUAL, - RequiresNonEmptyValue: true, - }, { - Tag: Tag{Key: "x", Value: "z"}, - Operator: EQUAL, - RequiresNonEmptyValue: true, + Expressions: Expressions{ + &expressionEqual{ + expressionCommon{ + key: "a", + value: "b", }, }, - NOT_EQUAL: { - { - Tag: Tag{Key: "c", Value: "d"}, - Operator: NOT_EQUAL, + &expressionEqual{ + expressionCommon{ + key: "x", + value: "z", }, }, - MATCH: { - { - Tag: Tag{Key: "e", Value: "^(?:f)"}, - Operator: MATCH, - RequiresNonEmptyValue: true, - UsesRegex: true, + &expressionPrefix{ + expressionCommon{ + key: "i", + value: "j", }, }, - NOT_MATCH: { - { - Tag: Tag{Key: "g", Value: "^(?:h)"}, - Operator: NOT_MATCH, - UsesRegex: true, + &expressionNotEqual{ + expressionCommon{ + key: "c", + value: "d", }, }, - PREFIX: { - { - Tag: Tag{Key: "i", Value: "j"}, - Operator: PREFIX, - RequiresNonEmptyValue: true, + &expressionMatch{ + expressionCommon: expressionCommon{ + key: "e", + value: "^(?:f)", }, + valueRe: nil, + }, + &expressionMatchTag{ + expressionCommon: expressionCommon{ + key: "__tag", + value: "^(?:k)", + }, + valueRe: nil, + }, + &expressionNotMatch{ + expressionCommon: expressionCommon{ + key: "g", + value: "^(?:h)", + }, + valueRe: nil, }, }, - TagMatch: Expression{ - Tag: Tag{Key: "__tag", Value: "^(?:k)"}, - Operator: MATCH_TAG, - RequiresNonEmptyValue: true, - UsesRegex: true, - }, - TagClause: MATCH_TAG, - StartWith: EQUAL, + tagClause: 5, + startWith: 0, }, }, { name: "test tag prefix with empty value", @@ -118,17 +118,17 @@ func TestNewQueryFromStrings(t *testing.T) { expressionStrs: []string{"abc=~cba"}, }, want: Query{ - Expressions: map[ExpressionOperator]Expressions{ - MATCH: { - { - Tag: Tag{Key: "abc", Value: "^(?:cba)"}, - Operator: MATCH, - RequiresNonEmptyValue: true, - UsesRegex: true, + Expressions: Expressions{ + &expressionMatch{ + expressionCommon: expressionCommon{ + key: "abc", + value: "^(?:cba)", }, + valueRe: nil, }, }, - StartWith: MATCH, + startWith: 0, + tagClause: -1, }, }, { name: "deduplicate duplicate expressions", @@ -136,20 +136,22 @@ func TestNewQueryFromStrings(t *testing.T) { expressionStrs: []string{"a=a", "b=b", "a=a"}, }, want: Query{ - Expressions: map[ExpressionOperator]Expressions{ - EQUAL: { - { - Tag: Tag{Key: "a", Value: "a"}, - Operator: EQUAL, - RequiresNonEmptyValue: true, - }, { - Tag: Tag{Key: "b", Value: "b"}, - Operator: EQUAL, - RequiresNonEmptyValue: true, + Expressions: Expressions{ + &expressionEqual{ + expressionCommon{ + key: "a", + value: "a", + }, + }, + &expressionEqual{ + expressionCommon{ + key: "b", + value: "b", }, }, }, - StartWith: EQUAL, + startWith: 0, + tagClause: -1, }, }, } @@ -167,10 +169,14 @@ func TestNewQueryFromStrings(t *testing.T) { } // don't compare the compiled regex objects - got.TagMatch.Regex = nil - for operator := range got.Expressions { - for i := range got.Expressions[operator] { - got.Expressions[operator][i].Regex = nil + for i := range got.Expressions { + switch got.Expressions[i].(type) { + case *expressionMatch: + got.Expressions[i].(*expressionMatch).valueRe = nil + case *expressionNotMatch: + got.Expressions[i].(*expressionNotMatch).valueRe = nil + case *expressionMatchTag: + got.Expressions[i].(*expressionMatchTag).valueRe = nil } } diff --git a/idx/memory/meta_tags_test.go b/idx/memory/meta_tags_test.go index 0446c48af5..e5a9e7ad33 100644 --- a/idx/memory/meta_tags_test.go +++ b/idx/memory/meta_tags_test.go @@ -2,7 +2,6 @@ package memory import ( "hash" - "reflect" "testing" "github.com/grafana/metrictank/expr/tagquery" @@ -37,42 +36,8 @@ func TestInsertSimpleMetaTagRecord(t *testing.T) { t.Fatalf("We expected the record to be found at the index of its hash, but it wasn't") } - if len(record.MetaTags) != 2 { - t.Fatalf("The newly created record was expected to have 2 tags, but it had %d", len(record.MetaTags)) - } - if len(record.Queries) != 2 { - t.Fatalf("The newly created record was expected to have 2 queries, but it had %d", len(record.Queries)) - } - - var seenMetaTag1, seenMetaTag2 bool - for _, metaTag := range record.MetaTags { - if reflect.DeepEqual(metaTag, tagquery.Tag{Key: "metaTag1", Value: "abc"}) { - seenMetaTag1 = true - } - if reflect.DeepEqual(metaTag, tagquery.Tag{Key: "anotherTag", Value: "theValue"}) { - seenMetaTag2 = true - } - } - - if !seenMetaTag1 || !seenMetaTag2 { - t.Fatalf("We expected both meta tags to be present in the record, but not both were: %t / %t", seenMetaTag1, seenMetaTag2) - } - - var seenQuery1, seenQuery2 bool - for _, query := range record.Queries { - // ignore the compiled regex structs, as they can't reliably be compared - query.Regex = nil - - if reflect.DeepEqual(query, tagquery.Expression{Tag: tagquery.Tag{Key: "metricTag", Value: "a"}, Operator: tagquery.NOT_EQUAL, RequiresNonEmptyValue: false, UsesRegex: false}) { - seenQuery1 = true - } - if reflect.DeepEqual(query, tagquery.Expression{Tag: tagquery.Tag{Key: "match", Value: "^(?:this)"}, Operator: tagquery.MATCH, RequiresNonEmptyValue: true, UsesRegex: true}) { - seenQuery2 = true - } - } - - if !seenQuery1 || !seenQuery2 { - t.Fatalf("We expected both queries to be present in the record, but not both were: %t / %t", seenQuery1, seenQuery2) + if !metaTagRecordsAreEqual(&recordToInsert, record) { + t.Fatalf("Inserted meta tag record has unexpectedly been modified") } } @@ -102,22 +67,18 @@ func TestUpdateExistingMetaTagRecord(t *testing.T) { t.Fatalf("Expected 2 meta tag records, but there were %d", len(metaTagRecords)) } - // the order of the records may have changed due to sorting by id - var record1, record2 tagquery.MetaTagRecord var found1, found2 bool var recordIdToUpdate recordId - for id, record := range metaTagRecords { - switch record.Queries[0].Value { - case "^(?:a)": - record1 = metaTagRecords[id] + for i, record := range metaTagRecords { + if metaTagRecordsAreEqual(&record, &recordToInsert1) { found1 = true - recordIdToUpdate = id - case "^(?:c)": - record2 = metaTagRecords[id] + recordIdToUpdate = i + } else if metaTagRecordsAreEqual(&record, &recordToInsert2) { found2 = true } } - if !found1 || !found2 { + + if !(found1 && found2) { t.Fatalf("Expected both meta tag records to be found, but at least one wasn't: %t / %t", found1, found2) } @@ -134,107 +95,27 @@ func TestUpdateExistingMetaTagRecord(t *testing.T) { if oldId != id { t.Fatalf("Expected the new id after updating to be %d (same as the old id), but it was %d", oldId, id) } - if oldRecord == nil { + if oldRecord == nil || !metaTagRecordsAreEqual(oldRecord, &recordToInsert1) { t.Fatalf("Expected the old record to not be nil, but it was") } - if len(metaTagRecords) != 2 { t.Fatalf("Expected that there to be 2 meta tag records, but there were %d", len(metaTagRecords)) } // the order of the records may have changed again due to sorting by id found1, found2 = false, false - for id, record := range metaTagRecords { - if len(record.Queries) != 2 { - t.Fatalf("Expected every record to have 2 queries, but one had not: %+v", record) - } - switch record.Queries[0].Value { - case "^(?:a)": - record1 = metaTagRecords[id] + for _, record := range metaTagRecords { + if metaTagRecordsAreEqual(&record, &recordToUpdate) { found1 = true - case "^(?:c)": - record2 = metaTagRecords[id] + } + if metaTagRecordsAreEqual(&record, &recordToInsert2) { found2 = true } } - if !found1 || !found2 { + if !(found1 && found2) { t.Fatalf("Expected both meta tag records to be found, but not both were: %t / %t", found1, found2) } - - expectedRecord1 := tagquery.MetaTagRecord{ - MetaTags: []tagquery.Tag{ - { - Key: "metaTag1", - Value: "value2", - }, - }, - Queries: tagquery.Expressions{ - tagquery.Expression{ - Tag: tagquery.Tag{ - Key: "tag1", - Value: "^(?:a)", - }, - Operator: tagquery.MATCH, - RequiresNonEmptyValue: true, - UsesRegex: true, - }, - tagquery.Expression{ - Tag: tagquery.Tag{ - Key: "tag2", - Value: "^(?:b)", - }, - Operator: tagquery.MATCH, - RequiresNonEmptyValue: true, - UsesRegex: true, - }, - }, - } - - // ignore the compiled regex structs, as they can't reliably be compared - for i := range record1.Queries { - record1.Queries[i].Regex = nil - } - if !reflect.DeepEqual(record1, expectedRecord1) { - t.Fatalf("Record1 did not look as expected:\nExpected\n%+v\nGot:\n%+v", expectedRecord1, record1) - } - - expectedRecord2 := tagquery.MetaTagRecord{ - MetaTags: []tagquery.Tag{ - { - Key: "metaTag1", - Value: "value1", - }, - }, - Queries: tagquery.Expressions{ - tagquery.Expression{ - Tag: tagquery.Tag{ - Key: "tag1", - Value: "^(?:c)", - }, - Operator: tagquery.MATCH, - RequiresNonEmptyValue: true, - UsesRegex: true, - }, - tagquery.Expression{ - Tag: tagquery.Tag{ - Key: "tag2", - Value: "^(?:d)", - }, - Operator: tagquery.MATCH, - RequiresNonEmptyValue: true, - UsesRegex: true, - }, - }, - } - - // ignore the compiled regex structs, as they can't reliably be compared - for i := range record2.Queries { - record2.Queries[i].Regex = nil - } - if !reflect.DeepEqual(record2, expectedRecord2) { - t.Fatalf("Record1 did not look as expected:\nExpected\n%+v\nGot:\n%+v", expectedRecord2, record2) - } } // we mock the hashing algorithm implementation because we want to be able to @@ -316,7 +197,7 @@ func TestHashCollisionsOnInsert(t *testing.T) { t.Fatalf("Expected 3 meta tag records to be present, but there were %d", len(metaTagRecords)) } - // updating the third record with the same hash and equal queries + // updating the third record with the same hash and equal queries, but different meta tags record, _ = tagquery.ParseMetaTagRecord([]string{"metaTag3=value4"}, []string{"metricTag3=value3"}) id, returnedRecord, oldId, oldRecord, err = metaTagRecords.upsert(record) if err != nil { @@ -327,7 +208,7 @@ func TestHashCollisionsOnInsert(t *testing.T) { } // check if the returned new record looks as expected - if !reflect.DeepEqual(returnedRecord, &record) { + if !metaTagRecordsAreEqual(returnedRecord, &record) { t.Fatalf("New record looked different than expected:\nExpected:\n%+v\nGot:\n%+v\n", &record, returnedRecord) } if oldId != 3 { @@ -336,7 +217,7 @@ func TestHashCollisionsOnInsert(t *testing.T) { // check if the returned old record looks as expected record, _ = tagquery.ParseMetaTagRecord([]string{"metaTag3=value3"}, []string{"metricTag3=value3"}) - if !reflect.DeepEqual(oldRecord, &record) { + if !metaTagRecordsAreEqual(oldRecord, &record) { t.Fatalf("Old record looked different than expected:\nExpected:\n%+v\nGot:\n%+v\n", &record, oldRecord) } if len(metaTagRecords) != 3 { @@ -358,6 +239,7 @@ func TestDeletingMetaRecord(t *testing.T) { } // then we delete one record again + // upserting a meta tag record with one that has no meta tags results in deletion record.MetaTags = nil id, returnedRecord, oldId, _, err := metaTagRecords.upsert(record) if err != nil { @@ -366,7 +248,7 @@ func TestDeletingMetaRecord(t *testing.T) { if len(returnedRecord.MetaTags) != 0 { t.Fatalf("Expected returned meta tag record to have 0 meta tags, but it had %d", len(returnedRecord.MetaTags)) } - if !reflect.DeepEqual(returnedRecord.Queries, record.Queries) { + if !metaTagRecordsAreEqual(returnedRecord, &record) { t.Fatalf("Queries of returned record don't match what we expected:\nExpected:\n%+v\nGot:\n%+v\n", record.Queries, returnedRecord.Queries) } if oldId != idOfRecord2 { @@ -380,3 +262,26 @@ func TestDeletingMetaRecord(t *testing.T) { t.Fatalf("Expected returned record id to not be present, but it was") } } + +func metaTagRecordsAreEqual(record1, record2 *tagquery.MetaTagRecord) bool { + if len(record1.MetaTags) != len(record2.MetaTags) { + return false + } + + foundTags := make([]bool, len(record1.MetaTags)) + for i, tag := range record1.MetaTags { + for _, otherTag := range record2.MetaTags { + if tag == otherTag { + foundTags[i] = true + } + } + } + + for i := range foundTags { + if !foundTags[i] { + return false + } + } + + return record1.MatchesQueries(record2) +} diff --git a/idx/memory/tag_query_test.go b/idx/memory/tag_query_test.go index c66022d63d..6f4efae1e7 100644 --- a/idx/memory/tag_query_test.go +++ b/idx/memory/tag_query_test.go @@ -171,7 +171,7 @@ func TestQueryByTagWithEqualEmpty(t *testing.T) { func TestQueryByTagWithUnequalEmpty(t *testing.T) { ids := getTestIDs() - q, _ := tagquery.NewQueryFromStrings([]string{"key1=value1", "key3!=", "key3!=~"}, 0) + q, _ := tagquery.NewQueryFromStrings([]string{"key1=value1", "key3!="}, 0) expect := make(IdSet) expect[ids[1]] = struct{}{} expect[ids[3]] = struct{}{} @@ -243,9 +243,6 @@ func TestQueryByTagFilterByTagPrefixSpecialCaseName(t *testing.T) { func TestQueryByTagFilterByTagMatchWithExpressionAndNameException(t *testing.T) { ids := getTestIDs() q, _ := tagquery.NewQueryFromStrings([]string{"__tag=~na", "key2=value2"}, 0) - if q.StartWith != tagquery.EQUAL { - t.Fatalf("Expected query to start with equal expression") - } expect := make(IdSet) expect[ids[0]] = struct{}{} expect[ids[5]] = struct{}{} @@ -255,30 +252,17 @@ func TestQueryByTagFilterByTagMatchWithExpressionAndNameException(t *testing.T) func TestQueryByTagFilterByTagMatchWithExpression(t *testing.T) { ids := getTestIDs() q, _ := tagquery.NewQueryFromStrings([]string{"__tag=~a{1}", "key2=value2"}, 0) - if q.StartWith != tagquery.EQUAL { - t.Fatalf("Expected query to start with equal expression") - } - expect := make(IdSet) expect[ids[5]] = struct{}{} queryAndCompareResults(t, NewTagQueryContext(q), expect) q, _ = tagquery.NewQueryFromStrings([]string{"__tag=~a{2}", "key2=value2"}, 0) - if q.StartWith != tagquery.EQUAL { - t.Fatalf("Expected query to start with equal expression") - } queryAndCompareResults(t, NewTagQueryContext(q), expect) q, _ = tagquery.NewQueryFromStrings([]string{"__tag=~a{3}", "key2=value2"}, 0) - if q.StartWith != tagquery.EQUAL { - t.Fatalf("Expected query to start with equal expression") - } queryAndCompareResults(t, NewTagQueryContext(q), expect) q, _ = tagquery.NewQueryFromStrings([]string{"__tag=~a{4}", "key2=value2"}, 0) - if q.StartWith != tagquery.EQUAL { - t.Fatalf("Expected query to start with equal expression") - } delete(expect, ids[5]) queryAndCompareResults(t, NewTagQueryContext(q), expect) } @@ -286,24 +270,15 @@ func TestQueryByTagFilterByTagMatchWithExpression(t *testing.T) { func TestQueryByTagFilterByTagPrefixWithExpression(t *testing.T) { ids := getTestIDs() q, _ := tagquery.NewQueryFromStrings([]string{"__tag^=aa", "key2=value2"}, 0) - if q.StartWith != tagquery.EQUAL { - t.Fatalf("Expected query to start with equal expression") - } expect := make(IdSet) expect[ids[5]] = struct{}{} queryAndCompareResults(t, NewTagQueryContext(q), expect) q, _ = tagquery.NewQueryFromStrings([]string{"__tag^=aaa", "key2=value2"}, 0) - if q.StartWith != tagquery.EQUAL { - t.Fatalf("Expected query to start with equal expression") - } queryAndCompareResults(t, NewTagQueryContext(q), expect) q, _ = tagquery.NewQueryFromStrings([]string{"__tag^=aaaa", "key2=value2"}, 0) - if q.StartWith != tagquery.EQUAL { - t.Fatalf("Expected query to start with equal expression") - } delete(expect, ids[5]) delete(expect, ids[6]) queryAndCompareResults(t, NewTagQueryContext(q), expect) @@ -448,7 +423,7 @@ func testGetByTag(t *testing.T) { expectation: []string{}, }, { expressions: []string{"key1=~value[0-9]", "key2=~", "key3!=value3"}, - expectation: []string{fullName(mds[11])}, + expectation: []string{fullName(mds[1]), fullName(mds[11]), fullName(mds[18])}, }, { expressions: []string{"key2=", "key1=value1"}, expectation: []string{fullName(mds[11]), fullName(mds[3])}, From 8e403eff9021a51156c6baa7c47eefef214c15ea Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Fri, 5 Jul 2019 16:12:19 +0000 Subject: [PATCH 06/40] add unit test for the indexes meta-tag-record interface --- idx/memory/memory_test.go | 111 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/idx/memory/memory_test.go b/idx/memory/memory_test.go index d0c4942d1b..6624a87112 100644 --- a/idx/memory/memory_test.go +++ b/idx/memory/memory_test.go @@ -1014,6 +1014,117 @@ func testSingleNodeMetric(t *testing.T) { ix.AddOrUpdate(mkey, data, getPartition(data)) } +func TestUpsertingMetaRecordsIntoIndex(t *testing.T) { + ix := NewUnpartitionedMemoryIdx() + + record1, err := tagquery.ParseMetaTagRecord([]string{"a=b", "c=d"}, []string{"name=~a.+", "__tag^=a"}) + if err != nil { + t.Fatalf("Unexpected error when parsing meta tag record: %q", err) + } + record2, err := tagquery.ParseMetaTagRecord([]string{"e=f", "g=h"}, []string{"other=queries", "some!=value"}) + if err != nil { + t.Fatalf("Unexpected error when parsing meta tag record: %q", err) + } + record3, err := tagquery.ParseMetaTagRecord([]string{"i=j", "k=l"}, []string{"other=queries", "some!=value"}) + if err != nil { + t.Fatalf("Unexpected error when parsing meta tag record: %q", err) + } + + createdRecord, created, err := ix.MetaTagRecordUpsert(1, record1) + if err != nil { + t.Fatalf("Unexpected error when upserting meta tag record: %q", err) + } + if !created { + t.Fatalf("Expected record to have been created, but it has not") + } + if !metaTagRecordsAreEqual(&createdRecord, &record1) { + t.Fatalf("Expected returned record to look same as added record, but it does not:\nExpected:\n%+v\nGot:\n%+v\n", record1, createdRecord) + } + + createdRecord, created, err = ix.MetaTagRecordUpsert(1, record2) + if err != nil { + t.Fatalf("Unexpected error when upserting meta tag record: %q", err) + } + if !created { + t.Fatalf("Expected record to have been created, but it has not") + } + if !metaTagRecordsAreEqual(&createdRecord, &record2) { + t.Fatalf("Expected returned record to look same as added record, but it does not:\nExpected:\n%+v\nGot:\n%+v\n", record2, createdRecord) + } + + metaTagRecords := ix.MetaTagRecordList(1) + if len(metaTagRecords) != 2 { + t.Fatalf("Expected MetaTagRecordList to return 2 records for org 1, but it has:\n%+v\n", metaTagRecords) + } + + var found1, found2 bool + for _, mtr := range metaTagRecords { + if metaTagRecordsAreEqual(&mtr, &record1) { + found1 = true + } + if metaTagRecordsAreEqual(&mtr, &record2) { + found2 = true + } + } + if !(found1 && found2) { + t.Fatalf("Expected MetaTagRecordList to return both records, but it has not: %t/%t", found1, found2) + } + + metaTagRecords = ix.MetaTagRecordList(2) + if len(metaTagRecords) != 0 { + t.Fatalf("Expected MetaTagRecordList to return no records for org 2, but it has:\n%+v\n", metaTagRecords) + } + + if len(ix.metaTags[1]["a"]["b"]) != 1 { + t.Fatalf("Expected that there is 1 record associated with tag a=b, but there were %d", len(ix.metaTags[1]["a"]["b"])) + } + if len(ix.metaTags[1]["c"]["d"]) != 1 { + t.Fatalf("Expected that there is 1 record associated with tag c=d, but there were %d", len(ix.metaTags[1]["c"]["d"])) + } + if len(ix.metaTags[1]["e"]["f"]) != 1 { + t.Fatalf("Expected that there is 1 record associated with tag e=f, but there were %d", len(ix.metaTags[1]["e"]["f"])) + } + if len(ix.metaTags[1]["g"]["h"]) != 1 { + t.Fatalf("Expected that there is 1 record associated with tag g=h, but there were %d", len(ix.metaTags[1]["g"]["h"])) + } + + // record3 has the same queries as record2, so it should completely replace it + createdRecord, created, err = ix.MetaTagRecordUpsert(1, record3) + if err != nil { + t.Fatalf("Unexpected error when upserting meta tag record: %q", err) + } + if created { + t.Fatalf("Expected record to not have been created, but it has") + } + if !metaTagRecordsAreEqual(&createdRecord, &record3) { + t.Fatalf("Expected returned record to look same as added record, but it does not:\nExpected:\n%+v\nGot:\n%+v\n", record3, createdRecord) + } + + metaTagRecords = ix.MetaTagRecordList(1) + if len(metaTagRecords) != 2 { + t.Fatalf("Expected MetaTagRecordList to return 2 records for org 1, but it has:\n%+v\n", metaTagRecords) + } + + if len(ix.metaTags[1]["a"]["b"]) != 1 { + t.Fatalf("Expected that there is 1 record associated with tag a=b, but there were %d", len(ix.metaTags[1]["a"]["b"])) + } + if len(ix.metaTags[1]["c"]["d"]) != 1 { + t.Fatalf("Expected that there is 1 record associated with tag c=d, but there were %d", len(ix.metaTags[1]["c"]["d"])) + } + if len(ix.metaTags[1]["e"]["f"]) != 0 { + t.Fatalf("Expected that there is 0 record associated with tag e=f, but there were %d", len(ix.metaTags[1]["e"]["f"])) + } + if len(ix.metaTags[1]["g"]["h"]) != 0 { + t.Fatalf("Expected that there is 0 record associated with tag g=h, but there were %d", len(ix.metaTags[1]["g"]["h"])) + } + if len(ix.metaTags[1]["i"]["j"]) != 1 { + t.Fatalf("Expected that there is 1 record associated with tag i=j, but there were %d", len(ix.metaTags[1]["i"]["j"])) + } + if len(ix.metaTags[1]["k"]["l"]) != 1 { + t.Fatalf("Expected that there is 1 record associated with tag k=l, but there were %d", len(ix.metaTags[1]["k"]["l"])) + } +} + func TestMetricNameStartingWithTilde(t *testing.T) { withAndWithoutPartitonedIndex(testMetricNameStartingWithTilde)(t) } From 521ec1d2b0f2643b8d7417529e779d83e7d74cbe Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Fri, 5 Jul 2019 17:17:10 +0000 Subject: [PATCH 07/40] rename meta tag record member queries to expressions because otherwise the terminology is confusing, as these are not full queries but only single expressions --- api/cluster.go | 2 +- api/graphite.go | 4 ++-- expr/tagquery/meta_tag_record.go | 14 +++++++------- expr/tagquery/meta_tag_record_test.go | 2 +- idx/memory/meta_tags.go | 4 ++-- idx/memory/meta_tags_test.go | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/api/cluster.go b/api/cluster.go index e63187b1b0..140a01362c 100644 --- a/api/cluster.go +++ b/api/cluster.go @@ -603,7 +603,7 @@ func (s *Server) indexMetaTagRecordUpsert(ctx *middleware.Context, req models.In response.Write(ctx, response.NewMsgp(200, &models.MetaTagRecordUpsertResult{ MetaTags: result.MetaTags.Strings(), - Queries: result.Queries.Strings(), + Queries: result.Expressions.Strings(), Created: created, })) } diff --git a/api/graphite.go b/api/graphite.go index 6403177a3f..7ec2272146 100644 --- a/api/graphite.go +++ b/api/graphite.go @@ -1350,7 +1350,7 @@ func (s *Server) metaTagRecordUpsert(ctx *middleware.Context, upsertRequest mode if !upsertRequest.Propagate { response.Write(ctx, response.NewJson(200, models.MetaTagRecordUpsertResult{ MetaTags: localResult.MetaTags.Strings(), - Queries: localResult.Queries.Strings(), + Queries: localResult.Expressions.Strings(), Created: created, }, "")) return @@ -1362,7 +1362,7 @@ func (s *Server) metaTagRecordUpsert(ctx *middleware.Context, upsertRequest mode res := models.MetaTagRecordUpsertResultByNode{ Local: models.MetaTagRecordUpsertResult{ MetaTags: localResult.MetaTags.Strings(), - Queries: localResult.Queries.Strings(), + Queries: localResult.Expressions.Strings(), Created: created, }, } diff --git a/expr/tagquery/meta_tag_record.go b/expr/tagquery/meta_tag_record.go index 7ff36d3511..6a83def237 100644 --- a/expr/tagquery/meta_tag_record.go +++ b/expr/tagquery/meta_tag_record.go @@ -5,8 +5,8 @@ import ( ) type MetaTagRecord struct { - MetaTags Tags - Queries Expressions + MetaTags Tags + Expressions Expressions } func ParseMetaTagRecord(metaTags []string, queries []string) (MetaTagRecord, error) { @@ -18,12 +18,12 @@ func ParseMetaTagRecord(metaTags []string, queries []string) (MetaTagRecord, err return res, err } - res.Queries, err = ParseExpressions(queries) + res.Expressions, err = ParseExpressions(queries) if err != nil { return res, err } - if len(res.Queries) == 0 { + if len(res.Expressions) == 0 { return res, fmt.Errorf("Meta Tag Record must have at least one query") } @@ -34,12 +34,12 @@ func ParseMetaTagRecord(metaTags []string, queries []string) (MetaTagRecord, err // one's queries. Returns true if they are equal, otherwise false. // It is assumed that all the queries are already sorted func (m *MetaTagRecord) MatchesQueries(other *MetaTagRecord) bool { - if len(m.Queries) != len(other.Queries) { + if len(m.Expressions) != len(other.Expressions) { return false } - for i, query := range m.Queries { - if !ExpressionsAreEqual(query, other.Queries[i]) { + for i, query := range m.Expressions { + if !ExpressionsAreEqual(query, other.Expressions[i]) { return false } } diff --git a/expr/tagquery/meta_tag_record_test.go b/expr/tagquery/meta_tag_record_test.go index 6ffa5e39ae..77d566560a 100644 --- a/expr/tagquery/meta_tag_record_test.go +++ b/expr/tagquery/meta_tag_record_test.go @@ -21,7 +21,7 @@ func TestParseMetaTagRecord(t *testing.T) { Value: "d", }, }, - Queries: Expressions{ + Expressions: Expressions{ &expressionNotEqual{ expressionCommon{ key: "e", diff --git a/idx/memory/meta_tags.go b/idx/memory/meta_tags.go index 1464d0fca6..205ba85588 100644 --- a/idx/memory/meta_tags.go +++ b/idx/memory/meta_tags.go @@ -75,9 +75,9 @@ func (m metaTagRecords) upsert(record tagquery.MetaTagRecord) (recordId, *tagque // hashMetaTagRecord generates a hash of all the queries in the record func (m *metaTagRecords) hashMetaTagRecord(record tagquery.MetaTagRecord) recordId { - record.Queries.SortByFilterOrder() + record.Expressions.SortByFilterOrder() builder := strings.Builder{} - for _, query := range record.Queries { + for _, query := range record.Expressions { query.StringIntoBuilder(&builder) // trailing ";" doesn't matter, this is only hash input diff --git a/idx/memory/meta_tags_test.go b/idx/memory/meta_tags_test.go index e5a9e7ad33..cf70048a07 100644 --- a/idx/memory/meta_tags_test.go +++ b/idx/memory/meta_tags_test.go @@ -249,7 +249,7 @@ func TestDeletingMetaRecord(t *testing.T) { t.Fatalf("Expected returned meta tag record to have 0 meta tags, but it had %d", len(returnedRecord.MetaTags)) } if !metaTagRecordsAreEqual(returnedRecord, &record) { - t.Fatalf("Queries of returned record don't match what we expected:\nExpected:\n%+v\nGot:\n%+v\n", record.Queries, returnedRecord.Queries) + t.Fatalf("Queries of returned record don't match what we expected:\nExpected:\n%+v\nGot:\n%+v\n", record.Expressions, returnedRecord.Expressions) } if oldId != idOfRecord2 { t.Fatalf("Expected the oldId to be the id of record2 (%d), but it was %d", idOfRecord2, oldId) From bcbb7572f6ad243d3dfb9d34d3ee8a7b47b481c6 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Fri, 5 Jul 2019 21:21:00 +0000 Subject: [PATCH 08/40] add special expression to match all/none certain expressions are guaranteed to match all series or none series. so instead of actually evaluating the according criterias we might as well shortcut it and create two special expression types to just match all/none metrics without evaluating anything. --- api/graphite_test.go | 5 + expr/tagquery/expression.go | 120 ++++++++++++++++----- expr/tagquery/expression_common.go | 10 ++ expr/tagquery/expression_has_tag.go | 2 +- expr/tagquery/expression_match.go | 8 +- expr/tagquery/expression_match_all.go | 53 ++++++++++ expr/tagquery/expression_match_none.go | 52 +++++++++ expr/tagquery/expression_match_tag.go | 8 +- expr/tagquery/expression_not_match.go | 6 +- expr/tagquery/expression_prefix.go | 6 ++ expr/tagquery/expression_prefix_tag.go | 6 ++ expr/tagquery/expression_test.go | 141 ++++++++++++++++++++----- expr/tagquery/query_test.go | 73 ++++++++++--- 13 files changed, 409 insertions(+), 81 deletions(-) create mode 100644 expr/tagquery/expression_match_all.go create mode 100644 expr/tagquery/expression_match_none.go diff --git a/api/graphite_test.go b/api/graphite_test.go index 09dc6ecd5d..268be62fa7 100644 --- a/api/graphite_test.go +++ b/api/graphite_test.go @@ -57,6 +57,11 @@ func TestExpressionParsing(t *testing.T) { expectError: false, expectExpressions: []string{"a!=~^(?:.*)"}, }, + { + inputValue: "'a=~.*' , '__tag^=a'", + expectError: false, + expectExpressions: []string{"a=~^(?:.*)", "__tag^=a"}, + }, { inputValue: "'a=~.+'", expectError: false, diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index 68da22cb0f..a9403c989e 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -60,13 +60,14 @@ func (e Expressions) SortByFilterOrder() { func (e Expressions) findInitialExpression() int { // order of preference to start with the viable operators for _, op := range []ExpressionOperator{ + MATCH_NONE, EQUAL, HAS_TAG, PREFIX, PREFIX_TAG, MATCH, MATCH_TAG, - NOT_MATCH, + MATCH_ALL, } { for i := range e { if e[i].GetOperator() == op && e[i].RequiresNonEmptyValue() { @@ -223,80 +224,135 @@ FIND_OPERATOR: } } resCommon.value = expr[valuePos:] - var operator ExpressionOperator + var originalOperator, effectiveOperator ExpressionOperator + // decide what operator this expression uses, based on the operator + // itself, but ignoring other factors like f.e. an empty value if not { - if len(resCommon.value) == 0 { - operator = HAS_TAG - } else if regex { - operator = NOT_MATCH + if regex { + originalOperator = NOT_MATCH } else { - operator = NOT_EQUAL + originalOperator = NOT_EQUAL } } else { if prefix { - if len(resCommon.value) == 0 { - operator = HAS_TAG - } else { - operator = PREFIX - } - } else if len(resCommon.value) == 0 { - operator = NOT_HAS_TAG + originalOperator = PREFIX } else if regex { - operator = MATCH + originalOperator = MATCH } else { - operator = EQUAL + originalOperator = EQUAL } } + effectiveOperator = originalOperator + // special key to match on tag instead of a value + // update the operator decision accordingly if resCommon.key == "__tag" { // currently ! (not) queries on tags are not supported // and unlike normal queries a value must be set - if not || len(resCommon.value) == 0 { + if not { return nil, fmt.Errorf(invalidExpressionError, expr) } - if operator == PREFIX { - operator = PREFIX_TAG - } else if operator == MATCH { - operator = MATCH_TAG + switch effectiveOperator { + case PREFIX: + if len(resCommon.value) == 0 { + effectiveOperator = MATCH_ALL + } else { + effectiveOperator = PREFIX_TAG + } + case MATCH: + if len(resCommon.value) == 0 { + effectiveOperator = MATCH_ALL + } else { + effectiveOperator = MATCH_TAG + } + case EQUAL: + if len(resCommon.value) == 0 { + return nil, fmt.Errorf(invalidExpressionError, expr) + } + + // "__tag=abc", should internatlly be translated into "abc!=" + resCommon.key = resCommon.value + resCommon.value = "" + effectiveOperator = HAS_TAG + } + } + + // check for special case of an empty value and + // update chosen operator accordingly + if len(resCommon.value) == 0 { + switch effectiveOperator { + case EQUAL: + effectiveOperator = NOT_HAS_TAG + case NOT_EQUAL: + effectiveOperator = HAS_TAG + case MATCH: + effectiveOperator = MATCH_ALL + case NOT_MATCH: + effectiveOperator = MATCH_NONE + case PREFIX: + effectiveOperator = MATCH_ALL } } - if operator == MATCH || operator == NOT_MATCH || operator == MATCH_TAG { + if effectiveOperator == MATCH || effectiveOperator == MATCH_TAG || effectiveOperator == NOT_MATCH { if len(resCommon.value) > 0 && resCommon.value[0] != '^' { resCommon.value = "^(?:" + resCommon.value + ")" } + // no need to run regular expressions that match any string + // so we update the operator to MATCH_ALL/NONE + if resCommon.value == "^(?:.*)" || resCommon.value == "^.*" || resCommon.value == "^(.*)" { + switch effectiveOperator { + case MATCH: + return &expressionMatchAll{expressionCommon: resCommon, originalOperator: originalOperator}, nil + case MATCH_TAG: + return &expressionMatchAll{expressionCommon: resCommon, originalOperator: originalOperator}, nil + case NOT_MATCH: + return &expressionMatchNone{expressionCommon: resCommon, originalOperator: originalOperator}, nil + } + } + valueRe, err := regexp.Compile(resCommon.value) if err != nil { return nil, err } - switch operator { + + // check for special case when regular expression matches + // empty value and update operator accordingly + matchesEmpty := valueRe.MatchString("") + + switch effectiveOperator { case MATCH: - return &expressionMatch{expressionCommon: resCommon, valueRe: valueRe}, nil + return &expressionMatch{expressionCommonRe: expressionCommonRe{expressionCommon: resCommon, valueRe: valueRe, matchesEmpty: matchesEmpty}}, nil case NOT_MATCH: - return &expressionNotMatch{expressionCommon: resCommon, valueRe: valueRe}, nil + return &expressionNotMatch{expressionCommonRe: expressionCommonRe{expressionCommon: resCommon, valueRe: valueRe, matchesEmpty: matchesEmpty}}, nil case MATCH_TAG: - return &expressionMatchTag{expressionCommon: resCommon, valueRe: valueRe}, nil + if matchesEmpty { + return nil, fmt.Errorf(invalidExpressionError, expr) + } + return &expressionMatchTag{expressionCommonRe: expressionCommonRe{expressionCommon: resCommon, valueRe: valueRe, matchesEmpty: matchesEmpty}}, nil } } else { - switch operator { + switch effectiveOperator { case EQUAL: return &expressionEqual{expressionCommon: resCommon}, nil case NOT_EQUAL: return &expressionNotEqual{expressionCommon: resCommon}, nil case PREFIX: return &expressionPrefix{expressionCommon: resCommon}, nil - case MATCH_TAG: - return &expressionMatchTag{expressionCommon: resCommon}, nil case HAS_TAG: return &expressionHasTag{expressionCommon: resCommon}, nil case NOT_HAS_TAG: return &expressionNotHasTag{expressionCommon: resCommon}, nil case PREFIX_TAG: return &expressionPrefixTag{expressionCommon: resCommon}, nil + case MATCH_ALL: + return &expressionMatchAll{expressionCommon: resCommon, originalOperator: originalOperator}, nil + case MATCH_NONE: + return &expressionMatchNone{expressionCommon: resCommon, originalOperator: originalOperator}, nil } } @@ -345,6 +401,8 @@ const ( PREFIX_TAG // __tag^= exact prefix with tag. non-standard, required for auto complete of tag keys HAS_TAG // !="" specified tag must be present NOT_HAS_TAG // ="" specified tag must not be present + MATCH_ALL // special case of expression that matches every metric (f.e. key=.*) + MATCH_NONE // special case of expression that matches no metric (f.e. key!=.*) ) func (o ExpressionOperator) StringIntoBuilder(builder *strings.Builder) { @@ -367,5 +425,9 @@ func (o ExpressionOperator) StringIntoBuilder(builder *strings.Builder) { builder.WriteString("!=") case NOT_HAS_TAG: builder.WriteString("=") + case MATCH_ALL: + builder.WriteString("=") + case MATCH_NONE: + builder.WriteString("!=") } } diff --git a/expr/tagquery/expression_common.go b/expr/tagquery/expression_common.go index 13c674741a..2a877aec66 100644 --- a/expr/tagquery/expression_common.go +++ b/expr/tagquery/expression_common.go @@ -1,5 +1,7 @@ package tagquery +import "regexp" + type expressionCommon struct { key string value string @@ -27,3 +29,11 @@ func (e *expressionCommon) HasRe() bool { // by default assume false, unless a concrete type overrides this method return false } + +// expressionCommonRe is an extended version of expressionCommon with additional +// properties for operators that use regular expressions +type expressionCommonRe struct { + expressionCommon + valueRe *regexp.Regexp + matchesEmpty bool +} diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index 9361d672ce..90dbf906b3 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -34,7 +34,7 @@ func (e *expressionHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { return func(_ string, _ []string) FilterDecision { return Pass } } - matchPrefix := e.GetKey() + "=" + matchPrefix := e.key + "=" return func(_ string, tags []string) FilterDecision { for _, tag := range tags { if strings.HasPrefix(tag, matchPrefix) { diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index b2f2a76dc7..eb791fb57c 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -1,15 +1,13 @@ package tagquery import ( - "regexp" "strings" "sync" "sync/atomic" ) type expressionMatch struct { - expressionCommon - valueRe *regexp.Regexp + expressionCommonRe } func (e *expressionMatch) GetOperator() ExpressionOperator { @@ -43,6 +41,10 @@ func (e *expressionMatch) StringIntoBuilder(builder *strings.Builder) { builder.WriteString(e.value) } +func (e *expressionMatch) RequiresNonEmptyValue() bool { + return !e.matchesEmpty +} + func (e *expressionMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { if e.key == "name" { if e.value == "" { diff --git a/expr/tagquery/expression_match_all.go b/expr/tagquery/expression_match_all.go new file mode 100644 index 0000000000..99aef85dbe --- /dev/null +++ b/expr/tagquery/expression_match_all.go @@ -0,0 +1,53 @@ +package tagquery + +import ( + "strings" +) + +type expressionMatchAll struct { + // we keep key, operator, value just to be able to convert the expression back into a string + expressionCommon + originalOperator ExpressionOperator +} + +func (e *expressionMatchAll) GetKey() string { + return e.key +} + +func (e *expressionMatchAll) GetValue() string { + return e.value +} + +func (e *expressionMatchAll) RequiresNonEmptyValue() bool { + return false +} + +func (e *expressionMatchAll) OperatesOnTag() bool { + return false +} + +func (e *expressionMatchAll) HasRe() bool { + return false +} + +func (e *expressionMatchAll) GetOperator() ExpressionOperator { + return MATCH_ALL +} + +func (e *expressionMatchAll) ValuePasses(value string) bool { + return true +} + +func (e *expressionMatchAll) GetDefaultDecision() FilterDecision { + return Pass +} + +func (e *expressionMatchAll) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + e.originalOperator.StringIntoBuilder(builder) + builder.WriteString(e.value) +} + +func (e *expressionMatchAll) GetMetricDefinitionFilter() MetricDefinitionFilter { + return func(_ string, _ []string) FilterDecision { return Pass } +} diff --git a/expr/tagquery/expression_match_none.go b/expr/tagquery/expression_match_none.go new file mode 100644 index 0000000000..6bc0bf881a --- /dev/null +++ b/expr/tagquery/expression_match_none.go @@ -0,0 +1,52 @@ +package tagquery + +import ( + "strings" +) + +type expressionMatchNone struct { + // we keep key, operator, value just to be able to convert the expression back into a string + expressionCommon + originalOperator ExpressionOperator +} + +func (e *expressionMatchNone) GetKey() string { + return e.key +} + +func (e *expressionMatchNone) GetValue() string { + return e.value +} + +func (e *expressionMatchNone) RequiresNonEmptyValue() bool { + return true +} + +func (e *expressionMatchNone) OperatesOnTag() bool { + return false +} + +func (e *expressionMatchNone) HasRe() bool { + return false +} +func (e *expressionMatchNone) GetOperator() ExpressionOperator { + return MATCH_NONE +} + +func (e *expressionMatchNone) ValuePasses(value string) bool { + return false +} + +func (e *expressionMatchNone) GetDefaultDecision() FilterDecision { + return Fail +} + +func (e *expressionMatchNone) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + e.originalOperator.StringIntoBuilder(builder) + builder.WriteString(e.value) +} + +func (e *expressionMatchNone) GetMetricDefinitionFilter() MetricDefinitionFilter { + return func(_ string, _ []string) FilterDecision { return Fail } +} diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index c92c0a6696..8291fdabf1 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -1,15 +1,13 @@ package tagquery import ( - "regexp" "strings" "sync" "sync/atomic" ) type expressionMatchTag struct { - expressionCommon - valueRe *regexp.Regexp + expressionCommonRe } func (e *expressionMatchTag) GetOperator() ExpressionOperator { @@ -32,6 +30,10 @@ func (e *expressionMatchTag) OperatesOnTag() bool { return true } +func (e *expressionMatchTag) RequiresNonEmptyValue() bool { + return !e.matchesEmpty +} + func (e *expressionMatchTag) StringIntoBuilder(builder *strings.Builder) { builder.WriteString("__tag=~") builder.WriteString(e.value) diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index 754e80f75b..f6a7d9e9b3 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -1,15 +1,13 @@ package tagquery import ( - "regexp" "strings" "sync" "sync/atomic" ) type expressionNotMatch struct { - expressionCommon - valueRe *regexp.Regexp + expressionCommonRe } func (e *expressionNotMatch) GetOperator() ExpressionOperator { @@ -17,7 +15,7 @@ func (e *expressionNotMatch) GetOperator() ExpressionOperator { } func (e *expressionNotMatch) RequiresNonEmptyValue() bool { - return false + return e.matchesEmpty } func (e *expressionNotMatch) HasRe() bool { diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index 82a2080619..1d64821de3 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -16,6 +16,12 @@ func (e *expressionPrefix) ValuePasses(value string) bool { return strings.HasPrefix(value, e.value) } +func (e *expressionPrefix) RequiresNonEmptyValue() bool { + // we know it requires an non-empty value, because the expression + // "__tag^=" would get parsed into the type expressionMatchAll + return true +} + func (e *expressionPrefix) GetDefaultDecision() FilterDecision { return Fail } diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go index 35fc8c5b30..a3e17825b0 100644 --- a/expr/tagquery/expression_prefix_tag.go +++ b/expr/tagquery/expression_prefix_tag.go @@ -12,6 +12,12 @@ func (e *expressionPrefixTag) GetOperator() ExpressionOperator { return PREFIX_TAG } +func (e *expressionPrefixTag) RequiresNonEmptyValue() bool { + // we know it requires an non-empty value, because the expression + // "__tag^=" would get parsed into the type expressionMatchAll + return true +} + func (e *expressionPrefixTag) ValuePasses(tag string) bool { return strings.HasPrefix(tag, e.value) } diff --git a/expr/tagquery/expression_test.go b/expr/tagquery/expression_test.go index 5d2aaafbac..231e44eb9f 100644 --- a/expr/tagquery/expression_test.go +++ b/expr/tagquery/expression_test.go @@ -18,41 +18,126 @@ func TestExpressionParsing(t *testing.T) { testCases := []testCase{ { - expression: "key=value", - key: "key", + expression: "a=value", + key: "a", value: "value", operator: EQUAL, - err: false, }, { - expression: "key!=", - key: "key", + expression: "abc=", + key: "abc", + value: "", + operator: NOT_HAS_TAG, + }, { + expression: "__tag=", + err: true, + }, { + expression: "ccc!=value", + key: "ccc", + value: "value", + operator: NOT_EQUAL, + }, { + expression: "__tag=abc", + key: "abc", value: "", operator: HAS_TAG, - err: false, }, { - expression: "key=", - key: "key", + expression: "a!=", + key: "a", value: "", - operator: NOT_HAS_TAG, - err: false, + operator: HAS_TAG, }, { - expression: "key=~", - key: "key", + expression: "__tag!=", + err: true, + }, { + expression: "tag1=~^abc.*", + key: "tag1", + value: "^abc.*", + operator: MATCH, + }, { + expression: "abc=~", + key: "abc", value: "", - operator: NOT_HAS_TAG, - err: false, + operator: MATCH_ALL, + }, { + expression: "abc=~.*", + key: "abc", + value: "^(?:.*)", + operator: MATCH_ALL, + }, { + expression: "abc=~.+", + key: "abc", + value: "^(?:.+)", + operator: MATCH, + }, { + expression: "tag123=~.*value.*", + key: "tag123", + value: "^(?:.*value.*)", + operator: MATCH, + }, { + expression: "__tag=~.*value.*", + key: "__tag", + value: "^(?:.*value.*)", + operator: MATCH_TAG, + }, { + expression: "__tag=~", + key: "__tag", + value: "", + operator: MATCH_ALL, + }, { + expression: "__tag=~.*", + key: "__tag", + value: "^(?:.*)", + operator: MATCH_ALL, + }, { + expression: "__tag=~.+", + key: "__tag", + value: "^(?:.+)", + operator: MATCH_TAG, }, { - expression: "key=~v_alue", + expression: "abc!=~.*", + key: "abc", + value: "^(?:.*)", + operator: MATCH_NONE, + }, { + expression: "key!=~v_alue", key: "key", value: "^(?:v_alue)", - operator: MATCH, - err: false, + operator: NOT_MATCH, }, { - expression: "k!=~v", + expression: "k!=~", key: "k", - value: "^(?:v)", + value: "", + operator: MATCH_NONE, + }, { + expression: "k!=~.*", + key: "k", + value: "^(?:.*)", + operator: MATCH_NONE, + }, { + expression: "sometag!=~.*abc.*", + key: "sometag", + value: "^(?:.*abc.*)", operator: NOT_MATCH, - err: false, + }, { + expression: "tag1^=", + key: "tag1", + value: "", + operator: MATCH_ALL, + }, { + expression: "tag1^=abc", + key: "tag1", + value: "abc", + operator: PREFIX, + }, { + expression: "__tag^=a", + key: "__tag", + value: "a", + operator: PREFIX_TAG, + }, { + expression: "__tag^=", + key: "__tag", + value: "", + operator: MATCH_ALL, }, { expression: "key!!=value", err: true, @@ -61,28 +146,27 @@ func TestExpressionParsing(t *testing.T) { key: "key", value: "=value", operator: EQUAL, - err: false, }, { expression: "key=~=value", key: "key", value: "^(?:=value)", operator: MATCH, - err: false, }, { expression: "__tag=~key", key: "__tag", value: "^(?:key)", operator: MATCH_TAG, - err: false, }, { expression: "__tag^=some.key", key: "__tag", value: "some.key", operator: PREFIX_TAG, - err: false, }, { expression: "key=~(abc", err: true, + }, { + expression: "=key=abc", + err: true, }, { expression: "__tag!=some.key", err: true, @@ -95,7 +179,14 @@ func TestExpressionParsing(t *testing.T) { for i, tc := range testCases { t.Run(fmt.Sprintf("TC %d \"%s\"", i, tc.expression), func(t *testing.T) { expression, err := ParseExpression(tc.expression) - if (err != nil) != tc.err || (err == nil && (expression.GetKey() != tc.key || expression.GetValue() != tc.value || expression.GetOperator() != tc.operator)) { + if (err != nil) != tc.err { + if tc.err { + t.Fatalf("Expected error, but did not get one") + } else { + t.Fatalf("Did not expect error, but got one: %q", err) + } + } + if err == nil && (expression.GetKey() != tc.key || expression.GetValue() != tc.value || expression.GetOperator() != tc.operator) { t.Fatalf("Expected the values %s, %s, %d, %t, but got %s, %s, %d, %q", tc.key, tc.value, tc.operator, tc.err, expression.GetKey(), expression.GetValue(), expression.GetOperator(), err) } }) diff --git a/expr/tagquery/query_test.go b/expr/tagquery/query_test.go index be8650a1ef..1b5a07f27a 100644 --- a/expr/tagquery/query_test.go +++ b/expr/tagquery/query_test.go @@ -64,25 +64,31 @@ func TestNewQueryFromStrings(t *testing.T) { }, }, &expressionMatch{ - expressionCommon: expressionCommon{ - key: "e", - value: "^(?:f)", + expressionCommonRe{ + expressionCommon: expressionCommon{ + key: "e", + value: "^(?:f)", + }, + valueRe: nil, }, - valueRe: nil, }, &expressionMatchTag{ - expressionCommon: expressionCommon{ - key: "__tag", - value: "^(?:k)", + expressionCommonRe{ + expressionCommon: expressionCommon{ + key: "__tag", + value: "^(?:k)", + }, + valueRe: nil, }, - valueRe: nil, }, &expressionNotMatch{ - expressionCommon: expressionCommon{ - key: "g", - value: "^(?:h)", + expressionCommonRe{ + expressionCommon: expressionCommon{ + key: "g", + value: "^(?:h)", + }, + valueRe: nil, }, - valueRe: nil, }, }, tagClause: 5, @@ -106,6 +112,39 @@ func TestNewQueryFromStrings(t *testing.T) { expressionStrs: []string{"key=", "abc!=cba"}, }, wantErr: true, + }, { + name: "missing an expression that requires non empty value because pattern matches empty value", + args: args{ + expressionStrs: []string{"key=", "abc=~.*"}, + }, + wantErr: true, + }, { + name: "no error with + instead of * because pattern does not match empty value", + args: args{ + expressionStrs: []string{"abc=~.+"}, + }, + want: Query{ + From: 0, + Expressions: Expressions{ + &expressionMatch{ + expressionCommonRe{ + expressionCommon: expressionCommon{ + key: "abc", + value: "^(?:.+)", + }, + valueRe: nil, + }, + }, + }, + tagClause: -1, + startWith: 0, + }, + }, { + name: "missing an expression that requires non empty value because prefix matches empty value", + args: args{ + expressionStrs: []string{"key=", "__tag^="}, + }, + wantErr: true, }, { name: "two different tag queries", args: args{ @@ -120,11 +159,13 @@ func TestNewQueryFromStrings(t *testing.T) { want: Query{ Expressions: Expressions{ &expressionMatch{ - expressionCommon: expressionCommon{ - key: "abc", - value: "^(?:cba)", + expressionCommonRe{ + expressionCommon: expressionCommon{ + key: "abc", + value: "^(?:cba)", + }, + valueRe: nil, }, - valueRe: nil, }, }, startWith: 0, From 6e9830a3869117c1125d682b2971f306db1c39e1 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 15 Jul 2019 14:33:22 -0400 Subject: [PATCH 09/40] add test to test various combinations of tag query expressions --- idx/memory/tag_query_test.go | 143 +++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/idx/memory/tag_query_test.go b/idx/memory/tag_query_test.go index 6f4efae1e7..8c674c0afe 100644 --- a/idx/memory/tag_query_test.go +++ b/idx/memory/tag_query_test.go @@ -2,6 +2,7 @@ package memory import ( "fmt" + "math" "reflect" "sort" "strings" @@ -89,7 +90,9 @@ func queryAndCompareResults(t *testing.T, q TagQueryContext, expectedData IdSet) res := q.Run(tagIdx, byId) if !reflect.DeepEqual(expectedData, res) { + q.Run(tagIdx, byId) t.Fatalf("Returned data does not match expected data:\nExpected: %s\nGot: %s", expectedData, res) + } } @@ -343,6 +346,146 @@ func TestTagExpressionQueryByTagWithFrom(t *testing.T) { } } +func TestTagExpressionQueriesInAllCombinations(t *testing.T) { + type expressionWithOperator struct { + expression string + operator tagquery.ExpressionOperator + expectedIds []int // based on the data generated in getTestIndex + } + expressionStrings := []expressionWithOperator{ + { + expression: "key1=value1", + operator: tagquery.EQUAL, + expectedIds: []int{0, 1, 2, 3}, + }, { + expression: "key3!=value3", + operator: tagquery.NOT_EQUAL, + expectedIds: []int{0, 2, 5, 6, 7, 8}, + }, { + expression: "key2=~v", + operator: tagquery.MATCH, + expectedIds: []int{0, 4, 5}, + }, { + expression: "key3!=~.*1$", + operator: tagquery.NOT_MATCH, + expectedIds: []int{0, 1, 2, 3, 4, 5, 7}, + }, { + expression: "key3^=v", + operator: tagquery.PREFIX, + expectedIds: []int{1, 3, 4, 5, 6, 7}, + }, { + expression: "__tag^=a", + operator: tagquery.PREFIX_TAG, + expectedIds: []int{3, 5, 6}, + }, { + expression: "__tag=abc", + operator: tagquery.HAS_TAG, + expectedIds: []int{3, 5}, + }, { + expression: "key2=", + operator: tagquery.NOT_HAS_TAG, + expectedIds: []int{1, 2, 3, 6, 7}, + }, { + expression: "a=~.*", + operator: tagquery.MATCH_ALL, + expectedIds: []int{0, 1, 2, 3, 4, 5, 6, 7}, + }, { + expression: "a!=~.*", + operator: tagquery.MATCH_NONE, + expectedIds: []int{}, + }, + } + + // parse all expression strings into expression objects + var err error + allExpressions := make(tagquery.Expressions, len(expressionStrings)) + for i, expr := range expressionStrings { + allExpressions[i], err = tagquery.ParseExpression(expr.expression) + if err != nil { + t.Fatalf("Unexpected error when parsing expression %q: %q", expr, err) + } + if allExpressions[i].GetOperator() != expr.operator { + t.Fatalf("Expression was %q supposed to result in operator %d, but got %d", expr.expression, expr.operator, allExpressions[i].GetOperator()) + } + } + + // find all the possible combinations of query expressions + all_subsets := make([][]int, 0, int(math.Pow(2, float64(len(expressionStrings))))-1) + var find_subsets func([]int, []int) + find_subsets = func(so_far, rest []int) { + if len(rest) == 0 { + if len(so_far) > 0 { + all_subsets = append(all_subsets, so_far) + } + } else { + find_subsets(append(so_far, rest[0]), rest[1:]) + find_subsets(so_far, rest[1:]) + } + } + all_ids := make([]int, len(expressionStrings)) + for i := range all_ids { + all_ids[i] = i + } + find_subsets([]int{}, all_ids) + + ids := getTestIDs() +TEST_CASES: + for tc, expressionIds := range all_subsets { + expressions := make(tagquery.Expressions, len(expressionIds)) + var expectedResults []int // intersection of expected results of each expression + + includingExpressionRequiringNonEmptyValue := false + // build the slice of expressions we want to query for and find the + // expected results for the current combination of expressions + for i, expressionId := range expressionIds { + expressions[i] = allExpressions[expressionId] + includingExpressionRequiringNonEmptyValue = includingExpressionRequiringNonEmptyValue || expressions[i].RequiresNonEmptyValue() + + if i == 0 { + expectedResults = make([]int, len(expressionStrings[expressionId].expectedIds)) + copy(expectedResults, expressionStrings[expressionId].expectedIds) + } else { + EXPECTED_RESULTS: + for j := 0; j < len(expectedResults); j++ { + for _, id := range expressionStrings[expressionId].expectedIds { + if expectedResults[j] == id { + continue EXPECTED_RESULTS + } + } + expectedResults = append(expectedResults[:j], expectedResults[j+1:]...) + j-- + } + } + } + + // this combination of expressions would result in an invalid query + // because there is no expression requiring a non-empty value + if !includingExpressionRequiringNonEmptyValue { + continue TEST_CASES + } + + expectedIds := make(IdSet, len(expectedResults)) + for j := range expectedResults { + expectedIds[ids[expectedResults[j]]] = struct{}{} + } + + builder := strings.Builder{} + builder.WriteString(fmt.Sprintf("TC %d: ", tc)) + for _, expr := range expressions { + expr.StringIntoBuilder(&builder) + builder.WriteString(";") + } + t.Run(builder.String(), func(t *testing.T) { + query, err := tagquery.NewQuery(expressions, 0) + if err != nil { + t.Fatalf("Unexpected error when getting query from expressions %q: %q", expressions, err) + } + + queryAndCompareResults(t, NewTagQueryContext(query), expectedIds) + }) + } +} + func TestGetByTag(t *testing.T) { withAndWithoutPartitonedIndex(testGetByTag)(t) } From 22b16619c06e6780fd5ec84a88651221a6938e2b Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 15 Jul 2019 17:44:30 -0400 Subject: [PATCH 10/40] consistent method ordering and comments in expressions --- expr/tagquery/expression.go | 31 +++++++++++-------- expr/tagquery/expression_common.go | 12 ++++---- expr/tagquery/expression_equal.go | 20 ++++++------- expr/tagquery/expression_has_tag.go | 22 +++++++------- expr/tagquery/expression_match.go | 32 ++++++++++---------- expr/tagquery/expression_match_all.go | 24 +++++++-------- expr/tagquery/expression_match_none.go | 25 +++++++--------- expr/tagquery/expression_match_tag.go | 22 +++++++------- expr/tagquery/expression_not_equal.go | 20 ++++++------- expr/tagquery/expression_not_has_tag.go | 26 ++++++++-------- expr/tagquery/expression_not_match.go | 40 ++++++++++++------------- expr/tagquery/expression_prefix.go | 24 +++++++-------- expr/tagquery/expression_prefix_tag.go | 26 ++++++++-------- 13 files changed, 162 insertions(+), 162 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index a9403c989e..ea84aa4605 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -90,10 +90,6 @@ func (e Expressions) Strings() []string { } type Expression interface { - // GetMetricDefinitionFilter returns a MetricDefinitionFilter. It takes a metric definition, looks - // at its tags and returns a decision regarding this query expression applied to its tags. - GetMetricDefinitionFilter() MetricDefinitionFilter - // GetDefaultDecision defines what decision should be made if the filter has not come to a conclusive // decision based on a single index. When looking at more than one tag index in order of decreasing // priority to decide whether a metric should be part of the final result set, some operators and metric @@ -147,18 +143,29 @@ type Expression interface { // GetOperator returns the operator of this expression GetOperator() ExpressionOperator - // FilterValues takes a map that's indexed by strings and applies this expression's criteria to - // each of the strings, then it returns the strings that have matched - // In case of expressions that get applied to tags, the first level map of the metric tag index - // or meta tag index can get passed into this function, otherwise the second level under the key - // returned by GetKey() - ValuePasses(string) bool - // HasRe indicates whether the evaluation of this expression involves regular expressions HasRe() bool - RequiresNonEmptyValue() bool + // OperatesOnTag returns true if this expression operators on the tag keys, + // or false if it operates on the values OperatesOnTag() bool + + // RequiresNonEmptyValue returns boolean indicating whether this expression requires a non-empty + // value. Every query must have at least one expression requiring a non-empty value, otherwise + // the query is considered invalid + RequiresNonEmptyValue() bool + + // ValuePasses takes a string which should either be a tag key or value depending on the return + // value of OperatesOnTag(), then it returns a bool to indicate whether the given value satisfies + // this expression + ValuePasses(string) bool + + // GetMetricDefinitionFilter returns a MetricDefinitionFilter + // The MetricDefinitionFilter takes a metric definition, looks at its tags and returns a decision + // regarding this query expression applied to its tags + GetMetricDefinitionFilter() MetricDefinitionFilter + + // StringIntoBuilder takes a builder and writes a string representation of this expression into it StringIntoBuilder(builder *strings.Builder) } diff --git a/expr/tagquery/expression_common.go b/expr/tagquery/expression_common.go index 2a877aec66..76be9e7183 100644 --- a/expr/tagquery/expression_common.go +++ b/expr/tagquery/expression_common.go @@ -15,9 +15,9 @@ func (e *expressionCommon) GetValue() string { return e.value } -func (e *expressionCommon) RequiresNonEmptyValue() bool { - // by default assume true, unless a concrete type overrides this method - return true +func (e *expressionCommon) HasRe() bool { + // by default assume false, unless a concrete type overrides this method + return false } func (e *expressionCommon) OperatesOnTag() bool { @@ -25,9 +25,9 @@ func (e *expressionCommon) OperatesOnTag() bool { return false } -func (e *expressionCommon) HasRe() bool { - // by default assume false, unless a concrete type overrides this method - return false +func (e *expressionCommon) RequiresNonEmptyValue() bool { + // by default assume true, unless a concrete type overrides this method + return true } // expressionCommonRe is an extended version of expressionCommon with additional diff --git a/expr/tagquery/expression_equal.go b/expr/tagquery/expression_equal.go index f918413dd2..35d398e706 100644 --- a/expr/tagquery/expression_equal.go +++ b/expr/tagquery/expression_equal.go @@ -8,6 +8,10 @@ type expressionEqual struct { expressionCommon } +func (e *expressionEqual) GetDefaultDecision() FilterDecision { + return Fail +} + func (e *expressionEqual) GetOperator() ExpressionOperator { return EQUAL } @@ -16,16 +20,6 @@ func (e *expressionEqual) ValuePasses(value string) bool { return value == e.value } -func (e *expressionEqual) GetDefaultDecision() FilterDecision { - return Fail -} - -func (e *expressionEqual) StringIntoBuilder(builder *strings.Builder) { - builder.WriteString(e.key) - builder.WriteString("=") - builder.WriteString(e.value) -} - func (e *expressionEqual) GetMetricDefinitionFilter() MetricDefinitionFilter { if e.key == "name" { if e.value == "" { @@ -58,3 +52,9 @@ func (e *expressionEqual) GetMetricDefinitionFilter() MetricDefinitionFilter { return None } } + +func (e *expressionEqual) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("=") + builder.WriteString(e.value) +} diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index 90dbf906b3..14f6f6f14a 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -8,25 +8,20 @@ type expressionHasTag struct { expressionCommon } -func (e *expressionHasTag) GetOperator() ExpressionOperator { - return HAS_TAG -} - -func (e *expressionHasTag) ValuePasses(value string) bool { - return value == e.key -} - func (e *expressionHasTag) GetDefaultDecision() FilterDecision { return Fail } +func (e *expressionHasTag) GetOperator() ExpressionOperator { + return HAS_TAG +} + func (e *expressionHasTag) OperatesOnTag() bool { return true } -func (e *expressionHasTag) StringIntoBuilder(builder *strings.Builder) { - builder.WriteString(e.key) - builder.WriteString("!=") +func (e *expressionHasTag) ValuePasses(value string) bool { + return value == e.key } func (e *expressionHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { @@ -45,3 +40,8 @@ func (e *expressionHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { return None } } + +func (e *expressionHasTag) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("!=") +} diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index eb791fb57c..f49534beb7 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -10,18 +10,6 @@ type expressionMatch struct { expressionCommonRe } -func (e *expressionMatch) GetOperator() ExpressionOperator { - return MATCH -} - -func (e *expressionMatch) HasRe() bool { - return true -} - -func (e *expressionMatch) ValuePasses(value string) bool { - return e.valueRe.MatchString(value) -} - func (e *expressionMatch) GetDefaultDecision() FilterDecision { // if the pattern matches "" (f.e. "tag=~.*) then a metric which // does not have the tag "tag" at all should also be part of the @@ -35,16 +23,22 @@ func (e *expressionMatch) GetDefaultDecision() FilterDecision { return Fail } -func (e *expressionMatch) StringIntoBuilder(builder *strings.Builder) { - builder.WriteString(e.key) - builder.WriteString("=~") - builder.WriteString(e.value) +func (e *expressionMatch) GetOperator() ExpressionOperator { + return MATCH +} + +func (e *expressionMatch) HasRe() bool { + return true } func (e *expressionMatch) RequiresNonEmptyValue() bool { return !e.matchesEmpty } +func (e *expressionMatch) ValuePasses(value string) bool { + return e.valueRe.MatchString(value) +} + func (e *expressionMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { if e.key == "name" { if e.value == "" { @@ -105,3 +99,9 @@ func (e *expressionMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { return None } } + +func (e *expressionMatch) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("=~") + builder.WriteString(e.value) +} diff --git a/expr/tagquery/expression_match_all.go b/expr/tagquery/expression_match_all.go index 99aef85dbe..4b1c9cd289 100644 --- a/expr/tagquery/expression_match_all.go +++ b/expr/tagquery/expression_match_all.go @@ -10,6 +10,10 @@ type expressionMatchAll struct { originalOperator ExpressionOperator } +func (e *expressionMatchAll) GetDefaultDecision() FilterDecision { + return Pass +} + func (e *expressionMatchAll) GetKey() string { return e.key } @@ -18,28 +22,24 @@ func (e *expressionMatchAll) GetValue() string { return e.value } -func (e *expressionMatchAll) RequiresNonEmptyValue() bool { - return false -} - -func (e *expressionMatchAll) OperatesOnTag() bool { - return false +func (e *expressionMatchAll) GetOperator() ExpressionOperator { + return MATCH_ALL } func (e *expressionMatchAll) HasRe() bool { return false } -func (e *expressionMatchAll) GetOperator() ExpressionOperator { - return MATCH_ALL +func (e *expressionMatchAll) RequiresNonEmptyValue() bool { + return false } func (e *expressionMatchAll) ValuePasses(value string) bool { return true } -func (e *expressionMatchAll) GetDefaultDecision() FilterDecision { - return Pass +func (e *expressionMatchAll) GetMetricDefinitionFilter() MetricDefinitionFilter { + return func(_ string, _ []string) FilterDecision { return Pass } } func (e *expressionMatchAll) StringIntoBuilder(builder *strings.Builder) { @@ -47,7 +47,3 @@ func (e *expressionMatchAll) StringIntoBuilder(builder *strings.Builder) { e.originalOperator.StringIntoBuilder(builder) builder.WriteString(e.value) } - -func (e *expressionMatchAll) GetMetricDefinitionFilter() MetricDefinitionFilter { - return func(_ string, _ []string) FilterDecision { return Pass } -} diff --git a/expr/tagquery/expression_match_none.go b/expr/tagquery/expression_match_none.go index 6bc0bf881a..2335496e5e 100644 --- a/expr/tagquery/expression_match_none.go +++ b/expr/tagquery/expression_match_none.go @@ -10,6 +10,10 @@ type expressionMatchNone struct { originalOperator ExpressionOperator } +func (e *expressionMatchNone) GetDefaultDecision() FilterDecision { + return Fail +} + func (e *expressionMatchNone) GetKey() string { return e.key } @@ -18,27 +22,24 @@ func (e *expressionMatchNone) GetValue() string { return e.value } -func (e *expressionMatchNone) RequiresNonEmptyValue() bool { - return true -} - -func (e *expressionMatchNone) OperatesOnTag() bool { - return false +func (e *expressionMatchNone) GetOperator() ExpressionOperator { + return MATCH_NONE } func (e *expressionMatchNone) HasRe() bool { return false } -func (e *expressionMatchNone) GetOperator() ExpressionOperator { - return MATCH_NONE + +func (e *expressionMatchNone) RequiresNonEmptyValue() bool { + return true } func (e *expressionMatchNone) ValuePasses(value string) bool { return false } -func (e *expressionMatchNone) GetDefaultDecision() FilterDecision { - return Fail +func (e *expressionMatchNone) GetMetricDefinitionFilter() MetricDefinitionFilter { + return func(_ string, _ []string) FilterDecision { return Fail } } func (e *expressionMatchNone) StringIntoBuilder(builder *strings.Builder) { @@ -46,7 +47,3 @@ func (e *expressionMatchNone) StringIntoBuilder(builder *strings.Builder) { e.originalOperator.StringIntoBuilder(builder) builder.WriteString(e.value) } - -func (e *expressionMatchNone) GetMetricDefinitionFilter() MetricDefinitionFilter { - return func(_ string, _ []string) FilterDecision { return Fail } -} diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index 8291fdabf1..b7c85ac705 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -10,6 +10,10 @@ type expressionMatchTag struct { expressionCommonRe } +func (e *expressionMatchTag) GetDefaultDecision() FilterDecision { + return Fail +} + func (e *expressionMatchTag) GetOperator() ExpressionOperator { return MATCH_TAG } @@ -18,14 +22,6 @@ func (e *expressionMatchTag) HasRe() bool { return true } -func (e *expressionMatchTag) ValuePasses(tag string) bool { - return e.valueRe.MatchString(tag) -} - -func (e *expressionMatchTag) GetDefaultDecision() FilterDecision { - return Fail -} - func (e *expressionMatchTag) OperatesOnTag() bool { return true } @@ -34,9 +30,8 @@ func (e *expressionMatchTag) RequiresNonEmptyValue() bool { return !e.matchesEmpty } -func (e *expressionMatchTag) StringIntoBuilder(builder *strings.Builder) { - builder.WriteString("__tag=~") - builder.WriteString(e.value) +func (e *expressionMatchTag) ValuePasses(tag string) bool { + return e.valueRe.MatchString(tag) } func (e *expressionMatchTag) GetMetricDefinitionFilter() MetricDefinitionFilter { @@ -82,3 +77,8 @@ func (e *expressionMatchTag) GetMetricDefinitionFilter() MetricDefinitionFilter return None } } + +func (e *expressionMatchTag) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString("__tag=~") + builder.WriteString(e.value) +} diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index 1dabf27d27..99fa3c8b05 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -8,6 +8,10 @@ type expressionNotEqual struct { expressionCommon } +func (e *expressionNotEqual) GetDefaultDecision() FilterDecision { + return Pass +} + func (e *expressionNotEqual) GetOperator() ExpressionOperator { return NOT_EQUAL } @@ -20,16 +24,6 @@ func (e *expressionNotEqual) ValuePasses(value string) bool { return value != e.value } -func (e *expressionNotEqual) GetDefaultDecision() FilterDecision { - return Pass -} - -func (e *expressionNotEqual) StringIntoBuilder(builder *strings.Builder) { - builder.WriteString(e.key) - builder.WriteString("!=") - builder.WriteString(e.value) -} - func (e *expressionNotEqual) GetMetricDefinitionFilter() MetricDefinitionFilter { if e.key == "name" { if e.value == "" { @@ -58,3 +52,9 @@ func (e *expressionNotEqual) GetMetricDefinitionFilter() MetricDefinitionFilter return None } } + +func (e *expressionNotEqual) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("!=") + builder.WriteString(e.value) +} diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go index 2b4126a404..2525b079f7 100644 --- a/expr/tagquery/expression_not_has_tag.go +++ b/expr/tagquery/expression_not_has_tag.go @@ -8,10 +8,18 @@ type expressionNotHasTag struct { expressionCommon } +func (e *expressionNotHasTag) GetDefaultDecision() FilterDecision { + return Pass +} + func (e *expressionNotHasTag) GetOperator() ExpressionOperator { return NOT_HAS_TAG } +func (e *expressionNotHasTag) OperatesOnTag() bool { + return true +} + func (e *expressionNotHasTag) RequiresNonEmptyValue() bool { return false } @@ -20,19 +28,6 @@ func (e *expressionNotHasTag) ValuePasses(value string) bool { return value == e.key } -func (e *expressionNotHasTag) GetDefaultDecision() FilterDecision { - return Pass -} - -func (e *expressionNotHasTag) OperatesOnTag() bool { - return true -} - -func (e *expressionNotHasTag) StringIntoBuilder(builder *strings.Builder) { - builder.WriteString(e.key) - builder.WriteString("=") -} - func (e *expressionNotHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { if e.key == "name" { return func(_ string, _ []string) FilterDecision { return Fail } @@ -48,3 +43,8 @@ func (e *expressionNotHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter return None } } + +func (e *expressionNotHasTag) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("=") +} diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index f6a7d9e9b3..d17e9628fa 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -10,22 +10,6 @@ type expressionNotMatch struct { expressionCommonRe } -func (e *expressionNotMatch) GetOperator() ExpressionOperator { - return NOT_MATCH -} - -func (e *expressionNotMatch) RequiresNonEmptyValue() bool { - return e.matchesEmpty -} - -func (e *expressionNotMatch) HasRe() bool { - return true -} - -func (e *expressionNotMatch) ValuePasses(value string) bool { - return !e.valueRe.MatchString(value) -} - func (e *expressionNotMatch) GetDefaultDecision() FilterDecision { // if the pattern matches "" (f.e. "tag!=~.*) then a metric which // does not have the tag "tag" at all should not be part of the @@ -39,10 +23,20 @@ func (e *expressionNotMatch) GetDefaultDecision() FilterDecision { return Pass } -func (e *expressionNotMatch) StringIntoBuilder(builder *strings.Builder) { - builder.WriteString(e.key) - builder.WriteString("!=~") - builder.WriteString(e.value) +func (e *expressionNotMatch) GetOperator() ExpressionOperator { + return NOT_MATCH +} + +func (e *expressionNotMatch) HasRe() bool { + return true +} + +func (e *expressionNotMatch) RequiresNonEmptyValue() bool { + return e.matchesEmpty +} + +func (e *expressionNotMatch) ValuePasses(value string) bool { + return !e.valueRe.MatchString(value) } func (e *expressionNotMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { @@ -105,3 +99,9 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter() MetricDefinitionFilter return None } } + +func (e *expressionNotMatch) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("!=~") + builder.WriteString(e.value) +} diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index 1d64821de3..c5be3085f1 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -8,12 +8,12 @@ type expressionPrefix struct { expressionCommon } -func (e *expressionPrefix) GetOperator() ExpressionOperator { - return PREFIX +func (e *expressionPrefix) GetDefaultDecision() FilterDecision { + return Fail } -func (e *expressionPrefix) ValuePasses(value string) bool { - return strings.HasPrefix(value, e.value) +func (e *expressionPrefix) GetOperator() ExpressionOperator { + return PREFIX } func (e *expressionPrefix) RequiresNonEmptyValue() bool { @@ -22,14 +22,8 @@ func (e *expressionPrefix) RequiresNonEmptyValue() bool { return true } -func (e *expressionPrefix) GetDefaultDecision() FilterDecision { - return Fail -} - -func (e *expressionPrefix) StringIntoBuilder(builder *strings.Builder) { - builder.WriteString(e.key) - builder.WriteString("^=") - builder.WriteString(e.value) +func (e *expressionPrefix) ValuePasses(value string) bool { + return strings.HasPrefix(value, e.value) } func (e *expressionPrefix) GetMetricDefinitionFilter() MetricDefinitionFilter { @@ -60,3 +54,9 @@ func (e *expressionPrefix) GetMetricDefinitionFilter() MetricDefinitionFilter { return None } } + +func (e *expressionPrefix) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString(e.key) + builder.WriteString("^=") + builder.WriteString(e.value) +} diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go index a3e17825b0..419d4d3e09 100644 --- a/expr/tagquery/expression_prefix_tag.go +++ b/expr/tagquery/expression_prefix_tag.go @@ -8,10 +8,18 @@ type expressionPrefixTag struct { expressionCommon } +func (e *expressionPrefixTag) GetDefaultDecision() FilterDecision { + return Fail +} + func (e *expressionPrefixTag) GetOperator() ExpressionOperator { return PREFIX_TAG } +func (e *expressionPrefixTag) OperatesOnTag() bool { + return true +} + func (e *expressionPrefixTag) RequiresNonEmptyValue() bool { // we know it requires an non-empty value, because the expression // "__tag^=" would get parsed into the type expressionMatchAll @@ -22,19 +30,6 @@ func (e *expressionPrefixTag) ValuePasses(tag string) bool { return strings.HasPrefix(tag, e.value) } -func (e *expressionPrefixTag) OperatesOnTag() bool { - return true -} - -func (e *expressionPrefixTag) GetDefaultDecision() FilterDecision { - return Fail -} - -func (e *expressionPrefixTag) StringIntoBuilder(builder *strings.Builder) { - builder.WriteString("__tag^=") - builder.WriteString(e.value) -} - func (e *expressionPrefixTag) GetMetricDefinitionFilter() MetricDefinitionFilter { if strings.HasPrefix("name", e.value) { // every metric has a name @@ -50,3 +45,8 @@ func (e *expressionPrefixTag) GetMetricDefinitionFilter() MetricDefinitionFilter return None } } + +func (e *expressionPrefixTag) StringIntoBuilder(builder *strings.Builder) { + builder.WriteString("__tag^=") + builder.WriteString(e.value) +} From 429ff37675d2d65e5921758adb32e1ce1ef9f01a Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 15 Jul 2019 20:04:14 -0400 Subject: [PATCH 11/40] performance improvements --- expr/tagquery/expression.go | 2 +- expr/tagquery/expression_match.go | 9 +++------ expr/tagquery/expression_not_equal.go | 4 +++- expr/tagquery/expression_not_match.go | 9 +++------ expr/tagquery/expression_prefix.go | 4 +++- idx/memory/memory.go | 13 ++++++++----- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index ea84aa4605..26293b4a8b 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -9,7 +9,7 @@ import ( const invalidExpressionError = "Invalid expression: %s" -var matchCacheSize int +const matchCacheSize = 1000 type Expressions []Expression diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index f49534beb7..52ef01d945 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -4,6 +4,8 @@ import ( "strings" "sync" "sync/atomic" + + "github.com/raintank/schema" ) type expressionMatch struct { @@ -46,7 +48,7 @@ func (e *expressionMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { return func(_ string, _ []string) FilterDecision { return Fail } } return func(name string, _ []string) FilterDecision { - if e.valueRe.MatchString(name) { + if e.valueRe.MatchString(schema.SanitizeNameAsTagValue(name)) { return Pass } else { return Fail @@ -64,11 +66,6 @@ func (e *expressionMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { continue } - // if value is empty, every metric which has this tag fails - if e.value == "" { - return Fail - } - value := tag[len(prefix):] // reduce regex matching by looking up cached non-matches diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index 99fa3c8b05..7e9acb993d 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -2,6 +2,8 @@ package tagquery import ( "strings" + + "github.com/raintank/schema" ) type expressionNotEqual struct { @@ -30,7 +32,7 @@ func (e *expressionNotEqual) GetMetricDefinitionFilter() MetricDefinitionFilter return func(_ string, _ []string) FilterDecision { return Pass } } return func(name string, _ []string) FilterDecision { - if name == e.value { + if schema.SanitizeNameAsTagValue(name) == e.value { return Fail } return Pass diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index d17e9628fa..f16aed7e4c 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -4,6 +4,8 @@ import ( "strings" "sync" "sync/atomic" + + "github.com/raintank/schema" ) type expressionNotMatch struct { @@ -47,7 +49,7 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter() MetricDefinitionFilter } return func(name string, _ []string) FilterDecision { - if e.valueRe.MatchString(name) { + if e.valueRe.MatchString(schema.SanitizeNameAsTagValue(name)) { return Fail } return Pass @@ -64,11 +66,6 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter() MetricDefinitionFilter continue } - // if value is empty, every metric which has this tag passes - if e.value == "" { - return Pass - } - value := tag[len(prefix):] // reduce regex matching by looking up cached non-matches diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index c5be3085f1..677790e576 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -2,6 +2,8 @@ package tagquery import ( "strings" + + "github.com/raintank/schema" ) type expressionPrefix struct { @@ -32,7 +34,7 @@ func (e *expressionPrefix) GetMetricDefinitionFilter() MetricDefinitionFilter { if e.key == "name" { return func(name string, _ []string) FilterDecision { - if strings.HasPrefix(name, e.value) { + if strings.HasPrefix(schema.SanitizeNameAsTagValue(name), e.value) { return Pass } diff --git a/idx/memory/memory.go b/idx/memory/memory.go index 81e577f164..12eb4e96e6 100755 --- a/idx/memory/memory.go +++ b/idx/memory/memory.go @@ -1089,9 +1089,10 @@ func (m *UnpartitionedMemoryIdx) FindByTag(orgId uint32, query tagquery.Query) [ continue } - if existing, ok := byPath[def.NameWithTags()]; !ok { - byPath[def.NameWithTags()] = &idx.Node{ - Path: def.NameWithTags(), + nameWithTags := def.NameWithTags() + if existing, ok := byPath[nameWithTags]; !ok { + byPath[nameWithTags] = &idx.Node{ + Path: nameWithTags, Leaf: true, HasChildren: false, Defs: []idx.Archive{CloneArchive(def)}, @@ -1101,10 +1102,12 @@ func (m *UnpartitionedMemoryIdx) FindByTag(orgId uint32, query tagquery.Query) [ } } - results := make([]idx.Node, 0, len(byPath)) + results := make([]idx.Node, len(byPath)) + i := 0 for _, v := range byPath { - results = append(results, *v) + results[i] = *v + i++ } return results From 4e852bc3d5c2432ae23c1afbf0738d0977caf509 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 15 Jul 2019 21:25:29 -0400 Subject: [PATCH 12/40] move matchCache config to tagquery pkg --- cmd/metrictank/metrictank.go | 5 +++++ expr/tagquery/expression.go | 2 -- expr/tagquery/tagquery.go | 17 +++++++++++++++++ idx/memory/memory.go | 2 -- idx/memory/memory_find_test.go | 2 +- 5 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 expr/tagquery/tagquery.go diff --git a/cmd/metrictank/metrictank.go b/cmd/metrictank/metrictank.go index a85a52744b..cd153809c7 100644 --- a/cmd/metrictank/metrictank.go +++ b/cmd/metrictank/metrictank.go @@ -15,6 +15,8 @@ import ( "syscall" "time" + "github.com/grafana/metrictank/expr/tagquery" + "github.com/Dieterbe/profiletrigger/heap" "github.com/Shopify/sarama" "github.com/grafana/globalconf" @@ -105,6 +107,9 @@ func main() { // input handlers input.ConfigSetup() + // tagquery handling + tagquery.ConfigSetup() + // load config for metric ingestors inCarbon.ConfigSetup() inKafkaMdm.ConfigSetup() diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index 26293b4a8b..795644bba9 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -9,8 +9,6 @@ import ( const invalidExpressionError = "Invalid expression: %s" -const matchCacheSize = 1000 - type Expressions []Expression func ParseExpressions(expressions []string) (Expressions, error) { diff --git a/expr/tagquery/tagquery.go b/expr/tagquery/tagquery.go new file mode 100644 index 0000000000..78b113488d --- /dev/null +++ b/expr/tagquery/tagquery.go @@ -0,0 +1,17 @@ +package tagquery + +import ( + "flag" + + "github.com/grafana/globalconf" +) + +var ( + matchCacheSize int +) + +func ConfigSetup() { + tagQuery := flag.NewFlagSet("tag-query", flag.ExitOnError) + tagQuery.IntVar(&matchCacheSize, "match-cache-size", 1000, "size of regular expression cache in tag query evaluation") + globalconf.Register("tag-query", tagQuery, flag.ExitOnError) +} diff --git a/idx/memory/memory.go b/idx/memory/memory.go index 12eb4e96e6..c61fa108f4 100755 --- a/idx/memory/memory.go +++ b/idx/memory/memory.go @@ -49,7 +49,6 @@ var ( statMetricsActive = stats.NewGauge32("idx.metrics_active") Enabled bool - matchCacheSize int maxPruneLockTime = time.Millisecond * 100 maxPruneLockTimeStr string TagSupport bool @@ -73,7 +72,6 @@ func ConfigSetup() { memoryIdx.BoolVar(&TagSupport, "tag-support", false, "enables/disables querying based on tags") memoryIdx.BoolVar(&Partitioned, "partitioned", false, "use separate indexes per partition. experimental feature") memoryIdx.IntVar(&TagQueryWorkers, "tag-query-workers", 50, "number of workers to spin up to evaluate tag queries") - memoryIdx.IntVar(&matchCacheSize, "match-cache-size", 1000, "size of regular expression cache in tag query evaluation") memoryIdx.IntVar(&findCacheSize, "find-cache-size", 1000, "number of find expressions to cache (per org). 0 disables cache") memoryIdx.IntVar(&findCacheInvalidateQueueSize, "find-cache-invalidate-queue-size", 200, "size of queue for invalidating findCache entries") memoryIdx.IntVar(&findCacheInvalidateMaxSize, "find-cache-invalidate-max-size", 100, "max amount of invalidations to queue up in one batch") diff --git a/idx/memory/memory_find_test.go b/idx/memory/memory_find_test.go index f4422ee95e..e25d8ff356 100644 --- a/idx/memory/memory_find_test.go +++ b/idx/memory/memory_find_test.go @@ -157,7 +157,7 @@ func TestMain(m *testing.M) { defer func(t bool) { TagSupport = t }(TagSupport) TagSupport = true TagQueryWorkers = 5 - matchCacheSize = 1000 + tagquery.ConfigSetup() // set matchCacheSize to default // we dont need info logs in the test output log.SetLevel(log.ErrorLevel) os.Exit(m.Run()) From 0cc49e614d7cc5b66d8bf041d59471e3fc5bcaff Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 15 Jul 2019 21:40:04 -0400 Subject: [PATCH 13/40] update example configs and changelog --- CHANGELOG.md | 1 + docker/docker-chaos/metrictank.ini | 7 +++++-- docker/docker-cluster-query/metrictank.ini | 7 +++++-- docker/docker-cluster/metrictank.ini | 7 +++++-- docker/docker-dev-custom-cfg-kafka/metrictank.ini | 7 +++++-- docs/config.md | 10 ++++++++-- metrictank-sample.ini | 7 +++++-- scripts/config/metrictank-docker.ini | 7 +++++-- scripts/config/metrictank-package.ini | 7 +++++-- 9 files changed, 44 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e0228fe43..ba8368d110 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # master ## breaking changes +* version v0.12.0-182-ged2adc5b and later move the option `memory-idx.match-cache-size` to the new config section `tag-query.match-cache-size`. * version v0.12.0-96-g998933c3 introduces config options for the cassandra/scylladb index table names. The default settings and schemas match the previous behavior, but people who have customized the schema-idx template files should know that we now no longer only expand the keyspace (and assume a hardcoded table name). diff --git a/docker/docker-chaos/metrictank.ini b/docker/docker-chaos/metrictank.ini index 1f56325b4a..d3a6a37761 100644 --- a/docker/docker-chaos/metrictank.ini +++ b/docker/docker-chaos/metrictank.ini @@ -429,6 +429,11 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false +### tag query evaluation +[tag-query] +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 + ### in-memory only [memory-idx] enabled = false @@ -436,8 +441,6 @@ enabled = false tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/docker/docker-cluster-query/metrictank.ini b/docker/docker-cluster-query/metrictank.ini index 9020555b30..fd3c35d761 100644 --- a/docker/docker-cluster-query/metrictank.ini +++ b/docker/docker-cluster-query/metrictank.ini @@ -429,6 +429,11 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false +### tag query evaluation +[tag-query] +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 + ### in-memory only [memory-idx] enabled = false @@ -436,8 +441,6 @@ enabled = false tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/docker/docker-cluster/metrictank.ini b/docker/docker-cluster/metrictank.ini index 78b761d3cf..ec3776db54 100644 --- a/docker/docker-cluster/metrictank.ini +++ b/docker/docker-cluster/metrictank.ini @@ -429,6 +429,11 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false +### tag query evaluation +[tag-query] +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 + ### in-memory only [memory-idx] enabled = false @@ -436,8 +441,6 @@ enabled = false tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/docker/docker-dev-custom-cfg-kafka/metrictank.ini b/docker/docker-dev-custom-cfg-kafka/metrictank.ini index 39a67e7e2b..072b281c37 100644 --- a/docker/docker-dev-custom-cfg-kafka/metrictank.ini +++ b/docker/docker-dev-custom-cfg-kafka/metrictank.ini @@ -429,6 +429,11 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false +### tag query evaluation +[tag-query] +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 + ### in-memory only [memory-idx] enabled = false @@ -436,8 +441,6 @@ enabled = false tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/docs/config.md b/docs/config.md index a02ccd6f87..a8f053f747 100644 --- a/docs/config.md +++ b/docs/config.md @@ -501,6 +501,14 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml disable-initial-host-lookup = false ``` +### tag query evaluation + +``` +[tag-query] +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 +``` + ### in-memory only ``` @@ -510,8 +518,6 @@ enabled = false tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/metrictank-sample.ini b/metrictank-sample.ini index cca51bac75..79566e2a10 100644 --- a/metrictank-sample.ini +++ b/metrictank-sample.ini @@ -432,6 +432,11 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false +### tag query evaluation +[tag-query] +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 + ### in-memory only [memory-idx] enabled = false @@ -439,8 +444,6 @@ enabled = false tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/scripts/config/metrictank-docker.ini b/scripts/config/metrictank-docker.ini index b6fe721da8..862b7c4685 100644 --- a/scripts/config/metrictank-docker.ini +++ b/scripts/config/metrictank-docker.ini @@ -429,6 +429,11 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false +### tag query evaluation +[tag-query] +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 + ### in-memory only [memory-idx] enabled = false @@ -436,8 +441,6 @@ enabled = false tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/scripts/config/metrictank-package.ini b/scripts/config/metrictank-package.ini index ce2398761e..84d8d8a058 100644 --- a/scripts/config/metrictank-package.ini +++ b/scripts/config/metrictank-package.ini @@ -429,6 +429,11 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false +### tag query evaluation +[tag-query] +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 + ### in-memory only [memory-idx] enabled = false @@ -436,8 +441,6 @@ enabled = false tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. From eb1e25a76b5f10653cbec22ee3603216ca6aa4ac Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Tue, 16 Jul 2019 10:40:15 -0400 Subject: [PATCH 14/40] cleanup remove dead code and add comments --- expr/tagquery/expression.go | 15 --------------- expr/tagquery/query.go | 8 ++++++-- idx/memory/tag_query.go | 2 +- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index 795644bba9..f3cfd15a65 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -371,21 +371,6 @@ func ExpressionsAreEqual(expr1, expr2 Expression) bool { // MetricDefinitionFilter takes a metric name together with its tags and returns a FilterDecision type MetricDefinitionFilter func(name string, tags []string) FilterDecision -type MetricDefinitionFilters []MetricDefinitionFilter - -func (m MetricDefinitionFilters) Filter(name string, tags []string) FilterDecision { - for i := range m { - decision := m[i](name, tags) - if decision == Fail { - return Fail - } else if decision == Pass { - return Pass - } - } - - return None -} - type FilterDecision uint8 const ( diff --git a/expr/tagquery/query.go b/expr/tagquery/query.go index 6e4d6394c2..17eb31fd9c 100644 --- a/expr/tagquery/query.go +++ b/expr/tagquery/query.go @@ -79,8 +79,12 @@ func NewQuery(expressions Expressions, from int64) (Query, error) { return q, nil } -func (q *Query) GetMetricDefinitionFilters() (MetricDefinitionFilters, []FilterDecision) { - var filters MetricDefinitionFilters +// GetMetricDefinitionFilters returns all the metric definition filters associated with this +// query, together with their according default decision +// The returned filters get generated from the query expressions, excluding the one which has +// been dedicated to be the initial expression (marked via the .startWith index) +func (q *Query) GetMetricDefinitionFilters() ([]MetricDefinitionFilter, []FilterDecision) { + var filters []MetricDefinitionFilter var defaultDecisions []FilterDecision for i := range q.Expressions { // the one we start with does not need to be added to the filters, diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go index b93508c219..d5b20456be 100644 --- a/idx/memory/tag_query.go +++ b/idx/memory/tag_query.go @@ -21,7 +21,7 @@ type TagQueryContext struct { wg sync.WaitGroup query tagquery.Query - filters tagquery.MetricDefinitionFilters + filters []tagquery.MetricDefinitionFilter defaultDecisions []tagquery.FilterDecision index TagIndex // the tag index, hierarchy of tags & values, set by Run()/RunGetTags() From 9c6cc1bc47abc3d3e076710b7d866412e4e1f177 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Wed, 17 Jul 2019 14:55:21 -0400 Subject: [PATCH 15/40] make meta tag support optional this leads to a better performance if the meta tag support is turned off, because then certain checks can be omitted. --- expr/tagquery/expression_equal.go | 12 ++++++++++++ expr/tagquery/expression_has_tag.go | 7 ++++++- expr/tagquery/expression_match.go | 7 ++++++- expr/tagquery/expression_match_tag.go | 7 ++++++- expr/tagquery/expression_not_equal.go | 11 +++++++++++ expr/tagquery/expression_not_has_tag.go | 8 +++++++- expr/tagquery/expression_not_match.go | 7 ++++++- expr/tagquery/expression_prefix.go | 9 +++++++-- expr/tagquery/expression_prefix_tag.go | 9 +++++++-- expr/tagquery/tagquery.go | 2 ++ 10 files changed, 70 insertions(+), 9 deletions(-) diff --git a/expr/tagquery/expression_equal.go b/expr/tagquery/expression_equal.go index 35d398e706..fddb6dbd45 100644 --- a/expr/tagquery/expression_equal.go +++ b/expr/tagquery/expression_equal.go @@ -36,6 +36,18 @@ func (e *expressionEqual) GetMetricDefinitionFilter() MetricDefinitionFilter { prefix := e.key + "=" matchString := prefix + e.value + if !metaTagSupport { + return func(name string, tags []string) FilterDecision { + for _, tag := range tags { + if tag == matchString { + return Pass + } + } + + return Fail + } + } + return func(name string, tags []string) FilterDecision { for _, tag := range tags { if tag == matchString { diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index 14f6f6f14a..8f4e12b171 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -29,6 +29,11 @@ func (e *expressionHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { return func(_ string, _ []string) FilterDecision { return Pass } } + resultIfTagIsAbsent := None + if !metaTagSupport { + resultIfTagIsAbsent = Fail + } + matchPrefix := e.key + "=" return func(_ string, tags []string) FilterDecision { for _, tag := range tags { @@ -37,7 +42,7 @@ func (e *expressionHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { } } - return None + return resultIfTagIsAbsent } } diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index 52ef01d945..3f5e4da26b 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -56,6 +56,11 @@ func (e *expressionMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { } } + resultIfTagIsAbsent := None + if !metaTagSupport { + resultIfTagIsAbsent = Fail + } + var matchCache, missCache sync.Map var currentMatchCacheSize, currentMissCacheSize int32 prefix := e.key + "=" @@ -93,7 +98,7 @@ func (e *expressionMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { } } - return None + return resultIfTagIsAbsent } } diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index b7c85ac705..960645340a 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -40,6 +40,11 @@ func (e *expressionMatchTag) GetMetricDefinitionFilter() MetricDefinitionFilter return func(_ string, _ []string) FilterDecision { return Pass } } + resultIfTagIsAbsent := None + if !metaTagSupport { + resultIfTagIsAbsent = Fail + } + var matchCache, missCache sync.Map var currentMatchCacheSize, currentMissCacheSize int32 @@ -74,7 +79,7 @@ func (e *expressionMatchTag) GetMetricDefinitionFilter() MetricDefinitionFilter } } - return None + return resultIfTagIsAbsent } } diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index 7e9acb993d..ec3a6d9771 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -41,6 +41,17 @@ func (e *expressionNotEqual) GetMetricDefinitionFilter() MetricDefinitionFilter prefix := e.key + "=" matchString := prefix + e.value + if !metaTagSupport { + return func(name string, tags []string) FilterDecision { + for _, tag := range tags { + if tag == matchString { + return Fail + } + } + return Pass + } + } + return func(_ string, tags []string) FilterDecision { for _, tag := range tags { if strings.HasPrefix(tag, prefix) { diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go index 2525b079f7..a867622e5e 100644 --- a/expr/tagquery/expression_not_has_tag.go +++ b/expr/tagquery/expression_not_has_tag.go @@ -33,6 +33,11 @@ func (e *expressionNotHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter return func(_ string, _ []string) FilterDecision { return Fail } } + resultIfTagIsAbsent := None + if !metaTagSupport { + resultIfTagIsAbsent = Pass + } + matchPrefix := e.key + "=" return func(_ string, tags []string) FilterDecision { for _, tag := range tags { @@ -40,7 +45,8 @@ func (e *expressionNotHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter return Fail } } - return None + + return resultIfTagIsAbsent } } diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index f16aed7e4c..c2c212eb4c 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -56,6 +56,11 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter() MetricDefinitionFilter } } + resultIfTagIsAbsent := None + if !metaTagSupport { + resultIfTagIsAbsent = Pass + } + var matchCache, missCache sync.Map var currentMatchCacheSize, currentMissCacheSize int32 prefix := e.key + "=" @@ -93,7 +98,7 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter() MetricDefinitionFilter } } - return None + return resultIfTagIsAbsent } } diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index 677790e576..f45dd704a2 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -42,7 +42,12 @@ func (e *expressionPrefix) GetMetricDefinitionFilter() MetricDefinitionFilter { } } - return func(_ string, tags []string) FilterDecision { + resultIfTagIsAbsent := None + if !metaTagSupport { + resultIfTagIsAbsent = Fail + } + + return func(name string, tags []string) FilterDecision { for _, tag := range tags { if strings.HasPrefix(tag, matchString) { return Pass @@ -53,7 +58,7 @@ func (e *expressionPrefix) GetMetricDefinitionFilter() MetricDefinitionFilter { } } - return None + return resultIfTagIsAbsent } } diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go index 419d4d3e09..cc79dbbf9e 100644 --- a/expr/tagquery/expression_prefix_tag.go +++ b/expr/tagquery/expression_prefix_tag.go @@ -36,13 +36,18 @@ func (e *expressionPrefixTag) GetMetricDefinitionFilter() MetricDefinitionFilter return func(_ string, _ []string) FilterDecision { return Pass } } - return func(_ string, tags []string) FilterDecision { + resultIfTagIsAbsent := None + if !metaTagSupport { + resultIfTagIsAbsent = Fail + } + + return func(name string, tags []string) FilterDecision { for _, tag := range tags { if strings.HasPrefix(tag, e.value) { return Pass } } - return None + return resultIfTagIsAbsent } } diff --git a/expr/tagquery/tagquery.go b/expr/tagquery/tagquery.go index 78b113488d..feb1c7c355 100644 --- a/expr/tagquery/tagquery.go +++ b/expr/tagquery/tagquery.go @@ -8,10 +8,12 @@ import ( var ( matchCacheSize int + metaTagSupport bool ) func ConfigSetup() { tagQuery := flag.NewFlagSet("tag-query", flag.ExitOnError) tagQuery.IntVar(&matchCacheSize, "match-cache-size", 1000, "size of regular expression cache in tag query evaluation") + tagQuery.BoolVar(&metaTagSupport, "meta-tag-support", false, "enables/disables querying based on meta tags which get defined via meta tag rules") globalconf.Register("tag-query", tagQuery, flag.ExitOnError) } From c1a30ab0c22ec7108d4838f7f449af764d45bfee Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Wed, 17 Jul 2019 11:53:10 -0400 Subject: [PATCH 16/40] optimize performance by using map lookup looking up a key from a map is faster than iterating over the values and comparing them to what we are looking for. to take advantage of the faster map lookup this now passes an index lookup method into the metric definition filters. --- expr/tagquery/expression.go | 6 +++-- expr/tagquery/expression_equal.go | 30 +++++++++++------------ expr/tagquery/expression_has_tag.go | 8 ++++--- expr/tagquery/expression_match.go | 11 ++++----- expr/tagquery/expression_match_all.go | 6 +++-- expr/tagquery/expression_match_none.go | 6 +++-- expr/tagquery/expression_match_tag.go | 8 ++++--- expr/tagquery/expression_not_equal.go | 32 ++++++++++++------------- expr/tagquery/expression_not_has_tag.go | 8 ++++--- expr/tagquery/expression_not_match.go | 11 ++++----- expr/tagquery/expression_prefix.go | 6 ++--- expr/tagquery/expression_prefix_tag.go | 8 ++++--- expr/tagquery/query.go | 8 +++++-- idx/memory/memory.go | 12 +++++++++- idx/memory/tag_query.go | 7 ++++-- idx/memory/tag_query_test.go | 16 +++++++++++++ 16 files changed, 113 insertions(+), 70 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index f3cfd15a65..a53902663b 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -5,6 +5,8 @@ import ( "regexp" "sort" "strings" + + "github.com/raintank/schema" ) const invalidExpressionError = "Invalid expression: %s" @@ -161,7 +163,7 @@ type Expression interface { // GetMetricDefinitionFilter returns a MetricDefinitionFilter // The MetricDefinitionFilter takes a metric definition, looks at its tags and returns a decision // regarding this query expression applied to its tags - GetMetricDefinitionFilter() MetricDefinitionFilter + GetMetricDefinitionFilter(lookup IdTagLookup) MetricDefinitionFilter // StringIntoBuilder takes a builder and writes a string representation of this expression into it StringIntoBuilder(builder *strings.Builder) @@ -369,7 +371,7 @@ func ExpressionsAreEqual(expr1, expr2 Expression) bool { } // MetricDefinitionFilter takes a metric name together with its tags and returns a FilterDecision -type MetricDefinitionFilter func(name string, tags []string) FilterDecision +type MetricDefinitionFilter func(id schema.MKey, name string, tags []string) FilterDecision type FilterDecision uint8 diff --git a/expr/tagquery/expression_equal.go b/expr/tagquery/expression_equal.go index fddb6dbd45..6b695b6b5c 100644 --- a/expr/tagquery/expression_equal.go +++ b/expr/tagquery/expression_equal.go @@ -2,6 +2,8 @@ package tagquery import ( "strings" + + "github.com/raintank/schema" ) type expressionEqual struct { @@ -20,13 +22,13 @@ func (e *expressionEqual) ValuePasses(value string) bool { return value == e.value } -func (e *expressionEqual) GetMetricDefinitionFilter() MetricDefinitionFilter { +func (e *expressionEqual) GetMetricDefinitionFilter(lookup IdTagLookup) MetricDefinitionFilter { if e.key == "name" { if e.value == "" { // every metric has a name, the value will never be empty - return func(_ string, _ []string) FilterDecision { return Fail } + return func(id schema.MKey, name string, tags []string) FilterDecision { return Fail } } - return func(name string, _ []string) FilterDecision { + return func(id schema.MKey, name string, tags []string) FilterDecision { if name == e.value { return Pass } @@ -34,26 +36,22 @@ func (e *expressionEqual) GetMetricDefinitionFilter() MetricDefinitionFilter { } } - prefix := e.key + "=" - matchString := prefix + e.value if !metaTagSupport { - return func(name string, tags []string) FilterDecision { - for _, tag := range tags { - if tag == matchString { - return Pass - } + return func(id schema.MKey, name string, tags []string) FilterDecision { + if lookup(id, e.key, e.value) { + return Pass } - return Fail } } - return func(name string, tags []string) FilterDecision { - for _, tag := range tags { - if tag == matchString { - return Pass - } + prefix := e.key + "=" + return func(id schema.MKey, name string, tags []string) FilterDecision { + if lookup(id, e.key, e.value) { + return Pass + } + for _, tag := range tags { // the tag is set, but it has a different value, // no need to keep looking at other indexes if strings.HasPrefix(tag, prefix) { diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index 8f4e12b171..59846f9172 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -2,6 +2,8 @@ package tagquery import ( "strings" + + "github.com/raintank/schema" ) type expressionHasTag struct { @@ -24,9 +26,9 @@ func (e *expressionHasTag) ValuePasses(value string) bool { return value == e.key } -func (e *expressionHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { +func (e *expressionHasTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if e.key == "name" { - return func(_ string, _ []string) FilterDecision { return Pass } + return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } } resultIfTagIsAbsent := None @@ -35,7 +37,7 @@ func (e *expressionHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { } matchPrefix := e.key + "=" - return func(_ string, tags []string) FilterDecision { + return func(id schema.MKey, name string, tags []string) FilterDecision { for _, tag := range tags { if strings.HasPrefix(tag, matchPrefix) { return Pass diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index 3f5e4da26b..881df25dcc 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -41,13 +41,13 @@ func (e *expressionMatch) ValuePasses(value string) bool { return e.valueRe.MatchString(value) } -func (e *expressionMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { +func (e *expressionMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if e.key == "name" { if e.value == "" { // silly query, always fails - return func(_ string, _ []string) FilterDecision { return Fail } + return func(id schema.MKey, name string, tags []string) FilterDecision { return Fail } } - return func(name string, _ []string) FilterDecision { + return func(id schema.MKey, name string, tags []string) FilterDecision { if e.valueRe.MatchString(schema.SanitizeNameAsTagValue(name)) { return Pass } else { @@ -61,11 +61,10 @@ func (e *expressionMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { resultIfTagIsAbsent = Fail } + prefix := e.key + "=" var matchCache, missCache sync.Map var currentMatchCacheSize, currentMissCacheSize int32 - prefix := e.key + "=" - - return func(_ string, tags []string) FilterDecision { + return func(id schema.MKey, name string, tags []string) FilterDecision { for _, tag := range tags { if !strings.HasPrefix(tag, prefix) { continue diff --git a/expr/tagquery/expression_match_all.go b/expr/tagquery/expression_match_all.go index 4b1c9cd289..9fcc41cd70 100644 --- a/expr/tagquery/expression_match_all.go +++ b/expr/tagquery/expression_match_all.go @@ -2,6 +2,8 @@ package tagquery import ( "strings" + + "github.com/raintank/schema" ) type expressionMatchAll struct { @@ -38,8 +40,8 @@ func (e *expressionMatchAll) ValuePasses(value string) bool { return true } -func (e *expressionMatchAll) GetMetricDefinitionFilter() MetricDefinitionFilter { - return func(_ string, _ []string) FilterDecision { return Pass } +func (e *expressionMatchAll) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { + return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } } func (e *expressionMatchAll) StringIntoBuilder(builder *strings.Builder) { diff --git a/expr/tagquery/expression_match_none.go b/expr/tagquery/expression_match_none.go index 2335496e5e..b8c6ec95c1 100644 --- a/expr/tagquery/expression_match_none.go +++ b/expr/tagquery/expression_match_none.go @@ -2,6 +2,8 @@ package tagquery import ( "strings" + + "github.com/raintank/schema" ) type expressionMatchNone struct { @@ -38,8 +40,8 @@ func (e *expressionMatchNone) ValuePasses(value string) bool { return false } -func (e *expressionMatchNone) GetMetricDefinitionFilter() MetricDefinitionFilter { - return func(_ string, _ []string) FilterDecision { return Fail } +func (e *expressionMatchNone) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { + return func(id schema.MKey, name string, tags []string) FilterDecision { return Fail } } func (e *expressionMatchNone) StringIntoBuilder(builder *strings.Builder) { diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index 960645340a..4b3e37ef87 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -4,6 +4,8 @@ import ( "strings" "sync" "sync/atomic" + + "github.com/raintank/schema" ) type expressionMatchTag struct { @@ -34,10 +36,10 @@ func (e *expressionMatchTag) ValuePasses(tag string) bool { return e.valueRe.MatchString(tag) } -func (e *expressionMatchTag) GetMetricDefinitionFilter() MetricDefinitionFilter { +func (e *expressionMatchTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if e.valueRe.Match([]byte("name")) { // every metric has a tag name, so we can always return Pass - return func(_ string, _ []string) FilterDecision { return Pass } + return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } } resultIfTagIsAbsent := None @@ -48,7 +50,7 @@ func (e *expressionMatchTag) GetMetricDefinitionFilter() MetricDefinitionFilter var matchCache, missCache sync.Map var currentMatchCacheSize, currentMissCacheSize int32 - return func(_ string, tags []string) FilterDecision { + return func(id schema.MKey, name string, tags []string) FilterDecision { for _, tag := range tags { values := strings.SplitN(tag, "=", 2) if len(values) < 2 { diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index ec3a6d9771..6493c168de 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -26,12 +26,12 @@ func (e *expressionNotEqual) ValuePasses(value string) bool { return value != e.value } -func (e *expressionNotEqual) GetMetricDefinitionFilter() MetricDefinitionFilter { +func (e *expressionNotEqual) GetMetricDefinitionFilter(lookup IdTagLookup) MetricDefinitionFilter { if e.key == "name" { if e.value == "" { - return func(_ string, _ []string) FilterDecision { return Pass } + return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } } - return func(name string, _ []string) FilterDecision { + return func(id schema.MKey, name string, tags []string) FilterDecision { if schema.SanitizeNameAsTagValue(name) == e.value { return Fail } @@ -39,29 +39,29 @@ func (e *expressionNotEqual) GetMetricDefinitionFilter() MetricDefinitionFilter } } - prefix := e.key + "=" - matchString := prefix + e.value if !metaTagSupport { - return func(name string, tags []string) FilterDecision { - for _, tag := range tags { - if tag == matchString { - return Fail - } + return func(id schema.MKey, name string, tags []string) FilterDecision { + if lookup(id, e.key, e.value) { + return Fail } return Pass } } - return func(_ string, tags []string) FilterDecision { + prefix := e.key + "=" + return func(id schema.MKey, name string, tags []string) FilterDecision { + if lookup(id, e.key, e.value) { + return Fail + } + for _, tag := range tags { + // the tag is set, but it has a different value, + // no need to keep looking at other indexes if strings.HasPrefix(tag, prefix) { - if tag == matchString { - return Fail - } else { - return Pass - } + return Pass } } + return None } } diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go index a867622e5e..b7a5edd961 100644 --- a/expr/tagquery/expression_not_has_tag.go +++ b/expr/tagquery/expression_not_has_tag.go @@ -2,6 +2,8 @@ package tagquery import ( "strings" + + "github.com/raintank/schema" ) type expressionNotHasTag struct { @@ -28,9 +30,9 @@ func (e *expressionNotHasTag) ValuePasses(value string) bool { return value == e.key } -func (e *expressionNotHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter { +func (e *expressionNotHasTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if e.key == "name" { - return func(_ string, _ []string) FilterDecision { return Fail } + return func(id schema.MKey, name string, tags []string) FilterDecision { return Fail } } resultIfTagIsAbsent := None @@ -39,7 +41,7 @@ func (e *expressionNotHasTag) GetMetricDefinitionFilter() MetricDefinitionFilter } matchPrefix := e.key + "=" - return func(_ string, tags []string) FilterDecision { + return func(id schema.MKey, name string, tags []string) FilterDecision { for _, tag := range tags { if strings.HasPrefix(tag, matchPrefix) { return Fail diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index c2c212eb4c..b5b9eca23a 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -41,14 +41,14 @@ func (e *expressionNotMatch) ValuePasses(value string) bool { return !e.valueRe.MatchString(value) } -func (e *expressionNotMatch) GetMetricDefinitionFilter() MetricDefinitionFilter { +func (e *expressionNotMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if e.key == "name" { if e.value == "" { // every metric has a name - return func(_ string, _ []string) FilterDecision { return Pass } + return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } } - return func(name string, _ []string) FilterDecision { + return func(id schema.MKey, name string, tags []string) FilterDecision { if e.valueRe.MatchString(schema.SanitizeNameAsTagValue(name)) { return Fail } @@ -61,11 +61,10 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter() MetricDefinitionFilter resultIfTagIsAbsent = Pass } + prefix := e.key + "=" var matchCache, missCache sync.Map var currentMatchCacheSize, currentMissCacheSize int32 - prefix := e.key + "=" - - return func(_ string, tags []string) FilterDecision { + return func(id schema.MKey, name string, tags []string) FilterDecision { for _, tag := range tags { if !strings.HasPrefix(tag, prefix) { continue diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index f45dd704a2..dc252b5ffe 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -28,12 +28,12 @@ func (e *expressionPrefix) ValuePasses(value string) bool { return strings.HasPrefix(value, e.value) } -func (e *expressionPrefix) GetMetricDefinitionFilter() MetricDefinitionFilter { +func (e *expressionPrefix) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { prefix := e.key + "=" matchString := prefix + e.value if e.key == "name" { - return func(name string, _ []string) FilterDecision { + return func(id schema.MKey, name string, tags []string) FilterDecision { if strings.HasPrefix(schema.SanitizeNameAsTagValue(name), e.value) { return Pass } @@ -47,7 +47,7 @@ func (e *expressionPrefix) GetMetricDefinitionFilter() MetricDefinitionFilter { resultIfTagIsAbsent = Fail } - return func(name string, tags []string) FilterDecision { + return func(_ schema.MKey, _ string, tags []string) FilterDecision { for _, tag := range tags { if strings.HasPrefix(tag, matchString) { return Pass diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go index cc79dbbf9e..b4e2adb835 100644 --- a/expr/tagquery/expression_prefix_tag.go +++ b/expr/tagquery/expression_prefix_tag.go @@ -2,6 +2,8 @@ package tagquery import ( "strings" + + "github.com/raintank/schema" ) type expressionPrefixTag struct { @@ -30,10 +32,10 @@ func (e *expressionPrefixTag) ValuePasses(tag string) bool { return strings.HasPrefix(tag, e.value) } -func (e *expressionPrefixTag) GetMetricDefinitionFilter() MetricDefinitionFilter { +func (e *expressionPrefixTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if strings.HasPrefix("name", e.value) { // every metric has a name - return func(_ string, _ []string) FilterDecision { return Pass } + return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } } resultIfTagIsAbsent := None @@ -41,7 +43,7 @@ func (e *expressionPrefixTag) GetMetricDefinitionFilter() MetricDefinitionFilter resultIfTagIsAbsent = Fail } - return func(name string, tags []string) FilterDecision { + return func(_ schema.MKey, _ string, tags []string) FilterDecision { for _, tag := range tags { if strings.HasPrefix(tag, e.value) { return Pass diff --git a/expr/tagquery/query.go b/expr/tagquery/query.go index 17eb31fd9c..a7a2886ffe 100644 --- a/expr/tagquery/query.go +++ b/expr/tagquery/query.go @@ -2,6 +2,8 @@ package tagquery import ( "errors" + + "github.com/raintank/schema" ) var ( @@ -83,7 +85,7 @@ func NewQuery(expressions Expressions, from int64) (Query, error) { // query, together with their according default decision // The returned filters get generated from the query expressions, excluding the one which has // been dedicated to be the initial expression (marked via the .startWith index) -func (q *Query) GetMetricDefinitionFilters() ([]MetricDefinitionFilter, []FilterDecision) { +func (q *Query) GetMetricDefinitionFilters(lookup IdTagLookup) ([]MetricDefinitionFilter, []FilterDecision) { var filters []MetricDefinitionFilter var defaultDecisions []FilterDecision for i := range q.Expressions { @@ -92,13 +94,15 @@ func (q *Query) GetMetricDefinitionFilters() ([]MetricDefinitionFilter, []Filter if i == q.startWith { continue } - filters = append(filters, q.Expressions[i].GetMetricDefinitionFilter()) + filters = append(filters, q.Expressions[i].GetMetricDefinitionFilter(lookup)) defaultDecisions = append(defaultDecisions, q.Expressions[i].GetDefaultDecision()) } return filters, defaultDecisions } +type IdTagLookup func(id schema.MKey, tag, value string) bool + // GetInitialExpression returns the expression which should be used to generate the initial // result set, to later filter it down with the remaining expressions. // We assume Query has been instantiated via NewQuery(), in which case it is guaranteed that diff --git a/idx/memory/memory.go b/idx/memory/memory.go index c61fa108f4..d807c242c1 100755 --- a/idx/memory/memory.go +++ b/idx/memory/memory.go @@ -106,7 +106,6 @@ func ConfigProcess() { if findCacheInvalidateMaxSize >= findCacheInvalidateQueueSize { log.Fatal("find-cache-invalidate-max-size should be smaller than find-cache-invalidate-queue-size") } - } // interface implemented by both UnpartitionedMemoryIdx and PartitionedMemoryIdx @@ -177,6 +176,11 @@ func (t *TagIndex) delTagId(name, value string, id schema.MKey) { // nameWithTags is the name plus all tags in the ;=... format. type defByTagSet map[uint32]map[string]map[*schema.MetricDefinition]struct{} +func (t TagIndex) idHasTag(id schema.MKey, tag, value string) bool { + _, ok := t[tag][value][id] + return ok +} + func (defs defByTagSet) add(def *schema.MetricDefinition) { var orgDefs map[string]map[*schema.MetricDefinition]struct{} var ok bool @@ -880,6 +884,8 @@ func (m *UnpartitionedMemoryIdx) FindTagsWithQuery(orgId uint32, prefix string, return nil } + queryCtx.prepareFilters(tags.idHasTag) + // probably allocating more than necessary, still better than growing res := make([]string, 0, len(tags)) @@ -950,6 +956,8 @@ func (m *UnpartitionedMemoryIdx) FindTagValuesWithQuery(orgId uint32, tag, prefi return nil } + queryCtx.prepareFilters(tags.idHasTag) + ids := queryCtx.Run(tags, m.defById) valueMap := make(map[string]struct{}) tagPrefix := tag + "=" + prefix @@ -1117,6 +1125,8 @@ func (m *UnpartitionedMemoryIdx) idsByTagQuery(orgId uint32, query TagQueryConte return nil } + query.prepareFilters(tags.idHasTag) + return query.Run(tags, m.defById) } diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go index d5b20456be..2ed4d2e382 100644 --- a/idx/memory/tag_query.go +++ b/idx/memory/tag_query.go @@ -34,11 +34,14 @@ func NewTagQueryContext(query tagquery.Query) TagQueryContext { ctx := TagQueryContext{ query: query, } - ctx.filters, ctx.defaultDecisions = query.GetMetricDefinitionFilters() return ctx } +func (q *TagQueryContext) prepareFilters(lookup tagquery.IdTagLookup) { + q.filters, q.defaultDecisions = q.query.GetMetricDefinitionFilters(lookup) +} + // getInitialIds asynchronously collects all ID's of the initial result set. It returns: // a channel through which the IDs of the initial result set will be sent // a stop channel, which when closed, will cause it to abort the background worker. @@ -125,7 +128,7 @@ func (q *TagQueryContext) testByAllExpressions(id schema.MKey, def *idx.Archive, } for i := range q.filters { - decision := q.filters[i](schema.SanitizeNameAsTagValue(def.Name), def.Tags) + decision := q.filters[i](id, schema.SanitizeNameAsTagValue(def.Name), def.Tags) if decision == tagquery.None { decision = q.defaultDecisions[i] diff --git a/idx/memory/tag_query_test.go b/idx/memory/tag_query_test.go index 8c674c0afe..40afe4925a 100644 --- a/idx/memory/tag_query_test.go +++ b/idx/memory/tag_query_test.go @@ -77,6 +77,8 @@ func queryAndCompareTagResults(t *testing.T, q TagQueryContext, expectedData map t.Helper() tagIdx, byId := getTestIndex() + q.prepareFilters(tagIdx.idHasTag) + res := q.RunGetTags(tagIdx, byId) if !reflect.DeepEqual(expectedData, res) { t.Fatalf("Expected: %+v\nGot: %+v", expectedData, res) @@ -87,6 +89,8 @@ func queryAndCompareResults(t *testing.T, q TagQueryContext, expectedData IdSet) t.Helper() tagIdx, byId := getTestIndex() + q.prepareFilters(tagIdx.idHasTag) + res := q.Run(tagIdx, byId) if !reflect.DeepEqual(expectedData, res) { @@ -96,6 +100,18 @@ func queryAndCompareResults(t *testing.T, q TagQueryContext, expectedData IdSet) } } +func TestIdHasTag(t *testing.T) { + tagIdx, _ := getTestIndex() + + ids := getTestIDs() + if tagIdx.idHasTag(ids[1], "key4", "value4") { + t.Fatalf("Expected false, but got true") + } + if !tagIdx.idHasTag(ids[2], "key4", "value4") { + t.Fatalf("Expected true, but got false") + } +} + func TestQueryByTagSimpleEqual(t *testing.T) { ids := getTestIDs() q, _ := tagquery.NewQueryFromStrings([]string{"key1=value1", "key3=value3"}, 0) From 3fbdf4994de3843c21721e07bfd73c9b853d1696 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Wed, 17 Jul 2019 19:18:04 -0400 Subject: [PATCH 17/40] cleanup, only syntax --- expr/tagquery/expression_equal.go | 9 +++++---- expr/tagquery/expression_has_tag.go | 4 ++-- expr/tagquery/expression_match.go | 19 ++++++++++--------- expr/tagquery/expression_match_all.go | 2 +- expr/tagquery/expression_match_none.go | 2 +- expr/tagquery/expression_match_tag.go | 16 ++++++++-------- expr/tagquery/expression_not_equal.go | 9 +++++---- expr/tagquery/expression_not_has_tag.go | 4 ++-- expr/tagquery/expression_not_match.go | 18 +++++++++--------- expr/tagquery/expression_prefix.go | 2 +- expr/tagquery/expression_prefix_tag.go | 2 +- 11 files changed, 45 insertions(+), 42 deletions(-) diff --git a/expr/tagquery/expression_equal.go b/expr/tagquery/expression_equal.go index 6b695b6b5c..783b4cf917 100644 --- a/expr/tagquery/expression_equal.go +++ b/expr/tagquery/expression_equal.go @@ -26,9 +26,10 @@ func (e *expressionEqual) GetMetricDefinitionFilter(lookup IdTagLookup) MetricDe if e.key == "name" { if e.value == "" { // every metric has a name, the value will never be empty - return func(id schema.MKey, name string, tags []string) FilterDecision { return Fail } + return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Fail } } - return func(id schema.MKey, name string, tags []string) FilterDecision { + + return func(_ schema.MKey, name string, _ []string) FilterDecision { if name == e.value { return Pass } @@ -37,7 +38,7 @@ func (e *expressionEqual) GetMetricDefinitionFilter(lookup IdTagLookup) MetricDe } if !metaTagSupport { - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(id schema.MKey, _ string, _ []string) FilterDecision { if lookup(id, e.key, e.value) { return Pass } @@ -46,7 +47,7 @@ func (e *expressionEqual) GetMetricDefinitionFilter(lookup IdTagLookup) MetricDe } prefix := e.key + "=" - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(id schema.MKey, _ string, tags []string) FilterDecision { if lookup(id, e.key, e.value) { return Pass } diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index 59846f9172..6e6725d551 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -28,7 +28,7 @@ func (e *expressionHasTag) ValuePasses(value string) bool { func (e *expressionHasTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if e.key == "name" { - return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } + return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Pass } } resultIfTagIsAbsent := None @@ -37,7 +37,7 @@ func (e *expressionHasTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefini } matchPrefix := e.key + "=" - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(_ schema.MKey, _ string, tags []string) FilterDecision { for _, tag := range tags { if strings.HasPrefix(tag, matchPrefix) { return Pass diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index 881df25dcc..84dbacfc99 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -45,9 +45,10 @@ func (e *expressionMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinit if e.key == "name" { if e.value == "" { // silly query, always fails - return func(id schema.MKey, name string, tags []string) FilterDecision { return Fail } + return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Fail } } - return func(id schema.MKey, name string, tags []string) FilterDecision { + + return func(_ schema.MKey, name string, _ []string) FilterDecision { if e.valueRe.MatchString(schema.SanitizeNameAsTagValue(name)) { return Pass } else { @@ -64,7 +65,7 @@ func (e *expressionMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinit prefix := e.key + "=" var matchCache, missCache sync.Map var currentMatchCacheSize, currentMissCacheSize int32 - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(_ schema.MKey, _ string, tags []string) FilterDecision { for _, tag := range tags { if !strings.HasPrefix(tag, prefix) { continue @@ -88,13 +89,13 @@ func (e *expressionMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinit atomic.AddInt32(¤tMatchCacheSize, 1) } return Pass - } else { - if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { - missCache.Store(value, struct{}{}) - atomic.AddInt32(¤tMissCacheSize, 1) - } - return Fail } + + if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { + missCache.Store(value, struct{}{}) + atomic.AddInt32(¤tMissCacheSize, 1) + } + return Fail } return resultIfTagIsAbsent diff --git a/expr/tagquery/expression_match_all.go b/expr/tagquery/expression_match_all.go index 9fcc41cd70..aaf7896f62 100644 --- a/expr/tagquery/expression_match_all.go +++ b/expr/tagquery/expression_match_all.go @@ -41,7 +41,7 @@ func (e *expressionMatchAll) ValuePasses(value string) bool { } func (e *expressionMatchAll) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { - return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } + return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Pass } } func (e *expressionMatchAll) StringIntoBuilder(builder *strings.Builder) { diff --git a/expr/tagquery/expression_match_none.go b/expr/tagquery/expression_match_none.go index b8c6ec95c1..6f12c68376 100644 --- a/expr/tagquery/expression_match_none.go +++ b/expr/tagquery/expression_match_none.go @@ -41,7 +41,7 @@ func (e *expressionMatchNone) ValuePasses(value string) bool { } func (e *expressionMatchNone) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { - return func(id schema.MKey, name string, tags []string) FilterDecision { return Fail } + return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Fail } } func (e *expressionMatchNone) StringIntoBuilder(builder *strings.Builder) { diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index 4b3e37ef87..2004e8d742 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -39,7 +39,7 @@ func (e *expressionMatchTag) ValuePasses(tag string) bool { func (e *expressionMatchTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if e.valueRe.Match([]byte("name")) { // every metric has a tag name, so we can always return Pass - return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } + return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Pass } } resultIfTagIsAbsent := None @@ -50,7 +50,7 @@ func (e *expressionMatchTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefi var matchCache, missCache sync.Map var currentMatchCacheSize, currentMissCacheSize int32 - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(_ schema.MKey, _ string, tags []string) FilterDecision { for _, tag := range tags { values := strings.SplitN(tag, "=", 2) if len(values) < 2 { @@ -72,13 +72,13 @@ func (e *expressionMatchTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefi atomic.AddInt32(¤tMatchCacheSize, 1) } return Pass - } else { - if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { - missCache.Store(value, struct{}{}) - atomic.AddInt32(¤tMissCacheSize, 1) - } - continue } + + if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { + missCache.Store(value, struct{}{}) + atomic.AddInt32(¤tMissCacheSize, 1) + } + continue } return resultIfTagIsAbsent diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index 6493c168de..6ae8eae5d2 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -29,9 +29,10 @@ func (e *expressionNotEqual) ValuePasses(value string) bool { func (e *expressionNotEqual) GetMetricDefinitionFilter(lookup IdTagLookup) MetricDefinitionFilter { if e.key == "name" { if e.value == "" { - return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } + return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Pass } } - return func(id schema.MKey, name string, tags []string) FilterDecision { + + return func(_ schema.MKey, name string, _ []string) FilterDecision { if schema.SanitizeNameAsTagValue(name) == e.value { return Fail } @@ -40,7 +41,7 @@ func (e *expressionNotEqual) GetMetricDefinitionFilter(lookup IdTagLookup) Metri } if !metaTagSupport { - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(id schema.MKey, _ string, _ []string) FilterDecision { if lookup(id, e.key, e.value) { return Fail } @@ -49,7 +50,7 @@ func (e *expressionNotEqual) GetMetricDefinitionFilter(lookup IdTagLookup) Metri } prefix := e.key + "=" - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(id schema.MKey, _ string, tags []string) FilterDecision { if lookup(id, e.key, e.value) { return Fail } diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go index b7a5edd961..8d1bda0505 100644 --- a/expr/tagquery/expression_not_has_tag.go +++ b/expr/tagquery/expression_not_has_tag.go @@ -32,7 +32,7 @@ func (e *expressionNotHasTag) ValuePasses(value string) bool { func (e *expressionNotHasTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if e.key == "name" { - return func(id schema.MKey, name string, tags []string) FilterDecision { return Fail } + return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Fail } } resultIfTagIsAbsent := None @@ -41,7 +41,7 @@ func (e *expressionNotHasTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDef } matchPrefix := e.key + "=" - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(_ schema.MKey, _ string, tags []string) FilterDecision { for _, tag := range tags { if strings.HasPrefix(tag, matchPrefix) { return Fail diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index b5b9eca23a..2e3e228c9e 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -45,10 +45,10 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefi if e.key == "name" { if e.value == "" { // every metric has a name - return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } + return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Pass } } - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(_ schema.MKey, name string, _ []string) FilterDecision { if e.valueRe.MatchString(schema.SanitizeNameAsTagValue(name)) { return Fail } @@ -64,7 +64,7 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefi prefix := e.key + "=" var matchCache, missCache sync.Map var currentMatchCacheSize, currentMissCacheSize int32 - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(_ schema.MKey, _ string, tags []string) FilterDecision { for _, tag := range tags { if !strings.HasPrefix(tag, prefix) { continue @@ -88,13 +88,13 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefi atomic.AddInt32(¤tMatchCacheSize, 1) } return Fail - } else { - if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { - missCache.Store(value, struct{}{}) - atomic.AddInt32(¤tMissCacheSize, 1) - } - return Pass } + + if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { + missCache.Store(value, struct{}{}) + atomic.AddInt32(¤tMissCacheSize, 1) + } + return Pass } return resultIfTagIsAbsent diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index dc252b5ffe..084a2eb506 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -33,7 +33,7 @@ func (e *expressionPrefix) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefini matchString := prefix + e.value if e.key == "name" { - return func(id schema.MKey, name string, tags []string) FilterDecision { + return func(_ schema.MKey, name string, _ []string) FilterDecision { if strings.HasPrefix(schema.SanitizeNameAsTagValue(name), e.value) { return Pass } diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go index b4e2adb835..a71cc67f84 100644 --- a/expr/tagquery/expression_prefix_tag.go +++ b/expr/tagquery/expression_prefix_tag.go @@ -35,7 +35,7 @@ func (e *expressionPrefixTag) ValuePasses(tag string) bool { func (e *expressionPrefixTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if strings.HasPrefix("name", e.value) { // every metric has a name - return func(id schema.MKey, name string, tags []string) FilterDecision { return Pass } + return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Pass } } resultIfTagIsAbsent := None From 4f6ed8fd5c1c5c89649cc464e8fbe30283af8454 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Thu, 18 Jul 2019 09:39:15 -0400 Subject: [PATCH 18/40] use cost estimation this allows us to minimize the number of ids that need to get filtered by the filters, while at the same time still taking the execution cost of each filter into account. f.e. if we have the choice between first calling a filter which uses regex or first calling a filter that doesn't use regex, we'd first want to call the one that's not using regex and let it reduce the size of the potential result set, that way the regex-using filter would later only get applied on a smaller set of potential results. --- expr/tagquery/expression.go | 58 ++----------------------- expr/tagquery/expression_common.go | 4 ++ expr/tagquery/expression_equal.go | 8 ++++ expr/tagquery/expression_has_tag.go | 4 ++ expr/tagquery/expression_match.go | 4 ++ expr/tagquery/expression_match_all.go | 4 ++ expr/tagquery/expression_match_none.go | 4 ++ expr/tagquery/expression_match_tag.go | 4 ++ expr/tagquery/expression_not_equal.go | 8 ++++ expr/tagquery/expression_not_has_tag.go | 4 ++ expr/tagquery/expression_not_match.go | 4 ++ expr/tagquery/expression_prefix.go | 4 ++ expr/tagquery/expression_prefix_tag.go | 4 ++ expr/tagquery/query.go | 36 --------------- idx/memory/memory.go | 6 --- idx/memory/meta_tags.go | 1 - idx/memory/tag_query.go | 54 +++++++++++++++++++---- idx/memory/tag_query_test.go | 4 -- 18 files changed, 106 insertions(+), 109 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index a53902663b..31b77fdeaf 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -3,7 +3,6 @@ package tagquery import ( "fmt" "regexp" - "sort" "strings" "github.com/raintank/schema" @@ -25,59 +24,6 @@ func ParseExpressions(expressions []string) (Expressions, error) { return res, nil } -// SortByFilterOrder sorts all the expressions first by operator -// roughly in cost-increaseing order when they are used as filters, -// then by key, then by value -func (e Expressions) SortByFilterOrder() { - costByOperator := map[ExpressionOperator]int{ - MATCH_NONE: 0, - EQUAL: 1, - HAS_TAG: 2, - PREFIX: 3, - PREFIX_TAG: 4, - NOT_EQUAL: 5, - NOT_HAS_TAG: 6, - MATCH: 7, - MATCH_TAG: 8, - NOT_MATCH: 9, - MATCH_ALL: 10, - } - - sort.Slice(e, func(i, j int) bool { - if e[i].GetOperator() == e[j].GetOperator() { - if e[i].GetKey() == e[j].GetKey() { - return e[i].GetValue() < e[j].GetValue() - } - return e[i].GetKey() < e[j].GetKey() - } - return costByOperator[e[i].GetOperator()] < costByOperator[e[j].GetOperator()] - }) -} - -// findInitialExpression returns the id of the expression which is the -// most suitable to start the query execution with. the chosen expression -// should be as cheap as possible and it must require a non-empty value -func (e Expressions) findInitialExpression() int { - // order of preference to start with the viable operators - for _, op := range []ExpressionOperator{ - MATCH_NONE, - EQUAL, - HAS_TAG, - PREFIX, - PREFIX_TAG, - MATCH, - MATCH_TAG, - MATCH_ALL, - } { - for i := range e { - if e[i].GetOperator() == op && e[i].RequiresNonEmptyValue() { - return i - } - } - } - return -1 -} - func (e Expressions) Strings() []string { builder := strings.Builder{} res := make([]string, len(e)) @@ -143,6 +89,8 @@ type Expression interface { // GetOperator returns the operator of this expression GetOperator() ExpressionOperator + GetCostMultiplier() uint32 + // HasRe indicates whether the evaluation of this expression involves regular expressions HasRe() bool @@ -160,6 +108,8 @@ type Expression interface { // this expression ValuePasses(string) bool + ValueMatchesExactly() bool + // GetMetricDefinitionFilter returns a MetricDefinitionFilter // The MetricDefinitionFilter takes a metric definition, looks at its tags and returns a decision // regarding this query expression applied to its tags diff --git a/expr/tagquery/expression_common.go b/expr/tagquery/expression_common.go index 76be9e7183..7fa1ab9d28 100644 --- a/expr/tagquery/expression_common.go +++ b/expr/tagquery/expression_common.go @@ -30,6 +30,10 @@ func (e *expressionCommon) RequiresNonEmptyValue() bool { return true } +func (e *expressionCommon) ValueMatchesExactly() bool { + return false +} + // expressionCommonRe is an extended version of expressionCommon with additional // properties for operators that use regular expressions type expressionCommonRe struct { diff --git a/expr/tagquery/expression_equal.go b/expr/tagquery/expression_equal.go index 783b4cf917..716391aafd 100644 --- a/expr/tagquery/expression_equal.go +++ b/expr/tagquery/expression_equal.go @@ -18,10 +18,18 @@ func (e *expressionEqual) GetOperator() ExpressionOperator { return EQUAL } +func (e *expressionEqual) GetCostMultiplier() uint32 { + return 1 +} + func (e *expressionEqual) ValuePasses(value string) bool { return value == e.value } +func (e *expressionEqual) ValueMatchesExactly() bool { + return true +} + func (e *expressionEqual) GetMetricDefinitionFilter(lookup IdTagLookup) MetricDefinitionFilter { if e.key == "name" { if e.value == "" { diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index 6e6725d551..9a58c04547 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -18,6 +18,10 @@ func (e *expressionHasTag) GetOperator() ExpressionOperator { return HAS_TAG } +func (e *expressionHasTag) GetCostMultiplier() uint32 { + return 2 +} + func (e *expressionHasTag) OperatesOnTag() bool { return true } diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index 84dbacfc99..8582dc55a0 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -29,6 +29,10 @@ func (e *expressionMatch) GetOperator() ExpressionOperator { return MATCH } +func (e *expressionMatch) GetCostMultiplier() uint32 { + return 0 +} + func (e *expressionMatch) HasRe() bool { return true } diff --git a/expr/tagquery/expression_match_all.go b/expr/tagquery/expression_match_all.go index aaf7896f62..416e059d03 100644 --- a/expr/tagquery/expression_match_all.go +++ b/expr/tagquery/expression_match_all.go @@ -28,6 +28,10 @@ func (e *expressionMatchAll) GetOperator() ExpressionOperator { return MATCH_ALL } +func (e *expressionMatchAll) GetCostMultiplier() uint32 { + return 50 +} + func (e *expressionMatchAll) HasRe() bool { return false } diff --git a/expr/tagquery/expression_match_none.go b/expr/tagquery/expression_match_none.go index 6f12c68376..011411d5fa 100644 --- a/expr/tagquery/expression_match_none.go +++ b/expr/tagquery/expression_match_none.go @@ -28,6 +28,10 @@ func (e *expressionMatchNone) GetOperator() ExpressionOperator { return MATCH_NONE } +func (e *expressionMatchNone) GetCostMultiplier() uint32 { + return 0 +} + func (e *expressionMatchNone) HasRe() bool { return false } diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index 2004e8d742..dec3ce1e17 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -20,6 +20,10 @@ func (e *expressionMatchTag) GetOperator() ExpressionOperator { return MATCH_TAG } +func (e *expressionMatchTag) GetCostMultiplier() uint32 { + return 20 +} + func (e *expressionMatchTag) HasRe() bool { return true } diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index 6ae8eae5d2..1b13cd1cef 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -18,6 +18,10 @@ func (e *expressionNotEqual) GetOperator() ExpressionOperator { return NOT_EQUAL } +func (e *expressionNotEqual) GetCostMultiplier() uint32 { + return 1 +} + func (e *expressionNotEqual) RequiresNonEmptyValue() bool { return false } @@ -26,6 +30,10 @@ func (e *expressionNotEqual) ValuePasses(value string) bool { return value != e.value } +func (e *expressionNotEqual) ValueMatchesExactly() bool { + return true +} + func (e *expressionNotEqual) GetMetricDefinitionFilter(lookup IdTagLookup) MetricDefinitionFilter { if e.key == "name" { if e.value == "" { diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go index 8d1bda0505..20ed365123 100644 --- a/expr/tagquery/expression_not_has_tag.go +++ b/expr/tagquery/expression_not_has_tag.go @@ -18,6 +18,10 @@ func (e *expressionNotHasTag) GetOperator() ExpressionOperator { return NOT_HAS_TAG } +func (e *expressionNotHasTag) GetCostMultiplier() uint32 { + return 2 +} + func (e *expressionNotHasTag) OperatesOnTag() bool { return true } diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index 2e3e228c9e..12aa61cb3d 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -29,6 +29,10 @@ func (e *expressionNotMatch) GetOperator() ExpressionOperator { return NOT_MATCH } +func (e *expressionNotMatch) GetCostMultiplier() uint32 { + return 10 +} + func (e *expressionNotMatch) HasRe() bool { return true } diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index 084a2eb506..5889c10f6c 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -18,6 +18,10 @@ func (e *expressionPrefix) GetOperator() ExpressionOperator { return PREFIX } +func (e *expressionPrefix) GetCostMultiplier() uint32 { + return 2 +} + func (e *expressionPrefix) RequiresNonEmptyValue() bool { // we know it requires an non-empty value, because the expression // "__tag^=" would get parsed into the type expressionMatchAll diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go index a71cc67f84..5f3da621b6 100644 --- a/expr/tagquery/expression_prefix_tag.go +++ b/expr/tagquery/expression_prefix_tag.go @@ -18,6 +18,10 @@ func (e *expressionPrefixTag) GetOperator() ExpressionOperator { return PREFIX_TAG } +func (e *expressionPrefixTag) GetCostMultiplier() uint32 { + return 3 +} + func (e *expressionPrefixTag) OperatesOnTag() bool { return true } diff --git a/expr/tagquery/query.go b/expr/tagquery/query.go index a7a2886ffe..3fabefeb78 100644 --- a/expr/tagquery/query.go +++ b/expr/tagquery/query.go @@ -17,9 +17,6 @@ type Query struct { // slice of expressions sorted by the estimated cost of their operators Expressions Expressions - // the index in the Expressions slice at which we start evaluating the query - startWith int - // the index of clause that operate on tags (keys) // we only support 0 or 1 tag expression per query // tag expressions are __tag^= and __tag=~ @@ -43,7 +40,6 @@ func NewQuery(expressions Expressions, from int64) (Query, error) { } foundExpressionRequiringNonEmptyValue := false - expressions.SortByFilterOrder() for i := 0; i < len(expressions); i++ { // skip duplicate expression if i > 0 && ExpressionsAreEqual(expressions[i], expressions[i-1]) { @@ -73,44 +69,12 @@ func NewQuery(expressions Expressions, from int64) (Query, error) { } q.Expressions = expressions - q.startWith = q.Expressions.findInitialExpression() - if q.startWith < 0 { - return q, errInvalidQuery - } return q, nil } -// GetMetricDefinitionFilters returns all the metric definition filters associated with this -// query, together with their according default decision -// The returned filters get generated from the query expressions, excluding the one which has -// been dedicated to be the initial expression (marked via the .startWith index) -func (q *Query) GetMetricDefinitionFilters(lookup IdTagLookup) ([]MetricDefinitionFilter, []FilterDecision) { - var filters []MetricDefinitionFilter - var defaultDecisions []FilterDecision - for i := range q.Expressions { - // the one we start with does not need to be added to the filters, - // because we use it to build the initial result set - if i == q.startWith { - continue - } - filters = append(filters, q.Expressions[i].GetMetricDefinitionFilter(lookup)) - defaultDecisions = append(defaultDecisions, q.Expressions[i].GetDefaultDecision()) - } - - return filters, defaultDecisions -} - type IdTagLookup func(id schema.MKey, tag, value string) bool -// GetInitialExpression returns the expression which should be used to generate the initial -// result set, to later filter it down with the remaining expressions. -// We assume Query has been instantiated via NewQuery(), in which case it is guaranteed that -// that .startWith has been set correctly or otherwise an error would have been returned -func (q *Query) GetInitialExpression() Expression { - return q.Expressions[q.startWith] -} - // GetTagClause returns the expression which operates on tags, if one is present. // This assumes that Query has been instantiated via NewQuery(), which either sets // .tagClause to a valid value or returns an error. diff --git a/idx/memory/memory.go b/idx/memory/memory.go index d807c242c1..05e42b1b3e 100755 --- a/idx/memory/memory.go +++ b/idx/memory/memory.go @@ -884,8 +884,6 @@ func (m *UnpartitionedMemoryIdx) FindTagsWithQuery(orgId uint32, prefix string, return nil } - queryCtx.prepareFilters(tags.idHasTag) - // probably allocating more than necessary, still better than growing res := make([]string, 0, len(tags)) @@ -956,8 +954,6 @@ func (m *UnpartitionedMemoryIdx) FindTagValuesWithQuery(orgId uint32, tag, prefi return nil } - queryCtx.prepareFilters(tags.idHasTag) - ids := queryCtx.Run(tags, m.defById) valueMap := make(map[string]struct{}) tagPrefix := tag + "=" + prefix @@ -1125,8 +1121,6 @@ func (m *UnpartitionedMemoryIdx) idsByTagQuery(orgId uint32, query TagQueryConte return nil } - query.prepareFilters(tags.idHasTag) - return query.Run(tags, m.defById) } diff --git a/idx/memory/meta_tags.go b/idx/memory/meta_tags.go index 205ba85588..ea523badf6 100644 --- a/idx/memory/meta_tags.go +++ b/idx/memory/meta_tags.go @@ -75,7 +75,6 @@ func (m metaTagRecords) upsert(record tagquery.MetaTagRecord) (recordId, *tagque // hashMetaTagRecord generates a hash of all the queries in the record func (m *metaTagRecords) hashMetaTagRecord(record tagquery.MetaTagRecord) recordId { - record.Expressions.SortByFilterOrder() builder := strings.Builder{} for _, query := range record.Expressions { query.StringIntoBuilder(&builder) diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go index 2ed4d2e382..a2a71f84ab 100644 --- a/idx/memory/tag_query.go +++ b/idx/memory/tag_query.go @@ -2,6 +2,7 @@ package memory import ( "math" + "sort" "strings" "sync" "sync/atomic" @@ -24,22 +25,57 @@ type TagQueryContext struct { filters []tagquery.MetricDefinitionFilter defaultDecisions []tagquery.FilterDecision - index TagIndex // the tag index, hierarchy of tags & values, set by Run()/RunGetTags() - byId map[schema.MKey]*idx.Archive // the metric index by ID, set by Run()/RunGetTags() + index TagIndex // the tag index, hierarchy of tags & values, set by Run()/RunGetTags() + byId map[schema.MKey]*idx.Archive // the metric index by ID, set by Run()/RunGetTags() + startWith int // the expression index to start with } // NewTagQueryContext takes a tag query and wraps it into all the // context structs necessary to execute the query on the indexes func NewTagQueryContext(query tagquery.Query) TagQueryContext { ctx := TagQueryContext{ - query: query, + query: query, + startWith: -1, } return ctx } -func (q *TagQueryContext) prepareFilters(lookup tagquery.IdTagLookup) { - q.filters, q.defaultDecisions = q.query.GetMetricDefinitionFilters(lookup) +func (q *TagQueryContext) prepareExpressions(idx TagIndex) { + type expressionCost struct { + cost uint32 + expressionIdx int + } + costs := make([]expressionCost, len(q.query.Expressions)) + + for i, expr := range q.query.Expressions { + costs[i].expressionIdx = i + if expr.ValueMatchesExactly() { + costs[i].cost = uint32(len(idx[expr.GetKey()][expr.GetValue()])) * expr.GetCostMultiplier() + } else { + if expr.OperatesOnTag() { + costs[i].cost = uint32(len(idx)) * expr.GetCostMultiplier() + } else { + costs[i].cost = uint32(len(idx[expr.GetKey()])) * expr.GetCostMultiplier() + } + } + } + + sort.Slice(costs, func(i, j int) bool { return costs[i].cost < costs[j].cost }) + + q.filters = make([]tagquery.MetricDefinitionFilter, len(q.query.Expressions)-1) + q.defaultDecisions = make([]tagquery.FilterDecision, len(q.query.Expressions)-1) + + i := 0 + for _, cost := range costs { + if q.startWith < 0 && q.query.Expressions[cost.expressionIdx].RequiresNonEmptyValue() { + q.startWith = cost.expressionIdx + } else { + q.filters[i] = q.query.Expressions[cost.expressionIdx].GetMetricDefinitionFilter(idx.idHasTag) + q.defaultDecisions[i] = q.query.Expressions[cost.expressionIdx].GetDefaultDecision() + i++ + } + } } // getInitialIds asynchronously collects all ID's of the initial result set. It returns: @@ -49,7 +85,7 @@ func (q *TagQueryContext) getInitialIds() (chan schema.MKey, chan struct{}) { idCh := make(chan schema.MKey, 1000) stopCh := make(chan struct{}) - if q.query.GetInitialExpression().OperatesOnTag() { + if q.query.Expressions[q.startWith].OperatesOnTag() { q.getInitialByTag(idCh, stopCh) } else { q.getInitialByTagValue(idCh, stopCh) @@ -62,7 +98,7 @@ func (q *TagQueryContext) getInitialIds() (chan schema.MKey, chan struct{}) { // it only handles those expressions which involve matching a tag value: // f.e. key=value but not key!= func (q *TagQueryContext) getInitialByTagValue(idCh chan schema.MKey, stopCh chan struct{}) { - expr := q.query.GetInitialExpression() + expr := q.query.Expressions[q.startWith] q.wg.Add(1) go func() { @@ -92,7 +128,7 @@ func (q *TagQueryContext) getInitialByTagValue(idCh chan schema.MKey, stopCh cha // it only handles those expressions which do not involve matching a tag value: // f.e. key!= but not key=value func (q *TagQueryContext) getInitialByTag(idCh chan schema.MKey, stopCh chan struct{}) { - expr := q.query.GetInitialExpression() + expr := q.query.Expressions[q.startWith] q.wg.Add(1) go func() { @@ -181,6 +217,7 @@ func (q *TagQueryContext) filterIdsFromChan(idCh, resCh chan schema.MKey) { func (q *TagQueryContext) Run(index TagIndex, byId map[schema.MKey]*idx.Archive) IdSet { q.index = index q.byId = byId + q.prepareExpressions(index) idCh, _ := q.getInitialIds() resCh := make(chan schema.MKey) @@ -339,6 +376,7 @@ func (q *TagQueryContext) tagFilterMatchesName() bool { func (q *TagQueryContext) RunGetTags(index TagIndex, byId map[schema.MKey]*idx.Archive) map[string]struct{} { q.index = index q.byId = byId + q.prepareExpressions(index) maxTagCount := int32(math.MaxInt32) diff --git a/idx/memory/tag_query_test.go b/idx/memory/tag_query_test.go index 40afe4925a..bfba5c83b4 100644 --- a/idx/memory/tag_query_test.go +++ b/idx/memory/tag_query_test.go @@ -77,8 +77,6 @@ func queryAndCompareTagResults(t *testing.T, q TagQueryContext, expectedData map t.Helper() tagIdx, byId := getTestIndex() - q.prepareFilters(tagIdx.idHasTag) - res := q.RunGetTags(tagIdx, byId) if !reflect.DeepEqual(expectedData, res) { t.Fatalf("Expected: %+v\nGot: %+v", expectedData, res) @@ -89,8 +87,6 @@ func queryAndCompareResults(t *testing.T, q TagQueryContext, expectedData IdSet) t.Helper() tagIdx, byId := getTestIndex() - q.prepareFilters(tagIdx.idHasTag) - res := q.Run(tagIdx, byId) if !reflect.DeepEqual(expectedData, res) { From 4a5892bb403894a376e74964fced81a4d9f0d133 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Thu, 18 Jul 2019 09:58:58 -0400 Subject: [PATCH 19/40] use direct key lookup if possible --- idx/memory/tag_query.go | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go index a2a71f84ab..60d617c2be 100644 --- a/idx/memory/tag_query.go +++ b/idx/memory/tag_query.go @@ -107,19 +107,31 @@ func (q *TagQueryContext) getInitialByTagValue(idCh chan schema.MKey, stopCh cha key := expr.GetKey() - OUTER: - for value, ids := range q.index[key] { - if !expr.ValuePasses(value) { - continue - } + if expr.ValueMatchesExactly() { + value := expr.GetValue() - for id := range ids { + for id := range q.index[key][value] { select { case <-stopCh: - break OUTER + break case idCh <- id: } } + } else { + OUTER: + for value, ids := range q.index[key] { + if !expr.ValuePasses(value) { + continue + } + + for id := range ids { + select { + case <-stopCh: + break OUTER + case idCh <- id: + } + } + } } }() } From dee5a34fd82529671e68527ec26fc8444f02b0ac Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Thu, 18 Jul 2019 10:26:22 -0400 Subject: [PATCH 20/40] fix tests --- expr/tagquery/expression.go | 13 +++++++++ expr/tagquery/expression_test.go | 22 --------------- expr/tagquery/query.go | 1 + expr/tagquery/query_test.go | 48 +++++++++++++++----------------- 4 files changed, 36 insertions(+), 48 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index 31b77fdeaf..ab58e3dc3a 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -3,6 +3,7 @@ package tagquery import ( "fmt" "regexp" + "sort" "strings" "github.com/raintank/schema" @@ -35,6 +36,18 @@ func (e Expressions) Strings() []string { return res } +func (e Expressions) Sort() { + sort.Slice(e, func(i, j int) bool { + if e[i].GetKey() == e[j].GetKey() { + if e[i].GetOperator() == e[j].GetOperator() { + return e[i].GetValue() < e[j].GetValue() + } + return e[i].GetOperator() < e[j].GetOperator() + } + return e[i].GetKey() < e[j].GetKey() + }) +} + type Expression interface { // GetDefaultDecision defines what decision should be made if the filter has not come to a conclusive // decision based on a single index. When looking at more than one tag index in order of decreasing diff --git a/expr/tagquery/expression_test.go b/expr/tagquery/expression_test.go index 231e44eb9f..756a8b024b 100644 --- a/expr/tagquery/expression_test.go +++ b/expr/tagquery/expression_test.go @@ -2,7 +2,6 @@ package tagquery import ( "fmt" - "reflect" "strings" "testing" ) @@ -193,27 +192,6 @@ func TestExpressionParsing(t *testing.T) { } } -func TestExpressions_Sort(t *testing.T) { - tests := make([]struct { - name string - have Expressions - want Expressions - }, 1) - - tests[0].name = "simple sort" - tests[0].have, _ = ParseExpressions([]string{"a!=a", "b=a", "a=b", "a=a"}) - tests[0].want, _ = ParseExpressions([]string{"a=a", "a=b", "b=a", "a!=a"}) - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tt.have.SortByFilterOrder() - if !reflect.DeepEqual(tt.have, tt.want) { - t.Fatalf("Expected expressions to be sorted:\nExpected:\n%+v\nGot:\n%+v\n", tt.want, tt.have) - } - }) - } -} - func TestExpressionsParsingAndBackToString(t *testing.T) { tests := make([]struct { got string diff --git a/expr/tagquery/query.go b/expr/tagquery/query.go index 3fabefeb78..44d8ba66a1 100644 --- a/expr/tagquery/query.go +++ b/expr/tagquery/query.go @@ -39,6 +39,7 @@ func NewQuery(expressions Expressions, from int64) (Query, error) { return q, errInvalidQuery } + expressions.Sort() foundExpressionRequiringNonEmptyValue := false for i := 0; i < len(expressions); i++ { // skip duplicate expression diff --git a/expr/tagquery/query_test.go b/expr/tagquery/query_test.go index 1b5a07f27a..e80ec4fa1d 100644 --- a/expr/tagquery/query_test.go +++ b/expr/tagquery/query_test.go @@ -39,22 +39,19 @@ func TestNewQueryFromStrings(t *testing.T) { want: Query{ From: 321, Expressions: Expressions{ - &expressionEqual{ - expressionCommon{ - key: "a", - value: "b", + &expressionMatchTag{ + expressionCommonRe{ + expressionCommon: expressionCommon{ + key: "__tag", + value: "^(?:k)", + }, + valueRe: nil, }, }, &expressionEqual{ expressionCommon{ - key: "x", - value: "z", - }, - }, - &expressionPrefix{ - expressionCommon{ - key: "i", - value: "j", + key: "a", + value: "b", }, }, &expressionNotEqual{ @@ -72,15 +69,6 @@ func TestNewQueryFromStrings(t *testing.T) { valueRe: nil, }, }, - &expressionMatchTag{ - expressionCommonRe{ - expressionCommon: expressionCommon{ - key: "__tag", - value: "^(?:k)", - }, - valueRe: nil, - }, - }, &expressionNotMatch{ expressionCommonRe{ expressionCommon: expressionCommon{ @@ -90,9 +78,20 @@ func TestNewQueryFromStrings(t *testing.T) { valueRe: nil, }, }, + &expressionPrefix{ + expressionCommon{ + key: "i", + value: "j", + }, + }, + &expressionEqual{ + expressionCommon{ + key: "x", + value: "z", + }, + }, }, - tagClause: 5, - startWith: 0, + tagClause: 0, }, }, { name: "test tag prefix with empty value", @@ -137,7 +136,6 @@ func TestNewQueryFromStrings(t *testing.T) { }, }, tagClause: -1, - startWith: 0, }, }, { name: "missing an expression that requires non empty value because prefix matches empty value", @@ -168,7 +166,6 @@ func TestNewQueryFromStrings(t *testing.T) { }, }, }, - startWith: 0, tagClause: -1, }, }, { @@ -191,7 +188,6 @@ func TestNewQueryFromStrings(t *testing.T) { }, }, }, - startWith: 0, tagClause: -1, }, }, From 480706f647d54bd7f6d35424487168d9f9590770 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 29 Jul 2019 15:49:30 -0400 Subject: [PATCH 21/40] cleaner way to build error --- expr/tagquery/expression.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index ab58e3dc3a..7868dc988f 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -9,7 +9,11 @@ import ( "github.com/raintank/schema" ) -const invalidExpressionError = "Invalid expression: %s" +type InvalidExpressionError string + +func (i InvalidExpressionError) Error() string { + return fmt.Sprintf("Invalid expression: %q", i) +} type Expressions []Expression @@ -152,13 +156,13 @@ FIND_OPERATOR: prefix = true break FIND_OPERATOR case ';': - return nil, fmt.Errorf(invalidExpressionError, expr) + return nil, InvalidExpressionError(expr) } } // key must not be empty if pos == 0 { - return nil, fmt.Errorf(invalidExpressionError, expr) + return nil, InvalidExpressionError(expr) } resCommon.key = expr[:pos] @@ -173,14 +177,14 @@ FIND_OPERATOR: } if len(expr) <= pos || expr[pos] != '=' { - return nil, fmt.Errorf(invalidExpressionError, expr) + return nil, InvalidExpressionError(expr) } pos++ if len(expr) > pos && expr[pos] == '~' { // ^=~ is not a valid operator if prefix { - return nil, fmt.Errorf(invalidExpressionError, expr) + return nil, InvalidExpressionError(expr) } regex = true pos++ @@ -190,7 +194,7 @@ FIND_OPERATOR: for ; pos < len(expr); pos++ { // disallow ; in value if expr[pos] == 59 { - return nil, fmt.Errorf(invalidExpressionError, expr) + return nil, InvalidExpressionError(expr) } } resCommon.value = expr[valuePos:] @@ -222,7 +226,7 @@ FIND_OPERATOR: // currently ! (not) queries on tags are not supported // and unlike normal queries a value must be set if not { - return nil, fmt.Errorf(invalidExpressionError, expr) + return nil, InvalidExpressionError(expr) } switch effectiveOperator { @@ -240,7 +244,7 @@ FIND_OPERATOR: } case EQUAL: if len(resCommon.value) == 0 { - return nil, fmt.Errorf(invalidExpressionError, expr) + return nil, InvalidExpressionError(expr) } // "__tag=abc", should internatlly be translated into "abc!=" @@ -301,7 +305,7 @@ FIND_OPERATOR: return &expressionNotMatch{expressionCommonRe: expressionCommonRe{expressionCommon: resCommon, valueRe: valueRe, matchesEmpty: matchesEmpty}}, nil case MATCH_TAG: if matchesEmpty { - return nil, fmt.Errorf(invalidExpressionError, expr) + return nil, InvalidExpressionError(expr) } return &expressionMatchTag{expressionCommonRe: expressionCommonRe{expressionCommon: resCommon, valueRe: valueRe, matchesEmpty: matchesEmpty}}, nil } From d5da8f14c27dbfce62e17396608076904feb9d62 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 29 Jul 2019 17:23:10 -0400 Subject: [PATCH 22/40] better comments --- expr/tagquery/expression.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index 7868dc988f..466dc006ea 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -98,7 +98,7 @@ type Expression interface { // in the expression "tag1=value" GetKey() would return "tag1" and OperatesOnTag() returns "false" GetKey() string - // GetValue the value part of the expression + // GetValue returns the value part of the expression // example: // in the expression "abc!=cba" this would return "cba" GetValue() string @@ -111,18 +111,16 @@ type Expression interface { // HasRe indicates whether the evaluation of this expression involves regular expressions HasRe() bool - // OperatesOnTag returns true if this expression operators on the tag keys, - // or false if it operates on the values + // OperatesOnTag returns whether this expression operators on the tag key + // (if not, it operates on the value) OperatesOnTag() bool - // RequiresNonEmptyValue returns boolean indicating whether this expression requires a non-empty - // value. Every query must have at least one expression requiring a non-empty value, otherwise - // the query is considered invalid + // RequiresNonEmptyValue returns whether this expression requires a non-empty value. + // Every valid query must have at least one expression requiring a non-empty value. RequiresNonEmptyValue() bool // ValuePasses takes a string which should either be a tag key or value depending on the return - // value of OperatesOnTag(), then it returns a bool to indicate whether the given value satisfies - // this expression + // value of OperatesOnTag(), then it returns whether whether the given value satisfies this expression ValuePasses(string) bool ValueMatchesExactly() bool From e3be86e0ea624a891129c8f2ffd1e20cf37a0d6a Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 29 Jul 2019 17:26:16 -0400 Subject: [PATCH 23/40] remove dead code 'HasRe' --- expr/tagquery/expression.go | 3 --- expr/tagquery/expression_common.go | 5 ----- expr/tagquery/expression_match.go | 4 ---- expr/tagquery/expression_match_all.go | 4 ---- expr/tagquery/expression_match_none.go | 4 ---- expr/tagquery/expression_match_tag.go | 4 ---- expr/tagquery/expression_not_match.go | 4 ---- 7 files changed, 28 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index 466dc006ea..dcf0cfd422 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -108,9 +108,6 @@ type Expression interface { GetCostMultiplier() uint32 - // HasRe indicates whether the evaluation of this expression involves regular expressions - HasRe() bool - // OperatesOnTag returns whether this expression operators on the tag key // (if not, it operates on the value) OperatesOnTag() bool diff --git a/expr/tagquery/expression_common.go b/expr/tagquery/expression_common.go index 7fa1ab9d28..21ba34b29d 100644 --- a/expr/tagquery/expression_common.go +++ b/expr/tagquery/expression_common.go @@ -15,11 +15,6 @@ func (e *expressionCommon) GetValue() string { return e.value } -func (e *expressionCommon) HasRe() bool { - // by default assume false, unless a concrete type overrides this method - return false -} - func (e *expressionCommon) OperatesOnTag() bool { // by default assume false, unless a concrete type overrides this method return false diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index 8582dc55a0..888f1046ec 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -33,10 +33,6 @@ func (e *expressionMatch) GetCostMultiplier() uint32 { return 0 } -func (e *expressionMatch) HasRe() bool { - return true -} - func (e *expressionMatch) RequiresNonEmptyValue() bool { return !e.matchesEmpty } diff --git a/expr/tagquery/expression_match_all.go b/expr/tagquery/expression_match_all.go index 416e059d03..8f0c9a7597 100644 --- a/expr/tagquery/expression_match_all.go +++ b/expr/tagquery/expression_match_all.go @@ -32,10 +32,6 @@ func (e *expressionMatchAll) GetCostMultiplier() uint32 { return 50 } -func (e *expressionMatchAll) HasRe() bool { - return false -} - func (e *expressionMatchAll) RequiresNonEmptyValue() bool { return false } diff --git a/expr/tagquery/expression_match_none.go b/expr/tagquery/expression_match_none.go index 011411d5fa..9469133339 100644 --- a/expr/tagquery/expression_match_none.go +++ b/expr/tagquery/expression_match_none.go @@ -32,10 +32,6 @@ func (e *expressionMatchNone) GetCostMultiplier() uint32 { return 0 } -func (e *expressionMatchNone) HasRe() bool { - return false -} - func (e *expressionMatchNone) RequiresNonEmptyValue() bool { return true } diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index dec3ce1e17..e48d1a453b 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -24,10 +24,6 @@ func (e *expressionMatchTag) GetCostMultiplier() uint32 { return 20 } -func (e *expressionMatchTag) HasRe() bool { - return true -} - func (e *expressionMatchTag) OperatesOnTag() bool { return true } diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index 12aa61cb3d..9cf161f018 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -33,10 +33,6 @@ func (e *expressionNotMatch) GetCostMultiplier() uint32 { return 10 } -func (e *expressionNotMatch) HasRe() bool { - return true -} - func (e *expressionNotMatch) RequiresNonEmptyValue() bool { return e.matchesEmpty } From e748c64f09c6364367d5471f73a56d9968035c8d Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 29 Jul 2019 21:06:51 -0400 Subject: [PATCH 24/40] move tag query options into memory index --- CHANGELOG.md | 1 - cmd/metrictank/metrictank.go | 5 ----- expr/tagquery/expression_equal.go | 2 +- expr/tagquery/expression_has_tag.go | 2 +- expr/tagquery/expression_match.go | 6 +++--- expr/tagquery/expression_match_tag.go | 8 ++++---- expr/tagquery/expression_not_equal.go | 2 +- expr/tagquery/expression_not_has_tag.go | 2 +- expr/tagquery/expression_not_match.go | 6 +++--- expr/tagquery/expression_prefix.go | 2 +- expr/tagquery/expression_prefix_tag.go | 2 +- expr/tagquery/query.go | 2 ++ expr/tagquery/tagquery.go | 19 ------------------- idx/memory/memory.go | 2 ++ idx/memory/memory_find_test.go | 2 +- 15 files changed, 21 insertions(+), 42 deletions(-) delete mode 100644 expr/tagquery/tagquery.go diff --git a/CHANGELOG.md b/CHANGELOG.md index ba8368d110..7e0228fe43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,6 @@ # master ## breaking changes -* version v0.12.0-182-ged2adc5b and later move the option `memory-idx.match-cache-size` to the new config section `tag-query.match-cache-size`. * version v0.12.0-96-g998933c3 introduces config options for the cassandra/scylladb index table names. The default settings and schemas match the previous behavior, but people who have customized the schema-idx template files should know that we now no longer only expand the keyspace (and assume a hardcoded table name). diff --git a/cmd/metrictank/metrictank.go b/cmd/metrictank/metrictank.go index cd153809c7..a85a52744b 100644 --- a/cmd/metrictank/metrictank.go +++ b/cmd/metrictank/metrictank.go @@ -15,8 +15,6 @@ import ( "syscall" "time" - "github.com/grafana/metrictank/expr/tagquery" - "github.com/Dieterbe/profiletrigger/heap" "github.com/Shopify/sarama" "github.com/grafana/globalconf" @@ -107,9 +105,6 @@ func main() { // input handlers input.ConfigSetup() - // tagquery handling - tagquery.ConfigSetup() - // load config for metric ingestors inCarbon.ConfigSetup() inKafkaMdm.ConfigSetup() diff --git a/expr/tagquery/expression_equal.go b/expr/tagquery/expression_equal.go index 716391aafd..168c08a038 100644 --- a/expr/tagquery/expression_equal.go +++ b/expr/tagquery/expression_equal.go @@ -45,7 +45,7 @@ func (e *expressionEqual) GetMetricDefinitionFilter(lookup IdTagLookup) MetricDe } } - if !metaTagSupport { + if !MetaTagSupport { return func(id schema.MKey, _ string, _ []string) FilterDecision { if lookup(id, e.key, e.value) { return Pass diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index 9a58c04547..dee9967b87 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -36,7 +36,7 @@ func (e *expressionHasTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefini } resultIfTagIsAbsent := None - if !metaTagSupport { + if !MetaTagSupport { resultIfTagIsAbsent = Fail } diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index 888f1046ec..57bc1baa70 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -58,7 +58,7 @@ func (e *expressionMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinit } resultIfTagIsAbsent := None - if !metaTagSupport { + if !MetaTagSupport { resultIfTagIsAbsent = Fail } @@ -84,14 +84,14 @@ func (e *expressionMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinit } if e.valueRe.MatchString(value) { - if atomic.LoadInt32(¤tMatchCacheSize) < int32(matchCacheSize) { + if atomic.LoadInt32(¤tMatchCacheSize) < int32(MatchCacheSize) { matchCache.Store(value, struct{}{}) atomic.AddInt32(¤tMatchCacheSize, 1) } return Pass } - if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { + if atomic.LoadInt32(¤tMissCacheSize) < int32(MatchCacheSize) { missCache.Store(value, struct{}{}) atomic.AddInt32(¤tMissCacheSize, 1) } diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index e48d1a453b..ed14816014 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -43,7 +43,7 @@ func (e *expressionMatchTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefi } resultIfTagIsAbsent := None - if !metaTagSupport { + if !MetaTagSupport { resultIfTagIsAbsent = Fail } @@ -66,15 +66,15 @@ func (e *expressionMatchTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefi return Pass } - if e.valueRe.Match([]byte(value)) { - if atomic.LoadInt32(¤tMatchCacheSize) < int32(matchCacheSize) { + if e.valueRe.MatchString(value) { + if atomic.LoadInt32(¤tMatchCacheSize) < int32(MatchCacheSize) { matchCache.Store(value, struct{}{}) atomic.AddInt32(¤tMatchCacheSize, 1) } return Pass } - if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { + if atomic.LoadInt32(¤tMissCacheSize) < int32(MatchCacheSize) { missCache.Store(value, struct{}{}) atomic.AddInt32(¤tMissCacheSize, 1) } diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index 1b13cd1cef..42b87f4a27 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -48,7 +48,7 @@ func (e *expressionNotEqual) GetMetricDefinitionFilter(lookup IdTagLookup) Metri } } - if !metaTagSupport { + if !MetaTagSupport { return func(id schema.MKey, _ string, _ []string) FilterDecision { if lookup(id, e.key, e.value) { return Fail diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go index 20ed365123..cf73caa46b 100644 --- a/expr/tagquery/expression_not_has_tag.go +++ b/expr/tagquery/expression_not_has_tag.go @@ -40,7 +40,7 @@ func (e *expressionNotHasTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDef } resultIfTagIsAbsent := None - if !metaTagSupport { + if !MetaTagSupport { resultIfTagIsAbsent = Pass } diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index 9cf161f018..01a9fec6a9 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -57,7 +57,7 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefi } resultIfTagIsAbsent := None - if !metaTagSupport { + if !MetaTagSupport { resultIfTagIsAbsent = Pass } @@ -83,14 +83,14 @@ func (e *expressionNotMatch) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefi } if e.valueRe.MatchString(value) { - if atomic.LoadInt32(¤tMatchCacheSize) < int32(matchCacheSize) { + if atomic.LoadInt32(¤tMatchCacheSize) < int32(MatchCacheSize) { matchCache.Store(value, struct{}{}) atomic.AddInt32(¤tMatchCacheSize, 1) } return Fail } - if atomic.LoadInt32(¤tMissCacheSize) < int32(matchCacheSize) { + if atomic.LoadInt32(¤tMissCacheSize) < int32(MatchCacheSize) { missCache.Store(value, struct{}{}) atomic.AddInt32(¤tMissCacheSize, 1) } diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index 5889c10f6c..1dfbf235a3 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -47,7 +47,7 @@ func (e *expressionPrefix) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefini } resultIfTagIsAbsent := None - if !metaTagSupport { + if !MetaTagSupport { resultIfTagIsAbsent = Fail } diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go index 5f3da621b6..7c54c677c5 100644 --- a/expr/tagquery/expression_prefix_tag.go +++ b/expr/tagquery/expression_prefix_tag.go @@ -43,7 +43,7 @@ func (e *expressionPrefixTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDef } resultIfTagIsAbsent := None - if !metaTagSupport { + if !MetaTagSupport { resultIfTagIsAbsent = Fail } diff --git a/expr/tagquery/query.go b/expr/tagquery/query.go index 44d8ba66a1..123b984c92 100644 --- a/expr/tagquery/query.go +++ b/expr/tagquery/query.go @@ -8,6 +8,8 @@ import ( var ( errInvalidQuery = errors.New("invalid query") + MatchCacheSize int + MetaTagSupport bool ) type Query struct { diff --git a/expr/tagquery/tagquery.go b/expr/tagquery/tagquery.go deleted file mode 100644 index feb1c7c355..0000000000 --- a/expr/tagquery/tagquery.go +++ /dev/null @@ -1,19 +0,0 @@ -package tagquery - -import ( - "flag" - - "github.com/grafana/globalconf" -) - -var ( - matchCacheSize int - metaTagSupport bool -) - -func ConfigSetup() { - tagQuery := flag.NewFlagSet("tag-query", flag.ExitOnError) - tagQuery.IntVar(&matchCacheSize, "match-cache-size", 1000, "size of regular expression cache in tag query evaluation") - tagQuery.BoolVar(&metaTagSupport, "meta-tag-support", false, "enables/disables querying based on meta tags which get defined via meta tag rules") - globalconf.Register("tag-query", tagQuery, flag.ExitOnError) -} diff --git a/idx/memory/memory.go b/idx/memory/memory.go index 05e42b1b3e..b8d935ebda 100755 --- a/idx/memory/memory.go +++ b/idx/memory/memory.go @@ -82,6 +82,8 @@ func ConfigSetup() { memoryIdx.DurationVar(&findCacheBackoffTime, "find-cache-backoff-time", time.Minute, "amount of time to disable the findCache when the invalidate queue fills up.") memoryIdx.StringVar(&indexRulesFile, "rules-file", "/etc/metrictank/index-rules.conf", "path to index-rules.conf file") memoryIdx.StringVar(&maxPruneLockTimeStr, "max-prune-lock-time", "100ms", "Maximum duration each second a prune job can lock the index.") + memoryIdx.IntVar(&tagquery.MatchCacheSize, "match-cache-size", 1000, "size of regular expression cache in tag query evaluation") + memoryIdx.BoolVar(&tagquery.MetaTagSupport, "meta-tag-support", false, "enables/disables querying based on meta tags which get defined via meta tag rules") globalconf.Register("memory-idx", memoryIdx, flag.ExitOnError) } diff --git a/idx/memory/memory_find_test.go b/idx/memory/memory_find_test.go index e25d8ff356..83878d02a3 100644 --- a/idx/memory/memory_find_test.go +++ b/idx/memory/memory_find_test.go @@ -157,7 +157,7 @@ func TestMain(m *testing.M) { defer func(t bool) { TagSupport = t }(TagSupport) TagSupport = true TagQueryWorkers = 5 - tagquery.ConfigSetup() // set matchCacheSize to default + tagquery.MatchCacheSize = 1000 // we dont need info logs in the test output log.SetLevel(log.ErrorLevel) os.Exit(m.Run()) From 667e2f61e701c1f8d38cbcd3e00143868b0ede8c Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 29 Jul 2019 21:27:20 -0400 Subject: [PATCH 25/40] add comparison methods to meta tag records and expression types also fix some confusing terminology in variable names --- expr/tagquery/expression.go | 8 ++--- expr/tagquery/expression_equal.go | 4 +++ expr/tagquery/expression_has_tag.go | 4 +++ expr/tagquery/expression_match.go | 4 +++ expr/tagquery/expression_match_all.go | 4 +++ expr/tagquery/expression_match_none.go | 4 +++ expr/tagquery/expression_match_tag.go | 4 +++ expr/tagquery/expression_not_equal.go | 4 +++ expr/tagquery/expression_not_has_tag.go | 4 +++ expr/tagquery/expression_not_match.go | 4 +++ expr/tagquery/expression_prefix.go | 4 +++ expr/tagquery/expression_prefix_tag.go | 4 +++ expr/tagquery/expression_test.go | 4 +-- expr/tagquery/meta_tag_record.go | 41 ++++++++++++++++++++----- expr/tagquery/query.go | 2 +- idx/memory/memory_test.go | 10 +++--- idx/memory/meta_tags.go | 2 +- idx/memory/meta_tags_test.go | 41 ++++++------------------- 18 files changed, 99 insertions(+), 53 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index dcf0cfd422..6f0b851cfa 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -53,6 +53,10 @@ func (e Expressions) Sort() { } type Expression interface { + // Equals takes another expression and compares it against itself. Returns true if they are equal + // or false otherwise + Equals(Expression) bool + // GetDefaultDecision defines what decision should be made if the filter has not come to a conclusive // decision based on a single index. When looking at more than one tag index in order of decreasing // priority to decide whether a metric should be part of the final result set, some operators and metric @@ -328,10 +332,6 @@ FIND_OPERATOR: return nil, fmt.Errorf("ParseExpression: Invalid operator in expression %s", expr) } -func ExpressionsAreEqual(expr1, expr2 Expression) bool { - return expr1.GetKey() == expr2.GetKey() && expr1.GetOperator() == expr2.GetOperator() && expr1.GetValue() == expr2.GetValue() -} - // MetricDefinitionFilter takes a metric name together with its tags and returns a FilterDecision type MetricDefinitionFilter func(id schema.MKey, name string, tags []string) FilterDecision diff --git a/expr/tagquery/expression_equal.go b/expr/tagquery/expression_equal.go index 168c08a038..c74818a293 100644 --- a/expr/tagquery/expression_equal.go +++ b/expr/tagquery/expression_equal.go @@ -10,6 +10,10 @@ type expressionEqual struct { expressionCommon } +func (e *expressionEqual) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionEqual) GetDefaultDecision() FilterDecision { return Fail } diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index dee9967b87..03dbac9a8a 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -10,6 +10,10 @@ type expressionHasTag struct { expressionCommon } +func (e *expressionHasTag) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionHasTag) GetDefaultDecision() FilterDecision { return Fail } diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index 57bc1baa70..9acd97cb60 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -12,6 +12,10 @@ type expressionMatch struct { expressionCommonRe } +func (e *expressionMatch) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionMatch) GetDefaultDecision() FilterDecision { // if the pattern matches "" (f.e. "tag=~.*) then a metric which // does not have the tag "tag" at all should also be part of the diff --git a/expr/tagquery/expression_match_all.go b/expr/tagquery/expression_match_all.go index 8f0c9a7597..9ef99286e2 100644 --- a/expr/tagquery/expression_match_all.go +++ b/expr/tagquery/expression_match_all.go @@ -12,6 +12,10 @@ type expressionMatchAll struct { originalOperator ExpressionOperator } +func (e *expressionMatchAll) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionMatchAll) GetDefaultDecision() FilterDecision { return Pass } diff --git a/expr/tagquery/expression_match_none.go b/expr/tagquery/expression_match_none.go index 9469133339..dbb297ecae 100644 --- a/expr/tagquery/expression_match_none.go +++ b/expr/tagquery/expression_match_none.go @@ -12,6 +12,10 @@ type expressionMatchNone struct { originalOperator ExpressionOperator } +func (e *expressionMatchNone) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionMatchNone) GetDefaultDecision() FilterDecision { return Fail } diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index ed14816014..a4ef0658cb 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -12,6 +12,10 @@ type expressionMatchTag struct { expressionCommonRe } +func (e *expressionMatchTag) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionMatchTag) GetDefaultDecision() FilterDecision { return Fail } diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index 42b87f4a27..5e81347e13 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -10,6 +10,10 @@ type expressionNotEqual struct { expressionCommon } +func (e *expressionNotEqual) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionNotEqual) GetDefaultDecision() FilterDecision { return Pass } diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go index cf73caa46b..4bd81845ea 100644 --- a/expr/tagquery/expression_not_has_tag.go +++ b/expr/tagquery/expression_not_has_tag.go @@ -10,6 +10,10 @@ type expressionNotHasTag struct { expressionCommon } +func (e *expressionNotHasTag) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionNotHasTag) GetDefaultDecision() FilterDecision { return Pass } diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index 01a9fec6a9..f18cac5a02 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -12,6 +12,10 @@ type expressionNotMatch struct { expressionCommonRe } +func (e *expressionNotMatch) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionNotMatch) GetDefaultDecision() FilterDecision { // if the pattern matches "" (f.e. "tag!=~.*) then a metric which // does not have the tag "tag" at all should not be part of the diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index 1dfbf235a3..fbfa5abadb 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -10,6 +10,10 @@ type expressionPrefix struct { expressionCommon } +func (e *expressionPrefix) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionPrefix) GetDefaultDecision() FilterDecision { return Fail } diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go index 7c54c677c5..9724dda9a2 100644 --- a/expr/tagquery/expression_prefix_tag.go +++ b/expr/tagquery/expression_prefix_tag.go @@ -10,6 +10,10 @@ type expressionPrefixTag struct { expressionCommon } +func (e *expressionPrefixTag) Equals(other Expression) bool { + return e.key == other.GetKey() && e.GetOperator() == other.GetOperator() && e.value == other.GetValue() +} + func (e *expressionPrefixTag) GetDefaultDecision() FilterDecision { return Fail } diff --git a/expr/tagquery/expression_test.go b/expr/tagquery/expression_test.go index 756a8b024b..3a2666bcf6 100644 --- a/expr/tagquery/expression_test.go +++ b/expr/tagquery/expression_test.go @@ -261,7 +261,7 @@ func TestExpression_IsEqualTo(t *testing.T) { if err != nil { t.Fatalf("Unexpected parsing error of \"%s\": %s", tc.expression, err) } - if !ExpressionsAreEqual(e1, e2) { + if !e1.Equals(e2) { t.Fatalf("Expected two instantiations of expressions to be equal, but they were not: \"%s\"", tc.expression) } @@ -271,7 +271,7 @@ func TestExpression_IsEqualTo(t *testing.T) { t.Fatalf("Unexpected parsing error of \"%s\": %s", tc.notEqual[j], err) } - if ExpressionsAreEqual(e1, other) || ExpressionsAreEqual(e2, other) { + if e1.Equals(other) || e2.Equals(other) { t.Fatalf("Expressions are supposed to not be equal, but they were: \"%s\"/\"%s\"", tc.expression, tc.notEqual[j]) } } diff --git a/expr/tagquery/meta_tag_record.go b/expr/tagquery/meta_tag_record.go index 6a83def237..dd74c61f9f 100644 --- a/expr/tagquery/meta_tag_record.go +++ b/expr/tagquery/meta_tag_record.go @@ -9,7 +9,7 @@ type MetaTagRecord struct { Expressions Expressions } -func ParseMetaTagRecord(metaTags []string, queries []string) (MetaTagRecord, error) { +func ParseMetaTagRecord(metaTags []string, expressions []string) (MetaTagRecord, error) { res := MetaTagRecord{} var err error @@ -18,7 +18,7 @@ func ParseMetaTagRecord(metaTags []string, queries []string) (MetaTagRecord, err return res, err } - res.Expressions, err = ParseExpressions(queries) + res.Expressions, err = ParseExpressions(expressions) if err != nil { return res, err } @@ -30,16 +30,41 @@ func ParseMetaTagRecord(metaTags []string, queries []string) (MetaTagRecord, err return res, nil } -// MatchesQueries compares another tag record's queries to this -// one's queries. Returns true if they are equal, otherwise false. -// It is assumed that all the queries are already sorted -func (m *MetaTagRecord) MatchesQueries(other *MetaTagRecord) bool { +func (m *MetaTagRecord) Equals(other *MetaTagRecord) bool { + if len(m.MetaTags) != len(other.MetaTags) { + return false + } + + foundTags := make([]bool, len(m.MetaTags)) + for i, tag := range m.MetaTags { + for _, otherTag := range other.MetaTags { + if tag == otherTag { + foundTags[i] = true + } + } + } + + for i := range foundTags { + if !foundTags[i] { + return false + } + } + + return m.EqualExpressions(other) + +} + +// EqualExpressions compares another meta tag record's expressions to +// this one's expressions +// Returns true if they are equal, otherwise false +// It is assumed that all the expressions are already sorted +func (m *MetaTagRecord) EqualExpressions(other *MetaTagRecord) bool { if len(m.Expressions) != len(other.Expressions) { return false } - for i, query := range m.Expressions { - if !ExpressionsAreEqual(query, other.Expressions[i]) { + for i, expression := range m.Expressions { + if !expression.Equals(other.Expressions[i]) { return false } } diff --git a/expr/tagquery/query.go b/expr/tagquery/query.go index 123b984c92..f5d0b72098 100644 --- a/expr/tagquery/query.go +++ b/expr/tagquery/query.go @@ -45,7 +45,7 @@ func NewQuery(expressions Expressions, from int64) (Query, error) { foundExpressionRequiringNonEmptyValue := false for i := 0; i < len(expressions); i++ { // skip duplicate expression - if i > 0 && ExpressionsAreEqual(expressions[i], expressions[i-1]) { + if i > 0 && expressions[i].Equals(expressions[i-1]) { expressions = append(expressions[:i], expressions[i+1:]...) i-- continue diff --git a/idx/memory/memory_test.go b/idx/memory/memory_test.go index 6624a87112..dc17a53e4e 100644 --- a/idx/memory/memory_test.go +++ b/idx/memory/memory_test.go @@ -1037,7 +1037,7 @@ func TestUpsertingMetaRecordsIntoIndex(t *testing.T) { if !created { t.Fatalf("Expected record to have been created, but it has not") } - if !metaTagRecordsAreEqual(&createdRecord, &record1) { + if !createdRecord.Equals(&record1) { t.Fatalf("Expected returned record to look same as added record, but it does not:\nExpected:\n%+v\nGot:\n%+v\n", record1, createdRecord) } @@ -1048,7 +1048,7 @@ func TestUpsertingMetaRecordsIntoIndex(t *testing.T) { if !created { t.Fatalf("Expected record to have been created, but it has not") } - if !metaTagRecordsAreEqual(&createdRecord, &record2) { + if !createdRecord.Equals(&record2) { t.Fatalf("Expected returned record to look same as added record, but it does not:\nExpected:\n%+v\nGot:\n%+v\n", record2, createdRecord) } @@ -1059,10 +1059,10 @@ func TestUpsertingMetaRecordsIntoIndex(t *testing.T) { var found1, found2 bool for _, mtr := range metaTagRecords { - if metaTagRecordsAreEqual(&mtr, &record1) { + if mtr.Equals(&record1) { found1 = true } - if metaTagRecordsAreEqual(&mtr, &record2) { + if mtr.Equals(&record2) { found2 = true } } @@ -1096,7 +1096,7 @@ func TestUpsertingMetaRecordsIntoIndex(t *testing.T) { if created { t.Fatalf("Expected record to not have been created, but it has") } - if !metaTagRecordsAreEqual(&createdRecord, &record3) { + if !createdRecord.Equals(&record3) { t.Fatalf("Expected returned record to look same as added record, but it does not:\nExpected:\n%+v\nGot:\n%+v\n", record3, createdRecord) } diff --git a/idx/memory/meta_tags.go b/idx/memory/meta_tags.go index ea523badf6..05d39fc8dd 100644 --- a/idx/memory/meta_tags.go +++ b/idx/memory/meta_tags.go @@ -45,7 +45,7 @@ func (m metaTagRecords) upsert(record tagquery.MetaTagRecord) (recordId, *tagque // the exact same queries as the one we're upserting for i := uint32(0); i < collisionAvoidanceWindow; i++ { if existingRecord, ok := m[id+recordId(i)]; ok { - if record.MatchesQueries(&existingRecord) { + if record.EqualExpressions(&existingRecord) { oldRecord = &existingRecord oldId = id + recordId(i) delete(m, oldId) diff --git a/idx/memory/meta_tags_test.go b/idx/memory/meta_tags_test.go index cf70048a07..3c84a873ee 100644 --- a/idx/memory/meta_tags_test.go +++ b/idx/memory/meta_tags_test.go @@ -36,7 +36,7 @@ func TestInsertSimpleMetaTagRecord(t *testing.T) { t.Fatalf("We expected the record to be found at the index of its hash, but it wasn't") } - if !metaTagRecordsAreEqual(&recordToInsert, record) { + if !recordToInsert.Equals(record) { t.Fatalf("Inserted meta tag record has unexpectedly been modified") } } @@ -70,10 +70,10 @@ func TestUpdateExistingMetaTagRecord(t *testing.T) { var found1, found2 bool var recordIdToUpdate recordId for i, record := range metaTagRecords { - if metaTagRecordsAreEqual(&record, &recordToInsert1) { + if record.Equals(&recordToInsert1) { found1 = true recordIdToUpdate = i - } else if metaTagRecordsAreEqual(&record, &recordToInsert2) { + } else if record.Equals(&recordToInsert2) { found2 = true } } @@ -95,7 +95,7 @@ func TestUpdateExistingMetaTagRecord(t *testing.T) { if oldId != id { t.Fatalf("Expected the new id after updating to be %d (same as the old id), but it was %d", oldId, id) } - if oldRecord == nil || !metaTagRecordsAreEqual(oldRecord, &recordToInsert1) { + if oldRecord == nil || !oldRecord.Equals(&recordToInsert1) { t.Fatalf("Expected the old record to not be nil, but it was") } if len(metaTagRecords) != 2 { @@ -105,10 +105,10 @@ func TestUpdateExistingMetaTagRecord(t *testing.T) { // the order of the records may have changed again due to sorting by id found1, found2 = false, false for _, record := range metaTagRecords { - if metaTagRecordsAreEqual(&record, &recordToUpdate) { + if record.Equals(&recordToUpdate) { found1 = true } - if metaTagRecordsAreEqual(&record, &recordToInsert2) { + if record.Equals(&recordToInsert2) { found2 = true } } @@ -208,7 +208,7 @@ func TestHashCollisionsOnInsert(t *testing.T) { } // check if the returned new record looks as expected - if !metaTagRecordsAreEqual(returnedRecord, &record) { + if !returnedRecord.Equals(&record) { t.Fatalf("New record looked different than expected:\nExpected:\n%+v\nGot:\n%+v\n", &record, returnedRecord) } if oldId != 3 { @@ -217,7 +217,7 @@ func TestHashCollisionsOnInsert(t *testing.T) { // check if the returned old record looks as expected record, _ = tagquery.ParseMetaTagRecord([]string{"metaTag3=value3"}, []string{"metricTag3=value3"}) - if !metaTagRecordsAreEqual(oldRecord, &record) { + if !oldRecord.Equals(&record) { t.Fatalf("Old record looked different than expected:\nExpected:\n%+v\nGot:\n%+v\n", &record, oldRecord) } if len(metaTagRecords) != 3 { @@ -248,7 +248,7 @@ func TestDeletingMetaRecord(t *testing.T) { if len(returnedRecord.MetaTags) != 0 { t.Fatalf("Expected returned meta tag record to have 0 meta tags, but it had %d", len(returnedRecord.MetaTags)) } - if !metaTagRecordsAreEqual(returnedRecord, &record) { + if !returnedRecord.Equals(&record) { t.Fatalf("Queries of returned record don't match what we expected:\nExpected:\n%+v\nGot:\n%+v\n", record.Expressions, returnedRecord.Expressions) } if oldId != idOfRecord2 { @@ -262,26 +262,3 @@ func TestDeletingMetaRecord(t *testing.T) { t.Fatalf("Expected returned record id to not be present, but it was") } } - -func metaTagRecordsAreEqual(record1, record2 *tagquery.MetaTagRecord) bool { - if len(record1.MetaTags) != len(record2.MetaTags) { - return false - } - - foundTags := make([]bool, len(record1.MetaTags)) - for i, tag := range record1.MetaTags { - for _, otherTag := range record2.MetaTags { - if tag == otherTag { - foundTags[i] = true - } - } - } - - for i := range foundTags { - if !foundTags[i] { - return false - } - } - - return record1.MatchesQueries(record2) -} From f19b46dde606d680ce59b7a9a1946cfdabd6d8f1 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 29 Jul 2019 21:39:17 -0400 Subject: [PATCH 26/40] add HashExpression method to meta tag record type --- expr/tagquery/meta_tag_record.go | 16 ++++++++++++++++ expr/tagquery/query.go | 10 ++++++++++ idx/memory/meta_tags.go | 31 ++----------------------------- idx/memory/meta_tags_test.go | 8 ++++---- 4 files changed, 32 insertions(+), 33 deletions(-) diff --git a/expr/tagquery/meta_tag_record.go b/expr/tagquery/meta_tag_record.go index dd74c61f9f..514ea5ac5e 100644 --- a/expr/tagquery/meta_tag_record.go +++ b/expr/tagquery/meta_tag_record.go @@ -2,6 +2,7 @@ package tagquery import ( "fmt" + "strings" ) type MetaTagRecord struct { @@ -51,7 +52,22 @@ func (m *MetaTagRecord) Equals(other *MetaTagRecord) bool { } return m.EqualExpressions(other) +} + +// HashExpressions returns a hash of all expressions in this meta tag record +// It is assumed that the expressions are already sorted +func (m *MetaTagRecord) HashExpressions() uint32 { + builder := strings.Builder{} + for _, query := range m.Expressions { + query.StringIntoBuilder(&builder) + + // trailing ";" doesn't matter, this is only hash input + builder.WriteString(";") + } + h := QueryHash() + h.Write([]byte(builder.String())) + return h.Sum32() } // EqualExpressions compares another meta tag record's expressions to diff --git a/expr/tagquery/query.go b/expr/tagquery/query.go index f5d0b72098..1b151a175c 100644 --- a/expr/tagquery/query.go +++ b/expr/tagquery/query.go @@ -2,6 +2,8 @@ package tagquery import ( "errors" + "hash" + "hash/fnv" "github.com/raintank/schema" ) @@ -10,8 +12,16 @@ var ( errInvalidQuery = errors.New("invalid query") MatchCacheSize int MetaTagSupport bool + + // the function we use to get the hash for hashing the meta records + // it can be replaced for mocking in tests + QueryHash func() hash.Hash32 ) +func init() { + QueryHash = fnv.New32a +} + type Query struct { // clause that operates on LastUpdate field From int64 diff --git a/idx/memory/meta_tags.go b/idx/memory/meta_tags.go index 05d39fc8dd..676d78bc31 100644 --- a/idx/memory/meta_tags.go +++ b/idx/memory/meta_tags.go @@ -1,10 +1,6 @@ package memory import ( - "hash" - "hash/fnv" - "strings" - "github.com/grafana/metrictank/errors" "github.com/grafana/metrictank/expr/tagquery" ) @@ -13,19 +9,11 @@ import ( // slot that's free if two record hashes collide var collisionAvoidanceWindow = uint32(1024) -// the function we use to get the hash for hashing the meta records -// it can be replaced for mocking in tests -var queryHash func() hash.Hash32 - -func init() { - queryHash = fnv.New32a -} +type recordId uint32 // list of meta records keyed by a unique identifier used as ID type metaTagRecords map[recordId]tagquery.MetaTagRecord -type recordId uint32 - // upsert inserts or updates a meta tag record according to the given specifications // it uses the set of tag query expressions as the identity of the record, if a record with the // same identity is already present then its meta tags get updated to the specified ones. @@ -37,7 +25,7 @@ type recordId uint32 // 4) Pointer to the metaTagRecord that has been replaced if an update was performed, otherwise nil // 5) Error if an error occurred, otherwise it's nil func (m metaTagRecords) upsert(record tagquery.MetaTagRecord) (recordId, *tagquery.MetaTagRecord, recordId, *tagquery.MetaTagRecord, error) { - id := m.hashMetaTagRecord(record) + id := recordId(record.HashExpressions()) var oldRecord *tagquery.MetaTagRecord var oldId recordId @@ -73,21 +61,6 @@ func (m metaTagRecords) upsert(record tagquery.MetaTagRecord) (recordId, *tagque return 0, nil, 0, nil, errors.NewInternal("Could not find a free ID to insert record") } -// hashMetaTagRecord generates a hash of all the queries in the record -func (m *metaTagRecords) hashMetaTagRecord(record tagquery.MetaTagRecord) recordId { - builder := strings.Builder{} - for _, query := range record.Expressions { - query.StringIntoBuilder(&builder) - - // trailing ";" doesn't matter, this is only hash input - builder.WriteString(";") - } - - h := queryHash() - h.Write([]byte(builder.String())) - return recordId(h.Sum32()) -} - // index structure keyed by tag -> value -> list of meta record IDs type metaTagValue map[string][]recordId type metaTagIndex map[string]metaTagValue diff --git a/idx/memory/meta_tags_test.go b/idx/memory/meta_tags_test.go index 3c84a873ee..9f2a7bacfd 100644 --- a/idx/memory/meta_tags_test.go +++ b/idx/memory/meta_tags_test.go @@ -31,7 +31,7 @@ func TestInsertSimpleMetaTagRecord(t *testing.T) { t.Fatalf("metaTagRecords was expected to have 1 entry, but it had %d", len(metaTagRecords)) } - _, ok := metaTagRecords[metaTagRecords.hashMetaTagRecord(*record)] + _, ok := metaTagRecords[recordId(record.HashExpressions())] if !ok { t.Fatalf("We expected the record to be found at the index of its hash, but it wasn't") } @@ -155,10 +155,10 @@ func TestHashCollisionsOnInsert(t *testing.T) { defer func() { collisionAvoidanceWindow = originalCollisionAvoidanceWindow }() collisionAvoidanceWindow = 3 - originalHash := queryHash - defer func() { queryHash = originalHash }() + originalHash := tagquery.QueryHash + defer func() { tagquery.QueryHash = originalHash }() - queryHash = func() hash.Hash32 { + tagquery.QueryHash = func() hash.Hash32 { return &mockHash{ returnValues: []uint32{1}, // keep returning 1 } From 954172bbdd1e33c75dec50651ce7662417b168a1 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 29 Jul 2019 21:40:46 -0400 Subject: [PATCH 27/40] add comment --- expr/tagquery/meta_tag_record.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/expr/tagquery/meta_tag_record.go b/expr/tagquery/meta_tag_record.go index 514ea5ac5e..40a7c53dda 100644 --- a/expr/tagquery/meta_tag_record.go +++ b/expr/tagquery/meta_tag_record.go @@ -31,6 +31,9 @@ func ParseMetaTagRecord(metaTags []string, expressions []string) (MetaTagRecord, return res, nil } +// Equals takes another MetaTagRecord and compares all its properties to its +// own properties. It is assumed that the expressions of both meta tag records +// are already sorted. func (m *MetaTagRecord) Equals(other *MetaTagRecord) bool { if len(m.MetaTags) != len(other.MetaTags) { return false From 6a53c6f42660248f1303c1795eba2b7a1ae26224 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 29 Jul 2019 22:14:48 -0400 Subject: [PATCH 28/40] adding comments --- idx/memory/tag_query.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go index 60d617c2be..0cdabf5dc4 100644 --- a/idx/memory/tag_query.go +++ b/idx/memory/tag_query.go @@ -63,9 +63,19 @@ func (q *TagQueryContext) prepareExpressions(idx TagIndex) { sort.Slice(costs, func(i, j int) bool { return costs[i].cost < costs[j].cost }) + // the number of filters / default decisions is equal to the number of expressions - 1 + // because one of the expressions will be chosen to be the one that we start with. + // we don't need to filter function, nor the default decision, of the expression which + // we start with. + // all the remaining expressions will be used as filter expressions, for which we need + // to obtain their filter functions and their default decisions. q.filters = make([]tagquery.MetricDefinitionFilter, len(q.query.Expressions)-1) q.defaultDecisions = make([]tagquery.FilterDecision, len(q.query.Expressions)-1) + // Every tag query has at least one expression which requires a non-empty value according to: + // https://graphite.readthedocs.io/en/latest/tags.html#querying + // This rule is enforced by tagquery.NewQuery, here we trust that the queries which get passed + // into the index have already been validated i := 0 for _, cost := range costs { if q.startWith < 0 && q.query.Expressions[cost.expressionIdx].RequiresNonEmptyValue() { From 59355009d92bab38537b4cf0b46034a3a7f4a3cd Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 29 Jul 2019 22:16:41 -0400 Subject: [PATCH 29/40] update comment --- expr/tagquery/expression.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index 6f0b851cfa..82448611df 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -96,10 +96,9 @@ type Expression interface { // final result set, so it returns "fail". GetDefaultDecision() FilterDecision - // GetKey returns tag to who's values this expression get's applied if it operates on the value - // (OperatorsOnTag returns "false") + // GetKey returns tag to who's values this expression get's applied to // example: - // in the expression "tag1=value" GetKey() would return "tag1" and OperatesOnTag() returns "false" + // in the expression "tag1=value" GetKey() would return "tag1" GetKey() string // GetValue returns the value part of the expression From 13059a06861156ff749936112b4557802a533d00 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Tue, 30 Jul 2019 19:03:17 -0400 Subject: [PATCH 30/40] faster meta tag record comparison --- expr/tagquery/meta_tag_record.go | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/expr/tagquery/meta_tag_record.go b/expr/tagquery/meta_tag_record.go index 40a7c53dda..efa1565cc3 100644 --- a/expr/tagquery/meta_tag_record.go +++ b/expr/tagquery/meta_tag_record.go @@ -39,17 +39,8 @@ func (m *MetaTagRecord) Equals(other *MetaTagRecord) bool { return false } - foundTags := make([]bool, len(m.MetaTags)) - for i, tag := range m.MetaTags { - for _, otherTag := range other.MetaTags { - if tag == otherTag { - foundTags[i] = true - } - } - } - - for i := range foundTags { - if !foundTags[i] { + for i := range m.MetaTags { + if m.MetaTags[i] != other.MetaTags[i] { return false } } From b1df23be8294ce04063569815e3986635565f85f Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Tue, 30 Jul 2019 23:27:24 -0400 Subject: [PATCH 31/40] update docs to reflect config parameter changes --- docker/docker-chaos/metrictank.ini | 9 ++++----- docker/docker-cluster-query/metrictank.ini | 9 ++++----- docker/docker-cluster/metrictank.ini | 9 ++++----- docker/docker-dev-custom-cfg-kafka/metrictank.ini | 9 ++++----- docs/config.md | 12 ++++-------- metrictank-sample.ini | 9 ++++----- scripts/config/metrictank-docker.ini | 9 ++++----- scripts/config/metrictank-package.ini | 9 ++++----- 8 files changed, 32 insertions(+), 43 deletions(-) diff --git a/docker/docker-chaos/metrictank.ini b/docker/docker-chaos/metrictank.ini index d3a6a37761..7f945e7f3a 100644 --- a/docker/docker-chaos/metrictank.ini +++ b/docker/docker-chaos/metrictank.ini @@ -429,18 +429,17 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false -### tag query evaluation -[tag-query] -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 - ### in-memory only [memory-idx] enabled = false # enables/disables querying based on tags tag-support = false +# enables/disables querying based on meta tags +meta-tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/docker/docker-cluster-query/metrictank.ini b/docker/docker-cluster-query/metrictank.ini index fd3c35d761..088c7cd8ce 100644 --- a/docker/docker-cluster-query/metrictank.ini +++ b/docker/docker-cluster-query/metrictank.ini @@ -429,18 +429,17 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false -### tag query evaluation -[tag-query] -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 - ### in-memory only [memory-idx] enabled = false # enables/disables querying based on tags tag-support = false +# enables/disables querying based on meta tags +meta-tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/docker/docker-cluster/metrictank.ini b/docker/docker-cluster/metrictank.ini index ec3776db54..998c926b7e 100644 --- a/docker/docker-cluster/metrictank.ini +++ b/docker/docker-cluster/metrictank.ini @@ -429,18 +429,17 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false -### tag query evaluation -[tag-query] -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 - ### in-memory only [memory-idx] enabled = false # enables/disables querying based on tags tag-support = false +# enables/disables querying based on meta tags +meta-tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 +# size of regular expression cache in tag query evaluation +tag-query-workers = 50 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/docker/docker-dev-custom-cfg-kafka/metrictank.ini b/docker/docker-dev-custom-cfg-kafka/metrictank.ini index 072b281c37..e89630816a 100644 --- a/docker/docker-dev-custom-cfg-kafka/metrictank.ini +++ b/docker/docker-dev-custom-cfg-kafka/metrictank.ini @@ -429,18 +429,17 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false -### tag query evaluation -[tag-query] -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 - ### in-memory only [memory-idx] enabled = false # enables/disables querying based on tags tag-support = false +# enables/disables querying based on meta tags +meta-tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/docs/config.md b/docs/config.md index a8f053f747..327afd2456 100644 --- a/docs/config.md +++ b/docs/config.md @@ -501,14 +501,6 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml disable-initial-host-lookup = false ``` -### tag query evaluation - -``` -[tag-query] -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 -``` - ### in-memory only ``` @@ -516,8 +508,12 @@ match-cache-size = 1000 enabled = false # enables/disables querying based on tags tag-support = false +# enables/disables querying based on meta tags +meta-tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/metrictank-sample.ini b/metrictank-sample.ini index 79566e2a10..a0e1bbcb83 100644 --- a/metrictank-sample.ini +++ b/metrictank-sample.ini @@ -432,18 +432,17 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false -### tag query evaluation -[tag-query] -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 - ### in-memory only [memory-idx] enabled = false # enables/disables querying based on tags tag-support = false +# enables/disables querying based on meta tags +meta-tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/scripts/config/metrictank-docker.ini b/scripts/config/metrictank-docker.ini index 862b7c4685..d3d315ef66 100644 --- a/scripts/config/metrictank-docker.ini +++ b/scripts/config/metrictank-docker.ini @@ -429,18 +429,17 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false -### tag query evaluation -[tag-query] -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 - ### in-memory only [memory-idx] enabled = false # enables/disables querying based on tags tag-support = false +# enables/disables querying based on meta tags +meta-tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 +# size of regular expression cache in tag query evaluation +tag-query-workers = 50 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. diff --git a/scripts/config/metrictank-package.ini b/scripts/config/metrictank-package.ini index 84d8d8a058..2e76a77874 100644 --- a/scripts/config/metrictank-package.ini +++ b/scripts/config/metrictank-package.ini @@ -429,18 +429,17 @@ schema-file = /etc/metrictank/schema-idx-cassandra.toml # instruct the driver to not attempt to get host info from the system.peers table disable-initial-host-lookup = false -### tag query evaluation -[tag-query] -# size of regular expression cache in tag query evaluation -match-cache-size = 1000 - ### in-memory only [memory-idx] enabled = false # enables/disables querying based on tags tag-support = false +# enables/disables querying based on meta tags +meta-tag-support = false # number of workers to spin up to evaluate tag queries tag-query-workers = 50 +# size of regular expression cache in tag query evaluation +match-cache-size = 1000 # path to index-rules.conf file rules-file = /etc/metrictank/index-rules.conf # maximum duration each second a prune job can lock the index. From bafc023d894f0998a6cff50cf55d84fa7fe8a604 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Thu, 1 Aug 2019 19:43:22 -0400 Subject: [PATCH 32/40] more comments to explain type Expression --- expr/tagquery/expression.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index 82448611df..efccddaa3e 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -52,6 +52,12 @@ func (e Expressions) Sort() { }) } +// Expression represents one expression inside a query of one or many expressions. +// It provides all the necessary methods that are required to do a tag lookup from an index keyed by +// tags & values, such as the type memory.TagIndex or the type memory.metaTagIndex. +// It is also comes with a method to generate a filter which decides whether a given MetricDefinition +// matches the requirements defined by this expression or not. This filter can be obtained from the +// method GetMetricDefinitionFilter(). type Expression interface { // Equals takes another expression and compares it against itself. Returns true if they are equal // or false otherwise @@ -112,7 +118,13 @@ type Expression interface { GetCostMultiplier() uint32 // OperatesOnTag returns whether this expression operators on the tag key - // (if not, it operates on the value) + // (if not, it operates on the value). + // Expressions such has expressionHasTag, expressionMatchTag, expressionPrefixTag would return true, + // because in order to make a decision regarding whether a metric should be part of the result set + // they need to look at a metric's tags, as opposed to looking at the values associated with some + // specified tag. + // If this returns true, then tags shall be passed into ValuePasses(), other values associated with + // the tag returned by GetKey() shall be passed into ValuePasses(). OperatesOnTag() bool // RequiresNonEmptyValue returns whether this expression requires a non-empty value. @@ -120,7 +132,7 @@ type Expression interface { RequiresNonEmptyValue() bool // ValuePasses takes a string which should either be a tag key or value depending on the return - // value of OperatesOnTag(), then it returns whether whether the given value satisfies this expression + // value of OperatesOnTag(), then it returns whether the given value satisfies this expression ValuePasses(string) bool ValueMatchesExactly() bool From 830f42f016bcc44594b96737a758940e4db97dbf Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Sat, 3 Aug 2019 11:19:26 -0400 Subject: [PATCH 33/40] fix json response format bug --- api/graphite.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/graphite.go b/api/graphite.go index 7ec2272146..a61b910524 100644 --- a/api/graphite.go +++ b/api/graphite.go @@ -1070,7 +1070,7 @@ func (s *Server) graphiteTags(ctx *middleware.Context, request models.GraphiteTa default: } - var resp models.GraphiteTagsResp + resp := make(models.GraphiteTagsResp, 0) for _, tag := range tags { resp = append(resp, models.GraphiteTagResp{Tag: tag}) } @@ -1103,6 +1103,8 @@ func (s *Server) clusterTags(ctx context.Context, orgId uint32, filter string, f } } + // we want to make an empty list, because this results in the json response "[]" if it's empty + // if we initialize "tags" with "var tags []string" the json response (if empty) is "nil" instead of "[]" tags := make([]string, 0, len(tagSet)) for t := range tagSet { tags = append(tags, t) From 590aaa5ab304a95f37a5a3b29f44df5cad988c0d Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Mon, 5 Aug 2019 21:47:45 -0400 Subject: [PATCH 34/40] fix bug when initial expression has type HAS_TAG this makes HAS_TAG correctly indicate that its value matches exactly, and it also uses the optimized lookup when doing the lookup of the initial ID set in getInitialByTag() --- expr/tagquery/expression_has_tag.go | 4 +++ expr/tagquery/expression_not_equal.go | 4 --- idx/memory/tag_query.go | 41 +++++++++++++++++++-------- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index 03dbac9a8a..7b7e12e23f 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -34,6 +34,10 @@ func (e *expressionHasTag) ValuePasses(value string) bool { return value == e.key } +func (e *expressionHasTag) ValueMatchesExactly() bool { + return true +} + func (e *expressionHasTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter { if e.key == "name" { return func(_ schema.MKey, _ string, _ []string) FilterDecision { return Pass } diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index 5e81347e13..50c66e2d4e 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -34,10 +34,6 @@ func (e *expressionNotEqual) ValuePasses(value string) bool { return value != e.value } -func (e *expressionNotEqual) ValueMatchesExactly() bool { - return true -} - func (e *expressionNotEqual) GetMetricDefinitionFilter(lookup IdTagLookup) MetricDefinitionFilter { if e.key == "name" { if e.value == "" { diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go index 0cdabf5dc4..866571a440 100644 --- a/idx/memory/tag_query.go +++ b/idx/memory/tag_query.go @@ -50,11 +50,16 @@ func (q *TagQueryContext) prepareExpressions(idx TagIndex) { for i, expr := range q.query.Expressions { costs[i].expressionIdx = i - if expr.ValueMatchesExactly() { - costs[i].cost = uint32(len(idx[expr.GetKey()][expr.GetValue()])) * expr.GetCostMultiplier() - } else { - if expr.OperatesOnTag() { + + if expr.OperatesOnTag() { + if expr.ValueMatchesExactly() { + costs[i].cost = uint32(len(idx[expr.GetKey()])) * expr.GetCostMultiplier() + } else { costs[i].cost = uint32(len(idx)) * expr.GetCostMultiplier() + } + } else { + if expr.ValueMatchesExactly() { + costs[i].cost = uint32(len(idx[expr.GetKey()][expr.GetValue()])) * expr.GetCostMultiplier() } else { costs[i].cost = uint32(len(idx[expr.GetKey()])) * expr.GetCostMultiplier() } @@ -157,21 +162,33 @@ func (q *TagQueryContext) getInitialByTag(idCh chan schema.MKey, stopCh chan str defer close(idCh) defer q.wg.Done() - OUTER: - for tag := range q.index { - if !expr.ValuePasses(tag) { - continue - } - - for _, ids := range q.index[tag] { + if expr.ValueMatchesExactly() { + for _, ids := range q.index[expr.GetKey()] { for id := range ids { select { case <-stopCh: - break OUTER + break case idCh <- id: } } } + } else { + OUTER: + for tag := range q.index { + if !expr.ValuePasses(tag) { + continue + } + + for _, ids := range q.index[tag] { + for id := range ids { + select { + case <-stopCh: + break OUTER + case idCh <- id: + } + } + } + } } }() } From 9d516c8edec6a221e64bd14cdd7f5d867393b2f0 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Tue, 6 Aug 2019 14:16:02 -0400 Subject: [PATCH 35/40] better naming and additional comment this renames some methods to make it clearer what they do. it also adds another explanator comment. --- expr/tagquery/expression.go | 19 ++++++++++++++----- expr/tagquery/expression_common.go | 2 +- expr/tagquery/expression_equal.go | 4 ++-- expr/tagquery/expression_has_tag.go | 4 ++-- expr/tagquery/expression_match.go | 2 +- expr/tagquery/expression_match_all.go | 2 +- expr/tagquery/expression_match_none.go | 2 +- expr/tagquery/expression_match_tag.go | 2 +- expr/tagquery/expression_not_equal.go | 2 +- expr/tagquery/expression_not_has_tag.go | 2 +- expr/tagquery/expression_not_match.go | 2 +- expr/tagquery/expression_prefix.go | 2 +- expr/tagquery/expression_prefix_tag.go | 2 +- idx/memory/tag_query.go | 18 +++++++++--------- 14 files changed, 37 insertions(+), 28 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index efccddaa3e..fc5fcb0bfc 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -131,11 +131,20 @@ type Expression interface { // Every valid query must have at least one expression requiring a non-empty value. RequiresNonEmptyValue() bool - // ValuePasses takes a string which should either be a tag key or value depending on the return - // value of OperatesOnTag(), then it returns whether the given value satisfies this expression - ValuePasses(string) bool - - ValueMatchesExactly() bool + // Matches takes a string which should either be a tag key or value depending on the return + // value of OperatesOnTag(), then it returns whether the given string satisfies this expression + Matches(string) bool + + // MatchesExactly returns a bool to indicate whether the key / value of this expression (depending + // on OperatesOnTag()) needs to be an exact match with the key / value of the metrics it evaluates + // F.e: + // in the case of the expression "tag1=value1" we're only looking for metrics where the value + // associated with tag key "tag1" is exactly "value1", so a simple string comparison is sufficient. + // in other cases like "tag1=~val.*" or "tag^=val" this isn't the case, a simple string comparison + // is not sufficient to decide whether a metric should be part of the result set or not. + // since simple string comparisons are cheaper than other comparison methods, whenever possible we + // want to use string comparison. + MatchesExactly() bool // GetMetricDefinitionFilter returns a MetricDefinitionFilter // The MetricDefinitionFilter takes a metric definition, looks at its tags and returns a decision diff --git a/expr/tagquery/expression_common.go b/expr/tagquery/expression_common.go index 21ba34b29d..7e0c6c710c 100644 --- a/expr/tagquery/expression_common.go +++ b/expr/tagquery/expression_common.go @@ -25,7 +25,7 @@ func (e *expressionCommon) RequiresNonEmptyValue() bool { return true } -func (e *expressionCommon) ValueMatchesExactly() bool { +func (e *expressionCommon) MatchesExactly() bool { return false } diff --git a/expr/tagquery/expression_equal.go b/expr/tagquery/expression_equal.go index c74818a293..66cb49ea6d 100644 --- a/expr/tagquery/expression_equal.go +++ b/expr/tagquery/expression_equal.go @@ -26,11 +26,11 @@ func (e *expressionEqual) GetCostMultiplier() uint32 { return 1 } -func (e *expressionEqual) ValuePasses(value string) bool { +func (e *expressionEqual) Matches(value string) bool { return value == e.value } -func (e *expressionEqual) ValueMatchesExactly() bool { +func (e *expressionEqual) MatchesExactly() bool { return true } diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index 7b7e12e23f..acaac5d550 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -30,11 +30,11 @@ func (e *expressionHasTag) OperatesOnTag() bool { return true } -func (e *expressionHasTag) ValuePasses(value string) bool { +func (e *expressionHasTag) Matches(value string) bool { return value == e.key } -func (e *expressionHasTag) ValueMatchesExactly() bool { +func (e *expressionHasTag) MatchesExactly() bool { return true } diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index 9acd97cb60..37a61f2fc5 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -41,7 +41,7 @@ func (e *expressionMatch) RequiresNonEmptyValue() bool { return !e.matchesEmpty } -func (e *expressionMatch) ValuePasses(value string) bool { +func (e *expressionMatch) Matches(value string) bool { return e.valueRe.MatchString(value) } diff --git a/expr/tagquery/expression_match_all.go b/expr/tagquery/expression_match_all.go index 9ef99286e2..54a3390b02 100644 --- a/expr/tagquery/expression_match_all.go +++ b/expr/tagquery/expression_match_all.go @@ -40,7 +40,7 @@ func (e *expressionMatchAll) RequiresNonEmptyValue() bool { return false } -func (e *expressionMatchAll) ValuePasses(value string) bool { +func (e *expressionMatchAll) Matches(value string) bool { return true } diff --git a/expr/tagquery/expression_match_none.go b/expr/tagquery/expression_match_none.go index dbb297ecae..762c4a6208 100644 --- a/expr/tagquery/expression_match_none.go +++ b/expr/tagquery/expression_match_none.go @@ -40,7 +40,7 @@ func (e *expressionMatchNone) RequiresNonEmptyValue() bool { return true } -func (e *expressionMatchNone) ValuePasses(value string) bool { +func (e *expressionMatchNone) Matches(value string) bool { return false } diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index a4ef0658cb..7d73653d82 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -36,7 +36,7 @@ func (e *expressionMatchTag) RequiresNonEmptyValue() bool { return !e.matchesEmpty } -func (e *expressionMatchTag) ValuePasses(tag string) bool { +func (e *expressionMatchTag) Matches(tag string) bool { return e.valueRe.MatchString(tag) } diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index 50c66e2d4e..351e74b605 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -30,7 +30,7 @@ func (e *expressionNotEqual) RequiresNonEmptyValue() bool { return false } -func (e *expressionNotEqual) ValuePasses(value string) bool { +func (e *expressionNotEqual) Matches(value string) bool { return value != e.value } diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go index 4bd81845ea..a1d542c7bf 100644 --- a/expr/tagquery/expression_not_has_tag.go +++ b/expr/tagquery/expression_not_has_tag.go @@ -34,7 +34,7 @@ func (e *expressionNotHasTag) RequiresNonEmptyValue() bool { return false } -func (e *expressionNotHasTag) ValuePasses(value string) bool { +func (e *expressionNotHasTag) Matches(value string) bool { return value == e.key } diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index f18cac5a02..9dece76f42 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -41,7 +41,7 @@ func (e *expressionNotMatch) RequiresNonEmptyValue() bool { return e.matchesEmpty } -func (e *expressionNotMatch) ValuePasses(value string) bool { +func (e *expressionNotMatch) Matches(value string) bool { return !e.valueRe.MatchString(value) } diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index fbfa5abadb..18155feee9 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -32,7 +32,7 @@ func (e *expressionPrefix) RequiresNonEmptyValue() bool { return true } -func (e *expressionPrefix) ValuePasses(value string) bool { +func (e *expressionPrefix) Matches(value string) bool { return strings.HasPrefix(value, e.value) } diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go index 9724dda9a2..a2fb9ae354 100644 --- a/expr/tagquery/expression_prefix_tag.go +++ b/expr/tagquery/expression_prefix_tag.go @@ -36,7 +36,7 @@ func (e *expressionPrefixTag) RequiresNonEmptyValue() bool { return true } -func (e *expressionPrefixTag) ValuePasses(tag string) bool { +func (e *expressionPrefixTag) Matches(tag string) bool { return strings.HasPrefix(tag, e.value) } diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go index 866571a440..f91a64958b 100644 --- a/idx/memory/tag_query.go +++ b/idx/memory/tag_query.go @@ -52,13 +52,13 @@ func (q *TagQueryContext) prepareExpressions(idx TagIndex) { costs[i].expressionIdx = i if expr.OperatesOnTag() { - if expr.ValueMatchesExactly() { + if expr.MatchesExactly() { costs[i].cost = uint32(len(idx[expr.GetKey()])) * expr.GetCostMultiplier() } else { costs[i].cost = uint32(len(idx)) * expr.GetCostMultiplier() } } else { - if expr.ValueMatchesExactly() { + if expr.MatchesExactly() { costs[i].cost = uint32(len(idx[expr.GetKey()][expr.GetValue()])) * expr.GetCostMultiplier() } else { costs[i].cost = uint32(len(idx[expr.GetKey()])) * expr.GetCostMultiplier() @@ -122,7 +122,7 @@ func (q *TagQueryContext) getInitialByTagValue(idCh chan schema.MKey, stopCh cha key := expr.GetKey() - if expr.ValueMatchesExactly() { + if expr.MatchesExactly() { value := expr.GetValue() for id := range q.index[key][value] { @@ -135,7 +135,7 @@ func (q *TagQueryContext) getInitialByTagValue(idCh chan schema.MKey, stopCh cha } else { OUTER: for value, ids := range q.index[key] { - if !expr.ValuePasses(value) { + if !expr.Matches(value) { continue } @@ -162,7 +162,7 @@ func (q *TagQueryContext) getInitialByTag(idCh chan schema.MKey, stopCh chan str defer close(idCh) defer q.wg.Done() - if expr.ValueMatchesExactly() { + if expr.MatchesExactly() { for _, ids := range q.index[expr.GetKey()] { for id := range ids { select { @@ -175,7 +175,7 @@ func (q *TagQueryContext) getInitialByTag(idCh chan schema.MKey, stopCh chan str } else { OUTER: for tag := range q.index { - if !expr.ValuePasses(tag) { + if !expr.Matches(tag) { continue } @@ -298,7 +298,7 @@ func (q *TagQueryContext) getMaxTagCount() int { var maxTagCount int for tag := range q.index { - if tagClause.ValuePasses(tag) { + if tagClause.Matches(tag) { maxTagCount++ } } @@ -346,7 +346,7 @@ IDS: continue } - if tagClause != nil && !tagClause.ValuePasses(key) { + if tagClause != nil && !tagClause.Matches(key) { continue } @@ -407,7 +407,7 @@ func (q *TagQueryContext) tagFilterMatchesName() bool { return true } - return tagClause.ValuePasses("name") + return tagClause.Matches("name") } // RunGetTags executes the tag query and returns all the tags of the From bc6c8b7748a7f6a4c35707b79f82ff5bd9dc2785 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Tue, 6 Aug 2019 17:54:56 -0400 Subject: [PATCH 36/40] performance tuning this changes the sortByCost logic so it puts more weight on the expression type, and only takes cardinality into account when it has to sort two expressions of the same operator cost. in the benchmarks this seems to lead to better results. --- expr/tagquery/expression.go | 5 ++++- expr/tagquery/expression_equal.go | 2 +- expr/tagquery/expression_has_tag.go | 4 ++-- expr/tagquery/expression_match.go | 4 ++-- expr/tagquery/expression_match_all.go | 2 +- expr/tagquery/expression_match_none.go | 2 +- expr/tagquery/expression_match_tag.go | 2 +- expr/tagquery/expression_not_equal.go | 2 +- expr/tagquery/expression_not_has_tag.go | 4 ++-- expr/tagquery/expression_not_match.go | 2 +- expr/tagquery/expression_prefix.go | 2 +- expr/tagquery/expression_prefix_tag.go | 4 ++-- idx/memory/tag_query.go | 22 ++++++++++++++++------ 13 files changed, 35 insertions(+), 22 deletions(-) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index fc5fcb0bfc..75620a68c3 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -115,7 +115,10 @@ type Expression interface { // GetOperator returns the operator of this expression GetOperator() ExpressionOperator - GetCostMultiplier() uint32 + // GetOperatorCost returns a value which should roughly reflect the cost of this operator compared + // to other operators. F.e. = is cheaper than =~. Keep in mind that this is only a very rough + // estimate and will never be accurate. + GetOperatorCost() uint32 // OperatesOnTag returns whether this expression operators on the tag key // (if not, it operates on the value). diff --git a/expr/tagquery/expression_equal.go b/expr/tagquery/expression_equal.go index 66cb49ea6d..88aa1756fe 100644 --- a/expr/tagquery/expression_equal.go +++ b/expr/tagquery/expression_equal.go @@ -22,7 +22,7 @@ func (e *expressionEqual) GetOperator() ExpressionOperator { return EQUAL } -func (e *expressionEqual) GetCostMultiplier() uint32 { +func (e *expressionEqual) GetOperatorCost() uint32 { return 1 } diff --git a/expr/tagquery/expression_has_tag.go b/expr/tagquery/expression_has_tag.go index acaac5d550..71761932a0 100644 --- a/expr/tagquery/expression_has_tag.go +++ b/expr/tagquery/expression_has_tag.go @@ -22,8 +22,8 @@ func (e *expressionHasTag) GetOperator() ExpressionOperator { return HAS_TAG } -func (e *expressionHasTag) GetCostMultiplier() uint32 { - return 2 +func (e *expressionHasTag) GetOperatorCost() uint32 { + return 10 } func (e *expressionHasTag) OperatesOnTag() bool { diff --git a/expr/tagquery/expression_match.go b/expr/tagquery/expression_match.go index 37a61f2fc5..6a00b18fd4 100644 --- a/expr/tagquery/expression_match.go +++ b/expr/tagquery/expression_match.go @@ -33,8 +33,8 @@ func (e *expressionMatch) GetOperator() ExpressionOperator { return MATCH } -func (e *expressionMatch) GetCostMultiplier() uint32 { - return 0 +func (e *expressionMatch) GetOperatorCost() uint32 { + return 10 } func (e *expressionMatch) RequiresNonEmptyValue() bool { diff --git a/expr/tagquery/expression_match_all.go b/expr/tagquery/expression_match_all.go index 54a3390b02..8d581222d8 100644 --- a/expr/tagquery/expression_match_all.go +++ b/expr/tagquery/expression_match_all.go @@ -32,7 +32,7 @@ func (e *expressionMatchAll) GetOperator() ExpressionOperator { return MATCH_ALL } -func (e *expressionMatchAll) GetCostMultiplier() uint32 { +func (e *expressionMatchAll) GetOperatorCost() uint32 { return 50 } diff --git a/expr/tagquery/expression_match_none.go b/expr/tagquery/expression_match_none.go index 762c4a6208..d13bde2189 100644 --- a/expr/tagquery/expression_match_none.go +++ b/expr/tagquery/expression_match_none.go @@ -32,7 +32,7 @@ func (e *expressionMatchNone) GetOperator() ExpressionOperator { return MATCH_NONE } -func (e *expressionMatchNone) GetCostMultiplier() uint32 { +func (e *expressionMatchNone) GetOperatorCost() uint32 { return 0 } diff --git a/expr/tagquery/expression_match_tag.go b/expr/tagquery/expression_match_tag.go index 7d73653d82..027667a06a 100644 --- a/expr/tagquery/expression_match_tag.go +++ b/expr/tagquery/expression_match_tag.go @@ -24,7 +24,7 @@ func (e *expressionMatchTag) GetOperator() ExpressionOperator { return MATCH_TAG } -func (e *expressionMatchTag) GetCostMultiplier() uint32 { +func (e *expressionMatchTag) GetOperatorCost() uint32 { return 20 } diff --git a/expr/tagquery/expression_not_equal.go b/expr/tagquery/expression_not_equal.go index 351e74b605..7433e9461b 100644 --- a/expr/tagquery/expression_not_equal.go +++ b/expr/tagquery/expression_not_equal.go @@ -22,7 +22,7 @@ func (e *expressionNotEqual) GetOperator() ExpressionOperator { return NOT_EQUAL } -func (e *expressionNotEqual) GetCostMultiplier() uint32 { +func (e *expressionNotEqual) GetOperatorCost() uint32 { return 1 } diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go index a1d542c7bf..19b4185fa0 100644 --- a/expr/tagquery/expression_not_has_tag.go +++ b/expr/tagquery/expression_not_has_tag.go @@ -22,8 +22,8 @@ func (e *expressionNotHasTag) GetOperator() ExpressionOperator { return NOT_HAS_TAG } -func (e *expressionNotHasTag) GetCostMultiplier() uint32 { - return 2 +func (e *expressionNotHasTag) GetOperatorCost() uint32 { + return 10 } func (e *expressionNotHasTag) OperatesOnTag() bool { diff --git a/expr/tagquery/expression_not_match.go b/expr/tagquery/expression_not_match.go index 9dece76f42..515e5c17c7 100644 --- a/expr/tagquery/expression_not_match.go +++ b/expr/tagquery/expression_not_match.go @@ -33,7 +33,7 @@ func (e *expressionNotMatch) GetOperator() ExpressionOperator { return NOT_MATCH } -func (e *expressionNotMatch) GetCostMultiplier() uint32 { +func (e *expressionNotMatch) GetOperatorCost() uint32 { return 10 } diff --git a/expr/tagquery/expression_prefix.go b/expr/tagquery/expression_prefix.go index 18155feee9..a4655a9319 100644 --- a/expr/tagquery/expression_prefix.go +++ b/expr/tagquery/expression_prefix.go @@ -22,7 +22,7 @@ func (e *expressionPrefix) GetOperator() ExpressionOperator { return PREFIX } -func (e *expressionPrefix) GetCostMultiplier() uint32 { +func (e *expressionPrefix) GetOperatorCost() uint32 { return 2 } diff --git a/expr/tagquery/expression_prefix_tag.go b/expr/tagquery/expression_prefix_tag.go index a2fb9ae354..d5aef8ecb7 100644 --- a/expr/tagquery/expression_prefix_tag.go +++ b/expr/tagquery/expression_prefix_tag.go @@ -22,8 +22,8 @@ func (e *expressionPrefixTag) GetOperator() ExpressionOperator { return PREFIX_TAG } -func (e *expressionPrefixTag) GetCostMultiplier() uint32 { - return 3 +func (e *expressionPrefixTag) GetOperatorCost() uint32 { + return 15 } func (e *expressionPrefixTag) OperatesOnTag() bool { diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go index f91a64958b..883e825045 100644 --- a/idx/memory/tag_query.go +++ b/idx/memory/tag_query.go @@ -43,7 +43,8 @@ func NewTagQueryContext(query tagquery.Query) TagQueryContext { func (q *TagQueryContext) prepareExpressions(idx TagIndex) { type expressionCost struct { - cost uint32 + operatorCost uint32 + cardinality uint32 expressionIdx int } costs := make([]expressionCost, len(q.query.Expressions)) @@ -53,20 +54,29 @@ func (q *TagQueryContext) prepareExpressions(idx TagIndex) { if expr.OperatesOnTag() { if expr.MatchesExactly() { - costs[i].cost = uint32(len(idx[expr.GetKey()])) * expr.GetCostMultiplier() + costs[i].operatorCost = expr.GetOperatorCost() + costs[i].cardinality = uint32(len(idx[expr.GetKey()])) } else { - costs[i].cost = uint32(len(idx)) * expr.GetCostMultiplier() + costs[i].operatorCost = expr.GetOperatorCost() + costs[i].cardinality = uint32(len(idx)) } } else { if expr.MatchesExactly() { - costs[i].cost = uint32(len(idx[expr.GetKey()][expr.GetValue()])) * expr.GetCostMultiplier() + costs[i].operatorCost = expr.GetOperatorCost() + costs[i].cardinality = uint32(len(idx[expr.GetKey()][expr.GetValue()])) } else { - costs[i].cost = uint32(len(idx[expr.GetKey()])) * expr.GetCostMultiplier() + costs[i].operatorCost = expr.GetOperatorCost() + costs[i].cardinality = uint32(len(idx[expr.GetKey()])) } } } - sort.Slice(costs, func(i, j int) bool { return costs[i].cost < costs[j].cost }) + sort.Slice(costs, func(i, j int) bool { + if costs[i].operatorCost == costs[j].operatorCost { + return costs[i].cardinality < costs[j].cardinality + } + return costs[i].operatorCost < costs[j].operatorCost + }) // the number of filters / default decisions is equal to the number of expressions - 1 // because one of the expressions will be chosen to be the one that we start with. From 142a89333372153e21b36acfa0f861fdf6dea83c Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Tue, 6 Aug 2019 16:59:50 -0400 Subject: [PATCH 37/40] fix benchmark TagQueryKeysByPrefixSimple --- idx/memory/memory_find_test.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/idx/memory/memory_find_test.go b/idx/memory/memory_find_test.go index 83878d02a3..18d3c148ed 100644 --- a/idx/memory/memory_find_test.go +++ b/idx/memory/memory_find_test.go @@ -1219,14 +1219,12 @@ func benchmarkTagQueryKeysByPrefixSimple(b *testing.B) { type testCase struct { prefix string - expr []string from int64 expRes []string } tc := testCase{ prefix: "di", - expr: []string{}, from: 100, expRes: []string{"direction", "disk"}, } @@ -1235,7 +1233,7 @@ func benchmarkTagQueryKeysByPrefixSimple(b *testing.B) { b.ResetTimer() for n := 0; n < b.N; n++ { - autoCompleteTagsWithQueryAndCompare(b, n, tc.prefix, tc.expr, tc.from, 2, tc.expRes) + autoCompleteTagsAndCompare(b, n, tc.prefix, tc.from, 2, tc.expRes) } } From e565d559220c073f65f4d9e30f2038b5a134fe2f Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Thu, 8 Aug 2019 09:43:07 -0400 Subject: [PATCH 38/40] Apply suggestions from code review Co-Authored-By: Robert Milan <42070645+robert-milan@users.noreply.github.com> --- api/cluster.go | 2 +- api/graphite.go | 2 +- expr/tagquery/expression.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api/cluster.go b/api/cluster.go index 140a01362c..6e751bba0c 100644 --- a/api/cluster.go +++ b/api/cluster.go @@ -268,7 +268,7 @@ func (s *Server) indexTagDelSeries(ctx *middleware.Context, request models.Index builder := strings.Builder{} for i := range tags { tags[i].StringIntoBuilder(&builder) - var err error + expressions[i], err = tagquery.ParseExpression(builder.String()) if err != nil { response.Write(ctx, response.WrapErrorForTagDB(err)) diff --git a/api/graphite.go b/api/graphite.go index a61b910524..e45172358d 100644 --- a/api/graphite.go +++ b/api/graphite.go @@ -1228,7 +1228,7 @@ func (s *Server) graphiteTagDelSeries(ctx *middleware.Context, request models.Gr builder := strings.Builder{} for i := range tags { tags[i].StringIntoBuilder(&builder) - var err error + expressions[i], err = tagquery.ParseExpression(builder.String()) if err != nil { response.Write(ctx, response.WrapErrorForTagDB(err)) diff --git a/expr/tagquery/expression.go b/expr/tagquery/expression.go index 75620a68c3..013aa5eff5 100644 --- a/expr/tagquery/expression.go +++ b/expr/tagquery/expression.go @@ -215,7 +215,7 @@ FIND_OPERATOR: valuePos := pos for ; pos < len(expr); pos++ { // disallow ; in value - if expr[pos] == 59 { + if expr[pos] == ';' { return nil, InvalidExpressionError(expr) } } From aefbc3c0696294448ffc0501b3da0138fc44c354 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Thu, 8 Aug 2019 10:31:56 -0400 Subject: [PATCH 39/40] initialize index settings into local variables --- idx/memory/memory.go | 9 +++++++-- idx/memory/memory_find_test.go | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/idx/memory/memory.go b/idx/memory/memory.go index b8d935ebda..515fbc3009 100755 --- a/idx/memory/memory.go +++ b/idx/memory/memory.go @@ -64,6 +64,8 @@ var ( writeQueueEnabled = false writeQueueDelay = 30 * time.Second writeMaxBatchSize = 5000 + matchCacheSize = 1000 + metaTagSupport = false ) func ConfigSetup() { @@ -82,8 +84,8 @@ func ConfigSetup() { memoryIdx.DurationVar(&findCacheBackoffTime, "find-cache-backoff-time", time.Minute, "amount of time to disable the findCache when the invalidate queue fills up.") memoryIdx.StringVar(&indexRulesFile, "rules-file", "/etc/metrictank/index-rules.conf", "path to index-rules.conf file") memoryIdx.StringVar(&maxPruneLockTimeStr, "max-prune-lock-time", "100ms", "Maximum duration each second a prune job can lock the index.") - memoryIdx.IntVar(&tagquery.MatchCacheSize, "match-cache-size", 1000, "size of regular expression cache in tag query evaluation") - memoryIdx.BoolVar(&tagquery.MetaTagSupport, "meta-tag-support", false, "enables/disables querying based on meta tags which get defined via meta tag rules") + memoryIdx.IntVar(&matchCacheSize, "match-cache-size", 1000, "size of regular expression cache in tag query evaluation") + memoryIdx.BoolVar(&metaTagSupport, "meta-tag-support", false, "enables/disables querying based on meta tags which get defined via meta tag rules") globalconf.Register("memory-idx", memoryIdx, flag.ExitOnError) } @@ -108,6 +110,9 @@ func ConfigProcess() { if findCacheInvalidateMaxSize >= findCacheInvalidateQueueSize { log.Fatal("find-cache-invalidate-max-size should be smaller than find-cache-invalidate-queue-size") } + + tagquery.MetaTagSupport = metaTagSupport + tagquery.MatchCacheSize = matchCacheSize } // interface implemented by both UnpartitionedMemoryIdx and PartitionedMemoryIdx diff --git a/idx/memory/memory_find_test.go b/idx/memory/memory_find_test.go index 18d3c148ed..e1faa59f9d 100644 --- a/idx/memory/memory_find_test.go +++ b/idx/memory/memory_find_test.go @@ -157,6 +157,7 @@ func TestMain(m *testing.M) { defer func(t bool) { TagSupport = t }(TagSupport) TagSupport = true TagQueryWorkers = 5 + matchCacheSize = 1000 tagquery.MatchCacheSize = 1000 // we dont need info logs in the test output log.SetLevel(log.ErrorLevel) From 31038544fc72c51eb98d9486547cd65fc2eca0a0 Mon Sep 17 00:00:00 2001 From: Mauro Stettler Date: Thu, 8 Aug 2019 17:58:54 -0400 Subject: [PATCH 40/40] bugfix in expression_not_has_tag --- expr/tagquery/expression_not_has_tag.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expr/tagquery/expression_not_has_tag.go b/expr/tagquery/expression_not_has_tag.go index 19b4185fa0..f5ab8fc216 100644 --- a/expr/tagquery/expression_not_has_tag.go +++ b/expr/tagquery/expression_not_has_tag.go @@ -35,7 +35,7 @@ func (e *expressionNotHasTag) RequiresNonEmptyValue() bool { } func (e *expressionNotHasTag) Matches(value string) bool { - return value == e.key + return value != e.key } func (e *expressionNotHasTag) GetMetricDefinitionFilter(_ IdTagLookup) MetricDefinitionFilter {