Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

store: Added regex-set optimization to ExpandedPostings #2450

Merged
merged 9 commits into from
Apr 17, 2020
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel
### Added

### Changed
- [#2450](https://github.com/thanos-io/thanos/pull/2450) Store: regex-set optimization for `label=~"a|b|c"` matchers.

## [v0.12.0](https://github.com/thanos-io/thanos/releases/tag/v0.12.0) - 2020.04.15

Expand Down
9 changes: 9 additions & 0 deletions pkg/store/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -1502,6 +1502,15 @@ func toPostingGroup(lvalsFn func(name string) ([]string, error), m *labels.Match
return emptyPostingsGroup, nil
}

if m.Type == labels.MatchRegexp && len(findSetMatches(m.Value)) > 0 {
vals := findSetMatches(m.Value)
toAdd := make([]labels.Label, 0, len(vals))
for _, val := range vals {
toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val})
}
return newPostingGroup(false, toAdd, nil), nil
}

// If the matcher selects an empty value, it selects all the series which don't
// have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575
// and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555.
Expand Down
2 changes: 2 additions & 0 deletions pkg/store/bucket_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,7 @@ func benchmarkExpandedPostings(
iNotEmpty := labels.MustNewMatcher(labels.MatchNotEqual, "i", "")
iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+postingsBenchSuffix)
iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$")
iRegexSet := labels.MustNewMatcher(labels.MatchRegexp, "i", "0"+postingsBenchSuffix+"|1"+postingsBenchSuffix+"|2"+postingsBenchSuffix)

series = series / 5
cases := []struct {
Expand All @@ -1029,6 +1030,7 @@ func benchmarkExpandedPostings(
{`n="1",i=~"1.+",j="foo"`, []*labels.Matcher{n1, i1Plus, jFoo}, int(float64(series) * 0.011111)},
{`n="1",i=~".+",i!="2",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2, jFoo}, int(float64(series) * 0.1)},
{`n="1",i=~".+",i!~"2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2Star, jFoo}, int(1 + float64(series)*0.088888)},
{`i=~"0|1|2"`, []*labels.Matcher{iRegexSet}, 150}, // 50 series for "1", 50 for "2" and 50 for "3".
}

for _, c := range cases {
Expand Down
63 changes: 63 additions & 0 deletions pkg/store/opts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) The Thanos Authors.
// Licensed under the Apache License 2.0.

package store

import (
"strings"
"unicode/utf8"
)

// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped.
var regexMetaCharacterBytes [16]byte

// isRegexMetaCharacter reports whether byte b needs to be escaped.
func isRegexMetaCharacter(b byte) bool {
return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0
}

func init() {
for _, b := range []byte(`.+*?()|[]{}^$`) {
regexMetaCharacterBytes[b%16] |= 1 << (b / 16)
}
}

// Copied from Prometheus querier.go, removed check for Prometheus wrapper.
// Returns list of values that can regex matches.
func findSetMatches(pattern string) []string {
escaped := false
sets := []*strings.Builder{{}}
for i := 0; i < len(pattern); i++ {
if escaped {
switch {
case isRegexMetaCharacter(pattern[i]):
sets[len(sets)-1].WriteByte(pattern[i])
case pattern[i] == '\\':
sets[len(sets)-1].WriteByte('\\')
default:
return nil
}
escaped = false
} else {
switch {
case isRegexMetaCharacter(pattern[i]):
if pattern[i] == '|' {
sets = append(sets, &strings.Builder{})
} else {
return nil
}
case pattern[i] == '\\':
escaped = true
default:
sets[len(sets)-1].WriteByte(pattern[i])
}
}
}
matches := make([]string, 0, len(sets))
for _, s := range sets {
if s.Len() > 0 {
matches = append(matches, s.String())
}
}
return matches
}
53 changes: 53 additions & 0 deletions pkg/store/opts_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (c) The Thanos Authors.
// Licensed under the Apache License 2.0.

package store

import (
"testing"

"github.com/thanos-io/thanos/pkg/testutil"
)

// Refer to https://github.com/prometheus/prometheus/issues/2651.
func TestFindSetMatches(t *testing.T) {
cases := []struct {
pattern string
exp []string
}{
// Simple sets.
{
pattern: "foo|bar|baz",
exp: []string{
"foo",
"bar",
"baz",
},
},
// Simple sets containing escaped characters.
{
pattern: "fo\\.o|bar\\?|\\^baz",
exp: []string{
"fo.o",
"bar?",
"^baz",
},
},
// Simple sets containing special characters without escaping.
{
pattern: "fo.o|bar?|^baz",
exp: nil,
},
{
pattern: "foo\\|bar\\|baz",
exp: []string{
"foo|bar|baz",
},
},
}

for _, c := range cases {
matches := findSetMatches(c.pattern)
testutil.Equals(t, c.exp, matches)
}
}