From 4960ec4c3d9bf33fdf0e7be48d99533fe95ea2cd Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Thu, 18 May 2023 09:24:49 -0700 Subject: [PATCH 1/2] Implementing Regex optimization on the MatchNotRegexp matcher type Signed-off-by: Alan Protasio --- pkg/store/bucket.go | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go index c2648ed400..8f711a67c1 100644 --- a/pkg/store/bucket.go +++ b/pkg/store/bucket.go @@ -2282,15 +2282,7 @@ func checkNilPosting(l labels.Label, p index.Postings) index.Postings { func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, error), m *labels.Matcher) (*postingGroup, error) { if m.Type == labels.MatchRegexp { if vals := findSetMatches(m.Value); len(vals) > 0 { - // Sorting will improve the performance dramatically if the dataset is relatively large - // since entries in the postings offset table was sorted by label name and value, - // the sequential reading is much faster. - sort.Strings(vals) - toAdd := make([]labels.Label, 0, len(vals)) - for _, val := range vals { - toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val}) - } - return newPostingGroup(false, toAdd, nil), nil + return newPostingGroup(false, labelsFromSetMatchers(m.Name, vals), nil), nil } } @@ -2298,12 +2290,22 @@ func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, er // have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575 // and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555. if m.Matches("") { + var toRemove []labels.Label + + // Inverse of a MatchNotRegexp is MatchRegexp (double negation). + // Fast-path for set matching. + if m.Type == labels.MatchNotRegexp { + if vals := findSetMatches(m.Value); len(vals) > 0 { + toRemove = labelsFromSetMatchers(m.Name, vals) + return newPostingGroup(true, nil, toRemove), nil + } + } + vals, err := lvalsFn(m.Name) if err != nil { return nil, err } - var toRemove []labels.Label for _, val := range vals { if ctx.Err() != nil { return nil, ctx.Err() @@ -2339,6 +2341,18 @@ func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, er return newPostingGroup(false, toAdd, nil), nil } +func labelsFromSetMatchers(name string, vals []string) []labels.Label { + // Sorting will improve the performance dramatically if the dataset is relatively large + // since entries in the postings offset table was sorted by label name and value, + // the sequential reading is much faster. + sort.Strings(vals) + toAdd := make([]labels.Label, 0, len(vals)) + for _, val := range vals { + toAdd = append(toAdd, labels.Label{Name: name, Value: val}) + } + return toAdd +} + type postingPtr struct { keyID int ptr index.Range From 039770d2ff3d6c7d86f851e1aa0ba66c24fcb768 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Thu, 18 May 2023 10:34:02 -0700 Subject: [PATCH 2/2] Opmizing MatchNotEqual Signed-off-by: Alan Protasio --- pkg/store/bucket.go | 7 +++++++ pkg/store/bucket_test.go | 2 ++ 2 files changed, 9 insertions(+) diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go index 8f711a67c1..147d12609c 100644 --- a/pkg/store/bucket.go +++ b/pkg/store/bucket.go @@ -2292,6 +2292,7 @@ func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, er if m.Matches("") { var toRemove []labels.Label + // Fast-path for MatchNotRegexp matching. // Inverse of a MatchNotRegexp is MatchRegexp (double negation). // Fast-path for set matching. if m.Type == labels.MatchNotRegexp { @@ -2301,6 +2302,12 @@ func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, er } } + // Fast-path for MatchNotEqual matching. + // Inverse of a MatchNotEqual is MatchEqual (double negation). + if m.Type == labels.MatchNotEqual { + return newPostingGroup(true, nil, []labels.Label{{Name: m.Name, Value: m.Value}}), nil + } + vals, err := lvalsFn(m.Name) if err != nil { return nil, err diff --git a/pkg/store/bucket_test.go b/pkg/store/bucket_test.go index 2f1eb8ef93..186f7499ca 100644 --- a/pkg/store/bucket_test.go +++ b/pkg/store/bucket_test.go @@ -1387,6 +1387,8 @@ func benchBucketSeries(t testutil.TB, sampleType chunkenc.ValueType, skipChunk b matchersCase := []*labels.Matcher{ labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"), labels.MustNewMatcher(labels.MatchNotEqual, "foo", "bar"), + labels.MustNewMatcher(labels.MatchEqual, "j", "0"), + labels.MustNewMatcher(labels.MatchNotEqual, "j", "0"), labels.MustNewMatcher(labels.MatchRegexp, "j", "(0|1)"), labels.MustNewMatcher(labels.MatchRegexp, "j", "0|1"), labels.MustNewMatcher(labels.MatchNotRegexp, "j", "(0|1)"),