From b894301c3e1503025d777fea03805df88d30f760 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Wed, 20 Dec 2023 13:00:59 +0100 Subject: [PATCH] Fix FastRegexMatcher to skip nested capture groups Signed-off-by: Marco Pracucci --- model/labels/regexp.go | 3 ++- model/labels/regexp_test.go | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/model/labels/regexp.go b/model/labels/regexp.go index 17c9e5fcfe..de4cfc3b1c 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -284,7 +284,8 @@ func findSetMatchesFromAlternate(re *syntax.Regexp, base string) (matches []stri // clearCapture removes capture operation as they are not used for matching. func clearCapture(regs ...*syntax.Regexp) { for _, r := range regs { - if r.Op == syntax.OpCapture { + // Iterate on the regexp because capture groups could be nested. + for r.Op == syntax.OpCapture { *r = *r.Sub[0] } } diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 8b8525b6c9..f0e21b2d5c 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -286,6 +286,8 @@ func TestFindSetMatches(t *testing.T) { // Simple sets alternate and concat and alternates with empty matches // parsed as b(ar|(?:)|uzz) where b(?:) means literal b. {"bar|b|buzz", []string{"bar", "b", "buzz"}, true}, + // Skip nested capture groups. + {"^((bar|b|buzz))$", []string{"bar", "b", "buzz"}, true}, // Skip outer anchors (it's enforced anyway at the root). {"^(bar|b|buzz)$", []string{"bar", "b", "buzz"}, true}, {"^(?:prod|production)$", []string{"prod", "production"}, true}, @@ -395,6 +397,8 @@ func TestStringMatcherFromRegexp(t *testing.T) { {"^foo$", &equalStringMatcher{s: "foo", caseSensitive: true}}, {"^(?i:foo)$", &equalStringMatcher{s: "FOO", caseSensitive: false}}, {"^((?i:foo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})}, + {`(?i:((foo|bar)))`, orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "BAR", caseSensitive: false}})}, + {`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, {"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})}, {"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, {".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}},