diff --git a/go.mod b/go.mod index 8072d9d08..9ad46c285 100644 --- a/go.mod +++ b/go.mod @@ -73,6 +73,7 @@ require ( github.com/google/pprof v0.0.0-20230510103437-eeec1cb781c3 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/uuid v1.6.0 // indirect + github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect diff --git a/go.sum b/go.sum index 8ad5a9732..5d39ca31d 100644 --- a/go.sum +++ b/go.sum @@ -233,6 +233,8 @@ github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2c github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd h1:PpuIBO5P3e9hpqBD0O/HjhShYuM6XE0i/lbE6J94kww= +github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A= github.com/grpc-ecosystem/grpc-gateway v1.14.6/go.mod h1:zdiPV4Yse/1gnckTHtghG4GkDEdKCRJduHpTxT3/jcw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1 h1:/c3QmbOGMGTOumP2iT/rCwB7b0QDGLKzqOmktBjT+Is= github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1/go.mod h1:5SN9VR2LTsRFsrEC6FHgRbTWrTHu6tqPeKxEQv15giM= diff --git a/pkg/monitoring/rules/rules.go b/pkg/monitoring/rules/rules.go index 0d33798eb..df5af7022 100644 --- a/pkg/monitoring/rules/rules.go +++ b/pkg/monitoring/rules/rules.go @@ -45,6 +45,10 @@ func BuildPrometheusRule(namespace string) (*promv1.PrometheusRule, error) { ) } +func ListAlerts() []promv1.Rule { + return operatorrules.ListAlerts() +} + func ListRecordingRules() []operatorrules.RecordingRule { return operatorrules.ListRecordingRules() } diff --git a/pkg/monitoring/rules/rules_test.go b/pkg/monitoring/rules/rules_test.go new file mode 100644 index 000000000..e49375dd5 --- /dev/null +++ b/pkg/monitoring/rules/rules_test.go @@ -0,0 +1,40 @@ +package rules + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/machadovilaca/operator-observability/pkg/testutil" +) + +func TestRules(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Rules Suite") +} + +var _ = Describe("Rules Validation", func() { + var linter *testutil.Linter + + BeforeEach(func() { + Expect(SetupRules()).To(Succeed()) + linter = testutil.New() + }) + + It("Should validate alerts", func() { + linter.AddCustomAlertValidations( + testutil.ValidateAlertNameLength, + testutil.ValidateAlertRunbookURLAnnotation, + testutil.ValidateAlertHealthImpactLabel, + testutil.ValidateAlertPartOfAndComponentLabels) + + problems := linter.LintAlerts(ListAlerts()) + Expect(problems).To(BeEmpty()) + }) + + It("Should validate recording rules", func() { + problems := linter.LintRecordingRules(ListRecordingRules()) + Expect(problems).To(BeEmpty()) + }) +}) diff --git a/vendor/github.com/grafana/regexp/.gitignore b/vendor/github.com/grafana/regexp/.gitignore new file mode 100644 index 000000000..66fd13c90 --- /dev/null +++ b/vendor/github.com/grafana/regexp/.gitignore @@ -0,0 +1,15 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ diff --git a/vendor/github.com/grafana/regexp/LICENSE b/vendor/github.com/grafana/regexp/LICENSE new file mode 100644 index 000000000..6a66aea5e --- /dev/null +++ b/vendor/github.com/grafana/regexp/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/grafana/regexp/README.md b/vendor/github.com/grafana/regexp/README.md new file mode 100644 index 000000000..756e60dcf --- /dev/null +++ b/vendor/github.com/grafana/regexp/README.md @@ -0,0 +1,12 @@ +# Grafana Go regexp package +This repo is a fork of the upstream Go `regexp` package, with some code optimisations to make it run faster. + +All the optimisations have been submitted upstream, but not yet merged. + +All semantics are the same, and the optimised code passes all tests from upstream. + +The `main` branch is non-optimised: switch over to [`speedup`](https://github.com/grafana/regexp/tree/speedup) branch for the improved code. + +## Benchmarks: + +![image](https://user-images.githubusercontent.com/8125524/152182951-856549ed-6044-4285-b799-69b31f598e32.png) diff --git a/vendor/github.com/grafana/regexp/backtrack.go b/vendor/github.com/grafana/regexp/backtrack.go new file mode 100644 index 000000000..0739f5ff5 --- /dev/null +++ b/vendor/github.com/grafana/regexp/backtrack.go @@ -0,0 +1,367 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// backtrack is a regular expression search with submatch +// tracking for small regular expressions and texts. It allocates +// a bit vector with (length of input) * (length of prog) bits, +// to make sure it never explores the same (character position, instruction) +// state multiple times. This limits the search to run in time linear in +// the length of the test. +// +// backtrack is a fast replacement for the NFA code on small +// regexps when onepass cannot be used. + +package regexp + +import ( + "regexp/syntax" + "sync" +) + +// A job is an entry on the backtracker's job stack. It holds +// the instruction pc and the position in the input. +type job struct { + pc uint32 + arg bool + pos int +} + +const ( + visitedBits = 32 + maxBacktrackProg = 500 // len(prog.Inst) <= max + maxBacktrackVector = 256 * 1024 // bit vector size <= max (bits) +) + +// bitState holds state for the backtracker. +type bitState struct { + end int + cap []int + matchcap []int + jobs []job + visited []uint32 + + inputs inputs +} + +var bitStatePool sync.Pool + +func newBitState() *bitState { + b, ok := bitStatePool.Get().(*bitState) + if !ok { + b = new(bitState) + } + return b +} + +func freeBitState(b *bitState) { + b.inputs.clear() + bitStatePool.Put(b) +} + +// maxBitStateLen returns the maximum length of a string to search with +// the backtracker using prog. +func maxBitStateLen(prog *syntax.Prog) int { + if !shouldBacktrack(prog) { + return 0 + } + return maxBacktrackVector / len(prog.Inst) +} + +// shouldBacktrack reports whether the program is too +// long for the backtracker to run. +func shouldBacktrack(prog *syntax.Prog) bool { + return len(prog.Inst) <= maxBacktrackProg +} + +// reset resets the state of the backtracker. +// end is the end position in the input. +// ncap is the number of captures. +func (b *bitState) reset(prog *syntax.Prog, end int, ncap int) { + b.end = end + + if cap(b.jobs) == 0 { + b.jobs = make([]job, 0, 256) + } else { + b.jobs = b.jobs[:0] + } + + visitedSize := (len(prog.Inst)*(end+1) + visitedBits - 1) / visitedBits + if cap(b.visited) < visitedSize { + b.visited = make([]uint32, visitedSize, maxBacktrackVector/visitedBits) + } else { + b.visited = b.visited[:visitedSize] + for i := range b.visited { + b.visited[i] = 0 + } + } + + if cap(b.cap) < ncap { + b.cap = make([]int, ncap) + } else { + b.cap = b.cap[:ncap] + } + for i := range b.cap { + b.cap[i] = -1 + } + + if cap(b.matchcap) < ncap { + b.matchcap = make([]int, ncap) + } else { + b.matchcap = b.matchcap[:ncap] + } + for i := range b.matchcap { + b.matchcap[i] = -1 + } +} + +// shouldVisit reports whether the combination of (pc, pos) has not +// been visited yet. +func (b *bitState) shouldVisit(pc uint32, pos int) bool { + n := uint(int(pc)*(b.end+1) + pos) + if b.visited[n/visitedBits]&(1<<(n&(visitedBits-1))) != 0 { + return false + } + b.visited[n/visitedBits] |= 1 << (n & (visitedBits - 1)) + return true +} + +// push pushes (pc, pos, arg) onto the job stack if it should be +// visited. +func (b *bitState) push(re *Regexp, pc uint32, pos int, arg bool) { + // Only check shouldVisit when arg is false. + // When arg is true, we are continuing a previous visit. + if re.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) { + b.jobs = append(b.jobs, job{pc: pc, arg: arg, pos: pos}) + } +} + +// tryBacktrack runs a backtracking search starting at pos. +func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { + longest := re.longest + + b.push(re, pc, pos, false) + for len(b.jobs) > 0 { + l := len(b.jobs) - 1 + // Pop job off the stack. + pc := b.jobs[l].pc + pos := b.jobs[l].pos + arg := b.jobs[l].arg + b.jobs = b.jobs[:l] + + // Optimization: rather than push and pop, + // code that is going to Push and continue + // the loop simply updates ip, p, and arg + // and jumps to CheckAndLoop. We have to + // do the ShouldVisit check that Push + // would have, but we avoid the stack + // manipulation. + goto Skip + CheckAndLoop: + if !b.shouldVisit(pc, pos) { + continue + } + Skip: + + inst := &re.prog.Inst[pc] + + switch inst.Op { + default: + panic("bad inst") + case syntax.InstFail: + panic("unexpected InstFail") + case syntax.InstAlt: + // Cannot just + // b.push(inst.Out, pos, false) + // b.push(inst.Arg, pos, false) + // If during the processing of inst.Out, we encounter + // inst.Arg via another path, we want to process it then. + // Pushing it here will inhibit that. Instead, re-push + // inst with arg==true as a reminder to push inst.Arg out + // later. + if arg { + // Finished inst.Out; try inst.Arg. + arg = false + pc = inst.Arg + goto CheckAndLoop + } else { + b.push(re, pc, pos, true) + pc = inst.Out + goto CheckAndLoop + } + + case syntax.InstAltMatch: + // One opcode consumes runes; the other leads to match. + switch re.prog.Inst[inst.Out].Op { + case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: + // inst.Arg is the match. + b.push(re, inst.Arg, pos, false) + pc = inst.Arg + pos = b.end + goto CheckAndLoop + } + // inst.Out is the match - non-greedy + b.push(re, inst.Out, b.end, false) + pc = inst.Out + goto CheckAndLoop + + case syntax.InstRune: + r, width := i.step(pos) + if !inst.MatchRune(r) { + continue + } + pos += width + pc = inst.Out + goto CheckAndLoop + + case syntax.InstRune1: + r, width := i.step(pos) + if r != inst.Rune[0] { + continue + } + pos += width + pc = inst.Out + goto CheckAndLoop + + case syntax.InstRuneAnyNotNL: + r, width := i.step(pos) + if r == '\n' || r == endOfText { + continue + } + pos += width + pc = inst.Out + goto CheckAndLoop + + case syntax.InstRuneAny: + r, width := i.step(pos) + if r == endOfText { + continue + } + pos += width + pc = inst.Out + goto CheckAndLoop + + case syntax.InstCapture: + if arg { + // Finished inst.Out; restore the old value. + b.cap[inst.Arg] = pos + continue + } else { + if inst.Arg < uint32(len(b.cap)) { + // Capture pos to register, but save old value. + b.push(re, pc, b.cap[inst.Arg], true) // come back when we're done. + b.cap[inst.Arg] = pos + } + pc = inst.Out + goto CheckAndLoop + } + + case syntax.InstEmptyWidth: + flag := i.context(pos) + if !flag.match(syntax.EmptyOp(inst.Arg)) { + continue + } + pc = inst.Out + goto CheckAndLoop + + case syntax.InstNop: + pc = inst.Out + goto CheckAndLoop + + case syntax.InstMatch: + // We found a match. If the caller doesn't care + // where the match is, no point going further. + if len(b.cap) == 0 { + return true + } + + // Record best match so far. + // Only need to check end point, because this entire + // call is only considering one start position. + if len(b.cap) > 1 { + b.cap[1] = pos + } + if old := b.matchcap[1]; old == -1 || (longest && pos > 0 && pos > old) { + copy(b.matchcap, b.cap) + } + + // If going for first match, we're done. + if !longest { + return true + } + + // If we used the entire text, no longer match is possible. + if pos == b.end { + return true + } + + // Otherwise, continue on in hope of a longer match. + continue + } + } + + return longest && len(b.matchcap) > 1 && b.matchcap[1] >= 0 +} + +// backtrack runs a backtracking search of prog on the input starting at pos. +func (re *Regexp) backtrack(ib []byte, is string, pos int, ncap int, dstCap []int) []int { + startCond := re.cond + if startCond == ^syntax.EmptyOp(0) { // impossible + return nil + } + if startCond&syntax.EmptyBeginText != 0 && pos != 0 { + // Anchored match, past beginning of text. + return nil + } + + b := newBitState() + i, end := b.inputs.init(nil, ib, is) + b.reset(re.prog, end, ncap) + + // Anchored search must start at the beginning of the input + if startCond&syntax.EmptyBeginText != 0 { + if len(b.cap) > 0 { + b.cap[0] = pos + } + if !re.tryBacktrack(b, i, uint32(re.prog.Start), pos) { + freeBitState(b) + return nil + } + } else { + + // Unanchored search, starting from each possible text position. + // Notice that we have to try the empty string at the end of + // the text, so the loop condition is pos <= end, not pos < end. + // This looks like it's quadratic in the size of the text, + // but we are not clearing visited between calls to TrySearch, + // so no work is duplicated and it ends up still being linear. + width := -1 + for ; pos <= end && width != 0; pos += width { + if len(re.prefix) > 0 { + // Match requires literal prefix; fast search for it. + advance := i.index(re, pos) + if advance < 0 { + freeBitState(b) + return nil + } + pos += advance + } + + if len(b.cap) > 0 { + b.cap[0] = pos + } + if re.tryBacktrack(b, i, uint32(re.prog.Start), pos) { + // Match must be leftmost; done. + goto Match + } + _, width = i.step(pos) + } + freeBitState(b) + return nil + } + +Match: + dstCap = append(dstCap, b.matchcap...) + freeBitState(b) + return dstCap +} diff --git a/vendor/github.com/grafana/regexp/exec.go b/vendor/github.com/grafana/regexp/exec.go new file mode 100644 index 000000000..3fc4b684f --- /dev/null +++ b/vendor/github.com/grafana/regexp/exec.go @@ -0,0 +1,554 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package regexp + +import ( + "io" + "regexp/syntax" + "sync" +) + +// A queue is a 'sparse array' holding pending threads of execution. +// See https://research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html +type queue struct { + sparse []uint32 + dense []entry +} + +// An entry is an entry on a queue. +// It holds both the instruction pc and the actual thread. +// Some queue entries are just place holders so that the machine +// knows it has considered that pc. Such entries have t == nil. +type entry struct { + pc uint32 + t *thread +} + +// A thread is the state of a single path through the machine: +// an instruction and a corresponding capture array. +// See https://swtch.com/~rsc/regexp/regexp2.html +type thread struct { + inst *syntax.Inst + cap []int +} + +// A machine holds all the state during an NFA simulation for p. +type machine struct { + re *Regexp // corresponding Regexp + p *syntax.Prog // compiled program + q0, q1 queue // two queues for runq, nextq + pool []*thread // pool of available threads + matched bool // whether a match was found + matchcap []int // capture information for the match + + inputs inputs +} + +type inputs struct { + // cached inputs, to avoid allocation + bytes inputBytes + string inputString + reader inputReader +} + +func (i *inputs) newBytes(b []byte) input { + i.bytes.str = b + return &i.bytes +} + +func (i *inputs) newString(s string) input { + i.string.str = s + return &i.string +} + +func (i *inputs) newReader(r io.RuneReader) input { + i.reader.r = r + i.reader.atEOT = false + i.reader.pos = 0 + return &i.reader +} + +func (i *inputs) clear() { + // We need to clear 1 of these. + // Avoid the expense of clearing the others (pointer write barrier). + if i.bytes.str != nil { + i.bytes.str = nil + } else if i.reader.r != nil { + i.reader.r = nil + } else { + i.string.str = "" + } +} + +func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) { + if r != nil { + return i.newReader(r), 0 + } + if b != nil { + return i.newBytes(b), len(b) + } + return i.newString(s), len(s) +} + +func (m *machine) init(ncap int) { + for _, t := range m.pool { + t.cap = t.cap[:ncap] + } + m.matchcap = m.matchcap[:ncap] +} + +// alloc allocates a new thread with the given instruction. +// It uses the free pool if possible. +func (m *machine) alloc(i *syntax.Inst) *thread { + var t *thread + if n := len(m.pool); n > 0 { + t = m.pool[n-1] + m.pool = m.pool[:n-1] + } else { + t = new(thread) + t.cap = make([]int, len(m.matchcap), cap(m.matchcap)) + } + t.inst = i + return t +} + +// A lazyFlag is a lazily-evaluated syntax.EmptyOp, +// for checking zero-width flags like ^ $ \A \z \B \b. +// It records the pair of relevant runes and does not +// determine the implied flags until absolutely necessary +// (most of the time, that means never). +type lazyFlag uint64 + +func newLazyFlag(r1, r2 rune) lazyFlag { + return lazyFlag(uint64(r1)<<32 | uint64(uint32(r2))) +} + +func (f lazyFlag) match(op syntax.EmptyOp) bool { + if op == 0 { + return true + } + r1 := rune(f >> 32) + if op&syntax.EmptyBeginLine != 0 { + if r1 != '\n' && r1 >= 0 { + return false + } + op &^= syntax.EmptyBeginLine + } + if op&syntax.EmptyBeginText != 0 { + if r1 >= 0 { + return false + } + op &^= syntax.EmptyBeginText + } + if op == 0 { + return true + } + r2 := rune(f) + if op&syntax.EmptyEndLine != 0 { + if r2 != '\n' && r2 >= 0 { + return false + } + op &^= syntax.EmptyEndLine + } + if op&syntax.EmptyEndText != 0 { + if r2 >= 0 { + return false + } + op &^= syntax.EmptyEndText + } + if op == 0 { + return true + } + if syntax.IsWordChar(r1) != syntax.IsWordChar(r2) { + op &^= syntax.EmptyWordBoundary + } else { + op &^= syntax.EmptyNoWordBoundary + } + return op == 0 +} + +// match runs the machine over the input starting at pos. +// It reports whether a match was found. +// If so, m.matchcap holds the submatch information. +func (m *machine) match(i input, pos int) bool { + startCond := m.re.cond + if startCond == ^syntax.EmptyOp(0) { // impossible + return false + } + m.matched = false + for i := range m.matchcap { + m.matchcap[i] = -1 + } + runq, nextq := &m.q0, &m.q1 + r, r1 := endOfText, endOfText + width, width1 := 0, 0 + r, width = i.step(pos) + if r != endOfText { + r1, width1 = i.step(pos + width) + } + var flag lazyFlag + if pos == 0 { + flag = newLazyFlag(-1, r) + } else { + flag = i.context(pos) + } + for { + if len(runq.dense) == 0 { + if startCond&syntax.EmptyBeginText != 0 && pos != 0 { + // Anchored match, past beginning of text. + break + } + if m.matched { + // Have match; finished exploring alternatives. + break + } + if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() { + // Match requires literal prefix; fast search for it. + advance := i.index(m.re, pos) + if advance < 0 { + break + } + pos += advance + r, width = i.step(pos) + r1, width1 = i.step(pos + width) + } + } + if !m.matched { + if len(m.matchcap) > 0 { + m.matchcap[0] = pos + } + m.add(runq, uint32(m.p.Start), pos, m.matchcap, &flag, nil) + } + flag = newLazyFlag(r, r1) + m.step(runq, nextq, pos, pos+width, r, &flag) + if width == 0 { + break + } + if len(m.matchcap) == 0 && m.matched { + // Found a match and not paying attention + // to where it is, so any match will do. + break + } + pos += width + r, width = r1, width1 + if r != endOfText { + r1, width1 = i.step(pos + width) + } + runq, nextq = nextq, runq + } + m.clear(nextq) + return m.matched +} + +// clear frees all threads on the thread queue. +func (m *machine) clear(q *queue) { + for _, d := range q.dense { + if d.t != nil { + m.pool = append(m.pool, d.t) + } + } + q.dense = q.dense[:0] +} + +// step executes one step of the machine, running each of the threads +// on runq and appending new threads to nextq. +// The step processes the rune c (which may be endOfText), +// which starts at position pos and ends at nextPos. +// nextCond gives the setting for the empty-width flags after c. +func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond *lazyFlag) { + longest := m.re.longest + for j := 0; j < len(runq.dense); j++ { + d := &runq.dense[j] + t := d.t + if t == nil { + continue + } + if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] { + m.pool = append(m.pool, t) + continue + } + i := t.inst + add := false + switch i.Op { + default: + panic("bad inst") + + case syntax.InstMatch: + if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) { + t.cap[1] = pos + copy(m.matchcap, t.cap) + } + if !longest { + // First-match mode: cut off all lower-priority threads. + for _, d := range runq.dense[j+1:] { + if d.t != nil { + m.pool = append(m.pool, d.t) + } + } + runq.dense = runq.dense[:0] + } + m.matched = true + + case syntax.InstRune: + add = i.MatchRune(c) + case syntax.InstRune1: + add = c == i.Rune[0] + case syntax.InstRuneAny: + add = true + case syntax.InstRuneAnyNotNL: + add = c != '\n' + } + if add { + t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t) + } + if t != nil { + m.pool = append(m.pool, t) + } + } + runq.dense = runq.dense[:0] +} + +// add adds an entry to q for pc, unless the q already has such an entry. +// It also recursively adds an entry for all instructions reachable from pc by following +// empty-width conditions satisfied by cond. pos gives the current position +// in the input. +func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond *lazyFlag, t *thread) *thread { +Again: + if pc == 0 { + return t + } + if j := q.sparse[pc]; j < uint32(len(q.dense)) && q.dense[j].pc == pc { + return t + } + + j := len(q.dense) + q.dense = q.dense[:j+1] + d := &q.dense[j] + d.t = nil + d.pc = pc + q.sparse[pc] = uint32(j) + + i := &m.p.Inst[pc] + switch i.Op { + default: + panic("unhandled") + case syntax.InstFail: + // nothing + case syntax.InstAlt, syntax.InstAltMatch: + t = m.add(q, i.Out, pos, cap, cond, t) + pc = i.Arg + goto Again + case syntax.InstEmptyWidth: + if cond.match(syntax.EmptyOp(i.Arg)) { + pc = i.Out + goto Again + } + case syntax.InstNop: + pc = i.Out + goto Again + case syntax.InstCapture: + if int(i.Arg) < len(cap) { + opos := cap[i.Arg] + cap[i.Arg] = pos + m.add(q, i.Out, pos, cap, cond, nil) + cap[i.Arg] = opos + } else { + pc = i.Out + goto Again + } + case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: + if t == nil { + t = m.alloc(i) + } else { + t.inst = i + } + if len(cap) > 0 && &t.cap[0] != &cap[0] { + copy(t.cap, cap) + } + d.t = t + t = nil + } + return t +} + +type onePassMachine struct { + inputs inputs + matchcap []int +} + +var onePassPool sync.Pool + +func newOnePassMachine() *onePassMachine { + m, ok := onePassPool.Get().(*onePassMachine) + if !ok { + m = new(onePassMachine) + } + return m +} + +func freeOnePassMachine(m *onePassMachine) { + m.inputs.clear() + onePassPool.Put(m) +} + +// doOnePass implements r.doExecute using the one-pass execution engine. +func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap int, dstCap []int) []int { + startCond := re.cond + if startCond == ^syntax.EmptyOp(0) { // impossible + return nil + } + + m := newOnePassMachine() + if cap(m.matchcap) < ncap { + m.matchcap = make([]int, ncap) + } else { + m.matchcap = m.matchcap[:ncap] + } + + matched := false + for i := range m.matchcap { + m.matchcap[i] = -1 + } + + i, _ := m.inputs.init(ir, ib, is) + + r, r1 := endOfText, endOfText + width, width1 := 0, 0 + r, width = i.step(pos) + if r != endOfText { + r1, width1 = i.step(pos + width) + } + var flag lazyFlag + if pos == 0 { + flag = newLazyFlag(-1, r) + } else { + flag = i.context(pos) + } + pc := re.onepass.Start + inst := &re.onepass.Inst[pc] + // If there is a simple literal prefix, skip over it. + if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) && + len(re.prefix) > 0 && i.canCheckPrefix() { + // Match requires literal prefix; fast search for it. + if !i.hasPrefix(re) { + goto Return + } + pos += len(re.prefix) + r, width = i.step(pos) + r1, width1 = i.step(pos + width) + flag = i.context(pos) + pc = int(re.prefixEnd) + } + for { + inst = &re.onepass.Inst[pc] + pc = int(inst.Out) + switch inst.Op { + default: + panic("bad inst") + case syntax.InstMatch: + matched = true + if len(m.matchcap) > 0 { + m.matchcap[0] = 0 + m.matchcap[1] = pos + } + goto Return + case syntax.InstRune: + if !inst.MatchRune(r) { + goto Return + } + case syntax.InstRune1: + if r != inst.Rune[0] { + goto Return + } + case syntax.InstRuneAny: + // Nothing + case syntax.InstRuneAnyNotNL: + if r == '\n' { + goto Return + } + // peek at the input rune to see which branch of the Alt to take + case syntax.InstAlt, syntax.InstAltMatch: + pc = int(onePassNext(inst, r)) + continue + case syntax.InstFail: + goto Return + case syntax.InstNop: + continue + case syntax.InstEmptyWidth: + if !flag.match(syntax.EmptyOp(inst.Arg)) { + goto Return + } + continue + case syntax.InstCapture: + if int(inst.Arg) < len(m.matchcap) { + m.matchcap[inst.Arg] = pos + } + continue + } + if width == 0 { + break + } + flag = newLazyFlag(r, r1) + pos += width + r, width = r1, width1 + if r != endOfText { + r1, width1 = i.step(pos + width) + } + } + +Return: + if !matched { + freeOnePassMachine(m) + return nil + } + + dstCap = append(dstCap, m.matchcap...) + freeOnePassMachine(m) + return dstCap +} + +// doMatch reports whether either r, b or s match the regexp. +func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool { + return re.doExecute(r, b, s, 0, 0, nil) != nil +} + +// doExecute finds the leftmost match in the input, appends the position +// of its subexpressions to dstCap and returns dstCap. +// +// nil is returned if no matches are found and non-nil if matches are found. +func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int { + if dstCap == nil { + // Make sure 'return dstCap' is non-nil. + dstCap = arrayNoInts[:0:0] + } + + if r == nil && len(b)+len(s) < re.minInputLen { + return nil + } + + if re.onepass != nil { + return re.doOnePass(r, b, s, pos, ncap, dstCap) + } + if r == nil && len(b)+len(s) < re.maxBitStateLen { + return re.backtrack(b, s, pos, ncap, dstCap) + } + + m := re.get() + i, _ := m.inputs.init(r, b, s) + + m.init(ncap) + if !m.match(i, pos) { + re.put(m) + return nil + } + + dstCap = append(dstCap, m.matchcap...) + re.put(m) + return dstCap +} + +// arrayNoInts is returned by doExecute match if nil dstCap is passed +// to it with ncap=0. +var arrayNoInts [0]int diff --git a/vendor/github.com/grafana/regexp/onepass.go b/vendor/github.com/grafana/regexp/onepass.go new file mode 100644 index 000000000..bc47f4c4a --- /dev/null +++ b/vendor/github.com/grafana/regexp/onepass.go @@ -0,0 +1,507 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package regexp + +import ( + "regexp/syntax" + "sort" + "strings" + "unicode" + "unicode/utf8" +) + +// "One-pass" regexp execution. +// Some regexps can be analyzed to determine that they never need +// backtracking: they are guaranteed to run in one pass over the string +// without bothering to save all the usual NFA state. +// Detect those and execute them more quickly. + +// A onePassProg is a compiled one-pass regular expression program. +// It is the same as syntax.Prog except for the use of onePassInst. +type onePassProg struct { + Inst []onePassInst + Start int // index of start instruction + NumCap int // number of InstCapture insts in re +} + +// A onePassInst is a single instruction in a one-pass regular expression program. +// It is the same as syntax.Inst except for the new 'Next' field. +type onePassInst struct { + syntax.Inst + Next []uint32 +} + +// OnePassPrefix returns a literal string that all matches for the +// regexp must start with. Complete is true if the prefix +// is the entire match. Pc is the index of the last rune instruction +// in the string. The OnePassPrefix skips over the mandatory +// EmptyBeginText +func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) { + i := &p.Inst[p.Start] + if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 { + return "", i.Op == syntax.InstMatch, uint32(p.Start) + } + pc = i.Out + i = &p.Inst[pc] + for i.Op == syntax.InstNop { + pc = i.Out + i = &p.Inst[pc] + } + // Avoid allocation of buffer if prefix is empty. + if iop(i) != syntax.InstRune || len(i.Rune) != 1 { + return "", i.Op == syntax.InstMatch, uint32(p.Start) + } + + // Have prefix; gather characters. + var buf strings.Builder + for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 && i.Rune[0] != utf8.RuneError { + buf.WriteRune(i.Rune[0]) + pc, i = i.Out, &p.Inst[i.Out] + } + if i.Op == syntax.InstEmptyWidth && + syntax.EmptyOp(i.Arg)&syntax.EmptyEndText != 0 && + p.Inst[i.Out].Op == syntax.InstMatch { + complete = true + } + return buf.String(), complete, pc +} + +// OnePassNext selects the next actionable state of the prog, based on the input character. +// It should only be called when i.Op == InstAlt or InstAltMatch, and from the one-pass machine. +// One of the alternates may ultimately lead without input to end of line. If the instruction +// is InstAltMatch the path to the InstMatch is in i.Out, the normal node in i.Next. +func onePassNext(i *onePassInst, r rune) uint32 { + next := i.MatchRunePos(r) + if next >= 0 { + return i.Next[next] + } + if i.Op == syntax.InstAltMatch { + return i.Out + } + return 0 +} + +func iop(i *syntax.Inst) syntax.InstOp { + op := i.Op + switch op { + case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: + op = syntax.InstRune + } + return op +} + +// Sparse Array implementation is used as a queueOnePass. +type queueOnePass struct { + sparse []uint32 + dense []uint32 + size, nextIndex uint32 +} + +func (q *queueOnePass) empty() bool { + return q.nextIndex >= q.size +} + +func (q *queueOnePass) next() (n uint32) { + n = q.dense[q.nextIndex] + q.nextIndex++ + return +} + +func (q *queueOnePass) clear() { + q.size = 0 + q.nextIndex = 0 +} + +func (q *queueOnePass) contains(u uint32) bool { + if u >= uint32(len(q.sparse)) { + return false + } + return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u +} + +func (q *queueOnePass) insert(u uint32) { + if !q.contains(u) { + q.insertNew(u) + } +} + +func (q *queueOnePass) insertNew(u uint32) { + if u >= uint32(len(q.sparse)) { + return + } + q.sparse[u] = q.size + q.dense[q.size] = u + q.size++ +} + +func newQueue(size int) (q *queueOnePass) { + return &queueOnePass{ + sparse: make([]uint32, size), + dense: make([]uint32, size), + } +} + +// mergeRuneSets merges two non-intersecting runesets, and returns the merged result, +// and a NextIp array. The idea is that if a rune matches the OnePassRunes at index +// i, NextIp[i/2] is the target. If the input sets intersect, an empty runeset and a +// NextIp array with the single element mergeFailed is returned. +// The code assumes that both inputs contain ordered and non-intersecting rune pairs. +const mergeFailed = uint32(0xffffffff) + +var ( + noRune = []rune{} + noNext = []uint32{mergeFailed} +) + +func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) { + leftLen := len(*leftRunes) + rightLen := len(*rightRunes) + if leftLen&0x1 != 0 || rightLen&0x1 != 0 { + panic("mergeRuneSets odd length []rune") + } + var ( + lx, rx int + ) + merged := make([]rune, 0) + next := make([]uint32, 0) + ok := true + defer func() { + if !ok { + merged = nil + next = nil + } + }() + + ix := -1 + extend := func(newLow *int, newArray *[]rune, pc uint32) bool { + if ix > 0 && (*newArray)[*newLow] <= merged[ix] { + return false + } + merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1]) + *newLow += 2 + ix += 2 + next = append(next, pc) + return true + } + + for lx < leftLen || rx < rightLen { + switch { + case rx >= rightLen: + ok = extend(&lx, leftRunes, leftPC) + case lx >= leftLen: + ok = extend(&rx, rightRunes, rightPC) + case (*rightRunes)[rx] < (*leftRunes)[lx]: + ok = extend(&rx, rightRunes, rightPC) + default: + ok = extend(&lx, leftRunes, leftPC) + } + if !ok { + return noRune, noNext + } + } + return merged, next +} + +// cleanupOnePass drops working memory, and restores certain shortcut instructions. +func cleanupOnePass(prog *onePassProg, original *syntax.Prog) { + for ix, instOriginal := range original.Inst { + switch instOriginal.Op { + case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune: + case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail: + prog.Inst[ix].Next = nil + case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: + prog.Inst[ix].Next = nil + prog.Inst[ix] = onePassInst{Inst: instOriginal} + } + } +} + +// onePassCopy creates a copy of the original Prog, as we'll be modifying it +func onePassCopy(prog *syntax.Prog) *onePassProg { + p := &onePassProg{ + Start: prog.Start, + NumCap: prog.NumCap, + Inst: make([]onePassInst, len(prog.Inst)), + } + for i, inst := range prog.Inst { + p.Inst[i] = onePassInst{Inst: inst} + } + + // rewrites one or more common Prog constructs that enable some otherwise + // non-onepass Progs to be onepass. A:BD (for example) means an InstAlt at + // ip A, that points to ips B & C. + // A:BC + B:DA => A:BC + B:CD + // A:BC + B:DC => A:DC + B:DC + for pc := range p.Inst { + switch p.Inst[pc].Op { + default: + continue + case syntax.InstAlt, syntax.InstAltMatch: + // A:Bx + B:Ay + p_A_Other := &p.Inst[pc].Out + p_A_Alt := &p.Inst[pc].Arg + // make sure a target is another Alt + instAlt := p.Inst[*p_A_Alt] + if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) { + p_A_Alt, p_A_Other = p_A_Other, p_A_Alt + instAlt = p.Inst[*p_A_Alt] + if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) { + continue + } + } + instOther := p.Inst[*p_A_Other] + // Analyzing both legs pointing to Alts is for another day + if instOther.Op == syntax.InstAlt || instOther.Op == syntax.InstAltMatch { + // too complicated + continue + } + // simple empty transition loop + // A:BC + B:DA => A:BC + B:DC + p_B_Alt := &p.Inst[*p_A_Alt].Out + p_B_Other := &p.Inst[*p_A_Alt].Arg + patch := false + if instAlt.Out == uint32(pc) { + patch = true + } else if instAlt.Arg == uint32(pc) { + patch = true + p_B_Alt, p_B_Other = p_B_Other, p_B_Alt + } + if patch { + *p_B_Alt = *p_A_Other + } + + // empty transition to common target + // A:BC + B:DC => A:DC + B:DC + if *p_A_Other == *p_B_Alt { + *p_A_Alt = *p_B_Other + } + } + } + return p +} + +// runeSlice exists to permit sorting the case-folded rune sets. +type runeSlice []rune + +func (p runeSlice) Len() int { return len(p) } +func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] } +func (p runeSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune} +var anyRune = []rune{0, unicode.MaxRune} + +// makeOnePass creates a onepass Prog, if possible. It is possible if at any alt, +// the match engine can always tell which branch to take. The routine may modify +// p if it is turned into a onepass Prog. If it isn't possible for this to be a +// onepass Prog, the Prog nil is returned. makeOnePass is recursive +// to the size of the Prog. +func makeOnePass(p *onePassProg) *onePassProg { + // If the machine is very long, it's not worth the time to check if we can use one pass. + if len(p.Inst) >= 1000 { + return nil + } + + var ( + instQueue = newQueue(len(p.Inst)) + visitQueue = newQueue(len(p.Inst)) + check func(uint32, []bool) bool + onePassRunes = make([][]rune, len(p.Inst)) + ) + + // check that paths from Alt instructions are unambiguous, and rebuild the new + // program as a onepass program + check = func(pc uint32, m []bool) (ok bool) { + ok = true + inst := &p.Inst[pc] + if visitQueue.contains(pc) { + return + } + visitQueue.insert(pc) + switch inst.Op { + case syntax.InstAlt, syntax.InstAltMatch: + ok = check(inst.Out, m) && check(inst.Arg, m) + // check no-input paths to InstMatch + matchOut := m[inst.Out] + matchArg := m[inst.Arg] + if matchOut && matchArg { + ok = false + break + } + // Match on empty goes in inst.Out + if matchArg { + inst.Out, inst.Arg = inst.Arg, inst.Out + matchOut, matchArg = matchArg, matchOut + } + if matchOut { + m[pc] = true + inst.Op = syntax.InstAltMatch + } + + // build a dispatch operator from the two legs of the alt. + onePassRunes[pc], inst.Next = mergeRuneSets( + &onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg) + if len(inst.Next) > 0 && inst.Next[0] == mergeFailed { + ok = false + break + } + case syntax.InstCapture, syntax.InstNop: + ok = check(inst.Out, m) + m[pc] = m[inst.Out] + // pass matching runes back through these no-ops. + onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...) + inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) + for i := range inst.Next { + inst.Next[i] = inst.Out + } + case syntax.InstEmptyWidth: + ok = check(inst.Out, m) + m[pc] = m[inst.Out] + onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...) + inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) + for i := range inst.Next { + inst.Next[i] = inst.Out + } + case syntax.InstMatch, syntax.InstFail: + m[pc] = inst.Op == syntax.InstMatch + case syntax.InstRune: + m[pc] = false + if len(inst.Next) > 0 { + break + } + instQueue.insert(inst.Out) + if len(inst.Rune) == 0 { + onePassRunes[pc] = []rune{} + inst.Next = []uint32{inst.Out} + break + } + runes := make([]rune, 0) + if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 { + r0 := inst.Rune[0] + runes = append(runes, r0, r0) + for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { + runes = append(runes, r1, r1) + } + sort.Sort(runeSlice(runes)) + } else { + runes = append(runes, inst.Rune...) + } + onePassRunes[pc] = runes + inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) + for i := range inst.Next { + inst.Next[i] = inst.Out + } + inst.Op = syntax.InstRune + case syntax.InstRune1: + m[pc] = false + if len(inst.Next) > 0 { + break + } + instQueue.insert(inst.Out) + runes := []rune{} + // expand case-folded runes + if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 { + r0 := inst.Rune[0] + runes = append(runes, r0, r0) + for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { + runes = append(runes, r1, r1) + } + sort.Sort(runeSlice(runes)) + } else { + runes = append(runes, inst.Rune[0], inst.Rune[0]) + } + onePassRunes[pc] = runes + inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) + for i := range inst.Next { + inst.Next[i] = inst.Out + } + inst.Op = syntax.InstRune + case syntax.InstRuneAny: + m[pc] = false + if len(inst.Next) > 0 { + break + } + instQueue.insert(inst.Out) + onePassRunes[pc] = append([]rune{}, anyRune...) + inst.Next = []uint32{inst.Out} + case syntax.InstRuneAnyNotNL: + m[pc] = false + if len(inst.Next) > 0 { + break + } + instQueue.insert(inst.Out) + onePassRunes[pc] = append([]rune{}, anyRuneNotNL...) + inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) + for i := range inst.Next { + inst.Next[i] = inst.Out + } + } + return + } + + instQueue.clear() + instQueue.insert(uint32(p.Start)) + m := make([]bool, len(p.Inst)) + for !instQueue.empty() { + visitQueue.clear() + pc := instQueue.next() + if !check(pc, m) { + p = nil + break + } + } + if p != nil { + for i := range p.Inst { + p.Inst[i].Rune = onePassRunes[i] + } + } + return p +} + +// compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog +// can be recharacterized as a one-pass regexp program, or syntax.nil if the +// Prog cannot be converted. For a one pass prog, the fundamental condition that must +// be true is: at any InstAlt, there must be no ambiguity about what branch to take. +func compileOnePass(prog *syntax.Prog) (p *onePassProg) { + if prog.Start == 0 { + return nil + } + // onepass regexp is anchored + if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth || + syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText { + return nil + } + // every instruction leading to InstMatch must be EmptyEndText + for _, inst := range prog.Inst { + opOut := prog.Inst[inst.Out].Op + switch inst.Op { + default: + if opOut == syntax.InstMatch { + return nil + } + case syntax.InstAlt, syntax.InstAltMatch: + if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch { + return nil + } + case syntax.InstEmptyWidth: + if opOut == syntax.InstMatch { + if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText { + continue + } + return nil + } + } + } + // Creates a slightly optimized copy of the original Prog + // that cleans up some Prog idioms that block valid onepass programs + p = onePassCopy(prog) + + // checkAmbiguity on InstAlts, build onepass Prog if possible + p = makeOnePass(p) + + if p != nil { + cleanupOnePass(p, prog) + } + return p +} diff --git a/vendor/github.com/grafana/regexp/regexp.go b/vendor/github.com/grafana/regexp/regexp.go new file mode 100644 index 000000000..7958a3972 --- /dev/null +++ b/vendor/github.com/grafana/regexp/regexp.go @@ -0,0 +1,1285 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package regexp implements regular expression search. +// +// The syntax of the regular expressions accepted is the same +// general syntax used by Perl, Python, and other languages. +// More precisely, it is the syntax accepted by RE2 and described at +// https://golang.org/s/re2syntax, except for \C. +// For an overview of the syntax, run +// +// go doc regexp/syntax +// +// The regexp implementation provided by this package is +// guaranteed to run in time linear in the size of the input. +// (This is a property not guaranteed by most open source +// implementations of regular expressions.) For more information +// about this property, see +// +// https://swtch.com/~rsc/regexp/regexp1.html +// +// or any book about automata theory. +// +// All characters are UTF-8-encoded code points. +// Following utf8.DecodeRune, each byte of an invalid UTF-8 sequence +// is treated as if it encoded utf8.RuneError (U+FFFD). +// +// There are 16 methods of Regexp that match a regular expression and identify +// the matched text. Their names are matched by this regular expression: +// +// Find(All)?(String)?(Submatch)?(Index)? +// +// If 'All' is present, the routine matches successive non-overlapping +// matches of the entire expression. Empty matches abutting a preceding +// match are ignored. The return value is a slice containing the successive +// return values of the corresponding non-'All' routine. These routines take +// an extra integer argument, n. If n >= 0, the function returns at most n +// matches/submatches; otherwise, it returns all of them. +// +// If 'String' is present, the argument is a string; otherwise it is a slice +// of bytes; return values are adjusted as appropriate. +// +// If 'Submatch' is present, the return value is a slice identifying the +// successive submatches of the expression. Submatches are matches of +// parenthesized subexpressions (also known as capturing groups) within the +// regular expression, numbered from left to right in order of opening +// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 is +// the match of the first parenthesized subexpression, and so on. +// +// If 'Index' is present, matches and submatches are identified by byte index +// pairs within the input string: result[2*n:2*n+2] identifies the indexes of +// the nth submatch. The pair for n==0 identifies the match of the entire +// expression. If 'Index' is not present, the match is identified by the text +// of the match/submatch. If an index is negative or text is nil, it means that +// subexpression did not match any string in the input. For 'String' versions +// an empty string means either no match or an empty match. +// +// There is also a subset of the methods that can be applied to text read +// from a RuneReader: +// +// MatchReader, FindReaderIndex, FindReaderSubmatchIndex +// +// This set may grow. Note that regular expression matches may need to +// examine text beyond the text returned by a match, so the methods that +// match text from a RuneReader may read arbitrarily far into the input +// before returning. +// +// (There are a few other methods that do not match this pattern.) +package regexp + +import ( + "bytes" + "io" + "regexp/syntax" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +// Regexp is the representation of a compiled regular expression. +// A Regexp is safe for concurrent use by multiple goroutines, +// except for configuration methods, such as Longest. +type Regexp struct { + expr string // as passed to Compile + prog *syntax.Prog // compiled program + onepass *onePassProg // onepass program or nil + numSubexp int + maxBitStateLen int + subexpNames []string + prefix string // required prefix in unanchored matches + prefixBytes []byte // prefix, as a []byte + prefixRune rune // first rune in prefix + prefixEnd uint32 // pc for last rune in prefix + mpool int // pool for machines + matchcap int // size of recorded match lengths + prefixComplete bool // prefix is the entire regexp + cond syntax.EmptyOp // empty-width conditions required at start of match + minInputLen int // minimum length of the input in bytes + + // This field can be modified by the Longest method, + // but it is otherwise read-only. + longest bool // whether regexp prefers leftmost-longest match +} + +// String returns the source text used to compile the regular expression. +func (re *Regexp) String() string { + return re.expr +} + +// Copy returns a new Regexp object copied from re. +// Calling Longest on one copy does not affect another. +// +// Deprecated: In earlier releases, when using a Regexp in multiple goroutines, +// giving each goroutine its own copy helped to avoid lock contention. +// As of Go 1.12, using Copy is no longer necessary to avoid lock contention. +// Copy may still be appropriate if the reason for its use is to make +// two copies with different Longest settings. +func (re *Regexp) Copy() *Regexp { + re2 := *re + return &re2 +} + +// Compile parses a regular expression and returns, if successful, +// a Regexp object that can be used to match against text. +// +// When matching against text, the regexp returns a match that +// begins as early as possible in the input (leftmost), and among those +// it chooses the one that a backtracking search would have found first. +// This so-called leftmost-first matching is the same semantics +// that Perl, Python, and other implementations use, although this +// package implements it without the expense of backtracking. +// For POSIX leftmost-longest matching, see CompilePOSIX. +func Compile(expr string) (*Regexp, error) { + return compile(expr, syntax.Perl, false) +} + +// CompilePOSIX is like Compile but restricts the regular expression +// to POSIX ERE (egrep) syntax and changes the match semantics to +// leftmost-longest. +// +// That is, when matching against text, the regexp returns a match that +// begins as early as possible in the input (leftmost), and among those +// it chooses a match that is as long as possible. +// This so-called leftmost-longest matching is the same semantics +// that early regular expression implementations used and that POSIX +// specifies. +// +// However, there can be multiple leftmost-longest matches, with different +// submatch choices, and here this package diverges from POSIX. +// Among the possible leftmost-longest matches, this package chooses +// the one that a backtracking search would have found first, while POSIX +// specifies that the match be chosen to maximize the length of the first +// subexpression, then the second, and so on from left to right. +// The POSIX rule is computationally prohibitive and not even well-defined. +// See https://swtch.com/~rsc/regexp/regexp2.html#posix for details. +func CompilePOSIX(expr string) (*Regexp, error) { + return compile(expr, syntax.POSIX, true) +} + +// Longest makes future searches prefer the leftmost-longest match. +// That is, when matching against text, the regexp returns a match that +// begins as early as possible in the input (leftmost), and among those +// it chooses a match that is as long as possible. +// This method modifies the Regexp and may not be called concurrently +// with any other methods. +func (re *Regexp) Longest() { + re.longest = true +} + +func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { + re, err := syntax.Parse(expr, mode) + if err != nil { + return nil, err + } + maxCap := re.MaxCap() + capNames := re.CapNames() + + re = re.Simplify() + prog, err := syntax.Compile(re) + if err != nil { + return nil, err + } + matchcap := prog.NumCap + if matchcap < 2 { + matchcap = 2 + } + regexp := &Regexp{ + expr: expr, + prog: prog, + onepass: compileOnePass(prog), + numSubexp: maxCap, + subexpNames: capNames, + cond: prog.StartCond(), + longest: longest, + matchcap: matchcap, + minInputLen: minInputLen(re), + } + if regexp.onepass == nil { + regexp.prefix, regexp.prefixComplete = prog.Prefix() + regexp.maxBitStateLen = maxBitStateLen(prog) + } else { + regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog) + } + if regexp.prefix != "" { + // TODO(rsc): Remove this allocation by adding + // IndexString to package bytes. + regexp.prefixBytes = []byte(regexp.prefix) + regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix) + } + + n := len(prog.Inst) + i := 0 + for matchSize[i] != 0 && matchSize[i] < n { + i++ + } + regexp.mpool = i + + return regexp, nil +} + +// Pools of *machine for use during (*Regexp).doExecute, +// split up by the size of the execution queues. +// matchPool[i] machines have queue size matchSize[i]. +// On a 64-bit system each queue entry is 16 bytes, +// so matchPool[0] has 16*2*128 = 4kB queues, etc. +// The final matchPool is a catch-all for very large queues. +var ( + matchSize = [...]int{128, 512, 2048, 16384, 0} + matchPool [len(matchSize)]sync.Pool +) + +// get returns a machine to use for matching re. +// It uses the re's machine cache if possible, to avoid +// unnecessary allocation. +func (re *Regexp) get() *machine { + m, ok := matchPool[re.mpool].Get().(*machine) + if !ok { + m = new(machine) + } + m.re = re + m.p = re.prog + if cap(m.matchcap) < re.matchcap { + m.matchcap = make([]int, re.matchcap) + for _, t := range m.pool { + t.cap = make([]int, re.matchcap) + } + } + + // Allocate queues if needed. + // Or reallocate, for "large" match pool. + n := matchSize[re.mpool] + if n == 0 { // large pool + n = len(re.prog.Inst) + } + if len(m.q0.sparse) < n { + m.q0 = queue{make([]uint32, n), make([]entry, 0, n)} + m.q1 = queue{make([]uint32, n), make([]entry, 0, n)} + } + return m +} + +// put returns a machine to the correct machine pool. +func (re *Regexp) put(m *machine) { + m.re = nil + m.p = nil + m.inputs.clear() + matchPool[re.mpool].Put(m) +} + +// minInputLen walks the regexp to find the minimum length of any matchable input +func minInputLen(re *syntax.Regexp) int { + switch re.Op { + default: + return 0 + case syntax.OpAnyChar, syntax.OpAnyCharNotNL, syntax.OpCharClass: + return 1 + case syntax.OpLiteral: + l := 0 + for _, r := range re.Rune { + if r == utf8.RuneError { + l++ + } else { + l += utf8.RuneLen(r) + } + } + return l + case syntax.OpCapture, syntax.OpPlus: + return minInputLen(re.Sub[0]) + case syntax.OpRepeat: + return re.Min * minInputLen(re.Sub[0]) + case syntax.OpConcat: + l := 0 + for _, sub := range re.Sub { + l += minInputLen(sub) + } + return l + case syntax.OpAlternate: + l := minInputLen(re.Sub[0]) + var lnext int + for _, sub := range re.Sub[1:] { + lnext = minInputLen(sub) + if lnext < l { + l = lnext + } + } + return l + } +} + +// MustCompile is like Compile but panics if the expression cannot be parsed. +// It simplifies safe initialization of global variables holding compiled regular +// expressions. +func MustCompile(str string) *Regexp { + regexp, err := Compile(str) + if err != nil { + panic(`regexp: Compile(` + quote(str) + `): ` + err.Error()) + } + return regexp +} + +// MustCompilePOSIX is like CompilePOSIX but panics if the expression cannot be parsed. +// It simplifies safe initialization of global variables holding compiled regular +// expressions. +func MustCompilePOSIX(str string) *Regexp { + regexp, err := CompilePOSIX(str) + if err != nil { + panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + err.Error()) + } + return regexp +} + +func quote(s string) string { + if strconv.CanBackquote(s) { + return "`" + s + "`" + } + return strconv.Quote(s) +} + +// NumSubexp returns the number of parenthesized subexpressions in this Regexp. +func (re *Regexp) NumSubexp() int { + return re.numSubexp +} + +// SubexpNames returns the names of the parenthesized subexpressions +// in this Regexp. The name for the first sub-expression is names[1], +// so that if m is a match slice, the name for m[i] is SubexpNames()[i]. +// Since the Regexp as a whole cannot be named, names[0] is always +// the empty string. The slice should not be modified. +func (re *Regexp) SubexpNames() []string { + return re.subexpNames +} + +// SubexpIndex returns the index of the first subexpression with the given name, +// or -1 if there is no subexpression with that name. +// +// Note that multiple subexpressions can be written using the same name, as in +// (?Pa+)(?Pb+), which declares two subexpressions named "bob". +// In this case, SubexpIndex returns the index of the leftmost such subexpression +// in the regular expression. +func (re *Regexp) SubexpIndex(name string) int { + if name != "" { + for i, s := range re.subexpNames { + if name == s { + return i + } + } + } + return -1 +} + +const endOfText rune = -1 + +// input abstracts different representations of the input text. It provides +// one-character lookahead. +type input interface { + step(pos int) (r rune, width int) // advance one rune + canCheckPrefix() bool // can we look ahead without losing info? + hasPrefix(re *Regexp) bool + index(re *Regexp, pos int) int + context(pos int) lazyFlag +} + +// inputString scans a string. +type inputString struct { + str string +} + +func (i *inputString) step(pos int) (rune, int) { + if pos < len(i.str) { + c := i.str[pos] + if c < utf8.RuneSelf { + return rune(c), 1 + } + return utf8.DecodeRuneInString(i.str[pos:]) + } + return endOfText, 0 +} + +func (i *inputString) canCheckPrefix() bool { + return true +} + +func (i *inputString) hasPrefix(re *Regexp) bool { + return strings.HasPrefix(i.str, re.prefix) +} + +func (i *inputString) index(re *Regexp, pos int) int { + return strings.Index(i.str[pos:], re.prefix) +} + +func (i *inputString) context(pos int) lazyFlag { + r1, r2 := endOfText, endOfText + // 0 < pos && pos <= len(i.str) + if uint(pos-1) < uint(len(i.str)) { + r1 = rune(i.str[pos-1]) + if r1 >= utf8.RuneSelf { + r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) + } + } + // 0 <= pos && pos < len(i.str) + if uint(pos) < uint(len(i.str)) { + r2 = rune(i.str[pos]) + if r2 >= utf8.RuneSelf { + r2, _ = utf8.DecodeRuneInString(i.str[pos:]) + } + } + return newLazyFlag(r1, r2) +} + +// inputBytes scans a byte slice. +type inputBytes struct { + str []byte +} + +func (i *inputBytes) step(pos int) (rune, int) { + if pos < len(i.str) { + c := i.str[pos] + if c < utf8.RuneSelf { + return rune(c), 1 + } + return utf8.DecodeRune(i.str[pos:]) + } + return endOfText, 0 +} + +func (i *inputBytes) canCheckPrefix() bool { + return true +} + +func (i *inputBytes) hasPrefix(re *Regexp) bool { + return bytes.HasPrefix(i.str, re.prefixBytes) +} + +func (i *inputBytes) index(re *Regexp, pos int) int { + return bytes.Index(i.str[pos:], re.prefixBytes) +} + +func (i *inputBytes) context(pos int) lazyFlag { + r1, r2 := endOfText, endOfText + // 0 < pos && pos <= len(i.str) + if uint(pos-1) < uint(len(i.str)) { + r1 = rune(i.str[pos-1]) + if r1 >= utf8.RuneSelf { + r1, _ = utf8.DecodeLastRune(i.str[:pos]) + } + } + // 0 <= pos && pos < len(i.str) + if uint(pos) < uint(len(i.str)) { + r2 = rune(i.str[pos]) + if r2 >= utf8.RuneSelf { + r2, _ = utf8.DecodeRune(i.str[pos:]) + } + } + return newLazyFlag(r1, r2) +} + +// inputReader scans a RuneReader. +type inputReader struct { + r io.RuneReader + atEOT bool + pos int +} + +func (i *inputReader) step(pos int) (rune, int) { + if !i.atEOT && pos != i.pos { + return endOfText, 0 + + } + r, w, err := i.r.ReadRune() + if err != nil { + i.atEOT = true + return endOfText, 0 + } + i.pos += w + return r, w +} + +func (i *inputReader) canCheckPrefix() bool { + return false +} + +func (i *inputReader) hasPrefix(re *Regexp) bool { + return false +} + +func (i *inputReader) index(re *Regexp, pos int) int { + return -1 +} + +func (i *inputReader) context(pos int) lazyFlag { + return 0 // not used +} + +// LiteralPrefix returns a literal string that must begin any match +// of the regular expression re. It returns the boolean true if the +// literal string comprises the entire regular expression. +func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { + return re.prefix, re.prefixComplete +} + +// MatchReader reports whether the text returned by the RuneReader +// contains any match of the regular expression re. +func (re *Regexp) MatchReader(r io.RuneReader) bool { + return re.doMatch(r, nil, "") +} + +// MatchString reports whether the string s +// contains any match of the regular expression re. +func (re *Regexp) MatchString(s string) bool { + return re.doMatch(nil, nil, s) +} + +// Match reports whether the byte slice b +// contains any match of the regular expression re. +func (re *Regexp) Match(b []byte) bool { + return re.doMatch(nil, b, "") +} + +// MatchReader reports whether the text returned by the RuneReader +// contains any match of the regular expression pattern. +// More complicated queries need to use Compile and the full Regexp interface. +func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) { + re, err := Compile(pattern) + if err != nil { + return false, err + } + return re.MatchReader(r), nil +} + +// MatchString reports whether the string s +// contains any match of the regular expression pattern. +// More complicated queries need to use Compile and the full Regexp interface. +func MatchString(pattern string, s string) (matched bool, err error) { + re, err := Compile(pattern) + if err != nil { + return false, err + } + return re.MatchString(s), nil +} + +// Match reports whether the byte slice b +// contains any match of the regular expression pattern. +// More complicated queries need to use Compile and the full Regexp interface. +func Match(pattern string, b []byte) (matched bool, err error) { + re, err := Compile(pattern) + if err != nil { + return false, err + } + return re.Match(b), nil +} + +// ReplaceAllString returns a copy of src, replacing matches of the Regexp +// with the replacement string repl. Inside repl, $ signs are interpreted as +// in Expand, so for instance $1 represents the text of the first submatch. +func (re *Regexp) ReplaceAllString(src, repl string) string { + n := 2 + if strings.Contains(repl, "$") { + n = 2 * (re.numSubexp + 1) + } + b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte { + return re.expand(dst, repl, nil, src, match) + }) + return string(b) +} + +// ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp +// with the replacement string repl. The replacement repl is substituted directly, +// without using Expand. +func (re *Regexp) ReplaceAllLiteralString(src, repl string) string { + return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { + return append(dst, repl...) + })) +} + +// ReplaceAllStringFunc returns a copy of src in which all matches of the +// Regexp have been replaced by the return value of function repl applied +// to the matched substring. The replacement returned by repl is substituted +// directly, without using Expand. +func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { + b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { + return append(dst, repl(src[match[0]:match[1]])...) + }) + return string(b) +} + +func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte { + lastMatchEnd := 0 // end position of the most recent match + searchPos := 0 // position where we next look for a match + var buf []byte + var endPos int + if bsrc != nil { + endPos = len(bsrc) + } else { + endPos = len(src) + } + if nmatch > re.prog.NumCap { + nmatch = re.prog.NumCap + } + + var dstCap [2]int + for searchPos <= endPos { + a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0]) + if len(a) == 0 { + break // no more matches + } + + // Copy the unmatched characters before this match. + if bsrc != nil { + buf = append(buf, bsrc[lastMatchEnd:a[0]]...) + } else { + buf = append(buf, src[lastMatchEnd:a[0]]...) + } + + // Now insert a copy of the replacement string, but not for a + // match of the empty string immediately after another match. + // (Otherwise, we get double replacement for patterns that + // match both empty and nonempty strings.) + if a[1] > lastMatchEnd || a[0] == 0 { + buf = repl(buf, a) + } + lastMatchEnd = a[1] + + // Advance past this match; always advance at least one character. + var width int + if bsrc != nil { + _, width = utf8.DecodeRune(bsrc[searchPos:]) + } else { + _, width = utf8.DecodeRuneInString(src[searchPos:]) + } + if searchPos+width > a[1] { + searchPos += width + } else if searchPos+1 > a[1] { + // This clause is only needed at the end of the input + // string. In that case, DecodeRuneInString returns width=0. + searchPos++ + } else { + searchPos = a[1] + } + } + + // Copy the unmatched characters after the last match. + if bsrc != nil { + buf = append(buf, bsrc[lastMatchEnd:]...) + } else { + buf = append(buf, src[lastMatchEnd:]...) + } + + return buf +} + +// ReplaceAll returns a copy of src, replacing matches of the Regexp +// with the replacement text repl. Inside repl, $ signs are interpreted as +// in Expand, so for instance $1 represents the text of the first submatch. +func (re *Regexp) ReplaceAll(src, repl []byte) []byte { + n := 2 + if bytes.IndexByte(repl, '$') >= 0 { + n = 2 * (re.numSubexp + 1) + } + srepl := "" + b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte { + if len(srepl) != len(repl) { + srepl = string(repl) + } + return re.expand(dst, srepl, src, "", match) + }) + return b +} + +// ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp +// with the replacement bytes repl. The replacement repl is substituted directly, +// without using Expand. +func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte { + return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { + return append(dst, repl...) + }) +} + +// ReplaceAllFunc returns a copy of src in which all matches of the +// Regexp have been replaced by the return value of function repl applied +// to the matched byte slice. The replacement returned by repl is substituted +// directly, without using Expand. +func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { + return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { + return append(dst, repl(src[match[0]:match[1]])...) + }) +} + +// Bitmap used by func special to check whether a character needs to be escaped. +var specialBytes [16]byte + +// special reports whether byte b needs to be escaped by QuoteMeta. +func special(b byte) bool { + return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0 +} + +func init() { + for _, b := range []byte(`\.+*?()|[]{}^$`) { + specialBytes[b%16] |= 1 << (b / 16) + } +} + +// QuoteMeta returns a string that escapes all regular expression metacharacters +// inside the argument text; the returned string is a regular expression matching +// the literal text. +func QuoteMeta(s string) string { + // A byte loop is correct because all metacharacters are ASCII. + var i int + for i = 0; i < len(s); i++ { + if special(s[i]) { + break + } + } + // No meta characters found, so return original string. + if i >= len(s) { + return s + } + + b := make([]byte, 2*len(s)-i) + copy(b, s[:i]) + j := i + for ; i < len(s); i++ { + if special(s[i]) { + b[j] = '\\' + j++ + } + b[j] = s[i] + j++ + } + return string(b[:j]) +} + +// The number of capture values in the program may correspond +// to fewer capturing expressions than are in the regexp. +// For example, "(a){0}" turns into an empty program, so the +// maximum capture in the program is 0 but we need to return +// an expression for \1. Pad appends -1s to the slice a as needed. +func (re *Regexp) pad(a []int) []int { + if a == nil { + // No match. + return nil + } + n := (1 + re.numSubexp) * 2 + for len(a) < n { + a = append(a, -1) + } + return a +} + +// allMatches calls deliver at most n times +// with the location of successive matches in the input text. +// The input text is b if non-nil, otherwise s. +func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) { + var end int + if b == nil { + end = len(s) + } else { + end = len(b) + } + + for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; { + matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil) + if len(matches) == 0 { + break + } + + accept := true + if matches[1] == pos { + // We've found an empty match. + if matches[0] == prevMatchEnd { + // We don't allow an empty match right + // after a previous match, so ignore it. + accept = false + } + var width int + if b == nil { + is := inputString{str: s} + _, width = is.step(pos) + } else { + ib := inputBytes{str: b} + _, width = ib.step(pos) + } + if width > 0 { + pos += width + } else { + pos = end + 1 + } + } else { + pos = matches[1] + } + prevMatchEnd = matches[1] + + if accept { + deliver(re.pad(matches)) + i++ + } + } +} + +// Find returns a slice holding the text of the leftmost match in b of the regular expression. +// A return value of nil indicates no match. +func (re *Regexp) Find(b []byte) []byte { + var dstCap [2]int + a := re.doExecute(nil, b, "", 0, 2, dstCap[:0]) + if a == nil { + return nil + } + return b[a[0]:a[1]:a[1]] +} + +// FindIndex returns a two-element slice of integers defining the location of +// the leftmost match in b of the regular expression. The match itself is at +// b[loc[0]:loc[1]]. +// A return value of nil indicates no match. +func (re *Regexp) FindIndex(b []byte) (loc []int) { + a := re.doExecute(nil, b, "", 0, 2, nil) + if a == nil { + return nil + } + return a[0:2] +} + +// FindString returns a string holding the text of the leftmost match in s of the regular +// expression. If there is no match, the return value is an empty string, +// but it will also be empty if the regular expression successfully matches +// an empty string. Use FindStringIndex or FindStringSubmatch if it is +// necessary to distinguish these cases. +func (re *Regexp) FindString(s string) string { + var dstCap [2]int + a := re.doExecute(nil, nil, s, 0, 2, dstCap[:0]) + if a == nil { + return "" + } + return s[a[0]:a[1]] +} + +// FindStringIndex returns a two-element slice of integers defining the +// location of the leftmost match in s of the regular expression. The match +// itself is at s[loc[0]:loc[1]]. +// A return value of nil indicates no match. +func (re *Regexp) FindStringIndex(s string) (loc []int) { + a := re.doExecute(nil, nil, s, 0, 2, nil) + if a == nil { + return nil + } + return a[0:2] +} + +// FindReaderIndex returns a two-element slice of integers defining the +// location of the leftmost match of the regular expression in text read from +// the RuneReader. The match text was found in the input stream at +// byte offset loc[0] through loc[1]-1. +// A return value of nil indicates no match. +func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) { + a := re.doExecute(r, nil, "", 0, 2, nil) + if a == nil { + return nil + } + return a[0:2] +} + +// FindSubmatch returns a slice of slices holding the text of the leftmost +// match of the regular expression in b and the matches, if any, of its +// subexpressions, as defined by the 'Submatch' descriptions in the package +// comment. +// A return value of nil indicates no match. +func (re *Regexp) FindSubmatch(b []byte) [][]byte { + var dstCap [4]int + a := re.doExecute(nil, b, "", 0, re.prog.NumCap, dstCap[:0]) + if a == nil { + return nil + } + ret := make([][]byte, 1+re.numSubexp) + for i := range ret { + if 2*i < len(a) && a[2*i] >= 0 { + ret[i] = b[a[2*i]:a[2*i+1]:a[2*i+1]] + } + } + return ret +} + +// Expand appends template to dst and returns the result; during the +// append, Expand replaces variables in the template with corresponding +// matches drawn from src. The match slice should have been returned by +// FindSubmatchIndex. +// +// In the template, a variable is denoted by a substring of the form +// $name or ${name}, where name is a non-empty sequence of letters, +// digits, and underscores. A purely numeric name like $1 refers to +// the submatch with the corresponding index; other names refer to +// capturing parentheses named with the (?P...) syntax. A +// reference to an out of range or unmatched index or a name that is not +// present in the regular expression is replaced with an empty slice. +// +// In the $name form, name is taken to be as long as possible: $1x is +// equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0. +// +// To insert a literal $ in the output, use $$ in the template. +func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte { + return re.expand(dst, string(template), src, "", match) +} + +// ExpandString is like Expand but the template and source are strings. +// It appends to and returns a byte slice in order to give the calling +// code control over allocation. +func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte { + return re.expand(dst, template, nil, src, match) +} + +func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte { + for len(template) > 0 { + before, after, ok := strings.Cut(template, "$") + if !ok { + break + } + dst = append(dst, before...) + template = after + if template != "" && template[0] == '$' { + // Treat $$ as $. + dst = append(dst, '$') + template = template[1:] + continue + } + name, num, rest, ok := extract(template) + if !ok { + // Malformed; treat $ as raw text. + dst = append(dst, '$') + continue + } + template = rest + if num >= 0 { + if 2*num+1 < len(match) && match[2*num] >= 0 { + if bsrc != nil { + dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...) + } else { + dst = append(dst, src[match[2*num]:match[2*num+1]]...) + } + } + } else { + for i, namei := range re.subexpNames { + if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 { + if bsrc != nil { + dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...) + } else { + dst = append(dst, src[match[2*i]:match[2*i+1]]...) + } + break + } + } + } + } + dst = append(dst, template...) + return dst +} + +// extract returns the name from a leading "name" or "{name}" in str. +// (The $ has already been removed by the caller.) +// If it is a number, extract returns num set to that number; otherwise num = -1. +func extract(str string) (name string, num int, rest string, ok bool) { + if str == "" { + return + } + brace := false + if str[0] == '{' { + brace = true + str = str[1:] + } + i := 0 + for i < len(str) { + rune, size := utf8.DecodeRuneInString(str[i:]) + if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' { + break + } + i += size + } + if i == 0 { + // empty name is not okay + return + } + name = str[:i] + if brace { + if i >= len(str) || str[i] != '}' { + // missing closing brace + return + } + i++ + } + + // Parse number. + num = 0 + for i := 0; i < len(name); i++ { + if name[i] < '0' || '9' < name[i] || num >= 1e8 { + num = -1 + break + } + num = num*10 + int(name[i]) - '0' + } + // Disallow leading zeros. + if name[0] == '0' && len(name) > 1 { + num = -1 + } + + rest = str[i:] + ok = true + return +} + +// FindSubmatchIndex returns a slice holding the index pairs identifying the +// leftmost match of the regular expression in b and the matches, if any, of +// its subexpressions, as defined by the 'Submatch' and 'Index' descriptions +// in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindSubmatchIndex(b []byte) []int { + return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap, nil)) +} + +// FindStringSubmatch returns a slice of strings holding the text of the +// leftmost match of the regular expression in s and the matches, if any, of +// its subexpressions, as defined by the 'Submatch' description in the +// package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindStringSubmatch(s string) []string { + var dstCap [4]int + a := re.doExecute(nil, nil, s, 0, re.prog.NumCap, dstCap[:0]) + if a == nil { + return nil + } + ret := make([]string, 1+re.numSubexp) + for i := range ret { + if 2*i < len(a) && a[2*i] >= 0 { + ret[i] = s[a[2*i]:a[2*i+1]] + } + } + return ret +} + +// FindStringSubmatchIndex returns a slice holding the index pairs +// identifying the leftmost match of the regular expression in s and the +// matches, if any, of its subexpressions, as defined by the 'Submatch' and +// 'Index' descriptions in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindStringSubmatchIndex(s string) []int { + return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap, nil)) +} + +// FindReaderSubmatchIndex returns a slice holding the index pairs +// identifying the leftmost match of the regular expression of text read by +// the RuneReader, and the matches, if any, of its subexpressions, as defined +// by the 'Submatch' and 'Index' descriptions in the package comment. A +// return value of nil indicates no match. +func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { + return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap, nil)) +} + +const startSize = 10 // The size at which to start a slice in the 'All' routines. + +// FindAll is the 'All' version of Find; it returns a slice of all successive +// matches of the expression, as defined by the 'All' description in the +// package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAll(b []byte, n int) [][]byte { + if n < 0 { + n = len(b) + 1 + } + var result [][]byte + re.allMatches("", b, n, func(match []int) { + if result == nil { + result = make([][]byte, 0, startSize) + } + result = append(result, b[match[0]:match[1]:match[1]]) + }) + return result +} + +// FindAllIndex is the 'All' version of FindIndex; it returns a slice of all +// successive matches of the expression, as defined by the 'All' description +// in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllIndex(b []byte, n int) [][]int { + if n < 0 { + n = len(b) + 1 + } + var result [][]int + re.allMatches("", b, n, func(match []int) { + if result == nil { + result = make([][]int, 0, startSize) + } + result = append(result, match[0:2]) + }) + return result +} + +// FindAllString is the 'All' version of FindString; it returns a slice of all +// successive matches of the expression, as defined by the 'All' description +// in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllString(s string, n int) []string { + if n < 0 { + n = len(s) + 1 + } + var result []string + re.allMatches(s, nil, n, func(match []int) { + if result == nil { + result = make([]string, 0, startSize) + } + result = append(result, s[match[0]:match[1]]) + }) + return result +} + +// FindAllStringIndex is the 'All' version of FindStringIndex; it returns a +// slice of all successive matches of the expression, as defined by the 'All' +// description in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllStringIndex(s string, n int) [][]int { + if n < 0 { + n = len(s) + 1 + } + var result [][]int + re.allMatches(s, nil, n, func(match []int) { + if result == nil { + result = make([][]int, 0, startSize) + } + result = append(result, match[0:2]) + }) + return result +} + +// FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice +// of all successive matches of the expression, as defined by the 'All' +// description in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { + if n < 0 { + n = len(b) + 1 + } + var result [][][]byte + re.allMatches("", b, n, func(match []int) { + if result == nil { + result = make([][][]byte, 0, startSize) + } + slice := make([][]byte, len(match)/2) + for j := range slice { + if match[2*j] >= 0 { + slice[j] = b[match[2*j]:match[2*j+1]:match[2*j+1]] + } + } + result = append(result, slice) + }) + return result +} + +// FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns +// a slice of all successive matches of the expression, as defined by the +// 'All' description in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int { + if n < 0 { + n = len(b) + 1 + } + var result [][]int + re.allMatches("", b, n, func(match []int) { + if result == nil { + result = make([][]int, 0, startSize) + } + result = append(result, match) + }) + return result +} + +// FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it +// returns a slice of all successive matches of the expression, as defined by +// the 'All' description in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string { + if n < 0 { + n = len(s) + 1 + } + var result [][]string + re.allMatches(s, nil, n, func(match []int) { + if result == nil { + result = make([][]string, 0, startSize) + } + slice := make([]string, len(match)/2) + for j := range slice { + if match[2*j] >= 0 { + slice[j] = s[match[2*j]:match[2*j+1]] + } + } + result = append(result, slice) + }) + return result +} + +// FindAllStringSubmatchIndex is the 'All' version of +// FindStringSubmatchIndex; it returns a slice of all successive matches of +// the expression, as defined by the 'All' description in the package +// comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int { + if n < 0 { + n = len(s) + 1 + } + var result [][]int + re.allMatches(s, nil, n, func(match []int) { + if result == nil { + result = make([][]int, 0, startSize) + } + result = append(result, match) + }) + return result +} + +// Split slices s into substrings separated by the expression and returns a slice of +// the substrings between those expression matches. +// +// The slice returned by this method consists of all the substrings of s +// not contained in the slice returned by FindAllString. When called on an expression +// that contains no metacharacters, it is equivalent to strings.SplitN. +// +// Example: +// +// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5) +// // s: ["", "b", "b", "c", "cadaaae"] +// +// The count determines the number of substrings to return: +// +// n > 0: at most n substrings; the last substring will be the unsplit remainder. +// n == 0: the result is nil (zero substrings) +// n < 0: all substrings +func (re *Regexp) Split(s string, n int) []string { + + if n == 0 { + return nil + } + + if len(re.expr) > 0 && len(s) == 0 { + return []string{""} + } + + matches := re.FindAllStringIndex(s, n) + strings := make([]string, 0, len(matches)) + + beg := 0 + end := 0 + for _, match := range matches { + if n > 0 && len(strings) >= n-1 { + break + } + + end = match[0] + if match[1] != 0 { + strings = append(strings, s[beg:end]) + } + beg = match[1] + } + + if end != len(s) { + strings = append(strings, s[beg:]) + } + + return strings +} diff --git a/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/alert_custom_validations.go b/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/alert_custom_validations.go new file mode 100644 index 000000000..de6628654 --- /dev/null +++ b/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/alert_custom_validations.go @@ -0,0 +1,86 @@ +package testutil + +import ( + promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" +) + +func ValidateAlertNameLength(alert *promv1.Rule) []Problem { + var result []Problem + + if len(alert.Alert) > 50 { + result = append(result, Problem{ + ResourceName: alert.Alert, + Description: "alert name exceeds 50 characters", + }) + } + + return result +} + +func ValidateAlertHasDescriptionAnnotation(alert *promv1.Rule) []Problem { + var result []Problem + + description := alert.Annotations["description"] + if description == "" { + result = append(result, Problem{ + ResourceName: alert.Alert, + Description: "alert must have a description annotation", + }) + } + + return result +} + +func ValidateAlertRunbookURLAnnotation(alert *promv1.Rule) []Problem { + var result []Problem + + runbookURL := alert.Annotations["runbook_url"] + if runbookURL == "" { + result = append(result, Problem{ + ResourceName: alert.Alert, + Description: "alert must have a runbook_url annotation", + }) + } + + return result +} + +func ValidateAlertHealthImpactLabel(alert *promv1.Rule) []Problem { + var result []Problem + + healthImpact := alert.Labels["operator_health_impact"] + if !isValidHealthImpact(healthImpact) { + result = append(result, Problem{ + ResourceName: alert.Alert, + Description: "alert must have a operator_health_impact label with value critical, warning, or none", + }) + } + + return result +} + +func ValidateAlertPartOfAndComponentLabels(alert *promv1.Rule) []Problem { + var result []Problem + + partOf := alert.Labels["kubernetes_operator_part_of"] + if partOf == "" { + result = append(result, Problem{ + ResourceName: alert.Alert, + Description: "alert must have a kubernetes_operator_part_of label", + }) + } + + component := alert.Labels["kubernetes_operator_component"] + if component == "" { + result = append(result, Problem{ + ResourceName: alert.Alert, + Description: "alert must have a kubernetes_operator_component label", + }) + } + + return result +} + +func isValidHealthImpact(healthImpact string) bool { + return healthImpact == "critical" || healthImpact == "warning" || healthImpact == "none" +} diff --git a/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/alert_validation.go b/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/alert_validation.go new file mode 100644 index 000000000..954567261 --- /dev/null +++ b/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/alert_validation.go @@ -0,0 +1,80 @@ +package testutil + +import ( + "github.com/grafana/regexp" + promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" +) + +type AlertValidation = func(alert *promv1.Rule) []Problem + +// based on https://sdk.operatorframework.io/docs/best-practices/observability-best-practices/#alerts-style-guide +var defaultAlertValidations = []AlertValidation{ + validateAlertName, + validateAlertHasExpression, + validateAlertHasSeverityLabel, + validateAlertHasSummaryAnnotation, +} + +func validateAlertName(alert *promv1.Rule) []Problem { + var result []Problem + + if alert.Alert == "" || !isPascalCase(alert.Alert) { + result = append(result, Problem{ + ResourceName: alert.Alert, + Description: "alert must have a name in PascalCase format", + }) + } + + return result +} + +func validateAlertHasExpression(alert *promv1.Rule) []Problem { + var result []Problem + + if alert.Expr.StrVal == "" { + result = append(result, Problem{ + ResourceName: alert.Alert, + Description: "alert must have an expression", + }) + } + + return result +} + +func validateAlertHasSeverityLabel(alert *promv1.Rule) []Problem { + var result []Problem + + severity := alert.Labels["severity"] + if !isValidSeverity(severity) { + result = append(result, Problem{ + ResourceName: alert.Alert, + Description: "alert must have a severity label with value critical, warning, or info", + }) + } + + return result +} + +func validateAlertHasSummaryAnnotation(alert *promv1.Rule) []Problem { + var result []Problem + + summary := alert.Annotations["summary"] + if summary == "" { + result = append(result, Problem{ + ResourceName: alert.Alert, + Description: "alert must have a summary annotation", + }) + } + + return result +} + +func isPascalCase(s string) bool { + pascalCasePattern := `^[A-Z][a-zA-Z0-9]*(?:[A-Z][a-zA-Z0-9]*)*$` + pascalCaseRegex := regexp.MustCompile(pascalCasePattern) + return pascalCaseRegex.MatchString(s) +} + +func isValidSeverity(severity string) bool { + return severity == "critical" || severity == "warning" || severity == "info" +} diff --git a/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/linter.go b/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/linter.go new file mode 100644 index 000000000..eba0e2232 --- /dev/null +++ b/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/linter.go @@ -0,0 +1,75 @@ +package testutil + +import ( + promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/machadovilaca/operator-observability/pkg/operatorrules" +) + +type Linter struct { + customAlertValidations []AlertValidation + customRecordRuleValidations []RecordRuleValidation +} + +func New() *Linter { + return &Linter{ + customAlertValidations: []AlertValidation{}, + customRecordRuleValidations: []RecordRuleValidation{}, + } +} + +func (linter *Linter) AddCustomAlertValidations(validations ...AlertValidation) { + linter.customAlertValidations = append(linter.customAlertValidations, validations...) +} + +func (linter *Linter) AddCustomRecordRuleValidations(validations ...RecordRuleValidation) { + linter.customRecordRuleValidations = append(linter.customRecordRuleValidations, validations...) +} + +func (linter *Linter) LintAlerts(alerts []promv1.Rule) []Problem { + var result []Problem + + for _, alert := range alerts { + result = append(result, linter.LintAlert(&alert)...) + } + + return result +} + +func (linter *Linter) LintAlert(alert *promv1.Rule) []Problem { + var result []Problem + + for _, alertValidation := range defaultAlertValidations { + result = append(result, alertValidation(alert)...) + } + + for _, alertValidation := range linter.customAlertValidations { + result = append(result, alertValidation(alert)...) + } + + return result +} + +func (linter *Linter) LintRecordingRules(recordingRules []operatorrules.RecordingRule) []Problem { + var result []Problem + + for _, recordingRule := range recordingRules { + result = append(result, linter.LintRecordingRule(&recordingRule)...) + } + + return result +} + +func (linter *Linter) LintRecordingRule(recordingRule *operatorrules.RecordingRule) []Problem { + var result []Problem + + for _, recordingRuleValidation := range defaultRecordRuleValidations { + result = append(result, recordingRuleValidation(recordingRule)...) + } + + for _, recordingRuleValidation := range linter.customRecordRuleValidations { + result = append(result, recordingRuleValidation(recordingRule)...) + } + + return result +} diff --git a/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/problem.go b/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/problem.go new file mode 100644 index 000000000..7c1d5e586 --- /dev/null +++ b/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/problem.go @@ -0,0 +1,9 @@ +package testutil + +type Problem struct { + // The name of the Metric, Recording Rule, or Alert indicated by the Problem + ResourceName string + + // The description of the problem found + Description string +} diff --git a/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/recording_rule_validation.go b/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/recording_rule_validation.go new file mode 100644 index 000000000..ab9447bc4 --- /dev/null +++ b/vendor/github.com/machadovilaca/operator-observability/pkg/testutil/recording_rule_validation.go @@ -0,0 +1,38 @@ +package testutil + +import ( + "github.com/machadovilaca/operator-observability/pkg/operatorrules" +) + +type RecordRuleValidation = func(rr *operatorrules.RecordingRule) []Problem + +var defaultRecordRuleValidations = []RecordRuleValidation{ + validateRecordingRuleName, + validateRecordingRuleExpression, +} + +func validateRecordingRuleName(recordingRule *operatorrules.RecordingRule) []Problem { + var result []Problem + + if recordingRule.MetricsOpts.Name == "" { + result = append(result, Problem{ + ResourceName: recordingRule.MetricsOpts.Name, + Description: "recording rule must have a name", + }) + } + + return result +} + +func validateRecordingRuleExpression(recordingRule *operatorrules.RecordingRule) []Problem { + var result []Problem + + if recordingRule.Expr.StrVal == "" { + result = append(result, Problem{ + ResourceName: recordingRule.MetricsOpts.Name, + Description: "recording rule must have an expression", + }) + } + + return result +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 8ab049a85..9eadceab6 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -147,6 +147,9 @@ github.com/google/shlex # github.com/google/uuid v1.6.0 ## explicit github.com/google/uuid +# github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd +## explicit; go 1.17 +github.com/grafana/regexp # github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1 ## explicit; go 1.19 github.com/grpc-ecosystem/grpc-gateway/v2/internal/httprule @@ -184,6 +187,7 @@ github.com/kubevirt/monitoring/pkg/metrics/parser github.com/machadovilaca/operator-observability/pkg/docs github.com/machadovilaca/operator-observability/pkg/operatormetrics github.com/machadovilaca/operator-observability/pkg/operatorrules +github.com/machadovilaca/operator-observability/pkg/testutil # github.com/mailru/easyjson v0.7.7 ## explicit; go 1.12 github.com/mailru/easyjson/buffer