diff --git a/CHANGELOG.md b/CHANGELOG.md index 807131bc..3fba9241 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## v0.8.1 +### Added + +- Added `--max-problems` flag to `pint watch` command. + ### Changed - Updated Prometheus modules to [v2.33.0](https://github.com/prometheus/prometheus/releases/tag/v2.33.0). diff --git a/README.md b/README.md index 632ea4de..b2fa3044 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,14 @@ scrape_configs: - targets: ['localhost:8080'] ``` +Available metrics: + +- `pint_problem` - exported for every problem detected by pint. + To avoid exposing too many metrics at once pass `--max-problems` flag to watch command. + When set pint will expose only up to `--max-problems` value number of `pint_problem` metrics. +- `pint_problems` - this metric is the total number of all problems detected by pint, + including those not exported due to `--max-problems` flag. + ## Release Notes See [CHANGELOG.md](/CHANGELOG.md) for history of changes. diff --git a/cmd/pint/tests/0042_watch_metrics.txt b/cmd/pint/tests/0042_watch_metrics.txt index ed674e5f..571113f1 100644 --- a/cmd/pint/tests/0042_watch_metrics.txt +++ b/cmd/pint/tests/0042_watch_metrics.txt @@ -275,6 +275,9 @@ pint_check_iterations_total pint_problem{kind="alerting",lines="8",name="comparison",problem="alert query doesn't have any condition, it will always fire if the metric exists",reporter="promql/comparison",severity="warning"} pint_problem{kind="recording",lines="2",name="broken",problem="syntax error: no arguments for aggregate expression provided",reporter="promql/syntax",severity="fatal"} pint_problem{kind="recording",lines="5",name="aggregate",problem="job label is required and should be preserved when aggregating \"^.+$\" rules, remove job from without()",reporter="promql/aggregate",severity="warning"} +# HELP pint_problems Total number of problems reported by pint +# TYPE pint_problems gauge +pint_problems # HELP prometheus_template_text_expansion_failures_total The total number of template text expansion failures. # TYPE prometheus_template_text_expansion_failures_total counter prometheus_template_text_expansion_failures_total diff --git a/cmd/pint/tests/0048_watch_limit.txt b/cmd/pint/tests/0048_watch_limit.txt new file mode 100644 index 00000000..5a69fdf6 --- /dev/null +++ b/cmd/pint/tests/0048_watch_limit.txt @@ -0,0 +1,34 @@ +exec bash -x ./test.sh & + +pint.ok watch --listen=:6048 --max-problems=2 --pidfile=pint.pid rules +cmp curl.txt metrics.txt + +-- test.sh -- +sleep 5 +curl -s http://127.0.0.1:6048/metrics | grep -E '^pint_problem' > curl.txt +cat pint.pid | xargs kill + +-- rules/1.yml -- +- record: broken + expr: foo / count()) + +- record: aggregate + expr: sum(foo) without(job) + +- alert: comparison + expr: foo + +-- .pint.hcl -- +rule { + match { + kind = "recording" + } + aggregate ".+" { + keep = [ "job" ] + } +} + +-- metrics.txt -- +pint_problem{kind="alerting",lines="8",name="comparison",problem="alert query doesn't have any condition, it will always fire if the metric exists",reporter="promql/comparison",severity="warning"} 1 +pint_problem{kind="recording",lines="2",name="broken",problem="syntax error: no arguments for aggregate expression provided",reporter="promql/syntax",severity="fatal"} 1 +pint_problems 3 diff --git a/cmd/pint/watch.go b/cmd/pint/watch.go index faf3d448..a8d79896 100644 --- a/cmd/pint/watch.go +++ b/cmd/pint/watch.go @@ -9,6 +9,7 @@ import ( _ "net/http/pprof" "os" "os/signal" + "sort" "strings" "sync" "syscall" @@ -27,9 +28,10 @@ import ( ) const ( - intervalFlag = "interval" - listenFlag = "listen" - pidfileFlag = "pidfile" + intervalFlag = "interval" + listenFlag = "listen" + pidfileFlag = "pidfile" + maxProblemsFlag = "max-problems" ) var watchCmd = &cli.Command{ @@ -54,6 +56,12 @@ var watchCmd = &cli.Command{ Aliases: []string{"p"}, Usage: "Write pid file to this path", }, + &cli.IntFlag{ + Name: maxProblemsFlag, + Aliases: []string{"m"}, + Value: 0, + Usage: "Maximum number of problems to report on metrics, 0 - no limit", + }, }, } @@ -100,7 +108,7 @@ func actionWatch(c *cli.Context) (err error) { } // start HTTP server for metrics - collector := newProblemCollector(cfg, paths) + collector := newProblemCollector(cfg, paths, c.Int(maxProblemsFlag)) prometheus.MustRegister(collector) prometheus.MustRegister(checkDuration) prometheus.MustRegister(checkIterationsTotal) @@ -179,14 +187,16 @@ func startTimer(ctx context.Context, cfg config.Config, workers int, interval ti } type problemCollector struct { - lock sync.Mutex - cfg config.Config - paths []string - summary *reporter.Summary - problem *prometheus.Desc + lock sync.Mutex + cfg config.Config + paths []string + summary *reporter.Summary + problem *prometheus.Desc + problems *prometheus.Desc + maxProblems int } -func newProblemCollector(cfg config.Config, paths []string) *problemCollector { +func newProblemCollector(cfg config.Config, paths []string, maxProblems int) *problemCollector { return &problemCollector{ cfg: cfg, paths: paths, @@ -196,6 +206,13 @@ func newProblemCollector(cfg config.Config, paths []string) *problemCollector { []string{"kind", "name", "severity", "reporter", "problem", "lines"}, prometheus.Labels{}, ), + problems: prometheus.NewDesc( + "pint_problems", + "Total number of problems reported by pint", + []string{}, + prometheus.Labels{}, + ), + maxProblems: maxProblems, } } @@ -231,7 +248,8 @@ func (c *problemCollector) Collect(ch chan<- prometheus.Metric) { return } - done := map[string]struct{}{} + done := map[string]prometheus.Metric{} + keys := []string{} for _, report := range c.summary.Reports { kind := "invalid" @@ -264,11 +282,21 @@ func (c *problemCollector) Collect(ch chan<- prometheus.Metric) { } key := out.String() - if _, ok := done[key]; ok { - continue + if _, ok := done[key]; !ok { + done[key] = metric + keys = append(keys, key) } + } + + ch <- prometheus.MustNewConstMetric(c.problems, prometheus.GaugeValue, float64(len(done))) - ch <- metric - done[key] = struct{}{} + sort.Strings(keys) + var reported int + for _, key := range keys { + ch <- done[key] + reported++ + if c.maxProblems > 0 && reported >= c.maxProblems { + break + } } }