Skip to content

Commit

Permalink
Add extra options to promql/series check
Browse files Browse the repository at this point in the history
  • Loading branch information
prymitive committed Jan 11, 2023
1 parent b86550d commit 877d0ca
Show file tree
Hide file tree
Showing 13 changed files with 103 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.19.4
go-version: 1.19.5
cache: true

- name: Test
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.19.4
go-version: 1.19.5
cache: true

- name: Compile pint
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.19.4
go-version: 1.19.5
cache: true

- name: Set up QEMU
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.19.4
go-version: 1.19.5
cache: true

- name: Build binary
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/go-mod-tidy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.19.4
go-version: 1.19.5

- name: Run go mod tidy
run: go mod tidy
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.19.4
go-version: 1.19.5
cache: true
cache-dependency-path: tools/golangci-lint/go.sum

Expand All @@ -34,7 +34,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.19.4
go-version: 1.19.5
cache: true
cache-dependency-path: tools/gofumpt/go.sum

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.19.4
go-version: 1.19.5
cache: true

- name: Test
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM golang:1.19.4-alpine
FROM golang:1.19.5-alpine
COPY . /src
WORKDIR /src
RUN apk add make git
Expand Down
1 change: 1 addition & 0 deletions cmd/pint/tests/0025_config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ level=info msg="Loading configuration file" path=.pint.hcl
},
"check": [
{
"LookbackStepDuration": 300000000000,
"ignoreMetrics": [
".*_error",
".*_error_.*",
Expand Down
2 changes: 2 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
### Added

- Allow snoozing checks for entire file using `# pint file/snooze ...` comments.
- Added `lookbackRange` and `lookbackStep` configration option to the
[promql/series](checks/promql/series.md) check - #493.

## v0.39.0

Expand Down
13 changes: 13 additions & 0 deletions docs/checks/promql/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,17 @@ check "promql/series" {
}
```

- `lookbackRange` - how far back to query when checking if given metric was ever
present in Prometheus.
Default is `7d`, meaning that if a metric is missing pint will query last 7 days
of metrics to tell you if this metric was ever present and if so, when was it last
seen.
- `lookbackStep` - lookback query resulution.
Default is `5m` which matches Prometheus default
[staleness](https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness)
checks.
If you have a custom `--query.lookback-delta` flag passed to Prometheus you might want
to set this option to the same value.
- `ignoreMetrics` - list of regexp matchers, if a metric is missing from Prometheus
but the name matches any of provided regexp matchers then pint will only report a
warning, instead of a bug level report.
Expand All @@ -139,6 +150,8 @@ Example:

```js
check "promql/series" {
lookbackRange = "5d"
lookbackStep = "1m"
ignoreMetrics = [
".*_error",
".*_error_.*",
Expand Down
59 changes: 40 additions & 19 deletions internal/checks/promql_series.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,12 @@ import (
)

type PromqlSeriesSettings struct {
IgnoreMetrics []string `hcl:"ignoreMetrics,optional" json:"ignoreMetrics,omitempty"`
ignoreMetricsRe []*regexp.Regexp
LookbackRange string `hcl:"lookbackRange,optional" json:"lookbackRange,omitempty"`
lookbackRangeDuration time.Duration
LookbackStep string `hcl:"lookbackStep,optional" json:"lookbackStep,omitempty"`
LookbackStepDuration time.Duration
IgnoreMetrics []string `hcl:"ignoreMetrics,optional" json:"ignoreMetrics,omitempty"`
ignoreMetricsRe []*regexp.Regexp
}

func (c *PromqlSeriesSettings) Validate() error {
Expand All @@ -34,6 +38,24 @@ func (c *PromqlSeriesSettings) Validate() error {
c.ignoreMetricsRe = append(c.ignoreMetricsRe, re)
}

c.lookbackRangeDuration = time.Hour * 24 * 7
if c.LookbackRange != "" {
dur, err := model.ParseDuration(c.LookbackRange)
if err != nil {
return err
}
c.lookbackRangeDuration = time.Duration(dur)
}

c.LookbackStepDuration = time.Minute * 5
if c.LookbackStep != "" {
dur, err := model.ParseDuration(c.LookbackStep)
if err != nil {
return err
}
c.LookbackStepDuration = time.Duration(dur)
}

return nil
}

Expand Down Expand Up @@ -66,16 +88,17 @@ func (c SeriesCheck) Check(ctx context.Context, path string, rule parser.Rule, e
if s := ctx.Value(SettingsKey(c.Reporter())); s != nil {
settings = s.(*PromqlSeriesSettings)
}
if settings == nil {
settings = &PromqlSeriesSettings{}
_ = settings.Validate()
}

expr := rule.Expr()

if expr.SyntaxError != nil {
return
}

rangeLookback := time.Hour * 24 * 7
rangeStep := time.Minute * 5

done := map[string]bool{}
for _, selector := range getSelectors(expr.Query) {
if _, ok := done[selector.String()]; ok {
Expand Down Expand Up @@ -160,7 +183,7 @@ func (c SeriesCheck) Check(ctx context.Context, path string, rule parser.Rule, e
continue
}

promUptime, err := c.prom.RangeQuery(ctx, fmt.Sprintf("count(%s)", c.prom.UptimeMetric()), promapi.NewRelativeRange(rangeLookback, rangeStep))
promUptime, err := c.prom.RangeQuery(ctx, fmt.Sprintf("count(%s)", c.prom.UptimeMetric()), promapi.NewRelativeRange(settings.lookbackRangeDuration, settings.LookbackStepDuration))
if err != nil {
log.Warn().Err(err).Str("name", c.prom.Name()).Msg("Cannot detect Prometheus uptime gaps")
}
Expand All @@ -175,7 +198,7 @@ func (c SeriesCheck) Check(ctx context.Context, path string, rule parser.Rule, e

// 2. If foo was NEVER there -> BUG
log.Debug().Str("check", c.Reporter()).Stringer("selector", &bareSelector).Msg("Checking if base metric has historical series")
trs, err := c.prom.RangeQuery(ctx, fmt.Sprintf("count(%s)", bareSelector.String()), promapi.NewRelativeRange(rangeLookback, rangeStep))
trs, err := c.prom.RangeQuery(ctx, fmt.Sprintf("count(%s)", bareSelector.String()), promapi.NewRelativeRange(settings.lookbackRangeDuration, settings.LookbackStepDuration))
if err != nil {
problems = append(problems, c.queryProblem(err, bareSelector.String(), expr))
continue
Expand Down Expand Up @@ -230,7 +253,7 @@ func (c SeriesCheck) Check(ctx context.Context, path string, rule parser.Rule, e
l := stripLabels(selector)
l.LabelMatchers = append(l.LabelMatchers, labels.MustNewMatcher(labels.MatchRegexp, name, ".+"))
log.Debug().Str("check", c.Reporter()).Stringer("selector", &l).Str("label", name).Msg("Checking if base metric has historical series with required label")
trsLabelCount, err := c.prom.RangeQuery(ctx, fmt.Sprintf("absent(%s)", l.String()), promapi.NewRelativeRange(rangeLookback, rangeStep))
trsLabelCount, err := c.prom.RangeQuery(ctx, fmt.Sprintf("absent(%s)", l.String()), promapi.NewRelativeRange(settings.lookbackRangeDuration, settings.LookbackStepDuration))
if err != nil {
problems = append(problems, c.queryProblem(err, selector.String(), expr))
continue
Expand All @@ -256,8 +279,8 @@ func (c SeriesCheck) Check(ctx context.Context, path string, rule parser.Rule, e

// 4. If foo was ALWAYS there but it's NO LONGER there (for more than min-age) -> BUG
if len(trs.Series.Ranges) == 1 &&
!oldest(trs.Series.Ranges).After(trs.Series.From.Add(rangeStep)) &&
newest(trs.Series.Ranges).Before(trs.Series.Until.Add(rangeStep*-1)) {
!oldest(trs.Series.Ranges).After(trs.Series.From.Add(settings.LookbackStepDuration)) &&
newest(trs.Series.Ranges).Before(trs.Series.Until.Add(settings.LookbackStepDuration*-1)) {

minAge, p := c.getMinAge(rule, selector)
if len(p) > 0 {
Expand Down Expand Up @@ -310,7 +333,7 @@ func (c SeriesCheck) Check(ctx context.Context, path string, rule parser.Rule, e
}
log.Debug().Str("check", c.Reporter()).Stringer("selector", &labelSelector).Stringer("matcher", lm).Msg("Checking if there are historical series matching filter")

trsLabel, err := c.prom.RangeQuery(ctx, fmt.Sprintf("count(%s)", labelSelector.String()), promapi.NewRelativeRange(rangeLookback, rangeStep))
trsLabel, err := c.prom.RangeQuery(ctx, fmt.Sprintf("count(%s)", labelSelector.String()), promapi.NewRelativeRange(settings.lookbackRangeDuration, settings.LookbackStepDuration))
if err != nil {
problems = append(problems, c.queryProblem(err, labelSelector.String(), expr))
continue
Expand Down Expand Up @@ -340,8 +363,8 @@ func (c SeriesCheck) Check(ctx context.Context, path string, rule parser.Rule, e

// 6. If foo is ALWAYS/SOMETIMES there AND {bar OR baz} used to be there ALWAYS BUT it's NO LONGER there -> BUG
if len(trsLabel.Series.Ranges) == 1 &&
!oldest(trsLabel.Series.Ranges).After(trsLabel.Series.Until.Add(rangeLookback-1).Add(rangeStep)) &&
newest(trsLabel.Series.Ranges).Before(trsLabel.Series.Until.Add(rangeStep*-1)) {
!oldest(trsLabel.Series.Ranges).After(trsLabel.Series.Until.Add(settings.lookbackRangeDuration-1).Add(settings.LookbackStepDuration)) &&
newest(trsLabel.Series.Ranges).Before(trsLabel.Series.Until.Add(settings.LookbackStepDuration*-1)) {

minAge, p := c.getMinAge(rule, selector)
if len(p) > 0 {
Expand Down Expand Up @@ -482,12 +505,10 @@ func (c SeriesCheck) isLabelValueIgnored(rule parser.Rule, selector promParser.V
}

func (c SeriesCheck) textAndSeverity(settings *PromqlSeriesSettings, name, text string, s Severity) (string, Severity) {
if settings != nil {
for _, re := range settings.ignoreMetricsRe {
if name != "" && re.MatchString(name) {
log.Debug().Str("check", c.Reporter()).Str("metric", name).Stringer("regexp", re).Msg("Metric matches check ignore rules")
return fmt.Sprintf("%s. Metric name %q matches %q check ignore regexp %q", text, name, c.Reporter(), re), Warning
}
for _, re := range settings.ignoreMetricsRe {
if name != "" && re.MatchString(name) {
log.Debug().Str("check", c.Reporter()).Str("metric", name).Stringer("regexp", re).Msg("Metric matches check ignore rules")
return fmt.Sprintf("%s. Metric name %q matches %q check ignore regexp %q", text, name, c.Reporter(), re), Warning
}
}
return text, s
Expand Down
38 changes: 38 additions & 0 deletions internal/checks/promql_series_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,44 @@ func TestSeriesCheck(t *testing.T) {
},
},
},
{
description: "#2 series never present, custom range",
content: "- record: foo\n expr: sum(notfound)\n",
ctx: func() context.Context {
s := checks.PromqlSeriesSettings{
LookbackRange: "3d",
LookbackStep: "6m",
}
if err := s.Validate(); err != nil {
t.Error(err)
t.FailNow()
}
return context.WithValue(context.Background(), checks.SettingsKey(checks.SeriesCheckName), &s)
},
checker: newSeriesCheck,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: "notfound",
Lines: []int{2},
Reporter: checks.SeriesCheckName,
Text: noMetricText("prom", uri, "notfound", "3d"),
Severity: checks.Bug,
},
}
},
mocks: []*prometheusMock{
{
conds: []requestCondition{requireQueryPath},
resp: respondWithEmptyVector(),
},
{
conds: []requestCondition{requireRangeQueryPath},
resp: respondWithEmptyMatrix(),
},
},
},
{
description: "#2 series never present but recording rule provides it correctly",
content: "- record: foo\n expr: sum(foo:bar{job=\"xxx\"})\n",
Expand Down

0 comments on commit 877d0ca

Please sign in to comment.