Skip to content

Commit

Permalink
Merge pull request cloudflare#451 from cloudflare/rule/duplicate
Browse files Browse the repository at this point in the history
Add rule/duplicate check
  • Loading branch information
prymitive authored Nov 17, 2022
2 parents ac23533 + f666298 commit d7a2e93
Show file tree
Hide file tree
Showing 43 changed files with 804 additions and 38 deletions.
2 changes: 1 addition & 1 deletion cmd/pint/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ func scanWorker(ctx context.Context, jobs <-chan scanJob, results chan<- reporte
}
default:
start := time.Now()
problems := job.check.Check(ctx, job.entry.Rule, job.allEntries)
problems := job.check.Check(ctx, job.entry.ReportedPath, job.entry.Rule, job.allEntries)
checkDuration.WithLabelValues(job.check.Reporter()).Observe(time.Since(start).Seconds())
for _, problem := range problems {
results <- reporter.Report{
Expand Down
8 changes: 4 additions & 4 deletions cmd/pint/tests/0023_enabled_checks.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
pint.error -l debug --no-color lint rules
! stdout .
stderr 'level=debug msg="Configured checks for rule" enabled=\["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/rate\(prom\)","promql/series\(prom\)","promql/vector_matching\(prom\)"\,"promql/range_query\(prom\)"] path=rules/1.yaml rule=one'
stderr 'level=debug msg="Configured checks for rule" enabled=\["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/rate\(prom\)","promql/series\(prom\)","promql/vector_matching\(prom\)"\,"promql/range_query\(prom\)"] path=rules/1.yaml rule=two'
stderr 'level=debug msg="Configured checks for rule" enabled=\["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/rate\(prom\)","promql/series\(prom\)","promql/vector_matching\(prom\)"\,"promql/range_query\(prom\)"] path=rules/2.yaml rule=one'
stderr 'level=debug msg="Configured checks for rule" enabled=\["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/rate\(prom\)","promql/series\(prom\)","promql/vector_matching\(prom\)"\,"promql/range_query\(prom\)"] path=rules/2.yaml rule=two'
stderr 'level=debug msg="Configured checks for rule" enabled=\["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/rate\(prom\)","promql/series\(prom\)","promql/vector_matching\(prom\)"\,"promql/range_query\(prom\)","rule/duplicate\(prom\)"] path=rules/1.yaml rule=one'
stderr 'level=debug msg="Configured checks for rule" enabled=\["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/rate\(prom\)","promql/series\(prom\)","promql/vector_matching\(prom\)"\,"promql/range_query\(prom\)","rule/duplicate\(prom\)"] path=rules/1.yaml rule=two'
stderr 'level=debug msg="Configured checks for rule" enabled=\["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/rate\(prom\)","promql/series\(prom\)","promql/vector_matching\(prom\)"\,"promql/range_query\(prom\)","rule/duplicate\(prom\)"] path=rules/2.yaml rule=one'
stderr 'level=debug msg="Configured checks for rule" enabled=\["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/rate\(prom\)","promql/series\(prom\)","promql/vector_matching\(prom\)"\,"promql/range_query\(prom\)","rule/duplicate\(prom\)"] path=rules/2.yaml rule=two'

-- rules/1.yaml --
- record: one
Expand Down
1 change: 1 addition & 0 deletions cmd/pint/tests/0025_config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ level=info msg="Loading configuration file" path=.pint.hcl
"promql/vector_matching",
"query/cost",
"promql/series",
"rule/duplicate",
"rule/label",
"rule/link",
"rule/reject"
Expand Down
2 changes: 2 additions & 0 deletions cmd/pint/tests/0054_watch_metrics_prometheus.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ pint_check_duration_seconds_sum{check="promql/syntax"}
pint_check_duration_seconds_count{check="promql/syntax"}
pint_check_duration_seconds_sum{check="promql/vector_matching"}
pint_check_duration_seconds_count{check="promql/vector_matching"}
pint_check_duration_seconds_sum{check="rule/duplicate"}
pint_check_duration_seconds_count{check="rule/duplicate"}
# HELP pint_check_iterations_total Total number of completed check iterations since pint start
# TYPE pint_check_iterations_total counter
pint_check_iterations_total
Expand Down
8 changes: 8 additions & 0 deletions cmd/pint/tests/0056_prometheus_required.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,21 @@ stderr 'level=info msg="Problems found" Warning=12'
-- rules/1.yaml --
- record: one
expr: up == 0
labels:
path: a
- record: two
expr: up == 0
labels:
path: a
-- rules/2.yaml --
- record: one
expr: up == 0
labels:
path: b
- record: two
expr: up == 0
labels:
path: b

-- .pint.hcl --
prometheus "prom" {
Expand Down
2 changes: 2 additions & 0 deletions cmd/pint/tests/0057_watch_metrics_prometheus_ignore.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ pint_check_duration_seconds_sum{check="promql/syntax"}
pint_check_duration_seconds_count{check="promql/syntax"}
pint_check_duration_seconds_sum{check="promql/vector_matching"}
pint_check_duration_seconds_count{check="promql/vector_matching"}
pint_check_duration_seconds_sum{check="rule/duplicate"}
pint_check_duration_seconds_count{check="rule/duplicate"}
# HELP pint_check_iterations_total Total number of completed check iterations since pint start
# TYPE pint_check_iterations_total counter
pint_check_iterations_total
Expand Down
1 change: 1 addition & 0 deletions cmd/pint/tests/0103_file_disable.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ level=debug msg="Stopping query workers" name=prom uri=http://127.0.0.1:7103
- record: "colo:test1"
expr: sum(foo) without(job)

# pint file/disable rule/duplicate

-- .pint.hcl --
prometheus "prom" {
Expand Down
2 changes: 2 additions & 0 deletions cmd/pint/tests/0104_file_ignore_prom.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ stderr 'level=info msg="Problems found" Bug=3'
- record: "colo:test2"
expr: sum(foo) without(job)

# pint file/disable rule/duplicate

-- .pint.hcl --
prometheus "prom" {
uri = "http://127.0.0.1:7104"
Expand Down
60 changes: 60 additions & 0 deletions cmd/pint/tests/0108_rule_duplicate.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
pint.error --no-color -d 'promql/.*' -d alerts/count lint rules
! stdout .
cmp stderr stderr.txt

-- stderr.txt --
level=info msg="Loading configuration file" path=.pint.hcl
rules/0001.yml:1-2: duplicated rule, identical rule found at rules/0002.yml:1 (rule/duplicate)
1 | - record: "colo:duplicate"
2 | expr: sum(foo) without(job)

rules/0001.yml:9-12: duplicated rule, identical rule found at rules/0002.yml:11 (rule/duplicate)
9 | - record: "colo:labels:equal"
10 | expr: sum(foo) without(job)
11 | labels:
12 | same: yes

level=info msg="Problems found" Bug=2
level=fatal msg="Fatal error" error="problems found"
-- rules/0001.yml --
- record: "colo:duplicate"
expr: sum(foo) without(job)
- record: "colo:labels:empty"
expr: sum(foo) without(job)
- record: "colo:labels:mismatch"
expr: sum(foo) without(job)
labels:
file: a
- record: "colo:labels:equal"
expr: sum(foo) without(job)
labels:
same: yes

-- rules/0002.yml --
- record: "colo:duplicate"
expr: sum(foo) without(job)
- record: "colo:labels:empty"
expr: sum(foo) without(job)
labels:
empty: nope
- record: "colo:labels:mismatch"
expr: sum(foo) without(job)
labels:
file: b
- record: "colo:labels:equal"
expr: sum(foo) without(job)
labels:
same: yes

# pint file/disable rule/duplicate

-- .pint.hcl --
prometheus "prom" {
uri = "http://127.0.0.1:7108"
failover = []
timeout = "5s"
required = true
}
parser {
relaxed = [".*"]
}
56 changes: 56 additions & 0 deletions cmd/pint/tests/0109_rule_duplicate_multiple_proms_include.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
pint.ok --no-color -d 'promql/.*' -d alerts/count lint rules
! stdout .
cmp stderr stderr.txt

-- stderr.txt --
level=info msg="Loading configuration file" path=.pint.hcl
-- rules/0001.yml --
- record: "colo:duplicate"
expr: sum(foo) without(job)
- record: "colo:labels:empty"
expr: sum(foo) without(job)
- record: "colo:labels:mismatch"
expr: sum(foo) without(job)
labels:
file: a
- record: "colo:labels:equal"
expr: sum(foo) without(job)
labels:
same: yes

-- rules/0002.yml --
- record: "colo:duplicate"
expr: sum(foo) without(job)
- record: "colo:labels:empty"
expr: sum(foo) without(job)
labels:
empty: nope
- record: "colo:labels:mismatch"
expr: sum(foo) without(job)
labels:
file: b
- record: "colo:labels:equal"
expr: sum(foo) without(job)
labels:
same: yes

# pint file/disable rule/duplicate

-- .pint.hcl --
prometheus "prom1" {
uri = "http://127.0.0.1:7109/1"
failover = []
timeout = "5s"
required = true
include = ["rules/0001.yml"]
}
prometheus "prom2" {
uri = "http://127.0.0.1:7109/2"
failover = []
timeout = "5s"
required = true
include = ["rules/0002.yml"]
}
parser {
relaxed = [".*"]
}
56 changes: 56 additions & 0 deletions cmd/pint/tests/0110_rule_duplicate_multiple_proms_exclude.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
pint.ok --no-color -d 'promql/.*' -d alerts/count lint rules
! stdout .
cmp stderr stderr.txt

-- stderr.txt --
level=info msg="Loading configuration file" path=.pint.hcl
-- rules/0001.yml --
- record: "colo:duplicate"
expr: sum(foo) without(job)
- record: "colo:labels:empty"
expr: sum(foo) without(job)
- record: "colo:labels:mismatch"
expr: sum(foo) without(job)
labels:
file: a
- record: "colo:labels:equal"
expr: sum(foo) without(job)
labels:
same: yes

-- rules/0002.yml --
- record: "colo:duplicate"
expr: sum(foo) without(job)
- record: "colo:labels:empty"
expr: sum(foo) without(job)
labels:
empty: nope
- record: "colo:labels:mismatch"
expr: sum(foo) without(job)
labels:
file: b
- record: "colo:labels:equal"
expr: sum(foo) without(job)
labels:
same: yes

# pint file/disable rule/duplicate

-- .pint.hcl --
prometheus "prom1" {
uri = "http://127.0.0.1:7110/1"
failover = []
timeout = "5s"
required = true
exclude = ["rules/0002.yml"]
}
prometheus "prom2" {
uri = "http://127.0.0.1:7110/2"
failover = []
timeout = "5s"
required = true
exclude = ["rules/0001.yml"]
}
parser {
relaxed = [".*"]
}
6 changes: 6 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## v0.34.0

### Added

- Added [rule/duplicate](checks/rule/duplicate.md) check.

## v0.33.1

### Fixed
Expand Down
81 changes: 81 additions & 0 deletions docs/checks/rule/duplicate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
---
layout: default
parent: Checks
grand_parent: Documentation
---

# rule/duplicate

This check will find and report any duplicated recording rules.

When Prometheus is configured with two identical recording rules that
are producing the exact time series it will discard results from one
of them. When that happens you will see warnings in logs, example:

```
msg="Rule evaluation result discarded" err="duplicate sample for timestamp"
```

Duplicated rule itself is not catastrophic but it will cause constant unnecessary
logs that might hide other issues and can lead to other problems if the
duplicated rule is later updated, but only in one place, not in both.

## Configuration

This check doesn't have any configuration options.

## How to enable it

This check is enabled by default for all configured Prometheus servers.

Example:

```js
prometheus "prod" {
uri = "https://prometheus-prod.example.com"
timeout = "60s"
include = [
"rules/prod/.*",
"rules/common/.*",
]
}

prometheus "dev" {
uri = "https://prometheus-dev.example.com"
timeout = "30s"
include = [
"rules/dev/.*",
"rules/common/.*",
]
}
```

## How to disable it

You can disable this check globally by adding this config block:

```js
checks {
disabled = ["rule/duplicate"]
}
```

You can also disable it for all rules inside given file by adding
a comment anywhere in that file. Example:

`# pint file/disable rule/duplicate`

Or you can disable it per rule by adding a comment to it. Example:

`# pint disable rule/duplicate`

If you want to disable only individual instances of this check
you can add a more specific comment.

`# pint disable rule/duplicate($prometheus)`

Where `$prometheus` is the name of Prometheus server to disable.

Example:

`# pint disable rule/duplicate(prod)`
2 changes: 1 addition & 1 deletion internal/checks/alerts_annotation.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func (c AnnotationCheck) Reporter() string {
return AnnotationCheckName
}

func (c AnnotationCheck) Check(ctx context.Context, rule parser.Rule, entries []discovery.Entry) (problems []Problem) {
func (c AnnotationCheck) Check(ctx context.Context, path string, rule parser.Rule, entries []discovery.Entry) (problems []Problem) {
if rule.AlertingRule == nil {
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion internal/checks/alerts_comparison.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func (c ComparisonCheck) Reporter() string {
return ComparisonCheckName
}

func (c ComparisonCheck) Check(ctx context.Context, rule parser.Rule, entries []discovery.Entry) (problems []Problem) {
func (c ComparisonCheck) Check(ctx context.Context, path string, rule parser.Rule, entries []discovery.Entry) (problems []Problem) {
if rule.AlertingRule == nil {
return
}
Expand Down
2 changes: 1 addition & 1 deletion internal/checks/alerts_count.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func (c AlertsCheck) Reporter() string {
return AlertsCheckName
}

func (c AlertsCheck) Check(ctx context.Context, rule parser.Rule, entries []discovery.Entry) (problems []Problem) {
func (c AlertsCheck) Check(ctx context.Context, path string, rule parser.Rule, entries []discovery.Entry) (problems []Problem) {
if rule.AlertingRule == nil {
return
}
Expand Down
2 changes: 1 addition & 1 deletion internal/checks/alerts_for.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func (c AlertsForChecksFor) Reporter() string {
return AlertForCheckName
}

func (c AlertsForChecksFor) Check(ctx context.Context, rule parser.Rule, entries []discovery.Entry) (problems []Problem) {
func (c AlertsForChecksFor) Check(ctx context.Context, path string, rule parser.Rule, entries []discovery.Entry) (problems []Problem) {
if rule.AlertingRule == nil || rule.AlertingRule.For == nil {
return
}
Expand Down
Loading

0 comments on commit d7a2e93

Please sign in to comment.