Skip to content

Commit

Permalink
mixin: Added critical Rules alerts.
Browse files Browse the repository at this point in the history
Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
  • Loading branch information
bwplotka committed Apr 3, 2020
1 parent 84495fa commit 355ab19
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 3 deletions.
21 changes: 20 additions & 1 deletion examples/alerts/alerts.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ rules:
)
for: 5m
labels:
severity: warning
severity: critical
- alert: ThanosRuleHighRuleEvaluationWarnings
annotations:
message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation
Expand Down Expand Up @@ -170,6 +170,25 @@ rules:
for: 15m
labels:
severity: warning
- alert: ThanosRuleNoEvaluationFor10Internvals
annotations:
message: Thanos Rule {{$labels.job}} have {{ $value | humanize }}% rule groups
that did not evaluate for at least 10x of their expected interval.
expr: |
time() - prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"}
> 10 * prometheus_rule_group_interval_seconds{job=~"thanos-rule.*"}
for: 5m
labels:
severity: critical
- alert: ThanosRuleTSDBNotIngestingSamples
annotations:
message: Thanos Rule {{$labels.job}} did not ingested any samples for last 15
minutes.
expr: |
rate(prometheus_tsdb_head_samples_appended_total{job=~"thanos-rule.*"}[5m]) <= 0
for: 10m
labels:
severity: critical
```
## Store Gateway
Expand Down
21 changes: 20 additions & 1 deletion examples/alerts/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ groups:
)
for: 5m
labels:
severity: warning
severity: critical
- alert: ThanosRuleHighRuleEvaluationWarnings
annotations:
message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation
Expand Down Expand Up @@ -389,6 +389,25 @@ groups:
for: 15m
labels:
severity: warning
- alert: ThanosRuleNoEvaluationFor10Internvals
annotations:
message: Thanos Rule {{$labels.job}} have {{ $value | humanize }}% rule groups
that did not evaluate for at least 10x of their expected interval.
expr: |
time() - prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"}
> 10 * prometheus_rule_group_interval_seconds{job=~"thanos-rule.*"}
for: 5m
labels:
severity: critical
- alert: ThanosRuleTSDBNotIngestingSamples
annotations:
message: Thanos Rule {{$labels.job}} did not ingested any samples for last 15
minutes.
expr: |
rate(prometheus_tsdb_head_samples_appended_total{job=~"thanos-rule.*"}[5m]) <= 0
for: 10m
labels:
severity: critical
- name: thanos-component-absent.rules
rules:
- alert: ThanosCompactIsDown
Expand Down
29 changes: 28 additions & 1 deletion mixin/thanos/alerts/rule.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@

'for': '5m',
labels: {
severity: 'warning',
severity: 'critical',
},
},
{
Expand Down Expand Up @@ -153,6 +153,33 @@
severity: 'warning',
},
},
{
alert: 'ThanosRuleNoEvaluationForLast10Intervals',
annotations: {
message: 'Thanos Rule {{$labels.job}} have {{ $value | humanize }}% rule groups that did not evaluate for at least 10x of their expected interval.',
},
expr: |||
time() - prometheus_rule_group_last_evaluation_timestamp_seconds{%(selector)s}
> 10 * prometheus_rule_group_interval_seconds{%(selector)s}
||| % thanos.rule,
'for': '5m',
labels: {
severity: 'critical',
},
},
{
alert: 'ThanosRuleTSDBNotIngestingSamples',
annotations: {
message: 'Thanos Rule {{$labels.job}} did not ingested any samples for last 15 minutes.',
},
expr: |||
rate(prometheus_tsdb_head_samples_appended_total{%(selector)s}[5m]) <= 0
||| % thanos.rule,
'for': '10m',
labels: {
severity: 'critical',
},
},
],
},
],
Expand Down

0 comments on commit 355ab19

Please sign in to comment.