Skip to content

Commit

Permalink
enhance: add missing cluster label to mixins
Browse files Browse the repository at this point in the history
Signed-off-by: QuentinBisson <quentin@giantswarm.io>
  • Loading branch information
QuentinBisson committed May 3, 2024
1 parent a03846b commit 07ac356
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 15 deletions.
6 changes: 3 additions & 3 deletions production/loki-mixin-compiled-ssd/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ groups:
- alert: LokiRequestErrors
annotations:
description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
{{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
summary: Loki request error rate is high.
expr: |
100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route)
Expand All @@ -17,7 +17,7 @@ groups:
- alert: LokiRequestPanics
annotations:
description: |
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
{{ $labels.cluster }} {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
summary: Loki requests are causing code panics.
expr: |
sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
Expand All @@ -26,7 +26,7 @@ groups:
- alert: LokiRequestLatency
annotations:
description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
{{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
summary: Loki request error latency is high.
expr: |
cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
Expand Down
6 changes: 3 additions & 3 deletions production/loki-mixin-compiled/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ groups:
- alert: LokiRequestErrors
annotations:
description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
{{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
summary: Loki request error rate is high.
expr: |
100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route)
Expand All @@ -17,7 +17,7 @@ groups:
- alert: LokiRequestPanics
annotations:
description: |
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
{{ $labels.cluster }} {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
summary: Loki requests are causing code panics.
expr: |
sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
Expand All @@ -26,7 +26,7 @@ groups:
- alert: LokiRequestLatency
annotations:
description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
{{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
summary: Loki request error latency is high.
expr: |
cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
Expand Down
18 changes: 9 additions & 9 deletions production/loki-mixin/alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
},
annotations: {
summary: 'Loki request error rate is high.',
description: |||
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
|||,
description: std.strReplace(|||
{{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
|||, 'cluster', $._config.per_cluster_label),
},
},
{
Expand All @@ -33,9 +33,9 @@
},
annotations: {
summary: 'Loki requests are causing code panics.',
description: |||
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
|||,
description: std.strReplace(|||
{{ $labels.cluster }} {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
|||, 'cluster', $._config.per_cluster_label),
},
},
{
Expand All @@ -49,9 +49,9 @@
},
annotations: {
summary: 'Loki request error latency is high.',
description: |||
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
|||,
description: std.strReplace(|||
{{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
|||, 'cluster', $._config.per_cluster_label),
},
},
{
Expand Down

0 comments on commit 07ac356

Please sign in to comment.