Skip to content

Commit

Permalink
Also fix alert
Browse files Browse the repository at this point in the history
Signed-off-by: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
  • Loading branch information
dimitarvdimitrov committed Sep 25, 2024
1 parent 609aa91 commit 02920a5
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@
* [BUGFIX] Dashboards: avoid over-counting of ingesters metrics when migrating to experimental ingest storage. #9170
* [BUGFIX] Dashboards: fix `job_prefix` not utilized in `jobSelector`. #9155
* [BUGFIX] Dashboards: Fix autoscaling metrics joins when series churn. #9412
* [BUGFIX] Alerts: Fix autoscaling metrics joins in `MimirAutoscalerNotActive` when series churn. #9412

### Jsonnet

Expand Down
13 changes: 10 additions & 3 deletions operations/mimir-mixin-compiled-baremetal/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -964,16 +964,23 @@ groups:
# Match only Mimir namespaces.
* on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info)
# Add "metric" label.
+ on(cluster, namespace, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)")
+ on(cluster, namespace, horizontalpodautoscaler) group_right
# Using `max by ()` so that series churn doesn't break the promQL join
max by (cluster, namespace, horizontalpodautoscaler) (
label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)")
)
> 0),
"scaledObject", "$1", "horizontalpodautoscaler", "keda-hpa-(.*)"
)
)
# Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0,
# then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported
# by KEDA could not exist at all or being exposed with a value of 0.
and on (cluster, namespace, metric, scaledObject)
(label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0)
and on (cluster, namespace, metric, scaledObject) (
max by (cluster, namespace, metric, scaledObject) (
label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0
)
)
for: 1h
labels:
severity: critical
Expand Down
13 changes: 10 additions & 3 deletions operations/mimir-mixin-compiled/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -978,16 +978,23 @@ groups:
# Match only Mimir namespaces.
* on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info)
# Add "metric" label.
+ on(cluster, namespace, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)")
+ on(cluster, namespace, horizontalpodautoscaler) group_right
# Using `max by ()` so that series churn doesn't break the promQL join
max by (cluster, namespace, horizontalpodautoscaler) (
label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)")
)
> 0),
"scaledObject", "$1", "horizontalpodautoscaler", "keda-hpa-(.*)"
)
)
# Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0,
# then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported
# by KEDA could not exist at all or being exposed with a value of 0.
and on (cluster, namespace, metric, scaledObject)
(label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0)
and on (cluster, namespace, metric, scaledObject) (
max by (cluster, namespace, metric, scaledObject) (
label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0
)
)
for: 1h
labels:
severity: critical
Expand Down
13 changes: 10 additions & 3 deletions operations/mimir-mixin/alerts/autoscaling.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,23 @@
# Match only Mimir namespaces.
* on(%(aggregation_labels)s) group_left max by(%(aggregation_labels)s) (cortex_build_info)
# Add "metric" label.
+ on(%(aggregation_labels)s, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)")
+ on(%(aggregation_labels)s, horizontalpodautoscaler) group_right
# Using `max by ()` so that series churn doesn't break the promQL join
max by (%(aggregation_labels)s, horizontalpodautoscaler) (
label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)")
)
> 0),
"scaledObject", "$1", "horizontalpodautoscaler", "%(hpa_prefix)s(.*)"
)
)
# Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0,
# then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported
# by KEDA could not exist at all or being exposed with a value of 0.
and on (%(aggregation_labels)s, metric, scaledObject)
(label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0)
and on (%(aggregation_labels)s, metric, scaledObject) (
max by (%(aggregation_labels)s, metric, scaledObject) (
label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0
)
)
||| % {
hpa_prefix: $._config.autoscaling_hpa_prefix,
aggregation_labels: $._config.alert_aggregation_labels,
Expand Down

0 comments on commit 02920a5

Please sign in to comment.