From 5f9816dc87636aa9f261218a9decdc875b1041e4 Mon Sep 17 00:00:00 2001 From: ArthurSens Date: Thu, 1 Sep 2022 17:25:04 +0000 Subject: [PATCH] Add alerts for kubernetes nodes Signed-off-by: ArthurSens --- .../{rules.yaml => kubernetes.yaml} | 0 .../platform/rules/kubernetes/nodes.yaml | 48 +++++++++++++++++++ 2 files changed, 48 insertions(+) rename operations/observability/mixins/platform/rules/kubernetes/{rules.yaml => kubernetes.yaml} (100%) create mode 100644 operations/observability/mixins/platform/rules/kubernetes/nodes.yaml diff --git a/operations/observability/mixins/platform/rules/kubernetes/rules.yaml b/operations/observability/mixins/platform/rules/kubernetes/kubernetes.yaml similarity index 100% rename from operations/observability/mixins/platform/rules/kubernetes/rules.yaml rename to operations/observability/mixins/platform/rules/kubernetes/kubernetes.yaml diff --git a/operations/observability/mixins/platform/rules/kubernetes/nodes.yaml b/operations/observability/mixins/platform/rules/kubernetes/nodes.yaml new file mode 100644 index 00000000000000..a75d9690fb0261 --- /dev/null +++ b/operations/observability/mixins/platform/rules/kubernetes/nodes.yaml @@ -0,0 +1,48 @@ +# Copyright (c) 2022 Gitpod GmbH. All rights reserved. +# Licensed under the GNU Affero General Public License (AGPL). +# See License-AGPL.txt in the project root for license information. + +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/name: kubernetes + app.kubernetes.io/part-of: kube-prometheus + prometheus: k8s + role: alert-rules + name: node-monitoring-rules + namespace: monitoring-satellite +spec: + groups: + - name: node + rules: + - alert: NodeFilesystemAlmostOutOfSpace + annotations: + description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. + runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemAlmostOutOfSpace.md + summary: Filesystem has less than 3% space left. + expr: | + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="shiftfs"} / node_filesystem_size_bytes{job="node-exporter",fstype!="shiftfs"} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0 + ) + for: 15m + labels: + severity: critical + team: platform + - alert: NodeFilesystemAlmostOutOfFiles + annotations: + description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. + runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemAlmostOutOfFiles.md + summary: Filesystem has less than 3% inodes left. + expr: | + ( + node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"} / node_filesystem_files{job="node-exporter",fstype!="shiftfs"} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0 + ) + for: 1h + labels: + severity: critical + team: platform