Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[kube-prometheus-stack] Resolve Issue 3340 #3351

Merged
merged 22 commits into from
May 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
7ff4ef5
update add_custom_labels to apply labels to all PrometheusRules
scott-grimes May 8, 2023
c9da672
update rules templates compiled from sync_prometheus_rules.p6
scott-grimes May 8, 2023
ee386c2
bump chart version
scott-grimes May 8, 2023
33ca886
fix node-exporter listen address with kubeRBACProxy enabled (#3336)
r0bj May 7, 2023
52fa579
[prometheus] Bump Prometheus release (#3335)
zeritti May 7, 2023
7f213f9
Fix RoleBinding subject namespace reference (#3342)
walker-tom May 7, 2023
029332a
Update extraManifests example to be multiline string instead of YAML …
walker-tom May 7, 2023
c438c01
[prometheus-stackdriver-exporter] Add prometheusRule support (#3115)
BapRx May 8, 2023
6846dc7
[kube-prometheus-stack] add prometheus config reloader liveness and r…
dongjiang1989 May 8, 2023
948abbf
[prometheus] env vars for configmap-reload (#3344)
zeritti May 8, 2023
a6f7cf6
[prometheus] Fix volume name evaluation (#3348)
zeritti May 8, 2023
7f3b601
Merge branch 'main' into issue-3340
scott-grimes May 8, 2023
9fec572
formatting
scott-grimes May 8, 2023
9b307ad
Merge branch 'main' into issue-3340
zanhsieh May 9, 2023
5c50e1d
Merge branch 'main' into issue-3340
scott-grimes May 9, 2023
1d45c24
Merge branch 'main' into issue-3340
scott-grimes May 11, 2023
04746d2
Update Chart.yaml
scott-grimes May 11, 2023
7269cb5
Merge branch 'main' into issue-3340
scott-grimes May 11, 2023
be34fcf
Update sync_prometheus_rules.py
scott-grimes May 11, 2023
f20fb4f
Merge branch 'main' into issue-3340
scott-grimes May 11, 2023
6a0fb39
Merge branch 'main' into issue-3340
scott-grimes May 12, 2023
ffb19c6
Merge branch 'main' into issue-3340
scott-grimes May 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion charts/kube-prometheus-stack/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ name: kube-prometheus-stack
sources:
- https://github.com/prometheus-community/helm-charts
- https://github.com/prometheus-operator/kube-prometheus
version: 45.28.0
version: 45.28.1
appVersion: v0.65.1
kubeVersion: ">=1.16.0-0"
home: https://github.com/prometheus-operator/kube-prometheus
Expand Down
59 changes: 41 additions & 18 deletions charts/kube-prometheus-stack/hack/sync_prometheus_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,26 +264,49 @@ def add_rules_per_rule_conditions(rules, group, indent=4):
return rules


def add_custom_labels(rules, indent=4):
def add_custom_labels(rules_str, indent=4):
"""Add if wrapper for additional rules labels"""
rule_condition = '{{- if .Values.defaultRules.additionalRuleLabels }}\n{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}\n{{- end }}'
rule_condition_len = len(rule_condition) + 1

separator = " " * indent + "- alert:.*"
alerts_positions = re.finditer(separator,rules)
alert=-1
for alert_position in alerts_positions:
# add rule_condition at the end of the alert block
if alert >= 0 :
index = alert_position.start() + rule_condition_len * alert - 1
rules = rules[:index] + "\n" + rule_condition + rules[index:]
alert += 1

# add rule_condition at the end of the last alert
if alert >= 0:
index = len(rules) - 1
rules = rules[:index] + "\n" + rule_condition + rules[index:]
return rules
rule_seperator = "\n" + " " * indent + "-.*"
label_seperator = "\n" + " " * indent + " labels:"
section_seperator = "\n" + " " * indent + " \S"
section_seperator_len = len(section_seperator)-1
rules_positions = re.finditer(rule_seperator,rules_str)

# fetch breakpoint between each set of rules
ruleStartingLine = [(rule_position.start(),rule_position.end()) for rule_position in rules_positions]
head = rules_str[:ruleStartingLine[0][0]]

# construct array of rules so they can be handled individually
rules = []
# pylint: disable=E1136
# See https://github.com/pylint-dev/pylint/issues/1498 for None Values
previousRule = None
for r in ruleStartingLine:
if previousRule != None:
rules.append(rules_str[previousRule[0]:r[0]])
previousRule = r
rules.append(rules_str[previousRule[0]:len(rules_str)-1])

for i, rule in enumerate(rules):
current_label = re.search(label_seperator,rule)
if current_label:
# `labels:` block exists
# determine if there are any existing entries
entries = re.search(section_seperator,rule[current_label.end():])
if entries:
entries_start = current_label.end()
entries_end = entries.end()+current_label.end()-section_seperator_len
rules[i] = rule[:entries_end] + "\n" + rule_condition + rule[entries_end:]
else:
# `labels:` does not contain any entries
# append template to label section
rules[i]+= "\n" + rule_condition
else:
# `labels:` block does not exist
# create it and append template
rules[i]+= label_seperator + "\n" + rule_condition
return head + "".join(rules) + "\n"


def add_custom_annotations(rules, indent=4):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,36 +31,60 @@ spec:
1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
)
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
)
record: node_namespace_pod_container:container_memory_working_set_bytes
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
)
record: node_namespace_pod_container:container_memory_rss
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
)
record: node_namespace_pod_container:container_memory_cache
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
)
record: node_namespace_pod_container:container_memory_swap
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)
group_left() max by (namespace, pod, cluster) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
sum by (namespace, cluster) (
sum by (namespace, pod, cluster) (
Expand All @@ -72,12 +96,20 @@ spec:
)
)
record: namespace_memory:kube_pod_container_resource_requests:sum
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)
group_left() max by (namespace, pod, cluster) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
sum by (namespace, cluster) (
sum by (namespace, pod, cluster) (
Expand All @@ -89,12 +121,20 @@ spec:
)
)
record: namespace_cpu:kube_pod_container_resource_requests:sum
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)
group_left() max by (namespace, pod, cluster) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
sum by (namespace, cluster) (
sum by (namespace, pod, cluster) (
Expand All @@ -106,12 +146,20 @@ spec:
)
)
record: namespace_memory:kube_pod_container_resource_limits:sum
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)
group_left() max by (namespace, pod, cluster) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
sum by (namespace, cluster) (
sum by (namespace, pod, cluster) (
Expand All @@ -123,6 +171,10 @@ spec:
)
)
record: namespace_cpu:kube_pod_container_resource_limits:sum
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
max by (cluster, namespace, workload, pod) (
label_replace(
Expand All @@ -139,6 +191,9 @@ spec:
)
labels:
workload_type: deployment
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: namespace_workload_pod:kube_pod_owner:relabel
- expr: |-
max by (cluster, namespace, workload, pod) (
Expand All @@ -149,6 +204,9 @@ spec:
)
labels:
workload_type: daemonset
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: namespace_workload_pod:kube_pod_owner:relabel
- expr: |-
max by (cluster, namespace, workload, pod) (
Expand All @@ -159,6 +217,9 @@ spec:
)
labels:
workload_type: statefulset
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: namespace_workload_pod:kube_pod_owner:relabel
- expr: |-
max by (cluster, namespace, workload, pod) (
Expand All @@ -169,5 +230,8 @@ spec:
)
labels:
workload_type: job
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: namespace_workload_pod:kube_pod_owner:relabel
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,48 @@ spec:
rules:
- expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
record: code_verb:apiserver_request_total:increase30d
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
labels:
verb: read
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: code:apiserver_request_total:increase30d
- expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
labels:
verb: write
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: code:apiserver_request_total:increase30d
- expr: sum by (cluster, verb, scope) (increase(apiserver_request_slo_duration_seconds_count[1h]))
record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h[30d]) * 24 * 30)
record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: sum by (cluster, verb, scope, le) (increase(apiserver_request_slo_duration_seconds_bucket[1h]))
record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h[30d]) * 24 * 30)
record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: |-
1 - (
(
Expand Down Expand Up @@ -74,6 +100,9 @@ spec:
sum by (cluster) (code:apiserver_request_total:increase30d)
labels:
verb: all
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: apiserver_request:availability30d
- expr: |-
1 - (
Expand All @@ -99,6 +128,9 @@ spec:
sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"})
labels:
verb: read
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: apiserver_request:availability30d
- expr: |-
1 - (
Expand All @@ -116,21 +148,46 @@ spec:
sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"})
labels:
verb: write
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: apiserver_request:availability30d
- expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels:
verb: read
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: code_resource:apiserver_request_total:rate5m
- expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels:
verb: write
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
record: code_resource:apiserver_request_total:rate5m
- expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
labels:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
Loading