forked from rhobs/configuration
-
Notifications
You must be signed in to change notification settings - Fork 0
/
metric-federation-rule-template.yaml
339 lines (339 loc) · 10.7 KB
/
metric-federation-rule-template.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
apiVersion: template.openshift.io/v1
kind: Template
metadata:
name: metric-federation-rule
objects:
- apiVersion: v1
data:
observatorium.yaml: |-
"groups":
- "interval": "1m"
"name": "telemeter-rhacs.rules"
"rules":
- "expr": |
rhacs:rox_central_cluster_metrics_cpu_capacity:avg_over_time1h
"labels":
"tenant_id": "FB870BF3-9F3A-44FF-9BF7-D7A047A52F43"
"record": "rhacs:rox_central_cluster_metrics_cpu_capacity:avg_over_time1h"
kind: ConfigMap
metadata:
annotations:
qontract.recycle: "true"
labels:
app.kubernetes.io/instance: observatorium
app.kubernetes.io/part-of: observatorium
name: metric-federation-rules
- apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: rule-evaluation-engine
app.kubernetes.io/instance: metric-federation
app.kubernetes.io/name: thanos-rule
app.kubernetes.io/part-of: observatorium
app.kubernetes.io/version: ${THANOS_IMAGE_TAG}
name: observatorium-thanos-metric-federation-rule
spec:
clusterIP: None
ports:
- name: grpc
port: 10901
targetPort: 10901
- name: http
port: 10902
targetPort: 10902
- name: reloader
port: 9533
targetPort: 9533
selector:
app.kubernetes.io/component: rule-evaluation-engine
app.kubernetes.io/instance: metric-federation
app.kubernetes.io/name: thanos-rule
app.kubernetes.io/part-of: observatorium
- apiVersion: v1
kind: ServiceAccount
metadata:
annotations: {}
labels:
app.kubernetes.io/component: rule-evaluation-engine
app.kubernetes.io/instance: metric-federation
app.kubernetes.io/name: thanos-rule
app.kubernetes.io/part-of: observatorium
app.kubernetes.io/version: ${THANOS_IMAGE_TAG}
name: observatorium-thanos-metric-federation-rule
- apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: rule-evaluation-engine
app.kubernetes.io/instance: metric-federation
app.kubernetes.io/name: thanos-rule
app.kubernetes.io/part-of: observatorium
prometheus: app-sre
name: observatorium-thanos-metric-federation-rule
spec:
endpoints:
- port: http
relabelings:
- action: replace
separator: /
sourceLabels:
- namespace
- pod
targetLabel: instance
- port: reloader
namespaceSelector:
matchNames: ${{NAMESPACES}}
selector:
matchLabels:
app.kubernetes.io/component: rule-evaluation-engine
app.kubernetes.io/instance: metric-federation
app.kubernetes.io/name: thanos-rule
app.kubernetes.io/part-of: observatorium
- apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app.kubernetes.io/component: rule-evaluation-engine
app.kubernetes.io/instance: metric-federation
app.kubernetes.io/name: thanos-rule
app.kubernetes.io/part-of: observatorium
app.kubernetes.io/version: ${THANOS_IMAGE_TAG}
name: observatorium-thanos-metric-federation-rule
spec:
replicas: ${{THANOS_RULER_REPLICAS}}
selector:
matchLabels:
app.kubernetes.io/component: rule-evaluation-engine
app.kubernetes.io/instance: metric-federation
app.kubernetes.io/name: thanos-rule
app.kubernetes.io/part-of: observatorium
serviceName: observatorium-thanos-metric-federation-rule
template:
metadata:
labels:
app.kubernetes.io/component: rule-evaluation-engine
app.kubernetes.io/instance: metric-federation
app.kubernetes.io/name: thanos-rule
app.kubernetes.io/part-of: observatorium
app.kubernetes.io/tracing: jaeger-agent
app.kubernetes.io/version: ${THANOS_IMAGE_TAG}
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- thanos-rule
- key: app.kubernetes.io/instance
operator: In
values:
- metric-federation
namespaces:
- ${NAMESPACE}
topologyKey: kubernetes.io/hostname
weight: 100
containers:
- args:
- rule
- --log.level=${THANOS_RULER_LOG_LEVEL}
- --log.format=logfmt
- --grpc-address=0.0.0.0:10901
- --http-address=0.0.0.0:10902
- --objstore.config=$(OBJSTORE_CONFIG)
- --data-dir=/var/thanos/rule
- --label=rule_replica="$(NAME)"
- --alert.label-drop=rule_replica
- --tsdb.retention=48h
- --tsdb.block-duration=2h
- --query=dnssrv+_http._tcp.observatorium-ruler-query.${THANOS_QUERIER_NAMESPACE}.svc.cluster.local
- --rule-file=/etc/thanos/rules/metric-federation-rules/observatorium.yaml
- |-
--tracing.config="config":
"sampler_param": 2
"sampler_type": "ratelimiting"
"service_name": "thanos-rule"
"type": "JAEGER"
env:
- name: NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: OBJSTORE_CONFIG
valueFrom:
secretKeyRef:
key: thanos.yaml
name: ${THANOS_CONFIG_SECRET}
- name: HOST_IP_ADDRESS
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
key: aws_access_key_id
name: ${THANOS_S3_SECRET}
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
key: aws_secret_access_key
name: ${THANOS_S3_SECRET}
image: ${THANOS_IMAGE}:${THANOS_IMAGE_TAG}
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 24
httpGet:
path: /-/healthy
port: 10902
scheme: HTTP
periodSeconds: 5
name: thanos-rule
ports:
- containerPort: 10901
name: grpc
- containerPort: 10902
name: http
- containerPort: 9533
name: reloader
readinessProbe:
failureThreshold: 18
httpGet:
path: /-/ready
port: 10902
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 5
resources:
limits:
cpu: ${THANOS_RULER_CPU_LIMIT}
memory: ${THANOS_RULER_MEMORY_LIMIT}
requests:
cpu: ${THANOS_RULER_CPU_REQUEST}
memory: ${THANOS_RULER_MEMORY_REQUEST}
terminationMessagePolicy: FallbackToLogsOnError
volumeMounts:
- mountPath: /var/thanos/rule
name: data
readOnly: false
- mountPath: /etc/thanos/rules/metric-federation-rules
name: metric-federation-rules
- args:
- -webhook-url=http://localhost:10902/-/reload
- -volume-dir=/etc/thanos/rules/metric-federation-rules
image: ${CONFIGMAP_RELOADER_IMAGE}:${CONFIGMAP_RELOADER_IMAGE_TAG}
imagePullPolicy: IfNotPresent
name: configmap-reloader
volumeMounts:
- mountPath: /etc/thanos/rules/metric-federation-rules
name: metric-federation-rules
- args:
- --reporter.grpc.host-port=dns:///jaeger-collector-headless.${JAEGER_COLLECTOR_NAMESPACE}.svc:14250
- --reporter.type=grpc
- --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD)
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD
valueFrom:
fieldRef:
fieldPath: metadata.name
image: ${JAEGER_AGENT_IMAGE}:${JAEGER_AGENT_IMAGE_TAG}
livenessProbe:
failureThreshold: 5
httpGet:
path: /
port: 14271
scheme: HTTP
name: jaeger-agent
ports:
- containerPort: 5778
name: configs
- containerPort: 6831
name: jaeger-thrift
- containerPort: 14271
name: metrics
readinessProbe:
httpGet:
path: /
port: 14271
scheme: HTTP
initialDelaySeconds: 1
resources:
limits:
cpu: 128m
memory: 128Mi
requests:
cpu: 32m
memory: 64Mi
nodeSelector:
kubernetes.io/os: linux
securityContext: {}
serviceAccountName: ${SERVICE_ACCOUNT_NAME}
volumes:
- configMap:
name: metric-federation-rules
name: metric-federation-rules
volumeClaimTemplates:
- metadata:
labels:
app.kubernetes.io/component: rule-evaluation-engine
app.kubernetes.io/instance: metric-federation
app.kubernetes.io/name: thanos-rule
app.kubernetes.io/part-of: observatorium
name: data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: ${THANOS_RULER_PVC_REQUEST}
storageClassName: ${STORAGE_CLASS}
parameters:
- name: NAMESPACE
value: observatorium-metrics
- name: NAMESPACES
value: '["observatorium-metrics"]'
- name: CONFIGMAP_RELOADER_IMAGE
value: quay.io/openshift/origin-configmap-reloader
- name: CONFIGMAP_RELOADER_IMAGE_TAG
value: 4.5.0
- name: JAEGER_AGENT_IMAGE_TAG
value: 1.29.0
- name: JAEGER_AGENT_IMAGE
value: quay.io/app-sre/jaegertracing-jaeger-agent
- name: JAEGER_COLLECTOR_NAMESPACE
value: $(NAMESPACE)
- name: SERVICE_ACCOUNT_NAME
value: prometheus-telemeter
- name: STORAGE_CLASS
value: gp2
- name: THANOS_CONFIG_SECRET
value: thanos-objectstorage
- name: THANOS_IMAGE_TAG
value: v0.30.2
- name: THANOS_IMAGE
value: quay.io/thanos/thanos
- name: THANOS_QUERIER_NAMESPACE
value: observatorium-mst
- name: THANOS_RULER_CPU_LIMIT
value: "1"
- name: THANOS_RULER_CPU_REQUEST
value: 500m
- name: THANOS_RULER_LOG_LEVEL
value: info
- name: THANOS_RULER_MEMORY_LIMIT
value: 4Gi
- name: THANOS_RULER_MEMORY_REQUEST
value: 4Gi
- name: THANOS_RULER_PVC_REQUEST
value: 50Gi
- name: THANOS_RULER_REPLICAS
value: "2"
- name: THANOS_S3_SECRET
value: telemeter-thanos-stage-s3