Skip to content

Commit

Permalink
feat: email receiver for alertmanager
Browse files Browse the repository at this point in the history
refactor: alertmanager receivers

refactor: no-keycloak now correctly handled, cleaned upi
  • Loading branch information
Maurice Faber committed Nov 20, 2020
1 parent edb31f7 commit b8b4198
Show file tree
Hide file tree
Showing 27 changed files with 290 additions and 202 deletions.
2 changes: 2 additions & 0 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@
"esbenp",
"foxundermoon",
"gcloud",
"gitea",
"gitops",
"gogs",
"grafana",
"grpc",
"helmfile",
Expand Down
5 changes: 4 additions & 1 deletion .demo/env/secrets.settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,8 @@ clouds:
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dnsmanager%40otomi-cloud.iam.gserviceaccount.com"
}
alerts:
home:
slack:
url: https://hooks.slack.com/services/id
slack:
url: https://hooks.slack.com/services/x/y/z
url: https://hooks.slack.com/services/id
12 changes: 9 additions & 3 deletions .demo/env/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ otomi:
mode: ce
isManaged: true
isMultitenant: true
isRedkubesMonitored: true
isHomeMonitored: true
teamPrefix: team-
hasCloudLB: false
customer:
name: demo
oidc:
clientID: demo
clientID: otomi
idp:
issuer: https://login.microsoftonline.com/57a3f6ea-7e70-4260-acb4-e06ce452f695
tenantID: 57a3f6ea-7e70-4260-acb4-e06ce452f695
Expand All @@ -17,4 +17,10 @@ oidc:
teamAdminGroupID: someTeamAdminGroupID
scope: openid email profile
alerts:
receiver: slack
drone: slack
home:
receivers: [slack]
slack:
channel: mon-otomi
channelCrit: mon-otomi-crit
receivers: [slack]
4 changes: 2 additions & 2 deletions .values/.prettierrc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ singleQuote: true
printWidth: 120
overrides:
- files:
- '*.yaml'
- '*.dec'
- 'env/**/*.yaml'
- 'env/**/*.dec'
options:
tabWidth: 4
useTabs: true
5 changes: 3 additions & 2 deletions .values/.vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@
"[docker,shell]": {
"editor.defaultFormatter": "foxundermoon.shell-format"
},
"[json,md,yaml,enc]": {
"[json,md,yaml,dec]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"editor.formatOnPaste": false,
"editor.formatOnSave": true,
"files.associations": {
"*.yml": "yaml",
"*.yaml.enc": "yaml",
"*.yaml.dec": "yaml",
".secrets*": "shellscript",
"otomi": "shellscript",
"aliases": "shellscript"
},
"prettier.enable": true,
"sops.defaults.gcpCredentialsPath": "gcp-key.json",
"yaml.schemas": {
".vscode/values-schema.yaml": "env/*.yaml"
Expand Down
2 changes: 1 addition & 1 deletion bin/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ hf() {
}

hf_values() {
[ "${VERBOSE-'true'}" = 'true' ] && quiet='--quiet'
[ "${VERBOSE-'false'}" = 'false' ] && quiet='--quiet'
helmfile ${quiet-} -e "$CLOUD-$CLUSTER" -f helmfile.tpl/helmfile-dump.yaml build | grep -Ev $helmfileOutputHide | sed -e $replacePathsPattern |
yq read -P - 'releases[0].values[0]'
}
Expand Down
2 changes: 1 addition & 1 deletion bin/crypt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ command=$1

function rotate() {
cd $ENV_DIR/env >/dev/null
find . -type f -name '*.secrets.yaml.enc' -exec bash -c "sops --input-type=yaml --output-type yaml -r {} > {}" \;
find . -type f -name 'secrets.*.yaml' -exec bash -c "sops --input-type=yaml --output-type yaml -r {} > {}" \;
cd - >/dev/null
}

Expand Down
4 changes: 3 additions & 1 deletion bin/gen-drone.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ ENV_DIR=${ENV_DIR:-./env}
. bin/common.sh
. bin/colors.sh

prepare_crypt
readonly values=$(hf_values)
readonly receiver=$(echo "$values" | yq r - alerts.receiver)
readonly raw_receiver=$(echo "$values" | yq r - alerts.drone)
readonly receiver=${raw_receiver:-'slack'}
readonly templatePath=$PWD/tpl/.drone.tpl.$receiver.yml
readonly customer_name=$(customer_name)

Expand Down
2 changes: 1 addition & 1 deletion bin/otomi
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ validate_cluster_env() {
local err
[[ -z "$CLOUD" ]] && echo "Error: The CLOUD environment variable is not set" >&2 && err=1
[[ -z "$CLUSTER" ]] && echo "Error: The CLUSTER environment variable is not set" >&2 && err=1
[[ ! -z "$err" ]] && exit 2
[[ -n "$err" ]] && exit 2
return 0
}

Expand Down
2 changes: 1 addition & 1 deletion charts/team-ns/templates/istio-virtualservices.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ spec:
set:
X-Forwarded-Proto: https
---
{{- if and (not (hasKey $s "isPublic")) (hasKey $s "authz") }}
{{- if and $v.hasKeycloak (not (hasKey $s "isPublic")) (hasKey $s "authz") }}
{{- $workload := ($s.authz.workload | toYaml | replace "__TEAM" $v.teamId) }}
apiVersion: security.istio.io/v1beta1
kind: RequestAuthentication
Expand Down
6 changes: 4 additions & 2 deletions helmfile.d/helmfile-05.init.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
bases:
- snippets/defaults.gotmpl

---
{{ readFile "snippets/templates.gotmpl" }}
{{- $v := .Environment.Values }}
{{- $c := $v.charts }}

releases:
- name: istio-operator
Expand All @@ -11,6 +13,6 @@ releases:
pkg: istio-operator
<<: *default
- name: keycloak
installed: true
installed: {{ $c | get "keycloak.enabled" true }}
namespace: keycloak
<<: *default
2 changes: 1 addition & 1 deletion helmfile.d/helmfile-10.monitoring.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ releases:
namespace: monitoring
<<: *default
- name: prometheus-msteams
installed: {{ eq ($c | get "prometheus-operator.alertmanager.receiver" "") "msteams" }}
installed: {{ or (eq ($v.alerts | get "receiver" "slack") "msteams") (eq ($v.alerts | get "home.receiver" "slack") "msteams") }}
namespace: monitoring
<<: *default
- name: sitespeed
Expand Down
2 changes: 1 addition & 1 deletion helmfile.d/helmfile-19.ingress-init.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ bases:

releases:
- name: jobs-keycloak
installed: true
installed: {{ $c | get "keycloak.enabled" true }}
<<: *jobs

2 changes: 1 addition & 1 deletion helmfile.d/helmfile-20.ingress.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ releases:
namespace: ingress
<<: *default
- name: ingress-azure
installed: {{ eq $v.cluster.provider "azure" }}
installed: {{ and (eq $v.cluster.provider "azure") $v.otomi.hasCloudLB }}
namespace: ingress
labels:
tag: ingress
Expand Down
2 changes: 1 addition & 1 deletion helmfile.d/helmfile-30.admin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ releases:
namespace: team-admin
<<: *default
- name: drone-admit-members
installed: {{ and ($c | get "drone.enabled" true) (eq ($c | get "drone.sourceControl.provider") "github") }}
installed: {{ and ($c | get "drone.enabled" true) (eq ($c | get "drone.sourceControl.provider" "github") "github") }}
namespace: team-admin
chart: ../charts/drone-admit-members
values:
Expand Down
64 changes: 13 additions & 51 deletions helmfile.d/helmfile-60.teams.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ bases:
{{- $c := $v.charts }}
{{- $cm := index $v.charts "cert-manager" }}
{{- $po := index $v.charts "prometheus-operator" }}
{{- $slackTpl := tpl (readFile "../values/prometheus-operator/slack-configs.gotmpl") $v | toString }}
{{- $slackTpl := tpl (readFile "../helmfile.d/snippets/slack.gotmpl") $v | toString }}

releases:
{{- range $teamId, $team := $tc.teams }}
{{- if hasKey $team "services" }}
Expand All @@ -24,6 +25,7 @@ releases:
values:
- cluster: {{- $v.cluster | toYaml | nindent 10 }}
otomi: {{- $v.otomi | toYaml | nindent 10 }}
hasKeycloak: {{ $v.charts.keycloak | get "enabled" true }}
domain: {{ $domain }}
certStage: {{ $cm.stage }}
knative:
Expand Down Expand Up @@ -62,43 +64,7 @@ releases:
alertmanager:
alertmanagerSpec:
externalUrl: https://{{ $appsDomain }}/alertmanager
config:
{{- $receiver := ($team | get "receiver" ($v.alerts | get "receiver" "slack")) }}
{{- if eq $receiver "slack" }}
global:
slack_api_url: {{ $team | get "slack.url" ($v.alerts.slack.url) }}
{{- end }}
receivers:
- name: "null"
{{- $channel := $team | get "slack.channel" "mon-otomi" }}
{{- if eq $receiver "slack" }}
- name: default
slack_configs:
- channel: "#{{ $team | get "slack.channel" "mon-otomi" }}"
{{- $slackTpl | nindent 20 }}
- name: critical
slack_configs:
- channel: "#{{ $team | get "slack.channel" "mon-otomi" }}-crit"
{{- $slackTpl | nindent 20 }}
{{- else }}
{{ $suffix := (hasKey $team "receiver") | ternary "" ".monitoring.svc.cluster.local" }}
- name: default
webhook_configs:
- url: "http://prometheus-msteams{{ $suffix }}:2000/low_priority_channel"
send_resolved: true
- name: critical
webhook_configs:
- url: "http://prometheus-msteams{{ $suffix }}:2000/high_priority_channel"
send_resolved: true
{{- end }}
- name: critical-redkubes
{{- if and $v.otomi.isRedkubesMonitored }}
# sending team criticals also to redkubes to be aware of team issues
slack_configs:
- channel: "#{{ $v.alerts | get "slack.channel" "mon-otomi" }}-crit"
api_url: {{ $v.alerts.slack.url }}
{{- $slackTpl | nindent 20 }}
{{- end }}
config: {{- tpl (readFile "../helmfile.d/snippets/alertmanager.gotmpl") (dict "instance" $team "root" $v "slackTpl" $slackTpl) | nindent 12 }}
commonLabels:
prometheus: team-{{ $teamId }}
prometheus:
Expand Down Expand Up @@ -152,11 +118,6 @@ releases:
grafana.ini:
server:
root_url: https://{{ $appsDomain }}/grafana
{{- if ($team | get "oidc.custom" false) }}
"auth.generic_oauth":
client_id: {{ $team.oidc.clientID }}
client_secret: {{ $team.oidc.clientSecret }}
{{- end }}
additionalDataSources:
- name: Prometheus-admin
editable: false
Expand All @@ -183,18 +144,19 @@ releases:
access: proxy
url: http://graphite.monitoring:80
{{- end }}
{{- if and (eq $v.cluster.provider "azure") ($team | get "azure.monitor" false) }}
{{- $monitor := (($team | get "azure.monitor.useAdmin" false) | ternary ($v.azure | getOrNil "monitor") ($team | getOrNil "azure.monitor")) }}
{{- if and (eq $v.cluster.provider "azure") }}
{{- $monitor := ($team | get "azure.monitor" ($v.clouds.azure | get "monitor" nil)) }}
{{- with $monitor }}
{{- $a := $v.clouds.azure }}
- name: Azure Monitor
type: grafana-azure-monitor-datasource
access: proxy
jsonData:
cloudName: azuremonitor
subscriptionId: {{ $v.azure.subscriptionId }}
tenantId: {{ $v.azure.tenantId }}
subscriptionId: {{ $a.subscriptionId }}
tenantId: {{ $a.tenantId }}
clientId: {{ .clientId }}
logAnalyticsTenantId: {{ . | get "logAnalyticsTenantId" $v.azure.tenantId }}
logAnalyticsTenantId: {{ . | get "logAnalyticsTenantId" $a.tenantId }}
logAnalyticsClientId: {{ . | get "logAnalyticsClientId" .clientId }}
logAnalyticsDefaultWorkspace: {{ .logAnalyticsWorkspace }}
appInsightsAppId: {{ . | get "appInsightsAppId" .clientId }}
Expand All @@ -208,7 +170,7 @@ releases:
editable: false
{{- end }}
{{- end }}
{{ if eq ($team | get "receiver" "slack") "msteams" }}
{{ if has "msteams" ($team | get "receivers" list) }}
- name: prometheus-msteams-{{ $teamId }}
installed: true
namespace: team-{{ $teamId }}
Expand All @@ -225,8 +187,8 @@ releases:
additionalLabels:
release: prometheus-{{ $teamId }}
connectors:
- high_priority_channel: {{ $team | get "msteams.highPrio" ($po | get "alertmanager.msteams.highPrio") }}
- low_priority_channel: {{ $team | get "msteams.lowPrio" ($po | get "alertmanager.msteams.lowPrio") }}
- high_priority_channel: {{ $team | get "msteams.highPrio" }}
- low_priority_channel: {{ $team | get "msteams.lowPrio" }}
{{- end }}

- name: grafana-dashboards-{{ $teamId }}
Expand Down
Loading

0 comments on commit b8b4198

Please sign in to comment.