diff --git a/.cspell.json b/.cspell.json index 8960dabc5d..3ee521d6a6 100644 --- a/.cspell.json +++ b/.cspell.json @@ -57,7 +57,9 @@ "esbenp", "foxundermoon", "gcloud", + "gitea", "gitops", + "gogs", "grafana", "grpc", "helmfile", diff --git a/.demo/env/secrets.settings.yaml b/.demo/env/secrets.settings.yaml index 61f223f0b1..b59525dc6f 100644 --- a/.demo/env/secrets.settings.yaml +++ b/.demo/env/secrets.settings.yaml @@ -21,5 +21,8 @@ clouds: "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dnsmanager%40otomi-cloud.iam.gserviceaccount.com" } alerts: + home: + slack: + url: https://hooks.slack.com/services/id slack: - url: https://hooks.slack.com/services/x/y/z + url: https://hooks.slack.com/services/id diff --git a/.demo/env/settings.yaml b/.demo/env/settings.yaml index 6b65153390..b9fd4c9453 100644 --- a/.demo/env/settings.yaml +++ b/.demo/env/settings.yaml @@ -2,13 +2,13 @@ otomi: mode: ce isManaged: true isMultitenant: true - isRedkubesMonitored: true + isHomeMonitored: true teamPrefix: team- hasCloudLB: false customer: name: demo oidc: - clientID: demo + clientID: otomi idp: issuer: https://login.microsoftonline.com/57a3f6ea-7e70-4260-acb4-e06ce452f695 tenantID: 57a3f6ea-7e70-4260-acb4-e06ce452f695 @@ -17,4 +17,10 @@ oidc: teamAdminGroupID: someTeamAdminGroupID scope: openid email profile alerts: - receiver: slack + drone: slack + home: + receivers: [slack] + slack: + channel: mon-otomi + channelCrit: mon-otomi-crit + receivers: [slack] diff --git a/.values/.prettierrc.yml b/.values/.prettierrc.yml index ec5645f260..e9a2106fdc 100644 --- a/.values/.prettierrc.yml +++ b/.values/.prettierrc.yml @@ -5,8 +5,8 @@ singleQuote: true printWidth: 120 overrides: - files: - - '*.yaml' - - '*.dec' + - 'env/**/*.yaml' + - 'env/**/*.dec' options: tabWidth: 4 useTabs: true diff --git a/.values/.vscode/settings.json b/.values/.vscode/settings.json index a4a120d0b4..92b628ae72 100644 --- a/.values/.vscode/settings.json +++ b/.values/.vscode/settings.json @@ -2,18 +2,19 @@ "[docker,shell]": { "editor.defaultFormatter": "foxundermoon.shell-format" }, - "[json,md,yaml,enc]": { + "[json,md,yaml,dec]": { "editor.defaultFormatter": "esbenp.prettier-vscode" }, "editor.formatOnPaste": false, "editor.formatOnSave": true, "files.associations": { "*.yml": "yaml", - "*.yaml.enc": "yaml", + "*.yaml.dec": "yaml", ".secrets*": "shellscript", "otomi": "shellscript", "aliases": "shellscript" }, + "prettier.enable": true, "sops.defaults.gcpCredentialsPath": "gcp-key.json", "yaml.schemas": { ".vscode/values-schema.yaml": "env/*.yaml" diff --git a/bin/common.sh b/bin/common.sh index 00c48e9925..19524a15d5 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -33,7 +33,7 @@ hf() { } hf_values() { - [ "${VERBOSE-'true'}" = 'true' ] && quiet='--quiet' + [ "${VERBOSE-'false'}" = 'false' ] && quiet='--quiet' helmfile ${quiet-} -e "$CLOUD-$CLUSTER" -f helmfile.tpl/helmfile-dump.yaml build | grep -Ev $helmfileOutputHide | sed -e $replacePathsPattern | yq read -P - 'releases[0].values[0]' } diff --git a/bin/crypt.sh b/bin/crypt.sh index 885690f731..b6f63998b4 100755 --- a/bin/crypt.sh +++ b/bin/crypt.sh @@ -8,7 +8,7 @@ command=$1 function rotate() { cd $ENV_DIR/env >/dev/null - find . -type f -name '*.secrets.yaml.enc' -exec bash -c "sops --input-type=yaml --output-type yaml -r {} > {}" \; + find . -type f -name 'secrets.*.yaml' -exec bash -c "sops --input-type=yaml --output-type yaml -r {} > {}" \; cd - >/dev/null } diff --git a/bin/gen-drone.sh b/bin/gen-drone.sh index 4724f79ad6..5f75ffab64 100755 --- a/bin/gen-drone.sh +++ b/bin/gen-drone.sh @@ -8,8 +8,10 @@ ENV_DIR=${ENV_DIR:-./env} . bin/common.sh . bin/colors.sh +prepare_crypt readonly values=$(hf_values) -readonly receiver=$(echo "$values" | yq r - alerts.receiver) +readonly raw_receiver=$(echo "$values" | yq r - alerts.drone) +readonly receiver=${raw_receiver:-'slack'} readonly templatePath=$PWD/tpl/.drone.tpl.$receiver.yml readonly customer_name=$(customer_name) diff --git a/bin/otomi b/bin/otomi index 93a63d7055..45e0e2b42b 100755 --- a/bin/otomi +++ b/bin/otomi @@ -157,7 +157,7 @@ validate_cluster_env() { local err [[ -z "$CLOUD" ]] && echo "Error: The CLOUD environment variable is not set" >&2 && err=1 [[ -z "$CLUSTER" ]] && echo "Error: The CLUSTER environment variable is not set" >&2 && err=1 - [[ ! -z "$err" ]] && exit 2 + [[ -n "$err" ]] && exit 2 return 0 } diff --git a/charts/team-ns/templates/istio-virtualservices.yaml b/charts/team-ns/templates/istio-virtualservices.yaml index 6b0fa1a486..7ca9941022 100644 --- a/charts/team-ns/templates/istio-virtualservices.yaml +++ b/charts/team-ns/templates/istio-virtualservices.yaml @@ -79,7 +79,7 @@ spec: set: X-Forwarded-Proto: https --- -{{- if and (not (hasKey $s "isPublic")) (hasKey $s "authz") }} +{{- if and $v.hasKeycloak (not (hasKey $s "isPublic")) (hasKey $s "authz") }} {{- $workload := ($s.authz.workload | toYaml | replace "__TEAM" $v.teamId) }} apiVersion: security.istio.io/v1beta1 kind: RequestAuthentication diff --git a/helmfile.d/helmfile-05.init.yaml b/helmfile.d/helmfile-05.init.yaml index c4fd2b3d54..cc069bfe43 100644 --- a/helmfile.d/helmfile-05.init.yaml +++ b/helmfile.d/helmfile-05.init.yaml @@ -1,7 +1,9 @@ bases: - snippets/defaults.gotmpl - +--- {{ readFile "snippets/templates.gotmpl" }} +{{- $v := .Environment.Values }} +{{- $c := $v.charts }} releases: - name: istio-operator @@ -11,6 +13,6 @@ releases: pkg: istio-operator <<: *default - name: keycloak - installed: true + installed: {{ $c | get "keycloak.enabled" true }} namespace: keycloak <<: *default diff --git a/helmfile.d/helmfile-10.monitoring.yaml b/helmfile.d/helmfile-10.monitoring.yaml index 517e9be428..bab2ab501f 100644 --- a/helmfile.d/helmfile-10.monitoring.yaml +++ b/helmfile.d/helmfile-10.monitoring.yaml @@ -23,7 +23,7 @@ releases: namespace: monitoring <<: *default - name: prometheus-msteams - installed: {{ eq ($c | get "prometheus-operator.alertmanager.receiver" "") "msteams" }} + installed: {{ or (eq ($v.alerts | get "receiver" "slack") "msteams") (eq ($v.alerts | get "home.receiver" "slack") "msteams") }} namespace: monitoring <<: *default - name: sitespeed diff --git a/helmfile.d/helmfile-19.ingress-init.yaml b/helmfile.d/helmfile-19.ingress-init.yaml index 5e2cf63c4c..e0ee76919c 100644 --- a/helmfile.d/helmfile-19.ingress-init.yaml +++ b/helmfile.d/helmfile-19.ingress-init.yaml @@ -7,6 +7,6 @@ bases: releases: - name: jobs-keycloak - installed: true + installed: {{ $c | get "keycloak.enabled" true }} <<: *jobs diff --git a/helmfile.d/helmfile-20.ingress.yaml b/helmfile.d/helmfile-20.ingress.yaml index e360d94e9f..e8c2fe7d54 100644 --- a/helmfile.d/helmfile-20.ingress.yaml +++ b/helmfile.d/helmfile-20.ingress.yaml @@ -28,7 +28,7 @@ releases: namespace: ingress <<: *default - name: ingress-azure - installed: {{ eq $v.cluster.provider "azure" }} + installed: {{ and (eq $v.cluster.provider "azure") $v.otomi.hasCloudLB }} namespace: ingress labels: tag: ingress diff --git a/helmfile.d/helmfile-30.admin.yaml b/helmfile.d/helmfile-30.admin.yaml index 1977772d96..f84fcdae38 100644 --- a/helmfile.d/helmfile-30.admin.yaml +++ b/helmfile.d/helmfile-30.admin.yaml @@ -11,7 +11,7 @@ releases: namespace: team-admin <<: *default - name: drone-admit-members - installed: {{ and ($c | get "drone.enabled" true) (eq ($c | get "drone.sourceControl.provider") "github") }} + installed: {{ and ($c | get "drone.enabled" true) (eq ($c | get "drone.sourceControl.provider" "github") "github") }} namespace: team-admin chart: ../charts/drone-admit-members values: diff --git a/helmfile.d/helmfile-60.teams.yaml b/helmfile.d/helmfile-60.teams.yaml index 1878b55b90..3b1e91ce10 100644 --- a/helmfile.d/helmfile-60.teams.yaml +++ b/helmfile.d/helmfile-60.teams.yaml @@ -7,7 +7,8 @@ bases: {{- $c := $v.charts }} {{- $cm := index $v.charts "cert-manager" }} {{- $po := index $v.charts "prometheus-operator" }} -{{- $slackTpl := tpl (readFile "../values/prometheus-operator/slack-configs.gotmpl") $v | toString }} +{{- $slackTpl := tpl (readFile "../helmfile.d/snippets/slack.gotmpl") $v | toString }} + releases: {{- range $teamId, $team := $tc.teams }} {{- if hasKey $team "services" }} @@ -24,6 +25,7 @@ releases: values: - cluster: {{- $v.cluster | toYaml | nindent 10 }} otomi: {{- $v.otomi | toYaml | nindent 10 }} + hasKeycloak: {{ $v.charts.keycloak | get "enabled" true }} domain: {{ $domain }} certStage: {{ $cm.stage }} knative: @@ -62,43 +64,7 @@ releases: alertmanager: alertmanagerSpec: externalUrl: https://{{ $appsDomain }}/alertmanager - config: - {{- $receiver := ($team | get "receiver" ($v.alerts | get "receiver" "slack")) }} - {{- if eq $receiver "slack" }} - global: - slack_api_url: {{ $team | get "slack.url" ($v.alerts.slack.url) }} - {{- end }} - receivers: - - name: "null" - {{- $channel := $team | get "slack.channel" "mon-otomi" }} - {{- if eq $receiver "slack" }} - - name: default - slack_configs: - - channel: "#{{ $team | get "slack.channel" "mon-otomi" }}" - {{- $slackTpl | nindent 20 }} - - name: critical - slack_configs: - - channel: "#{{ $team | get "slack.channel" "mon-otomi" }}-crit" - {{- $slackTpl | nindent 20 }} - {{- else }} - {{ $suffix := (hasKey $team "receiver") | ternary "" ".monitoring.svc.cluster.local" }} - - name: default - webhook_configs: - - url: "http://prometheus-msteams{{ $suffix }}:2000/low_priority_channel" - send_resolved: true - - name: critical - webhook_configs: - - url: "http://prometheus-msteams{{ $suffix }}:2000/high_priority_channel" - send_resolved: true - {{- end }} - - name: critical-redkubes - {{- if and $v.otomi.isRedkubesMonitored }} - # sending team criticals also to redkubes to be aware of team issues - slack_configs: - - channel: "#{{ $v.alerts | get "slack.channel" "mon-otomi" }}-crit" - api_url: {{ $v.alerts.slack.url }} - {{- $slackTpl | nindent 20 }} - {{- end }} + config: {{- tpl (readFile "../helmfile.d/snippets/alertmanager.gotmpl") (dict "instance" $team "root" $v "slackTpl" $slackTpl) | nindent 12 }} commonLabels: prometheus: team-{{ $teamId }} prometheus: @@ -152,11 +118,6 @@ releases: grafana.ini: server: root_url: https://{{ $appsDomain }}/grafana - {{- if ($team | get "oidc.custom" false) }} - "auth.generic_oauth": - client_id: {{ $team.oidc.clientID }} - client_secret: {{ $team.oidc.clientSecret }} - {{- end }} additionalDataSources: - name: Prometheus-admin editable: false @@ -183,18 +144,19 @@ releases: access: proxy url: http://graphite.monitoring:80 {{- end }} - {{- if and (eq $v.cluster.provider "azure") ($team | get "azure.monitor" false) }} - {{- $monitor := (($team | get "azure.monitor.useAdmin" false) | ternary ($v.azure | getOrNil "monitor") ($team | getOrNil "azure.monitor")) }} + {{- if and (eq $v.cluster.provider "azure") }} + {{- $monitor := ($team | get "azure.monitor" ($v.clouds.azure | get "monitor" nil)) }} {{- with $monitor }} + {{- $a := $v.clouds.azure }} - name: Azure Monitor type: grafana-azure-monitor-datasource access: proxy jsonData: cloudName: azuremonitor - subscriptionId: {{ $v.azure.subscriptionId }} - tenantId: {{ $v.azure.tenantId }} + subscriptionId: {{ $a.subscriptionId }} + tenantId: {{ $a.tenantId }} clientId: {{ .clientId }} - logAnalyticsTenantId: {{ . | get "logAnalyticsTenantId" $v.azure.tenantId }} + logAnalyticsTenantId: {{ . | get "logAnalyticsTenantId" $a.tenantId }} logAnalyticsClientId: {{ . | get "logAnalyticsClientId" .clientId }} logAnalyticsDefaultWorkspace: {{ .logAnalyticsWorkspace }} appInsightsAppId: {{ . | get "appInsightsAppId" .clientId }} @@ -208,7 +170,7 @@ releases: editable: false {{- end }} {{- end }} - {{ if eq ($team | get "receiver" "slack") "msteams" }} + {{ if has "msteams" ($team | get "receivers" list) }} - name: prometheus-msteams-{{ $teamId }} installed: true namespace: team-{{ $teamId }} @@ -225,8 +187,8 @@ releases: additionalLabels: release: prometheus-{{ $teamId }} connectors: - - high_priority_channel: {{ $team | get "msteams.highPrio" ($po | get "alertmanager.msteams.highPrio") }} - - low_priority_channel: {{ $team | get "msteams.lowPrio" ($po | get "alertmanager.msteams.lowPrio") }} + - high_priority_channel: {{ $team | get "msteams.highPrio" }} + - low_priority_channel: {{ $team | get "msteams.lowPrio" }} {{- end }} - name: grafana-dashboards-{{ $teamId }} diff --git a/helmfile.d/snippets/alertmanager.gotmpl b/helmfile.d/snippets/alertmanager.gotmpl new file mode 100644 index 0000000000..4ed57d1446 --- /dev/null +++ b/helmfile.d/snippets/alertmanager.gotmpl @@ -0,0 +1,97 @@ +{{- $receivers := .instance | get "alerts.receivers" (.root | get "alerts.receivers" (list "slack")) }} +{{- $suffix := (true | ternary "" ".monitoring.svc.cluster.local") }} +global: +{{- if or (has "slack" $receivers ) (and .root.otomi.isHomeMonitored (.root.alerts | get "home.receivers" (list "slack"))) }} + slack_api_url: {{ .instance | get "alerts.slack.url" (.root | get "alerts.slack.url" (.root | get "alerts.home.slack.url")) }} +{{- end }} +{{- if has "email" $receivers }} + smtp_smarthost: {{ .instance | get "alerts.email.smarthost" (.root | get "alerts.email.smarthost") }} + smtp_hello: {{ .instance | get "alerts.email.hello" (.root | get "alerts.email.hello" .root.cluster.domain) }} + smtp_auth_username: {{ .instance | get "alerts.email.auth_username" (.root | get "alerts.email.auth_username" nil) }} + smtp_auth_password: {{ .instance | get "alerts.email.auth_password" (.root | get "alerts.email.auth_password" nil) }} + smtp_auth_secret: {{ .instance | get "alerts.email.auth_secret" (.root | get "alerts.email.auth_secret" nil) }} + smtp_auth_identity: {{ .instance | get "alerts.email.auth_identity" (.root | get "alerts.email.auth_identity" nil) }} +{{- end }} +route: + receiver: default + group_by: [alertname] + group_interval: {{ .instance | get "alerts.groupInterval" (.root | get "alerts.groupInterval" "5m") }} + repeat_interval: {{ .instance | get "alerts.repeatInterval" (.root | get "alerts.repeatInterval" "3h") }} + routes: + - match: + alertname: Watchdog + receiver: "null" + - match: + alertname: CPUThrottlingHigh + receiver: "null" + {{- if eq .root.cluster.provider "azure" }} + - match: + alertname: KubeAPILatencyHigh + receiver: "null" + {{- end }} + - match: + severity: critical + receiver: critical + {{- if .root.otomi.isHomeMonitored }} + continue: true + - match: + severity: critical + receiver: critical-home + {{- end }} +receivers: + - name: "null" +{{- if has "slack" $receivers }} + - name: default + slack_configs: + - channel: "#{{ .instance | get "alerts.slack.channel" (.root | get "alerts.slack.channel" "mon-otomi") }}" + {{- .slackTpl | nindent 8 }} + - name: critical + slack_configs: + - channel: "#{{ .instance | get "alerts.slack.channelCrit" (.root | get "alerts.slack.channelCrit" "mon-otomi-crit") }}" + {{- .slackTpl | nindent 8 }} +{{- end }} +{{- if has "msteams" $receivers }} + - name: default + webhook_configs: + - url: "http://prometheus-msteams{{ $suffix }}:800/low_priority_channel" + send_resolved: true + - name: critical + webhook_configs: + - url: "http://prometheus-msteams{{ $suffix }}:800/high_priority_channel" + send_resolved: true +{{- end }} +{{- if has "email" $receivers }} + {{- $to := .instance | get "alerts.email.to" (.root | get "alerts.email.to") }} + {{- $from := .instance | get "alerts.email.from" (.root | get "alerts.email.from" (print "alerts@" .root.cluster.domain)) }} + - name: default + email_configs: + - to: {{ $to }} + from: {{ $from }} + send_resolved: true + - name: critical + email_configs: + - to: {{ $to }} + from: {{ $from }} + send_resolved: true +{{- end }} +{{- if .root.otomi.isHomeMonitored }} + - name: critical-home + {{- $receivers := .root.alerts.home | get "receivers" }} + # sending criticals also to home to be aware of issues + {{- if has "slack" $receivers }} + slack_configs: + - channel: "#{{ .root | get "alerts.home.slack.channelCrit" "mon-otomi-crit" }}" + {{- .slackTpl | nindent 8 }} + {{- end }} + {{- if has "msteams" $receivers }} + webhook_configs: + - url: "http://prometheus-msteams.monitoring.svc.cluster.local:800/high_priority_channel" + send_resolved: true + {{- end }} + {{- if has "email" $receivers }} + email_configs: + - to: {{ .root | get "alerts.home.email.to" }} + from: {{ .root | get "alerts.home.email.from" (print "alerts@" .root.cluster.domain) }} + send_resolved: true + {{- end }} +{{- end }} diff --git a/values/prometheus-operator/slack-configs.gotmpl b/helmfile.d/snippets/slack.gotmpl similarity index 100% rename from values/prometheus-operator/slack-configs.gotmpl rename to helmfile.d/snippets/slack.gotmpl diff --git a/values-schema.yaml b/values-schema.yaml index 6e7dd50d41..ffca5ff154 100644 --- a/values-schema.yaml +++ b/values-schema.yaml @@ -11,6 +11,11 @@ definitions: domain: type: string pattern: ^(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])$ + droneGit: + clientID: + type: string + clientSecretValue: + type: string registry: type: string pattern: ^[a-z0-9]+(?:[._-][a-z0-9]+)*$ @@ -169,6 +174,12 @@ definitions: pattern: ^(https:\/\/)([\w\-])+\.{1}([a-zA-Z]{2,63})([\/\w-]*)*\/?\??([^#\n\r]*)?#?([^\n\r]*)$ alerts: properties: + drone: + type: string + enum: + - slack + - msteams + default: slack groupInterval: type: string default: 5m @@ -177,13 +188,13 @@ definitions: type: string default: 3h description: How long to wait before sending a notification again if it has already been sent successfully for an alert. (Usually ~3h or more). - receiver: - type: string + receivers: + type: array enum: - slack - msteams - default: slack - description: A notification receiver. You can only choose one of Slack or MS Teams in simple configuration mode. + - email + description: Notification receivers. slack: type: object additionalProperties: false @@ -194,7 +205,11 @@ definitions: channel: type: string default: mon-otomi - description: 'The Slack channel for non-critical notifications. This channel, as well as one with the suffix "-crit" (for criticals) should exist.' + description: 'The Slack channel for non-critical notifications.' + channelCrit: + type: string + default: mon-otomi + description: 'The Slack channel for critical notifications.' required: [url] msteams: type: object @@ -207,7 +222,27 @@ definitions: type: string description: The high prio web hook required: [highPrio, lowPrio] - required: [receiver] + email: + type: object + properties: + from: + type: string + to: + type: string + smarthost: + type: string + hello: + type: string + auth_username: + type: string + auth_password: + type: string + auth_secret: + type: string + auth_identity: + type: string + + required: [receivers] cloud: description: A common cloud configuration type: object @@ -240,6 +275,15 @@ definitions: Standard_RAGZRS, ] description: An azure disk type (SKU Type) + monitor: + type: object + properties: + clientId: + type: string + description: An azure client id + clientSecret: + type: string + description: An azure client secret resourceGroup: type: string description: An azure resource group @@ -311,8 +355,14 @@ definitions: type: object additionalProperties: false properties: + alerts: + '$ref': '#/definitions/alerts' azure: - type: object + '$ref': '#/definitions/cloudAzure' + additionalProperties: + useAdmin: + type: boolean + default: true id: '$ref': '#/definitions/idName' description: Must be the same as the name. @@ -367,6 +417,10 @@ definitions: description: Details for a knative service that will be deployed and operated. additionalProperties: false properties: + predeployed: + description: Has this service been predeployed? Otherwise otomi will start it with the configuration given. + type: boolean + default: false scaleToZero: title: Scale to zero description: Scales to zero after 60 seconds and needs approximately 8 seconds to start back up. @@ -485,6 +539,10 @@ definitions: properties: alerts: '$ref': '#/definitions/alerts' + additionalProperties: + home: + description: Configuration to phone home. Used when otomi.isHomeMonitored is set. + '$ref': '#/definitions/alerts' charts: type: object additionalProperties: false @@ -632,17 +690,52 @@ properties: additionalProperties: false properties: github: - type: object - additionalProperties: false - properties: - clientID: + '$ref': '#/definitions/droneGit' + additionalProperties: + server: type: string - clientSecretValue: + default: https://github.com + gitlab: + '$ref': '#/definitions/droneGit' + additionalProperties: + server: type: string + gitea: + '$ref': '#/definitions/droneGit' + additionalProperties: server: type: string + gogs: + properties: + server: + type: string + bitbucketCloud: + '$ref': '#/definitions/droneGit' + bitbucketServer: + properties: + server: + type: string + consumerKey: + type: string + default: consumerKey + privateKey: + type: string + default: privateKey + username: + type: string + passwordKey: + type: string + default: password provider: type: string + enum: + - github + - gitlab + - gitea + - gogs + - bitbucketCloud + - bitbucketServer + default: github external-dns: type: object additionalProperties: false @@ -720,6 +813,10 @@ properties: name: description: A name of the Application Gateway type: string + usePrivateIP: + description: Wether to use a private ip range or not + type: boolean + default: false resourceGroup: description: A name of the Azure Resource Group in which Application Gateway was created type: string @@ -934,6 +1031,7 @@ properties: properties: enabled: type: boolean + default: true resources: '$ref': '#/definitions/resources' oauth2-proxy: @@ -988,24 +1086,6 @@ properties: type: object properties: additionalProperties: false - alertmanager: - type: object - additionalProperties: false - properties: - groupInterval: - type: string - receiver: - type: string - repeatInterval: - type: string - slack: - additionalProperties: false - type: object - properties: - channel: - type: string - url: - '$ref': '#/definitions/url' grafana: type: object additionalProperties: false @@ -1112,13 +1192,13 @@ properties: type: boolean description: Wether to separate team metrics and logs. Disabling this lets everybody be admin and see everything. default: true - isRedkubesMonitored: + isHomeMonitored: type: boolean - description: Wether this cluster is monitored under a RedKubes Premium SLA. Sends all notifications to RedKubes. + description: Wether this cluster is home monitored (like when under a Premium SLA). Sends criticals home. default: false mode: type: string - default: ce + default: ee description: The otomi-core edition. Either community edition (ce) or enterprise edition (ee) enum: [ce, ee] pullSecret: diff --git a/values/cloud/cloud-raw.gotmpl b/values/cloud/cloud-raw.gotmpl index 3417f10f2b..e7558a59ae 100644 --- a/values/cloud/cloud-raw.gotmpl +++ b/values/cloud/cloud-raw.gotmpl @@ -1,4 +1,4 @@ {{- $v := .Environment.Values }} -{{- $azureDiskType := (index $v | get "azure.diskType" "") }} +{{- $azureDiskType := (index $v | get "clouds.azure.diskType" "") }} resources: {{ tpl (readFile (printf "pv-%s.gotmpl" $v.cluster.provider)) (dict "type" $azureDiskType) }} \ No newline at end of file diff --git a/values/cluster-autoscaler/cluster-autoscaler.gotmpl b/values/cluster-autoscaler/cluster-autoscaler.gotmpl index 59c02d6553..dc3d972130 100644 --- a/values/cluster-autoscaler/cluster-autoscaler.gotmpl +++ b/values/cluster-autoscaler/cluster-autoscaler.gotmpl @@ -12,11 +12,12 @@ autoDiscovery: cloudProvider: {{ $v.cluster.provider }} {{- if eq $v.cluster.provider "azure" }} -azureClientID: {{ $v.azure.clientId }} -azureClientSecret: {{ $v.azure.clientSecret }} -azureResourceGroup: {{ $v.azure.resourceGroup }} -azureSubscriptionID: {{ $v.azure.subscriptionId }} -azureTenantID: {{ $v.azure.tenantId }} +{{- $a := $v.clouds.azure }} +azureClientID: {{ $a.clientId }} +azureClientSecret: {{ $a.clientSecret }} +azureResourceGroup: {{ $a.resourceGroup }} +azureSubscriptionID: {{ $a.subscriptionId }} +azureTenantID: {{ $a.tenantId }} # if using AKS azureVMType should be set to "AKS" azureVMType: "AKS" azureClusterName: {{ $v.clusterName }} diff --git a/values/ingress-azure/ingress-azure.gotmpl b/values/ingress-azure/ingress-azure.gotmpl index 137cb706d1..6e82ae1833 100644 --- a/values/ingress-azure/ingress-azure.gotmpl +++ b/values/ingress-azure/ingress-azure.gotmpl @@ -24,7 +24,7 @@ appgw: name: {{ $i.appgw.name }} subnetName: {{ $i.appgw.subnetName }} subnetPrefix: {{ $i.appgw.subnetPrefix }} - usePrivateIP: false + usePrivateIP: {{ $i.appgw | get "usePrivateIP" false }} rbac: enabled: true diff --git a/values/istio-operator/istio-operator-raw.gotmpl b/values/istio-operator/istio-operator-raw.gotmpl index 63850839d0..fa40e9a9de 100644 --- a/values/istio-operator/istio-operator-raw.gotmpl +++ b/values/istio-operator/istio-operator-raw.gotmpl @@ -31,10 +31,10 @@ resources: {{- $i.resources.pilot | toYaml | nindent 14 }} {{- else }} requests: - memory: 128Mi + memory: 192Mi cpu: 100m limits: - memory: 256Mi + memory: 768Mi cpu: 500m {{- end }} ingressGateways: diff --git a/values/oauth2-proxy/oauth2-proxy.gotmpl b/values/oauth2-proxy/oauth2-proxy.gotmpl index b12abe351f..a1c26595ee 100644 --- a/values/oauth2-proxy/oauth2-proxy.gotmpl +++ b/values/oauth2-proxy/oauth2-proxy.gotmpl @@ -9,15 +9,10 @@ {{- $keycloakIssuer := printf "https://keycloak.%s/realms/%s" $v.cluster.domain $realm }} {{- $joinTpl := readFile "../../helmfile.d/utils/joinListWithSep.gotmpl" }} image: - tag: "v6.0.0" + tag: "v6.1.1" repository: quay.io/oauth2-proxy/oauth2-proxy - # tag: latest - # pullPolicy: Always config: - # clientID: oidc-auth-client - # clientSecret: bladibladi - # cookieSecret: QkVwdy9MSkU0N3VYS2haZkVqZTdyUzExeFZheTM3YXk= clientID: {{ $o.clientID }} clientSecret: {{ $o.clientSecret }} cookieSecret: {{ $v | getOrNil "charts.oauth2-proxy.config.cookieSecret" | default "blajajaaa" }} diff --git a/values/otomi-api/otomi-api.gotmpl b/values/otomi-api/otomi-api.gotmpl index 880996c490..17a467e98c 100644 --- a/values/otomi-api/otomi-api.gotmpl +++ b/values/otomi-api/otomi-api.gotmpl @@ -28,6 +28,7 @@ secrets: OIDC_CLIENT_SECRET: {{ $v.oidc.clientSecret }} env: + # DEBUG: '*' GIT_REPO_URL: {{ $o.git.repoUrl }} GIT_BRANCH: {{ $o | get "git.branch" "master" }} CLUSTER_ID: {{ printf "%s/%s" $c.provider $c.name }} @@ -38,6 +39,9 @@ env: {{- end }} USE_SOPS: {{ $v.sops.enabled }} CORE_VERSION: '{{ $version }}' + {{- if (not $v.charts.keycloak | get "enabled" false) }} + NO_AUTHZ: true + {{- end }} core: k8s: {{- toYaml $v.k8s | nindent 4 }} diff --git a/values/prometheus-operator/prometheus-operator.gotmpl b/values/prometheus-operator/prometheus-operator.gotmpl index 2352c9cb37..b384fd3458 100644 --- a/values/prometheus-operator/prometheus-operator.gotmpl +++ b/values/prometheus-operator/prometheus-operator.gotmpl @@ -5,10 +5,9 @@ {{- $hasKeycloak := $k | get "enabled" true }} {{- $realm := $k | get "realm" "master" }} {{- $keycloakBase := printf "https://keycloak.%s/realms/%s" $v.cluster.domain $realm }} -{{- $o := $v.oidc }} -{{- $hasOIDC := or $hasKeycloak (hasKey $o "grafana") }} +{{- $hasOIDC := or $hasKeycloak (hasKey $v.oidc "grafana") }} {{- $appsDomain := printf "apps.%s" $v.cluster.domain }} -{{- $slackTpl := tpl (readFile "./slack-configs.gotmpl") $v | toString }} +{{- $slackTpl := tpl (readFile "../../helmfile.d/snippets/slack.gotmpl") $v | toString }} nameOverride: po fullnameOverride: po coreDns: @@ -66,9 +65,6 @@ prometheus: priorityClassName: "otomi-critical" externalLabels: cluster: {{ printf "%s/%s/%s" $v.customer.name $v.cluster.provider $v.cluster.name | lower | quote }} - # podMetadata: - # annotations: - # sidecar.istio.io/inject: "false" portName: http-web storageSpec: volumeClaimTemplate: @@ -82,10 +78,12 @@ prometheus: storage: {{ $p | get "prometheus.storageSize" "5Gi" }} enableAdminAPI: true externalUrl: https://{{ $appsDomain }}/prometheus - additionalServiceMonitors: {{- readFile "service-monitors.yaml" | nindent 4 }} + additionalServiceMonitors: {{- readFile "service-monitors.yaml" | nindent 4 }} +{{ if eq $v.cluster.provider "aws" }} additionalPrometheusRules: - name: cluster-autoscaler {{- readFile "rules/cluster-autoscaler.yaml" | nindent 4 }} +{{- end }} alertmanager: alertmanagerSpec: priorityClassName: "otomi-critical" @@ -96,73 +94,9 @@ alertmanager: limits: memory: 256Mi cpu: 500m - # podMetadata: - # annotations: - # sidecar.istio.io/inject: "false" portName: http-web externalUrl: https://{{ $appsDomain }}/alertmanager - config: - {{- $hasSlack := eq ($p.alertmanager | get "receiver" "slack") "slack" }} - global: - slack_api_url: {{ $p.alertmanager | get "slack.url" }} - route: - receiver: default - group_by: [alertname] - group_interval: {{ $p.alertmanager.groupInterval }} - repeat_interval: {{ $p.alertmanager.repeatInterval }} - routes: - - match: - alertname: Watchdog - receiver: "null" - - match: - alertname: CPUThrottlingHigh - receiver: "null" - {{- if eq $v.cluster.provider "azure" }} - - match: - alertname: KubeAPILatencyHigh - receiver: "null" - {{- end }} - # redkubes monitoring: only alerts that are for teams AND non-critical should go to the configured receiver - # > so NO config here for admins - - match: - severity: critical - receiver: critical - {{- if $v.otomi.isRedkubesMonitored }} - continue: true - - match: - severity: critical - receiver: critical-redkubes - {{- end }} - receivers: - - name: "null" - {{- $channel := $p.alertmanager | get "slack.channel" "mon-otomi" }} - {{- if or $v.otomi.isRedkubesMonitored $hasSlack }} - - name: default - slack_configs: - - channel: "#{{ $channel }}" - {{ $slackTpl | nindent 12 }} - - name: critical - slack_configs: - - channel: "#{{ $channel }}-crit" - {{ $slackTpl | nindent 12 }} - {{- else }} - - name: default - webhook_configs: - - url: "http://prometheus-msteams.monitoring.svc.cluster.local:2000/low_priority_channel" - send_resolved: true - - name: critical - webhook_configs: - - url: "http://prometheus-msteams.monitoring.svc.cluster.local:2000/high_priority_channel" - send_resolved: true - {{- end }} - {{- if $v.otomi.isRedkubesMonitored }} - - name: critical-redkubes - # sending criticals also to redkubes to be aware of customer issues - slack_configs: - - channel: "#{{ $channel }}-crit" - api_url: {{ $v.alerts.slack.url }} - {{ $slackTpl | nindent 12 }} - {{- end }} + config: {{- tpl (readFile "../../helmfile.d/snippets/alertmanager.gotmpl") (dict "instance" $v "root" $v "slackTpl" $slackTpl) | nindent 4 }} grafana: image: tag: 7.1.5 @@ -199,16 +133,17 @@ grafana: {{- else }} url: http://loki:3100 {{- end }} - {{- with $v | getOrNil "azure.monitor" }} + {{- with $v | getOrNil "clouds.azure.monitor" }} + {{- $a := $v.clouds.azure }} - name: Azure Monitor type: grafana-azure-monitor-datasource access: proxy jsonData: cloudName: azuremonitor - subscriptionId: {{ $v.azure.subscriptionId }} - tenantId: {{ $v.azure.tenantId }} + subscriptionId: {{ $a.subscriptionId }} + tenantId: {{ $a.tenantId }} clientId: {{ .clientId }} - logAnalyticsTenantId: {{ . | get "logAnalyticsTenantId" $v.azure.tenantId }} + logAnalyticsTenantId: {{ . | get "logAnalyticsTenantId" $a.tenantId }} logAnalyticsClientId: {{ . | get "logAnalyticsClientId" .clientId }} logAnalyticsDefaultWorkspace: {{ .logAnalyticsWorkspace }} appInsightsAppId: {{ . | get "appInsightsAppId" .clientId }} @@ -233,14 +168,12 @@ grafana: portName: http-service grafana.ini: "auth.anonymous": - enabled: false - # enabled: true + enabled: {{ not $hasOIDC }} org_role: Admin org_name: Main Org. "auth.generic_oauth": tls_skip_verify_insecure: {{ eq ($v.charts | get "cert-manager.stage") "staging" }} - # enabled: false - enabled: true + enabled: {{ $hasOIDC }} name: OAuth org_role: Admin allow_sign_up: true @@ -249,9 +182,9 @@ grafana: client_id: {{ $v.oidc.clientID }} client_secret: {{ $v.oidc.clientSecret }} scopes: openid - auth_url: {{ $hasKeycloak | ternary (printf "%s/protocol/openid-connect/auth" $keycloakBase) ($o | getOrNil "grafana.authUrl") }} - token_url: {{ $hasKeycloak | ternary (printf "%s/protocol/openid-connect/token" $keycloakBase) ($o | getOrNil "grafana.tokenUrl") }} - api_url: {{ $hasKeycloak | ternary (printf "%s/protocol/openid-connect/userinfo" $keycloakBase) ($o | getOrNil "grafana.apiUrl") }} + auth_url: {{ $hasKeycloak | ternary (printf "%s/protocol/openid-connect/auth" $keycloakBase) ($v.oidc | getOrNil "grafana.authUrl") }} + token_url: {{ $hasKeycloak | ternary (printf "%s/protocol/openid-connect/token" $keycloakBase) ($v.oidc | getOrNil "grafana.tokenUrl") }} + api_url: {{ $hasKeycloak | ternary (printf "%s/protocol/openid-connect/userinfo" $keycloakBase) ($v.oidc | getOrNil "grafana.apiUrl") }} role_attribute_path: contains(groups[*], 'admin') && 'Admin' || contains(groups[*], 'team-admin') && 'Admin' || 'Editor' log: level: error diff --git a/values/promitor-agent-scraper/promitor-agent-scraper.gotmpl b/values/promitor-agent-scraper/promitor-agent-scraper.gotmpl index 52c81e8f82..65dec392a1 100644 --- a/values/promitor-agent-scraper/promitor-agent-scraper.gotmpl +++ b/values/promitor-agent-scraper/promitor-agent-scraper.gotmpl @@ -1,5 +1,5 @@ {{- $v := .Environment.Values }} -{{- $a := $v.azure }} +{{- $a := $v.clouds.azure }} azureMetadata: tenantId: {{ $a.tenantId }} subscriptionId: {{ $a.subscriptionId }}