Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(operator): add proper health checks #608

Merged
merged 1 commit into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ e2e-helm-certmanager: e2e
e2e-helm-custom-configuration: KUTTL_CONFIG = kuttl-test-helm-custom-configuration.yaml
e2e-helm-custom-configuration: e2e

# We sleep for 10 seconds here because webhooks can mysteriously be unavailable even though the readiness check passes
.PHONY: e2e-wait-until-operator-ready
e2e-wait-until-operator-ready:
kubectl wait --for=condition=available --timeout 300s deploy --all -n tailing-sidecar-system
kubectl wait --for=condition=ready --timeout 300s pod --all -n tailing-sidecar-system
sleep 10

build-push-deploy: build-push-sidecar build-push-deploy-operator

build-push-sidecar:
Expand Down
25 changes: 9 additions & 16 deletions helm/tailing-sidecar-operator/templates/resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -474,29 +474,22 @@ spec:
protocol: TCP
startupProbe:
httpGet:
scheme: HTTPS
path: /add-tailing-sidecars-v1-pod
port: 9443
httpHeaders:
- name: Accept
value: application/json
- name: Content-Type
value: application/json
path: /readyz
port: 8081
{{- if .Values.operator.startupProbe}}
{{ toYaml .Values.operator.startupProbe | indent 10 }}
{{ else }}
periodSeconds: 3
{{ end }}
readinessProbe:
httpGet:
path: /readyz
port: 8081
periodSeconds: 10
livenessProbe:
httpGet:
scheme: HTTPS
path: /add-tailing-sidecars-v1-pod
port: 9443
httpHeaders:
- name: Accept
value: application/json
- name: Content-Type
value: application/json
path: /healthz
port: 8081
{{- if .Values.operator.livenessProbe}}
{{ toYaml .Values.operator.livenessProbe | indent 10 }}
{{ else }}
Expand Down
3 changes: 1 addition & 2 deletions kuttl-test-helm-certmanager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,4 @@ kindContainers:
commands:
- command: make -C ./operator deploy-cert-manager
- command: helm upgrade --install test-release ./helm/tailing-sidecar-operator -f ./helm/tests/values.withCertManager.yaml -n tailing-sidecar-system --create-namespace
- command: kubectl wait --for=condition=available --timeout 300s deploy -l app.kubernetes.io/name=tailing-sidecar-operator -n tailing-sidecar-system
- command: kubectl wait --for=condition=ready --timeout 300s pod -l app.kubernetes.io/name=tailing-sidecar-operator -n tailing-sidecar-system
- command: make e2e-wait-until-operator-ready
3 changes: 1 addition & 2 deletions kuttl-test-helm-custom-configuration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,4 @@ kindContainers:
- registry.localhost:5000/sumologic/tailing-sidecar:test
commands:
- command: helm upgrade --install test-release ./helm/tailing-sidecar-operator -f ./helm/tests/values.withCustomConfiguration.yaml -n tailing-sidecar-system --create-namespace
- command: kubectl wait --for=condition=available --timeout 300s deploy -l app.kubernetes.io/name=tailing-sidecar-operator -n tailing-sidecar-system
- command: kubectl wait --for=condition=ready --timeout 300s pod -l app.kubernetes.io/name=tailing-sidecar-operator -n tailing-sidecar-system
- command: make e2e-wait-until-operator-ready
4 changes: 2 additions & 2 deletions kuttl-test-helm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ kindContainers:
- registry.localhost:5000/sumologic/tailing-sidecar:test
commands:
- command: helm upgrade --install test-release ./helm/tailing-sidecar-operator -f ./helm/tests/values.yaml -n tailing-sidecar-system --create-namespace
- command: kubectl wait --for=condition=available --timeout 300s deploy -l app.kubernetes.io/name=tailing-sidecar-operator -n tailing-sidecar-system
- command: kubectl wait --for=condition=ready --timeout 300s pod -l app.kubernetes.io/name=tailing-sidecar-operator -n tailing-sidecar-system

- command: make e2e-wait-until-operator-ready
2 changes: 1 addition & 1 deletion kuttl-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ kindContainers:
commands:
- command: make -C ./operator deploy-cert-manager
- command: make -C ./operator deploy IMG="registry.localhost:5000/sumologic/tailing-sidecar-operator:test" TAILING_SIDECAR_IMG="registry.localhost:5000/sumologic/tailing-sidecar:test"
- command: kubectl wait --for=condition=ready --timeout 300s pod -l control-plane=tailing-sidecar-operator -n tailing-sidecar-system
- command: make e2e-wait-until-operator-ready
25 changes: 9 additions & 16 deletions operator/config/default/manager_webhook_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,18 @@ spec:
protocol: TCP
startupProbe:
httpGet:
scheme: HTTPS
path: /add-tailing-sidecars-v1-pod
port: 9443
httpHeaders:
- name: Accept
value: application/json
- name: Content-Type
value: application/json
path: /readyz
port: 8081
periodSeconds: 3
readinessProbe:
httpGet:
path: /readyz
port: 8081
periodSeconds: 10
livenessProbe:
httpGet:
scheme: HTTPS
path: /add-tailing-sidecars-v1-pod
port: 9443
httpHeaders:
- name: Accept
value: application/json
- name: Content-Type
value: application/json
path: /healthz
port: 8081
initialDelaySeconds: 1
periodSeconds: 10
volumeMounts:
Expand Down
24 changes: 19 additions & 5 deletions operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"
Expand Down Expand Up @@ -53,13 +54,15 @@ func init() {

func main() {
var metricsAddr string
var healthAddr string
var enableLeaderElection bool
var tailingSidecarImage string
var configPath string
var config Config
var err error

flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&healthAddr, "health-addr", ":8081", "The address the health check endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "enable-leader-election", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
Expand Down Expand Up @@ -92,11 +95,12 @@ func main() {
Metrics: metricsserver.Options{
BindAddress: metricsAddr,
},
LeaderElection: enableLeaderElection,
LeaderElectionID: "7b555970.sumologic.com",
LeaseDuration: (*time.Duration)(&config.LeaderElection.LeaseDuration),
RenewDeadline: (*time.Duration)(&config.LeaderElection.RenewDeadline),
RetryPeriod: (*time.Duration)(&config.LeaderElection.RetryPeriod),
HealthProbeBindAddress: healthAddr,
LeaderElection: enableLeaderElection,
LeaderElectionID: "7b555970.sumologic.com",
LeaseDuration: (*time.Duration)(&config.LeaderElection.LeaseDuration),
RenewDeadline: (*time.Duration)(&config.LeaderElection.RenewDeadline),
RetryPeriod: (*time.Duration)(&config.LeaderElection.RetryPeriod),
})
if err != nil {
setupLog.Error(err, "unable to start manager")
Expand Down Expand Up @@ -129,6 +133,16 @@ func main() {
})
mgr.Add(webhookServer)

if err = mgr.AddReadyzCheck("readyz", webhookServer.StartedChecker()); err != nil {
setupLog.Error(err, "unable to set up readiness check")
os.Exit(1)
}

if err = mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
Expand Down
Loading