From d99f71910739608150c26a3793812bba8da56932 Mon Sep 17 00:00:00 2001 From: Julie Vogelman Date: Mon, 11 Dec 2023 15:01:22 -0800 Subject: [PATCH 1/6] fix: don't use Informer Lister for Workflows which assumes *Workflow type Signed-off-by: Julie Vogelman --- workflow/controller/healthz.go | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/workflow/controller/healthz.go b/workflow/controller/healthz.go index b1d882b40c9c..cdc58545238e 100644 --- a/workflow/controller/healthz.go +++ b/workflow/controller/healthz.go @@ -6,15 +6,20 @@ import ( "time" log "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/tools/cache" - "github.com/argoproj/argo-workflows/v3/pkg/client/listers/workflow/v1alpha1" + //"github.com/argoproj/argo-workflows/v3/pkg/client/listers/workflow/v1alpha1" + + wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" "github.com/argoproj/argo-workflows/v3/util/env" "github.com/argoproj/argo-workflows/v3/workflow/common" + "github.com/argoproj/argo-workflows/v3/workflow/util" ) var ( - age = env.LookupEnvDurationOr("HEALTHZ_AGE", 5*time.Minute) + age = env.LookupEnvDurationOr("HEALTHZ_AGE", time.Minute) //5*time.Minute) ) // https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-http-request @@ -29,7 +34,7 @@ func (wfc *WorkflowController) Healthz(w http.ResponseWriter, r *http.Request) { }() labelSelector := "!" + common.LabelKeyPhase + "," + instanceIDSelector err := func() error { - seletor, err := labels.Parse(labelSelector) + selector, err := labels.Parse(labelSelector) if err != nil { return err } @@ -38,12 +43,26 @@ func (wfc *WorkflowController) Healthz(w http.ResponseWriter, r *http.Request) { log.Info("healthz: current pod is not the leader") return nil } - lister := v1alpha1.NewWorkflowLister(wfc.wfInformer.GetIndexer()) - list, err := lister.Workflows(wfc.managedNamespace).List(seletor) + + // establish a list of unreconciled workflows + unreconciledWorkflows := []*wfv1.Workflow{} + err = cache.ListAllByNamespace(wfc.wfInformer.GetIndexer(), wfc.managedNamespace, selector, func(m interface{}) { + // Informer holds Workflows as type *Unstructured + un := m.(*unstructured.Unstructured) + // verify it's of type *Workflow (if not, it's an incorrectly formatted Workflow spec) + wf, err := util.FromUnstructured(un) + if err != nil { + log.Warnf("Healthz check found an incorrectly formatted Workflow: %q (namespace %q)", un.GetName(), un.GetNamespace()) + return + } + + unreconciledWorkflows = append(unreconciledWorkflows, wf) + }) if err != nil { - return err + log.Errorf("Healthz check failed to list Workflows using Informer, err=%v", err) } - for _, wf := range list { + // go through the unreconciled workflows to determine if any of them exceed the max allowed age + for _, wf := range unreconciledWorkflows { if time.Since(wf.GetCreationTimestamp().Time) > age { return fmt.Errorf("workflow never reconciled: %s", wf.Name) } From 8f230d7a602c8a0b9d5d89959cc4db03c32ee9ae Mon Sep 17 00:00:00 2001 From: Julie Vogelman Date: Tue, 12 Dec 2023 10:23:46 -0800 Subject: [PATCH 2/6] fix: adding a unit test for healthz Signed-off-by: Julie Vogelman --- workflow/controller/healthz_test.go | 76 +++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 workflow/controller/healthz_test.go diff --git a/workflow/controller/healthz_test.go b/workflow/controller/healthz_test.go new file mode 100644 index 000000000000..8d587e525749 --- /dev/null +++ b/workflow/controller/healthz_test.go @@ -0,0 +1,76 @@ +package controller + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" + + wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" + "github.com/argoproj/argo-workflows/v3/workflow/common" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestHealthz(t *testing.T) { + + veryOldWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) + veryOldWF.SetCreationTimestamp(metav1.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)) // a long time ago + veryOldWF.SetName(veryOldWF.Name + "-1") + + newWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) + newWF.SetCreationTimestamp(metav1.Now()) + newWF.SetName(newWF.Name + "-2") + + veryOldButReconciledWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) + veryOldButReconciledWF.SetCreationTimestamp(metav1.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)) // a long time ago + veryOldButReconciledWF.SetName(veryOldWF.Name + "-3") + veryOldButReconciledWF.Labels = map[string]string{common.LabelKeyPhase: string(wfv1.WorkflowPending)} + + tests := []struct { + workflows []*wfv1.Workflow + expectedStatus int + }{ + { + []*wfv1.Workflow{veryOldWF}, + 500, + }, + { + []*wfv1.Workflow{newWF}, + 200, + }, + { + []*wfv1.Workflow{veryOldWF, newWF}, + 500, + }, + { + []*wfv1.Workflow{veryOldButReconciledWF}, + 200, + }, + } + + for _, tt := range tests { + workflowsAsInterfaceSlice := []interface{}{} + for _, wf := range tt.workflows { + workflowsAsInterfaceSlice = append(workflowsAsInterfaceSlice, wf) + } + cancel, controller := newController(workflowsAsInterfaceSlice...) + defer cancel() + + rr := httptest.NewRecorder() + + handler := http.HandlerFunc(controller.Healthz) + + req, err := http.NewRequest("GET", "/healthz", nil) + if err != nil { + t.Fatal(err) + } + handler.ServeHTTP(rr, req) + + // Check the status code is what we expect. + if status := rr.Code; status != tt.expectedStatus { + t.Errorf("handler returned wrong status code: got %v want %v", + status, tt.expectedStatus) + } + } + +} From 235e90aa0e5c47da0e69ea59b58382db37473b6d Mon Sep 17 00:00:00 2001 From: Julie Vogelman Date: Tue, 12 Dec 2023 10:35:02 -0800 Subject: [PATCH 3/6] fix: remove accidental commit Signed-off-by: Julie Vogelman --- workflow/controller/healthz.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/controller/healthz.go b/workflow/controller/healthz.go index cdc58545238e..0f6abd3ab796 100644 --- a/workflow/controller/healthz.go +++ b/workflow/controller/healthz.go @@ -19,7 +19,7 @@ import ( ) var ( - age = env.LookupEnvDurationOr("HEALTHZ_AGE", time.Minute) //5*time.Minute) + age = env.LookupEnvDurationOr("HEALTHZ_AGE", 5*time.Minute) ) // https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-http-request From e0d85558becfde1550b40978f5d02f201c3a8d33 Mon Sep 17 00:00:00 2001 From: Julie Vogelman Date: Tue, 12 Dec 2023 10:35:44 -0800 Subject: [PATCH 4/6] fix: cleanup Signed-off-by: Julie Vogelman --- workflow/controller/healthz.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/workflow/controller/healthz.go b/workflow/controller/healthz.go index 0f6abd3ab796..4edc56c31212 100644 --- a/workflow/controller/healthz.go +++ b/workflow/controller/healthz.go @@ -10,8 +10,6 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/tools/cache" - //"github.com/argoproj/argo-workflows/v3/pkg/client/listers/workflow/v1alpha1" - wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" "github.com/argoproj/argo-workflows/v3/util/env" "github.com/argoproj/argo-workflows/v3/workflow/common" From 91e106ddd7e95a7bd362305898a2332dea7f79e3 Mon Sep 17 00:00:00 2001 From: Julie Vogelman Date: Tue, 12 Dec 2023 10:38:59 -0800 Subject: [PATCH 5/6] fix: if informer list call fails, fail liveness check Signed-off-by: Julie Vogelman --- workflow/controller/healthz.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/controller/healthz.go b/workflow/controller/healthz.go index 4edc56c31212..a1138c0d9b5b 100644 --- a/workflow/controller/healthz.go +++ b/workflow/controller/healthz.go @@ -57,7 +57,7 @@ func (wfc *WorkflowController) Healthz(w http.ResponseWriter, r *http.Request) { unreconciledWorkflows = append(unreconciledWorkflows, wf) }) if err != nil { - log.Errorf("Healthz check failed to list Workflows using Informer, err=%v", err) + return fmt.Errorf("Healthz check failed to list Workflows using Informer, err=%v", err) } // go through the unreconciled workflows to determine if any of them exceed the max allowed age for _, wf := range unreconciledWorkflows { From c9b9eb632669aa40e613b001d0b7fe569b1a1f99 Mon Sep 17 00:00:00 2001 From: Julie Vogelman Date: Tue, 12 Dec 2023 10:48:11 -0800 Subject: [PATCH 6/6] fix: reorder importers for linter Signed-off-by: Julie Vogelman --- workflow/controller/healthz_test.go | 31 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/workflow/controller/healthz_test.go b/workflow/controller/healthz_test.go index 8d587e525749..351773c0578e 100644 --- a/workflow/controller/healthz_test.go +++ b/workflow/controller/healthz_test.go @@ -6,44 +6,45 @@ import ( "testing" "time" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" "github.com/argoproj/argo-workflows/v3/workflow/common" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func TestHealthz(t *testing.T) { - veryOldWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) - veryOldWF.SetCreationTimestamp(metav1.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)) // a long time ago - veryOldWF.SetName(veryOldWF.Name + "-1") + veryOldUnreconciledWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) + veryOldUnreconciledWF.SetCreationTimestamp(metav1.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)) // a long time ago + veryOldUnreconciledWF.SetName(veryOldUnreconciledWF.Name + "-1") - newWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) - newWF.SetCreationTimestamp(metav1.Now()) - newWF.SetName(newWF.Name + "-2") + newUnreconciledWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) + newUnreconciledWF.SetCreationTimestamp(metav1.Now()) + newUnreconciledWF.SetName(newUnreconciledWF.Name + "-2") - veryOldButReconciledWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) - veryOldButReconciledWF.SetCreationTimestamp(metav1.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)) // a long time ago - veryOldButReconciledWF.SetName(veryOldWF.Name + "-3") - veryOldButReconciledWF.Labels = map[string]string{common.LabelKeyPhase: string(wfv1.WorkflowPending)} + veryOldReconciledWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) + veryOldReconciledWF.SetCreationTimestamp(metav1.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)) // a long time ago + veryOldReconciledWF.SetName(veryOldUnreconciledWF.Name + "-3") + veryOldReconciledWF.Labels = map[string]string{common.LabelKeyPhase: string(wfv1.WorkflowPending)} tests := []struct { workflows []*wfv1.Workflow expectedStatus int }{ { - []*wfv1.Workflow{veryOldWF}, + []*wfv1.Workflow{veryOldUnreconciledWF}, 500, }, { - []*wfv1.Workflow{newWF}, + []*wfv1.Workflow{newUnreconciledWF}, 200, }, { - []*wfv1.Workflow{veryOldWF, newWF}, + []*wfv1.Workflow{veryOldUnreconciledWF, newUnreconciledWF}, 500, }, { - []*wfv1.Workflow{veryOldButReconciledWF}, + []*wfv1.Workflow{veryOldReconciledWF}, 200, }, }