diff --git a/workflow/controller/healthz.go b/workflow/controller/healthz.go index b1d882b40c9c..a1138c0d9b5b 100644 --- a/workflow/controller/healthz.go +++ b/workflow/controller/healthz.go @@ -6,11 +6,14 @@ import ( "time" log "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/tools/cache" - "github.com/argoproj/argo-workflows/v3/pkg/client/listers/workflow/v1alpha1" + wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" "github.com/argoproj/argo-workflows/v3/util/env" "github.com/argoproj/argo-workflows/v3/workflow/common" + "github.com/argoproj/argo-workflows/v3/workflow/util" ) var ( @@ -29,7 +32,7 @@ func (wfc *WorkflowController) Healthz(w http.ResponseWriter, r *http.Request) { }() labelSelector := "!" + common.LabelKeyPhase + "," + instanceIDSelector err := func() error { - seletor, err := labels.Parse(labelSelector) + selector, err := labels.Parse(labelSelector) if err != nil { return err } @@ -38,12 +41,26 @@ func (wfc *WorkflowController) Healthz(w http.ResponseWriter, r *http.Request) { log.Info("healthz: current pod is not the leader") return nil } - lister := v1alpha1.NewWorkflowLister(wfc.wfInformer.GetIndexer()) - list, err := lister.Workflows(wfc.managedNamespace).List(seletor) + + // establish a list of unreconciled workflows + unreconciledWorkflows := []*wfv1.Workflow{} + err = cache.ListAllByNamespace(wfc.wfInformer.GetIndexer(), wfc.managedNamespace, selector, func(m interface{}) { + // Informer holds Workflows as type *Unstructured + un := m.(*unstructured.Unstructured) + // verify it's of type *Workflow (if not, it's an incorrectly formatted Workflow spec) + wf, err := util.FromUnstructured(un) + if err != nil { + log.Warnf("Healthz check found an incorrectly formatted Workflow: %q (namespace %q)", un.GetName(), un.GetNamespace()) + return + } + + unreconciledWorkflows = append(unreconciledWorkflows, wf) + }) if err != nil { - return err + return fmt.Errorf("Healthz check failed to list Workflows using Informer, err=%v", err) } - for _, wf := range list { + // go through the unreconciled workflows to determine if any of them exceed the max allowed age + for _, wf := range unreconciledWorkflows { if time.Since(wf.GetCreationTimestamp().Time) > age { return fmt.Errorf("workflow never reconciled: %s", wf.Name) } diff --git a/workflow/controller/healthz_test.go b/workflow/controller/healthz_test.go new file mode 100644 index 000000000000..351773c0578e --- /dev/null +++ b/workflow/controller/healthz_test.go @@ -0,0 +1,77 @@ +package controller + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" + "github.com/argoproj/argo-workflows/v3/workflow/common" +) + +func TestHealthz(t *testing.T) { + + veryOldUnreconciledWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) + veryOldUnreconciledWF.SetCreationTimestamp(metav1.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)) // a long time ago + veryOldUnreconciledWF.SetName(veryOldUnreconciledWF.Name + "-1") + + newUnreconciledWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) + newUnreconciledWF.SetCreationTimestamp(metav1.Now()) + newUnreconciledWF.SetName(newUnreconciledWF.Name + "-2") + + veryOldReconciledWF := wfv1.MustUnmarshalWorkflow(helloWorldWf) + veryOldReconciledWF.SetCreationTimestamp(metav1.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)) // a long time ago + veryOldReconciledWF.SetName(veryOldUnreconciledWF.Name + "-3") + veryOldReconciledWF.Labels = map[string]string{common.LabelKeyPhase: string(wfv1.WorkflowPending)} + + tests := []struct { + workflows []*wfv1.Workflow + expectedStatus int + }{ + { + []*wfv1.Workflow{veryOldUnreconciledWF}, + 500, + }, + { + []*wfv1.Workflow{newUnreconciledWF}, + 200, + }, + { + []*wfv1.Workflow{veryOldUnreconciledWF, newUnreconciledWF}, + 500, + }, + { + []*wfv1.Workflow{veryOldReconciledWF}, + 200, + }, + } + + for _, tt := range tests { + workflowsAsInterfaceSlice := []interface{}{} + for _, wf := range tt.workflows { + workflowsAsInterfaceSlice = append(workflowsAsInterfaceSlice, wf) + } + cancel, controller := newController(workflowsAsInterfaceSlice...) + defer cancel() + + rr := httptest.NewRecorder() + + handler := http.HandlerFunc(controller.Healthz) + + req, err := http.NewRequest("GET", "/healthz", nil) + if err != nil { + t.Fatal(err) + } + handler.ServeHTTP(rr, req) + + // Check the status code is what we expect. + if status := rr.Code; status != tt.expectedStatus { + t.Errorf("handler returned wrong status code: got %v want %v", + status, tt.expectedStatus) + } + } + +}