From 134958db8e8cbbb9f6cec30fdbeaa05aa78587e7 Mon Sep 17 00:00:00 2001 From: Tim Usner Date: Fri, 4 Jun 2021 16:51:01 +0200 Subject: [PATCH] Handle Unknown -> Ready members --- api/v1alpha1/etcd_types.go | 2 + api/v1alpha1/zz_generated.deepcopy.go | 1 + .../crd/bases/druid.gardener.cloud_etcds.yaml | 10 + controllers/config/custodian.go | 13 +- controllers/controllers_suite_test.go | 9 +- controllers/etcd_custodian_controller.go | 10 +- main.go | 17 +- pkg/health/etcdmember/check_ready.go | 63 +++- pkg/health/etcdmember/check_ready_test.go | 273 ++++++++++++++---- pkg/health/etcdmember/types.go | 8 +- pkg/health/status/check.go | 26 +- pkg/health/status/check_test.go | 33 ++- .../etcd-druid/api/v1alpha1/etcd_types.go | 2 + .../api/v1alpha1/zz_generated.deepcopy.go | 1 + 14 files changed, 363 insertions(+), 105 deletions(-) diff --git a/api/v1alpha1/etcd_types.go b/api/v1alpha1/etcd_types.go index 8d8a76982..b109a1423 100644 --- a/api/v1alpha1/etcd_types.go +++ b/api/v1alpha1/etcd_types.go @@ -297,6 +297,8 @@ type EtcdMemberStatus struct { Name string `json:"name"` // ID is the ID of the etcd member. ID string `json:"id"` + // PodRef is the reference to the Pod which hosts the etcd member. + PodRef corev1.LocalObjectReference `json:"podRef"` // Role is the role in the etcd cluster, either `Member` or `Learner`. Role EtcdRole `json:"role"` // Status of the condition, one of True, False, Unknown. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index c7604b824..24f8a33b5 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -268,6 +268,7 @@ func (in *EtcdList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EtcdMemberStatus) DeepCopyInto(out *EtcdMemberStatus) { *out = *in + out.PodRef = in.PodRef in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) } diff --git a/config/crd/bases/druid.gardener.cloud_etcds.yaml b/config/crd/bases/druid.gardener.cloud_etcds.yaml index a1e375e2b..f6602fdfc 100644 --- a/config/crd/bases/druid.gardener.cloud_etcds.yaml +++ b/config/crd/bases/druid.gardener.cloud_etcds.yaml @@ -550,6 +550,15 @@ spec: name: description: Name is the name of the etcd member. type: string + podRef: + description: PodRef is the reference to the Pod which hosts + the etcd member. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + type: object reason: description: The reason for the condition's last transition. type: string @@ -565,6 +574,7 @@ spec: - lastTransitionTime - lastUpdateTime - name + - podRef - reason - role - status diff --git a/controllers/config/custodian.go b/controllers/config/custodian.go index 9e1993212..2c7602ab2 100644 --- a/controllers/config/custodian.go +++ b/controllers/config/custodian.go @@ -18,6 +18,15 @@ import "time" // EtcdCustodianController contains configuration for the etcd custodian controller. type EtcdCustodianController struct { - EtcdStaleMemberThreshold time.Duration - SyncPeriod time.Duration + // EtcdMember holds configuration related to etcd members. + EtcdMember EtcdMemberConfig + // SyncPeriod is the duration after which re-enqueuing happens. + SyncPeriod time.Duration +} + +type EtcdMemberConfig struct { + // EtcdMemberUnknownThreshold is the duration after which a etcd member's state is considered `Unknown`. + EtcdMemberUnknownThreshold time.Duration + // EtcdMemberUnknownThreshold is the duration after which a etcd member's state is considered `NotReady`. + EtcdMemberNotReadyThreshold time.Duration } diff --git a/controllers/controllers_suite_test.go b/controllers/controllers_suite_test.go index f94f86f81..65362b8d0 100644 --- a/controllers/controllers_suite_test.go +++ b/controllers/controllers_suite_test.go @@ -22,7 +22,7 @@ import ( "time" druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" - "github.com/gardener/etcd-druid/controllers/config" + controllersconfig "github.com/gardener/etcd-druid/controllers/config" "github.com/gardener/gardener/pkg/utils/test" . "github.com/onsi/ginkgo" @@ -110,8 +110,11 @@ var _ = BeforeSuite(func(done Done) { err = er.SetupWithManager(mgr, 1, true) Expect(err).NotTo(HaveOccurred()) - custodian := NewEtcdCustodian(mgr, config.EtcdCustodianController{ - EtcdStaleMemberThreshold: 1 * time.Minute, + custodian := NewEtcdCustodian(mgr, controllersconfig.EtcdCustodianController{ + EtcdMember: controllersconfig.EtcdMemberConfig{ + EtcdMemberUnknownThreshold: 1 * time.Minute, + EtcdMemberNotReadyThreshold: 1 * time.Minute, + }, }) err = custodian.SetupWithManager(mgrCtx, mgr, 1) diff --git a/controllers/etcd_custodian_controller.go b/controllers/etcd_custodian_controller.go index 35b5099c1..cff0ee78f 100644 --- a/controllers/etcd_custodian_controller.go +++ b/controllers/etcd_custodian_controller.go @@ -37,7 +37,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" - "github.com/gardener/etcd-druid/controllers/config" + controllersconfig "github.com/gardener/etcd-druid/controllers/config" "github.com/gardener/etcd-druid/pkg/health/status" druidmapper "github.com/gardener/etcd-druid/pkg/mapper" druidpredicates "github.com/gardener/etcd-druid/pkg/predicate" @@ -48,11 +48,11 @@ type EtcdCustodian struct { client.Client Scheme *runtime.Scheme logger logr.Logger - config config.EtcdCustodianController + config controllersconfig.EtcdCustodianController } // NewEtcdCustodian creates a new EtcdCustodian object -func NewEtcdCustodian(mgr manager.Manager, config config.EtcdCustodianController) *EtcdCustodian { +func NewEtcdCustodian(mgr manager.Manager, config controllersconfig.EtcdCustodianController) *EtcdCustodian { return &EtcdCustodian{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), @@ -94,8 +94,8 @@ func (ec *EtcdCustodian) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. return ctrl.Result{}, err } - statusCheck := status.NewChecker(ec.config) - if err := statusCheck.Check(ctx, &etcd.Status); err != nil { + statusCheck := status.NewChecker(ec.Client, ec.config) + if err := statusCheck.Check(ctx, etcd); err != nil { logger.Error(err, "Error executing status checks") return ctrl.Result{}, err } diff --git a/main.go b/main.go index 352eff0cb..910f0c010 100644 --- a/main.go +++ b/main.go @@ -22,7 +22,7 @@ import ( druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" "github.com/gardener/etcd-druid/controllers" - "github.com/gardener/etcd-druid/controllers/config" + controllersconfig "github.com/gardener/etcd-druid/controllers/config" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -57,7 +57,8 @@ func main() { custodianSyncPeriod time.Duration ignoreOperationAnnotation bool - etcdStaleMemberThreshold time.Duration + etcdMemberUnknownThreshold time.Duration + etcdMemberNotReadyThreshold time.Duration // TODO: migrate default to `leases` in one of the next releases defaultLeaderElectionResourceLock = resourcelock.ConfigMapsLeasesResourceLock @@ -75,7 +76,8 @@ func main() { flag.StringVar(&leaderElectionResourceLock, "leader-election-resource-lock", defaultLeaderElectionResourceLock, "Which resource type to use for leader election. "+ "Supported options are 'endpoints', 'configmaps', 'leases', 'endpointsleases' and 'configmapsleases'.") flag.BoolVar(&ignoreOperationAnnotation, "ignore-operation-annotation", true, "Ignore the operation annotation or not.") - flag.DurationVar(&etcdStaleMemberThreshold, "etcd-member-threshold", 1*time.Minute, "Threshold after which an etcd member status is considered unknown if no heartbeat happened.") + flag.DurationVar(&etcdMemberUnknownThreshold, "etcd-member-unknown-threshold", 60*time.Second, "Threshold after which an etcd member status is considered unknown if no heartbeat happened.") + flag.DurationVar(&etcdMemberNotReadyThreshold, "etcd-member-notready-threshold", 5*time.Minute, "Threshold after which an etcd member is considered not ready if the status was unknown before.") flag.Parse() @@ -107,9 +109,12 @@ func main() { os.Exit(1) } - custodian := controllers.NewEtcdCustodian(mgr, config.EtcdCustodianController{ - EtcdStaleMemberThreshold: etcdStaleMemberThreshold, - SyncPeriod: custodianSyncPeriod, + custodian := controllers.NewEtcdCustodian(mgr, controllersconfig.EtcdCustodianController{ + EtcdMember: controllersconfig.EtcdMemberConfig{ + EtcdMemberUnknownThreshold: etcdMemberUnknownThreshold, + EtcdMemberNotReadyThreshold: etcdMemberNotReadyThreshold, + }, + SyncPeriod: custodianSyncPeriod, }) if err := custodian.SetupWithManager(ctx, mgr, custodianWorkers); err != nil { diff --git a/pkg/health/etcdmember/check_ready.go b/pkg/health/etcdmember/check_ready.go index 615a6daa7..42d19701e 100644 --- a/pkg/health/etcdmember/check_ready.go +++ b/pkg/health/etcdmember/check_ready.go @@ -15,42 +15,83 @@ package etcdmember import ( + "context" "time" - "github.com/gardener/etcd-druid/controllers/config" + kutil "github.com/gardener/gardener/pkg/utils/kubernetes" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/controller-runtime/pkg/client" druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" + controllersconfig "github.com/gardener/etcd-druid/controllers/config" ) type readyCheck struct { - etcdStaleMemberThreshold time.Duration + memberConfig controllersconfig.EtcdMemberConfig + cl client.Client } // TimeNow is the function used by this check to get the current time. var TimeNow = time.Now -func (r *readyCheck) Check(status druidv1alpha1.EtcdStatus) []Result { +func (r *readyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) []Result { var ( results []Result - threshold = TimeNow().UTC().Add(-1 * r.etcdStaleMemberThreshold) + checkTime = TimeNow().UTC() ) - for _, etcd := range status.Members { - if etcd.LastUpdateTime.Time.Before(threshold) { + for _, member := range etcd.Status.Members { + // Check if status must be changed from Unknown to NotReady. + if member.Status == druidv1alpha1.EtcdMemeberStatusUnknown && + member.LastTransitionTime.Time.Add(r.memberConfig.EtcdMemberNotReadyThreshold).Before(checkTime) { results = append(results, &result{ - id: etcd.ID, - status: druidv1alpha1.EtcdMemeberStatusUnknown, - reason: "UnknownMemberStatus", + id: member.ID, + status: druidv1alpha1.EtcdMemeberStatusNotReady, + reason: "UnkownStateTimeout", }) + continue } + + // Skip if status is not already Unknown and LastUpdateTime is within grace period. + if !member.LastUpdateTime.Time.Add(r.memberConfig.EtcdMemberUnknownThreshold).Before(checkTime) { + continue + } + + // If pod is not running or cannot be found then we deduce that the status is NotReady. + ready, err := r.checkPodIsRunning(ctx, etcd.Namespace, member) + if (err == nil && !ready) || apierrors.IsNotFound(err) { + results = append(results, &result{ + id: member.ID, + status: druidv1alpha1.EtcdMemeberStatusNotReady, + reason: "PodNotRunning", + }) + continue + } + + // For every other reason the status is Unknown. + results = append(results, &result{ + id: member.ID, + status: druidv1alpha1.EtcdMemeberStatusUnknown, + reason: "UnknownMemberStatus", + }) } return results } +func (r *readyCheck) checkPodIsRunning(ctx context.Context, namespace string, member druidv1alpha1.EtcdMemberStatus) (bool, error) { + pod := &corev1.Pod{} + if err := r.cl.Get(ctx, kutil.Key(namespace, member.PodRef.Name), pod); err != nil { + return false, err + } + return pod.Status.Phase == corev1.PodRunning, nil +} + // ReadyCheck returns a check for the "Ready" condition. -func ReadyCheck(config config.EtcdCustodianController) Checker { +func ReadyCheck(cl client.Client, config controllersconfig.EtcdCustodianController) Checker { return &readyCheck{ - etcdStaleMemberThreshold: config.EtcdStaleMemberThreshold, + cl: cl, + memberConfig: config.EtcdMember, } } diff --git a/pkg/health/etcdmember/check_ready_test.go b/pkg/health/etcdmember/check_ready_test.go index d05cfae35..d50995cf9 100644 --- a/pkg/health/etcdmember/check_ready_test.go +++ b/pkg/health/etcdmember/check_ready_test.go @@ -15,101 +15,274 @@ package etcdmember_test import ( + "context" + "errors" "time" + kutil "github.com/gardener/gardener/pkg/utils/kubernetes" "github.com/gardener/gardener/pkg/utils/test" + "github.com/golang/mock/gomock" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" "github.com/gardener/etcd-druid/controllers/config" + controllersconfig "github.com/gardener/etcd-druid/controllers/config" . "github.com/gardener/etcd-druid/pkg/health/etcdmember" + mockclient "github.com/gardener/etcd-druid/pkg/mock/controller-runtime/client" ) var _ = Describe("ReadyCheck", func() { Describe("#Check", func() { var ( - threshold time.Duration - now time.Time - check Checker + ctx context.Context + mockCtrl *gomock.Controller + cl *mockclient.MockClient + unknownThreshold, notReadyThreshold time.Duration + now time.Time + check Checker ) BeforeEach(func() { - threshold = 300 * time.Second + ctx = context.Background() + mockCtrl = gomock.NewController(GinkgoT()) + cl = mockclient.NewMockClient(mockCtrl) + unknownThreshold = 300 * time.Second + notReadyThreshold = 60 * time.Second now, _ = time.Parse(time.RFC3339, "2021-06-01T00:00:00Z") - check = ReadyCheck(config.EtcdCustodianController{ - EtcdStaleMemberThreshold: threshold, + check = ReadyCheck(cl, controllersconfig.EtcdCustodianController{ + EtcdMember: controllersconfig.EtcdMemberConfig{ + EtcdMemberUnknownThreshold: unknownThreshold, + EtcdMemberNotReadyThreshold: notReadyThreshold, + }, }) }) + AfterEach(func() { + mockCtrl.Finish() + }) + Context("when condition is outdated", func() { + var ( + podName string + etcd druidv1alpha1.Etcd + ) + + BeforeEach(func() { + podName = "etcd-main-0" + etcd = druidv1alpha1.Etcd{ + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd", + Namespace: "etcd-test", + }, + Status: druidv1alpha1.EtcdStatus{ + Members: []druidv1alpha1.EtcdMemberStatus{ + { + Name: "member1", + ID: "1", + PodRef: corev1.LocalObjectReference{ + Name: podName, + }, + Role: druidv1alpha1.EtcdRoleMember, + Status: druidv1alpha1.EtcdMemeberStatusReady, + Reason: "foo reason", + LastTransitionTime: metav1.Now(), + LastUpdateTime: metav1.NewTime(now.Add(-301 * time.Second)), + }, + { + Name: "member2", + ID: "2", + PodRef: corev1.LocalObjectReference{ + Name: "etcd-main-1", + }, + Role: druidv1alpha1.EtcdRoleMember, + Status: druidv1alpha1.EtcdMemeberStatusReady, + Reason: "bar reason", + LastTransitionTime: metav1.Now(), + LastUpdateTime: metav1.NewTime(now.Add(-1 * unknownThreshold)), + }, + }, + }, + } + }) + It("should set the affected condition to UNKNOWN", func() { defer test.WithVar(&TimeNow, func() time.Time { return now })() - status := druidv1alpha1.EtcdStatus{ - Members: []druidv1alpha1.EtcdMemberStatus{ - { - Name: "member1", - ID: "1", - Role: druidv1alpha1.EtcdRoleMember, - Status: druidv1alpha1.EtcdMemeberStatusReady, - Reason: "foo reason", - LastTransitionTime: metav1.Now(), - LastUpdateTime: metav1.NewTime(now.Add(-301 * time.Second)), - }, - { - Name: "member2", - ID: "2", - Role: druidv1alpha1.EtcdRoleMember, - Status: druidv1alpha1.EtcdMemeberStatusReady, - Reason: "bar reason", - LastTransitionTime: metav1.Now(), - LastUpdateTime: metav1.NewTime(now.Add(-1 * threshold)), - }, + + check := ReadyCheck(cl, controllersconfig.EtcdCustodianController{ + EtcdMember: controllersconfig.EtcdMemberConfig{ + EtcdMemberUnknownThreshold: unknownThreshold, }, - } + }) - check := ReadyCheck(config.EtcdCustodianController{ - EtcdStaleMemberThreshold: threshold, + cl.EXPECT().Get(ctx, kutil.Key(etcd.Namespace, podName), gomock.AssignableToTypeOf(&corev1.Pod{})).DoAndReturn( + func(_ context.Context, _ client.ObjectKey, pod *corev1.Pod) error { + *pod = corev1.Pod{ + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + }, + } + return nil + }, + ) + + results := check.Check(ctx, etcd) + + Expect(results).To(HaveLen(1)) + Expect(results[0].Status()).To(Equal(druidv1alpha1.EtcdMemeberStatusUnknown)) + Expect(results[0].ID()).To(Equal("1")) + }) + + It("should set the affected condition to UNKNOWN because Pod cannot be received", func() { + defer test.WithVar(&TimeNow, func() time.Time { + return now + })() + + check := ReadyCheck(cl, controllersconfig.EtcdCustodianController{ + EtcdMember: controllersconfig.EtcdMemberConfig{ + EtcdMemberUnknownThreshold: unknownThreshold, + }, }) - results := check.Check(status) + cl.EXPECT().Get(ctx, kutil.Key(etcd.Namespace, podName), gomock.AssignableToTypeOf(&corev1.Pod{})).DoAndReturn( + func(_ context.Context, _ client.ObjectKey, pod *corev1.Pod) error { + return errors.New("foo") + }, + ) + + results := check.Check(ctx, etcd) Expect(results).To(HaveLen(1)) Expect(results[0].Status()).To(Equal(druidv1alpha1.EtcdMemeberStatusUnknown)) Expect(results[0].ID()).To(Equal("1")) }) + + It("should set the affected condition to FAILED because Pod is not running", func() { + defer test.WithVar(&TimeNow, func() time.Time { + return now + })() + + check := ReadyCheck(cl, controllersconfig.EtcdCustodianController{ + EtcdMember: controllersconfig.EtcdMemberConfig{ + EtcdMemberUnknownThreshold: unknownThreshold, + }, + }) + + cl.EXPECT().Get(ctx, kutil.Key(etcd.Namespace, podName), gomock.AssignableToTypeOf(&corev1.Pod{})).DoAndReturn( + func(_ context.Context, _ client.ObjectKey, pod *corev1.Pod) error { + *pod = corev1.Pod{ + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + }, + } + return nil + }, + ) + + results := check.Check(ctx, etcd) + + Expect(results).To(HaveLen(1)) + Expect(results[0].Status()).To(Equal(druidv1alpha1.EtcdMemeberStatusNotReady)) + Expect(results[0].ID()).To(Equal("1")) + }) + + It("should set the affected condition to FAILED because Pod is not found", func() { + defer test.WithVar(&TimeNow, func() time.Time { + return now + })() + + check := ReadyCheck(cl, config.EtcdCustodianController{ + EtcdMember: controllersconfig.EtcdMemberConfig{ + EtcdMemberUnknownThreshold: unknownThreshold, + }, + }) + + cl.EXPECT().Get(ctx, kutil.Key(etcd.Namespace, podName), gomock.AssignableToTypeOf(&corev1.Pod{})).DoAndReturn( + func(_ context.Context, _ client.ObjectKey, pod *corev1.Pod) error { + return apierrors.NewNotFound(corev1.Resource("pods"), podName) + }, + ) + + results := check.Check(ctx, etcd) + + Expect(results).To(HaveLen(1)) + Expect(results[0].Status()).To(Equal(druidv1alpha1.EtcdMemeberStatusNotReady)) + Expect(results[0].ID()).To(Equal("1")) + }) + + It("should set the affected condition to FAILED because status was Unknown for a while", func() { + defer test.WithVar(&TimeNow, func() time.Time { + return now + })() + + latestWithinGrace := now.Add(-1 * notReadyThreshold) + + // member with unknown state within grace period + etcd.Status.Members[0].Status = druidv1alpha1.EtcdMemeberStatusUnknown + etcd.Status.Members[0].LastTransitionTime = metav1.NewTime(latestWithinGrace) + + // member with unknown state outside grace period + etcd.Status.Members[1].Status = druidv1alpha1.EtcdMemeberStatusUnknown + etcd.Status.Members[1].LastTransitionTime = metav1.NewTime(latestWithinGrace.Add(-1 * time.Second)) + + cl.EXPECT().Get(ctx, kutil.Key(etcd.Namespace, podName), gomock.AssignableToTypeOf(&corev1.Pod{})).DoAndReturn( + func(_ context.Context, _ client.ObjectKey, pod *corev1.Pod) error { + return errors.New("foo") + }, + ) + + results := check.Check(ctx, etcd) + + Expect(results).To(HaveLen(2)) + Expect(results[0].Status()).To(Equal(druidv1alpha1.EtcdMemeberStatusUnknown)) + Expect(results[0].ID()).To(Equal("1")) + Expect(results[1].Status()).To(Equal(druidv1alpha1.EtcdMemeberStatusNotReady)) + Expect(results[1].ID()).To(Equal("2")) + }) }) + Context("when condition is not outdated", func() { It("should not return any results", func() { defer test.WithVar(&TimeNow, func() time.Time { return now })() - status := druidv1alpha1.EtcdStatus{ - Members: []druidv1alpha1.EtcdMemberStatus{ - { - Name: "member1", - ID: "1", - Role: druidv1alpha1.EtcdRoleMember, - Status: druidv1alpha1.EtcdMemeberStatusReady, - Reason: "foo reason", - LastTransitionTime: metav1.Now(), - LastUpdateTime: metav1.Now(), - }, - { - Name: "member2", - ID: "2", - Role: druidv1alpha1.EtcdRoleMember, - Status: druidv1alpha1.EtcdMemeberStatusReady, - Reason: "bar reason", - LastTransitionTime: metav1.Now(), - LastUpdateTime: metav1.NewTime(now.Add(-1 * threshold)), + etcd := druidv1alpha1.Etcd{ + Status: druidv1alpha1.EtcdStatus{ + Members: []druidv1alpha1.EtcdMemberStatus{ + { + Name: "member1", + ID: "1", + PodRef: corev1.LocalObjectReference{ + Name: "etcd-main-0", + }, + Role: druidv1alpha1.EtcdRoleMember, + Status: druidv1alpha1.EtcdMemeberStatusReady, + Reason: "foo reason", + LastTransitionTime: metav1.Now(), + LastUpdateTime: metav1.Now(), + }, + { + Name: "member2", + ID: "2", + PodRef: corev1.LocalObjectReference{ + Name: "etcd-main-1", + }, + Role: druidv1alpha1.EtcdRoleMember, + Status: druidv1alpha1.EtcdMemeberStatusReady, + Reason: "bar reason", + LastTransitionTime: metav1.Now(), + LastUpdateTime: metav1.NewTime(now.Add(-1 * unknownThreshold)), + }, }, }, } - results := check.Check(status) + results := check.Check(ctx, etcd) Expect(results).To(BeEmpty()) }) diff --git a/pkg/health/etcdmember/types.go b/pkg/health/etcdmember/types.go index 336afae10..c4d33abfd 100644 --- a/pkg/health/etcdmember/types.go +++ b/pkg/health/etcdmember/types.go @@ -14,11 +14,15 @@ package etcdmember -import druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" +import ( + "context" + + druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" +) // Checker is an interface to check the members of a etcd cluster. type Checker interface { - Check(status druidv1alpha1.EtcdStatus) []Result + Check(ctx context.Context, etcd druidv1alpha1.Etcd) []Result } type Result interface { diff --git a/pkg/health/status/check.go b/pkg/health/status/check.go index 3ace2b4b1..914c5aede 100644 --- a/pkg/health/status/check.go +++ b/pkg/health/status/check.go @@ -20,10 +20,10 @@ import ( "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/gardener/etcd-druid/controllers/config" + "sigs.k8s.io/controller-runtime/pkg/client" druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" + controllersconfig "github.com/gardener/etcd-druid/controllers/config" "github.com/gardener/etcd-druid/pkg/health/condition" "github.com/gardener/etcd-druid/pkg/health/etcdmember" ) @@ -32,7 +32,7 @@ import ( type ConditionCheckFn func() condition.Checker // EtcdMemberCheckFn is a type alias for a function which returns an implementation of `Check`. -type EtcdMemberCheckFn func(config.EtcdCustodianController) etcdmember.Checker +type EtcdMemberCheckFn func(client.Client, controllersconfig.EtcdCustodianController) etcdmember.Checker // TimeNow is the function used to get the current time. var TimeNow = time.Now @@ -54,7 +54,8 @@ var ( ) type checker struct { - config config.EtcdCustodianController + cl client.Client + config controllersconfig.EtcdCustodianController conditionCheckFns []ConditionCheckFn conditionBuilderFn func() condition.Builder etcdMemberCheckFns []EtcdMemberCheckFn @@ -62,14 +63,14 @@ type checker struct { } // Check executes the status checks and mutates the passed status object with the corresponding results. -func (c *checker) Check(ctx context.Context, status *druidv1alpha1.EtcdStatus) error { +func (c *checker) Check(ctx context.Context, etcd *druidv1alpha1.Etcd) error { // First execute the etcd member checks for the status. - if err := c.executeEtcdMemberChecks(status); err != nil { + if err := c.executeEtcdMemberChecks(ctx, etcd); err != nil { return err } // Execute condition checks after the etcd member checks because we need their result here. - if err := c.executeConditionChecks(status); err != nil { + if err := c.executeConditionChecks(&etcd.Status); err != nil { return err } return nil @@ -115,27 +116,28 @@ func (c *checker) executeConditionChecks(status *druidv1alpha1.EtcdStatus) error // executeEtcdMemberChecks runs all registered etcd member checks **sequentially**. // The result of a check is passed via the `status` sub-resources to the next check. -func (c *checker) executeEtcdMemberChecks(status *druidv1alpha1.EtcdStatus) error { +func (c *checker) executeEtcdMemberChecks(ctx context.Context, etcd *druidv1alpha1.Etcd) error { // Run etcd member checks sequentially as most of them act on multiple elements. for _, newCheck := range c.etcdMemberCheckFns { - results := newCheck(c.config).Check(*status) + results := newCheck(c.cl, c.config).Check(ctx, *etcd) // Build and assign the results after each check, so that the next check // can act on the latest results. memberStatuses := c.etcdMemberBuilderFn(). WithNowFunc(func() metav1.Time { return metav1.NewTime(TimeNow()) }). - WithOldMembers(status.Members). + WithOldMembers(etcd.Status.Members). WithResults(results). Build() - status.Members = memberStatuses + etcd.Status.Members = memberStatuses } return nil } // NewChecker creates a new instance for checking the etcd status. -func NewChecker(config config.EtcdCustodianController) *checker { +func NewChecker(cl client.Client, config controllersconfig.EtcdCustodianController) *checker { return &checker{ + cl: cl, config: config, conditionCheckFns: ConditionChecks, conditionBuilderFn: NewDefaultConditionBuilder, diff --git a/pkg/health/status/check_test.go b/pkg/health/status/check_test.go index 81871b43c..48e49a3ed 100644 --- a/pkg/health/status/check_test.go +++ b/pkg/health/status/check_test.go @@ -18,26 +18,27 @@ import ( "context" "time" - "github.com/gardener/gardener/pkg/utils/test" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" - controllerconfig "github.com/gardener/etcd-druid/controllers/config" "github.com/gardener/etcd-druid/pkg/health/condition" "github.com/gardener/etcd-druid/pkg/health/etcdmember" + "github.com/gardener/gardener/pkg/utils/test" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" . "github.com/onsi/gomega/gstruct" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" + controllersconfig "github.com/gardener/etcd-druid/controllers/config" . "github.com/gardener/etcd-druid/pkg/health/status" ) var _ = Describe("Check", func() { Describe("#Check", func() { It("should correctly execute checks and fill status", func() { - config := controllerconfig.EtcdCustodianController{ - EtcdStaleMemberThreshold: 1 * time.Minute, + config := controllersconfig.EtcdCustodianController{ + EtcdMember: controllersconfig.EtcdMemberConfig{ + EtcdMemberUnknownThreshold: 1 * time.Minute, + }, } timeBefore, _ := time.Parse(time.RFC3339, "2021-06-01T00:00:00Z") timeNow := timeBefore.Add(1 * time.Hour) @@ -100,6 +101,10 @@ var _ = Describe("Check", func() { }, } + etcd := &druidv1alpha1.Etcd{ + Status: status, + } + defer test.WithVar(&ConditionChecks, []ConditionCheckFn{ func() condition.Checker { return createConditionCheck(druidv1alpha1.ConditionTypeReady, druidv1alpha1.ConditionFalse, "FailedConditionCheck", "check failed") @@ -110,18 +115,18 @@ var _ = Describe("Check", func() { })() defer test.WithVar(&EtcdMemberChecks, []EtcdMemberCheckFn{ - func(_ controllerconfig.EtcdCustodianController) etcdmember.Checker { + func(_ client.Client, _ controllersconfig.EtcdCustodianController) etcdmember.Checker { return createEtcdMemberCheck("1", druidv1alpha1.EtcdMemeberStatusUnknown, "Unknown") }, })() defer test.WithVar(&TimeNow, func() time.Time { return timeNow })() - checker := NewChecker(config) + checker := NewChecker(nil, config) - Expect(checker.Check(context.Background(), &status)).To(Succeed()) + Expect(checker.Check(context.Background(), etcd)).To(Succeed()) - Expect(status.Conditions).To(ConsistOf( + Expect(etcd.Status.Conditions).To(ConsistOf( MatchFields(IgnoreExtras, Fields{ "Type": Equal(druidv1alpha1.ConditionTypeReady), "Status": Equal(druidv1alpha1.ConditionFalse), @@ -148,7 +153,7 @@ var _ = Describe("Check", func() { }), )) - Expect(status.Members).To(ConsistOf( + Expect(etcd.Status.Members).To(ConsistOf( MatchFields(IgnoreExtras, Fields{ "ID": Equal("1"), "Name": Equal("Member1"), @@ -246,7 +251,7 @@ type etcdMemberTestChecker struct { result *etcdMemberResult } -func (t *etcdMemberTestChecker) Check(_ druidv1alpha1.EtcdStatus) []etcdmember.Result { +func (t *etcdMemberTestChecker) Check(_ context.Context, _ druidv1alpha1.Etcd) []etcdmember.Result { return []etcdmember.Result{ t.result, } diff --git a/vendor/github.com/gardener/etcd-druid/api/v1alpha1/etcd_types.go b/vendor/github.com/gardener/etcd-druid/api/v1alpha1/etcd_types.go index 8d8a76982..b109a1423 100644 --- a/vendor/github.com/gardener/etcd-druid/api/v1alpha1/etcd_types.go +++ b/vendor/github.com/gardener/etcd-druid/api/v1alpha1/etcd_types.go @@ -297,6 +297,8 @@ type EtcdMemberStatus struct { Name string `json:"name"` // ID is the ID of the etcd member. ID string `json:"id"` + // PodRef is the reference to the Pod which hosts the etcd member. + PodRef corev1.LocalObjectReference `json:"podRef"` // Role is the role in the etcd cluster, either `Member` or `Learner`. Role EtcdRole `json:"role"` // Status of the condition, one of True, False, Unknown. diff --git a/vendor/github.com/gardener/etcd-druid/api/v1alpha1/zz_generated.deepcopy.go b/vendor/github.com/gardener/etcd-druid/api/v1alpha1/zz_generated.deepcopy.go index c7604b824..24f8a33b5 100644 --- a/vendor/github.com/gardener/etcd-druid/api/v1alpha1/zz_generated.deepcopy.go +++ b/vendor/github.com/gardener/etcd-druid/api/v1alpha1/zz_generated.deepcopy.go @@ -268,6 +268,7 @@ func (in *EtcdList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EtcdMemberStatus) DeepCopyInto(out *EtcdMemberStatus) { *out = *in + out.PodRef = in.PodRef in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) }