Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce new Etcd status condition AllMembersUpdated #987

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions api/v1alpha1/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@ const (
ConditionUnknown ConditionStatus = "Unknown"
// ConditionProgressing means the condition was seen true, failed but stayed within a predefined failure threshold.
// In the future, we could add other intermediate conditions, e.g. ConditionDegraded.
// Deprecated: Will be removed in the future since druid conditions will be replaced by metav1.Condition
// which has only three status options: True, False, Unknown.
ConditionProgressing ConditionStatus = "Progressing"
// ConditionCheckError is a constant for a reason in condition.
// Deprecated: Will be removed in the future since druid conditions will be replaced by metav1.Condition
// which has only three status options: True, False, Unknown.
ConditionCheckError ConditionStatus = "ConditionCheckError"
)

Expand Down
3 changes: 3 additions & 0 deletions api/v1alpha1/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,9 @@ const (
ConditionTypeReady ConditionType = "Ready"
// ConditionTypeAllMembersReady is a constant for a condition type indicating that all members of the etcd cluster are ready.
ConditionTypeAllMembersReady ConditionType = "AllMembersReady"
// ConditionTypeAllMembersUpdated is a constant for a condition type indicating that all members
// of the etcd cluster have been updated with the desired spec changes.
ConditionTypeAllMembersUpdated ConditionType = "AllMembersUpdated"
// ConditionTypeBackupReady is a constant for a condition type indicating that the etcd backup is ready.
ConditionTypeBackupReady ConditionType = "BackupReady"
// ConditionTypeDataVolumesReady is a constant for a condition type indicating that the etcd data volumes are ready.
Expand Down
5 changes: 3 additions & 2 deletions docs/api-reference/etcd-druid-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ _Appears in:_
| `True` | ConditionTrue means a resource is in the condition.<br /> |
| `False` | ConditionFalse means a resource is not in the condition.<br /> |
| `Unknown` | ConditionUnknown means Gardener can't decide if a resource is in the condition or not.<br /> |
| `Progressing` | ConditionProgressing means the condition was seen true, failed but stayed within a predefined failure threshold.<br />In the future, we could add other intermediate conditions, e.g. ConditionDegraded.<br /> |
| `ConditionCheckError` | ConditionCheckError is a constant for a reason in condition.<br /> |
| `Progressing` | ConditionProgressing means the condition was seen true, failed but stayed within a predefined failure threshold.<br />In the future, we could add other intermediate conditions, e.g. ConditionDegraded.<br />Deprecated: Will be removed in the future since druid conditions will be replaced by metav1.Condition<br />which has only three status options: True, False, Unknown.<br /> |
| `ConditionCheckError` | ConditionCheckError is a constant for a reason in condition.<br />Deprecated: Will be removed in the future since druid conditions will be replaced by metav1.Condition<br />which has only three status options: True, False, Unknown.<br /> |


#### ConditionType
Expand All @@ -175,6 +175,7 @@ _Appears in:_
| --- | --- |
| `Ready` | ConditionTypeReady is a constant for a condition type indicating that the etcd cluster is ready.<br /> |
| `AllMembersReady` | ConditionTypeAllMembersReady is a constant for a condition type indicating that all members of the etcd cluster are ready.<br /> |
| `AllMembersUpdated` | ConditionTypeAllMembersUpdated is a constant for a condition type indicating that all members<br />of the etcd cluster have been updated with the desired spec changes.<br /> |
| `BackupReady` | ConditionTypeBackupReady is a constant for a condition type indicating that the etcd backup is ready.<br /> |
| `DataVolumesReady` | ConditionTypeDataVolumesReady is a constant for a condition type indicating that the etcd data volumes are ready.<br /> |
| `Succeeded` | EtcdCopyBackupsTaskSucceeded is a condition type indicating that a EtcdCopyBackupsTask has succeeded.<br /> |
Expand Down
8 changes: 5 additions & 3 deletions internal/health/condition/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ import (

// skipMergeConditions contain the list of conditions we don't want to add to the list if not recalculated
var skipMergeConditions = map[druidv1alpha1.ConditionType]struct{}{
druidv1alpha1.ConditionTypeReady: {},
druidv1alpha1.ConditionTypeAllMembersReady: {},
druidv1alpha1.ConditionTypeBackupReady: {},
druidv1alpha1.ConditionTypeReady: {},
druidv1alpha1.ConditionTypeAllMembersReady: {},
druidv1alpha1.ConditionTypeAllMembersUpdated: {},
druidv1alpha1.ConditionTypeBackupReady: {},
druidv1alpha1.ConditionTypeDataVolumesReady: {},
}

// Builder is an interface for building conditions.
Expand Down
59 changes: 59 additions & 0 deletions internal/health/condition/check_all_members_updated.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Gardener contributors
//
// SPDX-License-Identifier: Apache-2.0

package condition

import (
"context"
"fmt"

druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
"github.com/gardener/etcd-druid/internal/utils"

"sigs.k8s.io/controller-runtime/pkg/client"
)

type allMembersUpdated struct {
cl client.Client
}

func (a *allMembersUpdated) Check(ctx context.Context, etcd druidv1alpha1.Etcd) Result {
res := &result{
conType: druidv1alpha1.ConditionTypeAllMembersUpdated,
}

sts, err := utils.GetStatefulSet(ctx, a.cl, &etcd)
if err != nil {
res.status = druidv1alpha1.ConditionUnknown
res.reason = "UnableToFetchStatefulSet"
res.message = fmt.Sprintf("Unable to fetch StatefulSet for etcd: %s", err.Error())
return res
} else if sts == nil {
res.status = druidv1alpha1.ConditionUnknown
res.reason = "StatefulSetNotFound"
res.message = fmt.Sprintf("StatefulSet %s not found for etcd", etcd.Name)
return res
}

if sts.Status.ObservedGeneration == sts.Generation &&
sts.Status.UpdatedReplicas == *sts.Spec.Replicas &&
sts.Status.UpdateRevision == sts.Status.CurrentRevision {
res.status = druidv1alpha1.ConditionTrue
res.reason = "AllMembersUpdated"
res.message = "All members reflect latest desired spec"
return res
}

res.status = druidv1alpha1.ConditionFalse
res.reason = "NotAllMembersUpdated"
res.message = "At least one member is not yet updated"
return res
}

// AllMembersUpdatedCheck returns a check for the "AllMembersUpdated" condition.
func AllMembersUpdatedCheck(cl client.Client) Checker {
return &allMembersUpdated{
cl: cl,
}
}
148 changes: 148 additions & 0 deletions internal/health/condition/check_all_members_updated_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Gardener contributors
//
// SPDX-License-Identifier: Apache-2.0

package condition_test

import (
"context"
"k8s.io/utils/pointer"

druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
"github.com/gardener/etcd-druid/internal/health/condition"
testutils "github.com/gardener/etcd-druid/test/utils"

appsv1 "k8s.io/api/apps/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var _ = Describe("AllMembersUpdatedCheck", func() {
Describe("#Check", func() {
var (
notFoundErr = apierrors.StatusError{
ErrStatus: metav1.Status{
Reason: metav1.StatusReasonNotFound,
},
}
internalErr = apierrors.StatusError{
ErrStatus: metav1.Status{
Reason: metav1.StatusReasonInternalError,
},
}

etcd = druidv1alpha1.Etcd{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "default",
},
Spec: druidv1alpha1.EtcdSpec{},
}
sts = &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{druidv1alpha1.GetAsOwnerReference(etcd.ObjectMeta)},
},
Spec: appsv1.StatefulSetSpec{},
Status: appsv1.StatefulSetStatus{},
}
)

Context("when error in fetching statefulset", func() {
It("should return that the condition is unknown", func() {
cl := testutils.CreateTestFakeClientForObjects(&internalErr, nil, nil, nil, []client.Object{sts}, client.ObjectKeyFromObject(sts))
check := condition.AllMembersUpdatedCheck(cl)
result := check.Check(context.Background(), etcd)

Expect(result.ConditionType()).To(Equal(druidv1alpha1.ConditionTypeAllMembersUpdated))
Expect(result.Status()).To(Equal(druidv1alpha1.ConditionUnknown))
Expect(result.Reason()).To(Equal("UnableToFetchStatefulSet"))
})
})

Context("when statefulset not found", func() {
It("should return that the condition is unknown", func() {
cl := testutils.CreateTestFakeClientForObjects(&notFoundErr, nil, nil, nil, nil, client.ObjectKeyFromObject(sts))
check := condition.AllMembersUpdatedCheck(cl)
result := check.Check(context.Background(), etcd)

Expect(result.ConditionType()).To(Equal(druidv1alpha1.ConditionTypeAllMembersUpdated))
Expect(result.Status()).To(Equal(druidv1alpha1.ConditionUnknown))
Expect(result.Reason()).To(Equal("StatefulSetNotFound"))
})
})

Context("when statefulset is found", func() {
Context("when sts observed generation does not match sts generation", func() {
It("should return that the condition is false", func() {
sts.Status.ObservedGeneration = 1
sts.Generation = 2

cl := testutils.CreateTestFakeClientForObjects(nil, nil, nil, nil, []client.Object{sts}, client.ObjectKeyFromObject(sts))
check := condition.AllMembersUpdatedCheck(cl)
result := check.Check(context.Background(), etcd)

Expect(result.ConditionType()).To(Equal(druidv1alpha1.ConditionTypeAllMembersUpdated))
Expect(result.Status()).To(Equal(druidv1alpha1.ConditionFalse))
Expect(result.Reason()).To(Equal("NotAllMembersUpdated"))
})
})

Context("when sts updated replicas does not match spec replicas", func() {
It("should return that the condition is false", func() {
sts.Status.UpdatedReplicas = 1
sts.Spec.Replicas = pointer.Int32(2)

cl := testutils.CreateTestFakeClientForObjects(nil, nil, nil, nil, []client.Object{sts}, client.ObjectKeyFromObject(sts))
check := condition.AllMembersUpdatedCheck(cl)
result := check.Check(context.Background(), etcd)

Expect(result.ConditionType()).To(Equal(druidv1alpha1.ConditionTypeAllMembersUpdated))
Expect(result.Status()).To(Equal(druidv1alpha1.ConditionFalse))
Expect(result.Reason()).To(Equal("NotAllMembersUpdated"))
})
})

Context("when sts update revision does not match sts current revision", func() {
It("should return that the condition is false", func() {
sts.Spec.Replicas = pointer.Int32(2)
sts.Status.UpdatedReplicas = 2
sts.Status.UpdateRevision = "123456"
sts.Status.CurrentRevision = "654321"

cl := testutils.CreateTestFakeClientForObjects(nil, nil, nil, nil, []client.Object{sts}, client.ObjectKeyFromObject(sts))
check := condition.AllMembersUpdatedCheck(cl)
result := check.Check(context.Background(), etcd)

Expect(result.ConditionType()).To(Equal(druidv1alpha1.ConditionTypeAllMembersUpdated))
Expect(result.Status()).To(Equal(druidv1alpha1.ConditionFalse))
Expect(result.Reason()).To(Equal("NotAllMembersUpdated"))
})
})

Context("when all sts replicas are updated", func() {
It("should return that the condition is true", func() {
sts.Status.ObservedGeneration = 1
sts.Generation = 1
sts.Status.UpdatedReplicas = 1
sts.Spec.Replicas = pointer.Int32(1)
sts.Status.UpdateRevision = "123456"
sts.Status.CurrentRevision = "123456"

cl := testutils.CreateTestFakeClientForObjects(nil, nil, nil, nil, []client.Object{sts}, client.ObjectKeyFromObject(sts))
check := condition.AllMembersUpdatedCheck(cl)
result := check.Check(context.Background(), etcd)

Expect(result.ConditionType()).To(Equal(druidv1alpha1.ConditionTypeAllMembersUpdated))
Expect(result.Status()).To(Equal(druidv1alpha1.ConditionTrue))
Expect(result.Reason()).To(Equal("AllMembersUpdated"))
})
})
})
})
})
13 changes: 2 additions & 11 deletions internal/health/condition/check_backup_ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package condition

import (
"context"
"fmt"
"time"

druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
Expand Down Expand Up @@ -54,8 +53,8 @@ func (a *backupReadyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) R
fullSnapshotInterval = 24 * time.Hour
deltaSnapLease = &coordinationv1.Lease{}
)
fullSnapErr = a.cl.Get(ctx, types.NamespacedName{Name: getFullSnapLeaseName(&etcd), Namespace: etcd.ObjectMeta.Namespace}, fullSnapLease)
incrSnapErr = a.cl.Get(ctx, types.NamespacedName{Name: getDeltaSnapLeaseName(&etcd), Namespace: etcd.ObjectMeta.Namespace}, deltaSnapLease)
fullSnapErr = a.cl.Get(ctx, types.NamespacedName{Name: druidv1alpha1.GetFullSnapshotLeaseName(etcd.ObjectMeta), Namespace: etcd.ObjectMeta.Namespace}, fullSnapLease)
incrSnapErr = a.cl.Get(ctx, types.NamespacedName{Name: druidv1alpha1.GetDeltaSnapshotLeaseName(etcd.ObjectMeta), Namespace: etcd.ObjectMeta.Namespace}, deltaSnapLease)

// Compute the full snapshot interval if full snapshot schedule is set
if etcd.Spec.Backup.FullSnapshotSchedule != nil {
Expand Down Expand Up @@ -125,14 +124,6 @@ func (a *backupReadyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) R
return result
}

func getDeltaSnapLeaseName(etcd *druidv1alpha1.Etcd) string {
return fmt.Sprintf("%s-delta-snap", string(etcd.ObjectMeta.Name))
}

func getFullSnapLeaseName(etcd *druidv1alpha1.Etcd) string {
return fmt.Sprintf("%s-full-snap", string(etcd.ObjectMeta.Name))
}

// BackupReadyCheck returns a check for the "BackupReady" condition.
func BackupReadyCheck(cl client.Client) Checker {
return &backupReadyCheck{
Expand Down
10 changes: 5 additions & 5 deletions internal/health/condition/check_data_volumes_ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ func (d *dataVolumesReady) Check(ctx context.Context, etcd druidv1alpha1.Etcd) R
}

sts, err := utils.GetStatefulSet(ctx, d.cl, &etcd)
if sts == nil && err == nil {
res.reason = "StatefulSetNotFound"
res.message = fmt.Sprintf("StatefulSet %s not found for etcd", etcd.Name)
return res
} else if err != nil {
if err != nil {
res.reason = "UnableToFetchStatefulSet"
res.message = fmt.Sprintf("Unable to fetch StatefulSet for etcd: %s", err.Error())
return res
} else if sts == nil {
res.reason = "StatefulSetNotFound"
res.message = fmt.Sprintf("StatefulSet %s not found for etcd", etcd.Name)
return res
}

pvcEvents, err := utils.FetchPVCWarningMessagesForStatefulSet(ctx, d.cl, sts)
Expand Down
3 changes: 2 additions & 1 deletion internal/health/status/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ var (
NewDefaultEtcdMemberBuilder = etcdmember.NewBuilder
// ConditionChecks Checks are the registered condition checks.
ConditionChecks = []ConditionCheckFn{
condition.AllMembersReadyCheck,
condition.ReadyCheck,
condition.AllMembersReadyCheck,
condition.AllMembersUpdatedCheck,
condition.BackupReadyCheck,
condition.DataVolumesReadyCheck,
}
Expand Down
19 changes: 19 additions & 0 deletions internal/health/status/check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ var _ = Describe("Check", func() {
Reason: "bar reason",
Message: "bar message",
},
{
Type: druidv1alpha1.ConditionTypeAllMembersUpdated,
Status: druidv1alpha1.ConditionUnknown,
LastTransitionTime: metav1.NewTime(timeBefore),
LastUpdateTime: metav1.NewTime(timeBefore),
Reason: "foobar reason",
Message: "foobar message",
},
{
Type: druidv1alpha1.ConditionTypeBackupReady,
Status: druidv1alpha1.ConditionUnknown,
Expand Down Expand Up @@ -105,6 +113,9 @@ var _ = Describe("Check", func() {
func(client.Client) condition.Checker {
return createConditionCheck(druidv1alpha1.ConditionTypeAllMembersReady, druidv1alpha1.ConditionTrue, "bar reason", "bar message")
},
func(client.Client) condition.Checker {
return createConditionCheck(druidv1alpha1.ConditionTypeAllMembersUpdated, druidv1alpha1.ConditionUnknown, "foobar reason", "foobar message")
},
func(client.Client) condition.Checker {
return createConditionCheck(druidv1alpha1.ConditionTypeBackupReady, druidv1alpha1.ConditionUnknown, "foobar reason", "foobar message")
},
Expand Down Expand Up @@ -147,6 +158,14 @@ var _ = Describe("Check", func() {
"Reason": Equal("bar reason"),
"Message": Equal("bar message"),
}),
MatchFields(IgnoreExtras, Fields{
"Type": Equal(druidv1alpha1.ConditionTypeAllMembersUpdated),
"Status": Equal(druidv1alpha1.ConditionUnknown),
"LastTransitionTime": Equal(metav1.NewTime(timeBefore)),
"LastUpdateTime": Equal(metav1.NewTime(timeNow)),
"Reason": Equal("foobar reason"),
"Message": Equal("foobar message"),
}),
MatchFields(IgnoreExtras, Fields{
"Type": Equal(druidv1alpha1.ConditionTypeBackupReady),
"Status": Equal(druidv1alpha1.ConditionUnknown),
Expand Down
2 changes: 1 addition & 1 deletion internal/webhook/etcdcomponents/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func (h *Handler) handleUpdate(req admission.Request, etcd *druidv1alpha1.Etcd,
}
}

return admission.Denied(fmt.Sprintf("changes from service account %s are disallowed at the moment", req.UserInfo.Username))
return admission.Denied(fmt.Sprintf("changes from service account %s are disallowed at the moment. Please consider disabling component protection by setting annotation `%s` on the parent Etcd resource", req.UserInfo.Username, druidv1alpha1.DisableEtcdComponentProtectionAnnotation))
}

func (h *Handler) handleDelete(req admission.Request, etcd *druidv1alpha1.Etcd) admission.Response {
Expand Down
Loading