Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to allow etcd-druid to patch STS when there is only a change of mount-path even if all pods are not up and running #915

Merged
merged 8 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions internal/common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,14 @@ const (
VolumeNameEtcdClientTLS = "etcd-client-tls"
// VolumeNameEtcdPeerCA is the name of the volume that contains the CA certificate bundle and CA certificate key used to sign certificates for peer communication.
VolumeNameEtcdPeerCA = "etcd-peer-ca"
// OldVolumeNameEtcdPeerCA is the old name of the volume that contains the CA certificate bundle and CA certificate key used to sign certificates for peer communication.
// TODO: (i062009) remove this when all clusters have started to use the new volume names.
OldVolumeNameEtcdPeerCA = "peer-url-ca-etcd"
// VolumeNameEtcdPeerServerTLS is the name of the volume that contains the server certificate-key pair used to set up the peer server.
VolumeNameEtcdPeerServerTLS = "etcd-peer-server-tls"
// OldVolumeNameEtcdPeerServerTLS is the old name of the volume that contains the server certificate-key pair used to set up the peer server.
// TODO: (i062009) remove this when all clusters have started to use the new volume names.
OldVolumeNameEtcdPeerServerTLS = "peer-url-etcd-server-tls"
// VolumeNameBackupRestoreCA is the name of the volume that contains the CA certificate bundle and CA certificate key used to sign certificates for backup-restore communication.
VolumeNameBackupRestoreCA = "backup-restore-ca"
// VolumeNameBackupRestoreServerTLS is the name of the volume that contains the server certificate-key pair used to set up the backup-restore server.
Expand Down
4 changes: 3 additions & 1 deletion internal/component/clientservice/clientservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ func buildResource(etcd *druidv1alpha1.Etcd, svc *corev1.Service) {
svc.OwnerReferences = []metav1.OwnerReference{druidv1alpha1.GetAsOwnerReference(etcd.ObjectMeta)}
svc.Spec.Type = corev1.ServiceTypeClusterIP
svc.Spec.SessionAffinity = corev1.ServiceAffinityNone
svc.Spec.Selector = druidv1alpha1.GetDefaultLabels(etcd.ObjectMeta)
// Client service should only target StatefulSet pods. Default labels are going to be present on anything that is managed by etcd-druid and started for an etcd cluster.
// Therefore, only using default labels as label selector can cause issues as we have already seen in https://github.com/gardener/etcd-druid/issues/914
svc.Spec.Selector = utils.MergeMaps(druidv1alpha1.GetDefaultLabels(etcd.ObjectMeta), map[string]string{druidv1alpha1.LabelComponentKey: common.ComponentNameStatefulSet})
ishan16696 marked this conversation as resolved.
Show resolved Hide resolved
svc.Spec.Ports = getPorts(etcd)
}

Expand Down
3 changes: 2 additions & 1 deletion internal/component/clientservice/clientservice_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ func matchClientService(g *WithT, etcd *druidv1alpha1.Etcd, actualSvc corev1.Ser
expectedAnnotations = etcd.Spec.Etcd.ClientService.Annotations
expectedLabels = utils.MergeMaps(etcd.Spec.Etcd.ClientService.Labels, druidv1alpha1.GetDefaultLabels(etcdObjMeta))
}
expectedLabelSelector := utils.MergeMaps(druidv1alpha1.GetDefaultLabels(etcdObjMeta), map[string]string{druidv1alpha1.LabelComponentKey: common.ComponentNameStatefulSet})

g.Expect(actualSvc).To(MatchFields(IgnoreExtras, Fields{
"ObjectMeta": MatchFields(IgnoreExtras, Fields{
Expand All @@ -284,7 +285,7 @@ func matchClientService(g *WithT, etcd *druidv1alpha1.Etcd, actualSvc corev1.Ser
"Spec": MatchFields(IgnoreExtras, Fields{
"Type": Equal(corev1.ServiceTypeClusterIP),
"SessionAffinity": Equal(corev1.ServiceAffinityNone),
"Selector": Equal(druidv1alpha1.GetDefaultLabels(etcdObjMeta)),
"Selector": Equal(expectedLabelSelector),
"Ports": ConsistOf(
Equal(corev1.ServicePort{
Name: "client",
Expand Down
4 changes: 3 additions & 1 deletion internal/component/peerservice/peerservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ func buildResource(etcd *druidv1alpha1.Etcd, svc *corev1.Service) {
svc.Spec.Type = corev1.ServiceTypeClusterIP
svc.Spec.ClusterIP = corev1.ClusterIPNone
svc.Spec.SessionAffinity = corev1.ServiceAffinityNone
svc.Spec.Selector = druidv1alpha1.GetDefaultLabels(etcd.ObjectMeta)
// Peer service should only target StatefulSet pods. Default labels are going to be present on anything that is managed by etcd-druid and started for an etcd cluster.
// Therefore, only using default labels as label selector can cause issues as we have already seen in https://github.com/gardener/etcd-druid/issues/914
svc.Spec.Selector = utils.MergeMaps(druidv1alpha1.GetDefaultLabels(etcd.ObjectMeta), map[string]string{druidv1alpha1.LabelComponentKey: common.ComponentNameStatefulSet})
ishan16696 marked this conversation as resolved.
Show resolved Hide resolved
svc.Spec.PublishNotReadyAddresses = true
svc.Spec.Ports = getPorts(etcd)
}
Expand Down
4 changes: 3 additions & 1 deletion internal/component/peerservice/peerservice_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/gardener/etcd-druid/internal/common"
"github.com/gardener/etcd-druid/internal/component"
druiderr "github.com/gardener/etcd-druid/internal/errors"
"github.com/gardener/etcd-druid/internal/utils"
testutils "github.com/gardener/etcd-druid/test/utils"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -246,6 +247,7 @@ func newPeerService(etcd *druidv1alpha1.Etcd) *corev1.Service {
func matchPeerService(g *WithT, etcd *druidv1alpha1.Etcd, actualSvc corev1.Service) {
peerPort := ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer)
etcdObjMeta := etcd.ObjectMeta
expectedLabelSelector := utils.MergeMaps(druidv1alpha1.GetDefaultLabels(etcdObjMeta), map[string]string{druidv1alpha1.LabelComponentKey: common.ComponentNameStatefulSet})
g.Expect(actualSvc).To(MatchFields(IgnoreExtras, Fields{
"ObjectMeta": MatchFields(IgnoreExtras, Fields{
"Name": Equal(druidv1alpha1.GetPeerServiceName(etcdObjMeta)),
Expand All @@ -257,7 +259,7 @@ func matchPeerService(g *WithT, etcd *druidv1alpha1.Etcd, actualSvc corev1.Servi
"Type": Equal(corev1.ServiceTypeClusterIP),
"ClusterIP": Equal(corev1.ClusterIPNone),
"SessionAffinity": Equal(corev1.ServiceAffinityNone),
"Selector": Equal(druidv1alpha1.GetDefaultLabels(etcdObjMeta)),
"Selector": Equal(expectedLabelSelector),
"Ports": ConsistOf(
Equal(corev1.ServicePort{
Name: "peer",
Expand Down
26 changes: 16 additions & 10 deletions internal/component/statefulset/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -708,8 +708,22 @@ func getEtcdContainerSecretVolumeMounts(etcd *druidv1alpha1.Etcd) []corev1.Volum
},
)
}
if etcd.Spec.Etcd.PeerUrlTLS != nil {
secretVolumeMounts = append(secretVolumeMounts, getEtcdContainerPeerVolumeMounts(etcd)...)
if etcd.Spec.Backup.TLS != nil {
secretVolumeMounts = append(secretVolumeMounts,
corev1.VolumeMount{
Name: common.VolumeNameBackupRestoreCA,
MountPath: common.VolumeMountPathBackupRestoreCA,
},
)
}
return secretVolumeMounts
}

func getEtcdContainerPeerVolumeMounts(etcd *druidv1alpha1.Etcd) []corev1.VolumeMount {
peerTLSVolMounts := make([]corev1.VolumeMount, 0, 2)
if etcd.Spec.Etcd.PeerUrlTLS != nil {
peerTLSVolMounts = append(peerTLSVolMounts,
corev1.VolumeMount{
Name: common.VolumeNameEtcdPeerCA,
MountPath: common.VolumeMountPathEtcdPeerCA,
Expand All @@ -720,15 +734,7 @@ func getEtcdContainerSecretVolumeMounts(etcd *druidv1alpha1.Etcd) []corev1.Volum
},
)
}
if etcd.Spec.Backup.TLS != nil {
secretVolumeMounts = append(secretVolumeMounts,
corev1.VolumeMount{
Name: common.VolumeNameBackupRestoreCA,
MountPath: common.VolumeMountPathBackupRestoreCA,
},
)
}
return secretVolumeMounts
return peerTLSVolMounts
}

// getPodVolumes gets volumes that needs to be mounted onto the etcd StatefulSet pods
Expand Down
13 changes: 10 additions & 3 deletions internal/component/statefulset/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ func (r _resource) handleTLSChanges(ctx component.OperatorContext, etcd *druidv1
// and the cluster will be stuck in a bad state. Updating peer URL is a cluster wide operation as all members will need to know that a peer TLS has changed.
// If not all members are ready then rolling-update of StatefulSet can potentially cause a healthy node to be restarted causing loss of quorum from which
// there will not be an automatic recovery.
if shouldRequeueForMultiNodeEtcdIfPodsNotReady(existingSts) {
if !r.hasTLSEnablementForPeerURLReflectedOnSTS(etcd, existingSts) && shouldRequeueForMultiNodeEtcdIfPodsNotReady(existingSts) {
return druiderr.New(
druiderr.ErrRequeueAfter,
component.OperationSync,
Expand Down Expand Up @@ -309,10 +309,17 @@ func shouldRequeueForMultiNodeEtcdIfPodsNotReady(sts *appsv1.StatefulSet) bool {
sts.Status.ReadyReplicas < *sts.Spec.Replicas
}

func isStatefulSetTLSConfigInSync(etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) bool {
func (r _resource) hasTLSEnablementForPeerURLReflectedOnSTS(etcd *druidv1alpha1.Etcd, existingSts *appsv1.StatefulSet) bool {
newEtcdWrapperPeerTLSVolMounts := getEtcdContainerPeerVolumeMounts(etcd)
containerPeerTLSVolMounts := utils.GetEtcdContainerPeerTLSVolumeMounts(existingSts)
ishan16696 marked this conversation as resolved.
Show resolved Hide resolved
r.logger.Info("Checking if peer URL TLS enablement is reflected on StatefulSet", "len(newEtcdWrapperPeerTLSVolMounts)", len(newEtcdWrapperPeerTLSVolMounts), "len(containerPeerTLSVolMounts)", len(containerPeerTLSVolMounts))
return len(containerPeerTLSVolMounts) == len(newEtcdWrapperPeerTLSVolMounts)
}

func isStatefulSetTLSConfigInSync(etcd *druidv1alpha1.Etcd, existingSts *appsv1.StatefulSet) bool {
newEtcdbrTLSVolMounts := getBackupRestoreContainerSecretVolumeMounts(etcd)
newEtcdWrapperTLSVolMounts := getEtcdContainerSecretVolumeMounts(etcd)
containerTLSVolMounts := utils.GetStatefulSetContainerTLSVolumeMounts(sts)
containerTLSVolMounts := utils.GetStatefulSetContainerTLSVolumeMounts(existingSts)
return !hasTLSVolumeMountsChanged(containerTLSVolMounts[common.ContainerNameEtcd], newEtcdWrapperTLSVolMounts) &&
!hasTLSVolumeMountsChanged(containerTLSVolMounts[common.ContainerNameEtcdBackupRestore], newEtcdbrTLSVolMounts)
}
Expand Down
38 changes: 36 additions & 2 deletions internal/utils/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,47 @@ func FetchPVCWarningMessagesForStatefulSet(ctx context.Context, cl client.Client
}

var (
etcdTLSVolumeMountNames = sets.New[string](common.VolumeNameEtcdCA, common.VolumeNameEtcdServerTLS, common.VolumeNameEtcdClientTLS, common.VolumeNameEtcdPeerCA, common.VolumeNameEtcdPeerServerTLS, common.VolumeNameBackupRestoreCA)
etcdbrTLSVolumeMountNames = sets.New[string](common.VolumeNameBackupRestoreServerTLS, common.VolumeNameEtcdCA, common.VolumeNameEtcdClientTLS)
etcdTLSVolumeMountNames = sets.New[string](common.VolumeNameEtcdCA,
common.VolumeNameEtcdServerTLS,
common.VolumeNameEtcdClientTLS,
common.VolumeNameEtcdPeerCA,
common.VolumeNameEtcdPeerServerTLS,
common.VolumeNameBackupRestoreCA)
possiblePeerTLSVolumeMountNames = sets.New[string](common.VolumeNameEtcdPeerCA,
common.OldVolumeNameEtcdPeerCA,
common.VolumeNameEtcdPeerServerTLS,
common.OldVolumeNameEtcdPeerServerTLS)
etcdbrTLSVolumeMountNames = sets.New[string](common.VolumeNameBackupRestoreServerTLS,
common.VolumeNameEtcdCA,
common.VolumeNameEtcdClientTLS)
)

// GetEtcdContainerPeerTLSVolumeMounts returns the volume mounts for the etcd container that are related to peer TLS.
// It will look at both older names (present in version <= v0.22) and new names (present in version >= v0.23) to create the slice.
func GetEtcdContainerPeerTLSVolumeMounts(sts *appsv1.StatefulSet) []corev1.VolumeMount {
volumeMounts := make([]corev1.VolumeMount, 0, 2)
if sts == nil {
return volumeMounts
}
for _, container := range sts.Spec.Template.Spec.Containers {
if container.Name == common.ContainerNameEtcd {
for _, volMount := range container.VolumeMounts {
if possiblePeerTLSVolumeMountNames.Has(volMount.Name) {
volumeMounts = append(volumeMounts, volMount)
}
}
break
}
}
return volumeMounts
}

// GetStatefulSetContainerTLSVolumeMounts returns a map of container name to TLS volume mounts for the given StatefulSet.
func GetStatefulSetContainerTLSVolumeMounts(sts *appsv1.StatefulSet) map[string][]corev1.VolumeMount {
containerVolMounts := make(map[string][]corev1.VolumeMount, 2) // each pod is a 2 container pod. Init containers are not counted as containers.
if sts == nil {
return containerVolMounts
}
for _, container := range sts.Spec.Template.Spec.Containers {
if _, ok := containerVolMounts[container.Name]; !ok {
// Assuming 6 volume mounts per container. If there are more in future then this map's capacity will be increased by the golang runtime.
Expand Down
128 changes: 128 additions & 0 deletions internal/utils/statefulset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"testing"

"github.com/gardener/etcd-druid/internal/client/kubernetes"
"github.com/gardener/etcd-druid/internal/common"
testutils "github.com/gardener/etcd-druid/test/utils"

appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -270,3 +271,130 @@ func TestFetchPVCWarningMessagesForStatefulSet(t *testing.T) {
})
}
}

func TestGetEtcdContainerPeerTLSVolumeMounts(t *testing.T) {
testCases := []struct {
name string
isSTSNil bool
oldVolMountNames bool
expectedVolumeMounts []corev1.VolumeMount
}{
{
name: "sts is nil",
isSTSNil: true,
expectedVolumeMounts: []corev1.VolumeMount{},
},
{
name: "sts with old volume mount names",
oldVolMountNames: true,
expectedVolumeMounts: []corev1.VolumeMount{
{Name: common.OldVolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.OldVolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
},
},
{
name: "sts with new volume mount names",
expectedVolumeMounts: []corev1.VolumeMount{
{Name: common.VolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.VolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
},
},
}
g := NewWithT(t)
t.Parallel()
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
var sts *appsv1.StatefulSet
if tc.isSTSNil {
g.Expect(GetEtcdContainerPeerTLSVolumeMounts(sts)).To(HaveLen(0))
} else {
sts = testutils.CreateStatefulSet("test-sts", "test-ns", uuid.NewUUID(), 3)
sts.Spec.Template.Spec.Containers = append(sts.Spec.Template.Spec.Containers, corev1.Container{Name: common.ContainerNameEtcd})
if tc.oldVolMountNames {
sts.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{
{Name: common.OldVolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.OldVolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
}
} else {
sts.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{
{Name: common.VolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.VolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
}
}
g.Expect(GetEtcdContainerPeerTLSVolumeMounts(sts)).To(Equal(tc.expectedVolumeMounts))
}
})
}
}

func TestGetStatefulSetContainerTLSVolumeMounts(t *testing.T) {
testCases := []struct {
name string
isSTSNil bool
peerTLSEnabled bool
expectedVolumeMounts map[string][]corev1.VolumeMount
}{
{
name: "sts is nil",
isSTSNil: true,
expectedVolumeMounts: map[string][]corev1.VolumeMount{},
},
{
name: "sts with peer TLS enabled",
peerTLSEnabled: true,
expectedVolumeMounts: map[string][]corev1.VolumeMount{
common.ContainerNameEtcd: {
{Name: common.VolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.VolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
},
common.ContainerNameEtcdBackupRestore: {
{Name: common.VolumeNameBackupRestoreServerTLS, MountPath: common.VolumeMountPathBackupRestoreServerTLS},
{Name: common.VolumeNameEtcdCA, MountPath: common.VolumeMountPathEtcdCA},
{Name: common.VolumeNameEtcdClientTLS, MountPath: common.VolumeMountPathEtcdClientTLS},
},
},
},
{
name: "sts with peer TLS disabled",
peerTLSEnabled: false,
expectedVolumeMounts: map[string][]corev1.VolumeMount{
common.ContainerNameEtcd: {},
common.ContainerNameEtcdBackupRestore: {
{Name: common.VolumeNameBackupRestoreServerTLS, MountPath: common.VolumeMountPathBackupRestoreServerTLS},
{Name: common.VolumeNameEtcdCA, MountPath: common.VolumeMountPathEtcdCA},
{Name: common.VolumeNameEtcdClientTLS, MountPath: common.VolumeMountPathEtcdClientTLS},
},
},
},
}
g := NewWithT(t)
t.Parallel()
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
var sts *appsv1.StatefulSet
if tc.isSTSNil {
g.Expect(GetStatefulSetContainerTLSVolumeMounts(sts)).To(HaveLen(0))
} else {
sts = testutils.CreateStatefulSet("test-sts", "test-ns", uuid.NewUUID(), 3)
sts.Spec.Template.Spec.Containers = []corev1.Container{
{Name: common.ContainerNameEtcd},
{Name: common.ContainerNameEtcdBackupRestore},
}
if tc.peerTLSEnabled {
sts.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{
{Name: common.VolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.VolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
}
}
sts.Spec.Template.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{
{Name: common.VolumeNameBackupRestoreServerTLS, MountPath: common.VolumeMountPathBackupRestoreServerTLS},
{Name: common.VolumeNameEtcdCA, MountPath: common.VolumeMountPathEtcdCA},
{Name: common.VolumeNameEtcdClientTLS, MountPath: common.VolumeMountPathEtcdClientTLS},
}
g.Expect(GetStatefulSetContainerTLSVolumeMounts(sts)).To(Equal(tc.expectedVolumeMounts))
}
})
}
}