Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modularization & Status Enhancements #188

Merged
merged 7 commits into from
Jun 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ci/test
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ function test_with_coverage() {
fetch_envtest_tools "$kb_root_dir"
setup_envtest_env "$kb_root_dir"

TEST_PACKAGES="api api_tests controllers"
TEST_PACKAGES="api api_tests controllers pkg"
GINKGO_COMMON_FLAGS="-r -timeout=1h0m0s --randomizeAllSpecs --randomizeSuites --failOnPending --progress"
if [ -z $COVER ] || [ "$COVER" = false ] ; then
echo "[INFO] Test coverage is disabled."
Expand Down
5 changes: 4 additions & 1 deletion api/v1alpha1/etcd_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ const (

// EtcdMemberStatus holds information about a etcd cluster membership.
type EtcdMemberStatus struct {
// Name is the name of the etcd member.
// Name is the name of the etcd member. It is the name of the backing `Pod`.
Name string `json:"name"`
// ID is the ID of the etcd member.
ID string `json:"id"`
Expand Down Expand Up @@ -325,6 +325,9 @@ type EtcdStatus struct {
// LastError represents the last occurred error.
// +optional
LastError *string `json:"lastError,omitempty"`
// Cluster size is the size of the etcd cluster.
// +optional
ClusterSize *int32 `json:"clusterSize,omitempty"`
// CurrentReplicas is the current replica count for the etcd cluster.
// +optional
CurrentReplicas int32 `json:"currentReplicas,omitempty"`
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion config/crd/bases/druid.gardener.cloud_etcds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,10 @@ spec:
status:
description: EtcdStatus defines the observed state of Etcd.
properties:
clusterSize:
description: Cluster size is the size of the etcd cluster.
format: int32
type: integer
conditions:
description: Conditions represents the latest available observations
of an etcd's current state.
Expand Down Expand Up @@ -548,7 +552,8 @@ spec:
format: date-time
type: string
name:
description: Name is the name of the etcd member.
description: Name is the name of the etcd member. It is the
name of the backing `Pod`.
type: string
reason:
description: The reason for the condition's last transition.
Expand Down
32 changes: 32 additions & 0 deletions controllers/config/custodian.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (c) 2021 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package config

import "time"

// EtcdCustodianController contains configuration for the etcd custodian controller.
type EtcdCustodianController struct {
// EtcdMember holds configuration related to etcd members.
EtcdMember EtcdMemberConfig
// SyncPeriod is the duration after which re-enqueuing happens.
SyncPeriod time.Duration
}

type EtcdMemberConfig struct {
// EtcdMemberUnknownThreshold is the duration after which a etcd member's state is considered `Unknown`.
EtcdMemberUnknownThreshold time.Duration
// EtcdMemberUnknownThreshold is the duration after which a etcd member's state is considered `NotReady`.
EtcdMemberNotReadyThreshold time.Duration
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ import (
"testing"
"time"

"github.com/gardener/gardener/pkg/utils/test"
druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
controllersconfig "github.com/gardener/etcd-druid/controllers/config"

"github.com/gardener/gardener/pkg/utils/test"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"

druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes/scheme"
Expand Down Expand Up @@ -111,7 +110,12 @@ var _ = BeforeSuite(func(done Done) {
err = er.SetupWithManager(mgr, 1, true)
Expect(err).NotTo(HaveOccurred())

custodian := NewEtcdCustodian(mgr)
custodian := NewEtcdCustodian(mgr, controllersconfig.EtcdCustodianController{
EtcdMember: controllersconfig.EtcdMemberConfig{
EtcdMemberUnknownThreshold: 1 * time.Minute,
EtcdMemberNotReadyThreshold: 1 * time.Minute,
},
})

err = custodian.SetupWithManager(mgrCtx, mgr, 1)
Expect(err).NotTo(HaveOccurred())
Expand Down
89 changes: 57 additions & 32 deletions controllers/etcd_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ import (
"k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/client-go/rest"
"k8s.io/client-go/util/retry"
"k8s.io/utils/pointer"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
Expand Down Expand Up @@ -209,7 +210,7 @@ func (r *EtcdReconciler) reconcile(ctx context.Context, etcd *druidv1alpha1.Etcd
finalizers.Insert(FinalizerName)
etcd.Finalizers = finalizers.UnsortedList()
if err := r.Update(ctx, etcd); err != nil {
if err := r.updateEtcdErrorStatus(ctx, etcd, nil, err); err != nil {
if err := r.updateEtcdErrorStatus(ctx, noOp, etcd, nil, err); err != nil {
return ctrl.Result{
Requeue: true,
}, err
Expand All @@ -220,7 +221,7 @@ func (r *EtcdReconciler) reconcile(ctx context.Context, etcd *druidv1alpha1.Etcd
}
}
if err := r.addFinalizersToDependantSecrets(ctx, logger, etcd); err != nil {
if err := r.updateEtcdErrorStatus(ctx, etcd, nil, err); err != nil {
if err := r.updateEtcdErrorStatus(ctx, noOp, etcd, nil, err); err != nil {
return ctrl.Result{
Requeue: true,
}, err
Expand All @@ -235,9 +236,9 @@ func (r *EtcdReconciler) reconcile(ctx context.Context, etcd *druidv1alpha1.Etcd
Requeue: true,
}, err
}
svc, ss, err := r.reconcileEtcd(ctx, logger, etcd)
op, svc, sts, err := r.reconcileEtcd(ctx, logger, etcd)
if err != nil {
if err := r.updateEtcdErrorStatus(ctx, etcd, ss, err); err != nil {
if err := r.updateEtcdErrorStatus(ctx, op, etcd, sts, err); err != nil {
logger.Error(err, "Error during reconciling ETCD")
return ctrl.Result{
Requeue: true,
Expand All @@ -247,8 +248,7 @@ func (r *EtcdReconciler) reconcile(ctx context.Context, etcd *druidv1alpha1.Etcd
Requeue: true,
}, err
}

if err := r.updateEtcdStatus(ctx, etcd, svc, ss); err != nil {
if err := r.updateEtcdStatus(ctx, op, etcd, svc, sts); err != nil {
return ctrl.Result{
Requeue: true,
}, err
Expand All @@ -264,7 +264,7 @@ func (r *EtcdReconciler) delete(ctx context.Context, etcd *druidv1alpha1.Etcd) (
logger.Info("Starting operation")

if err := r.removeDependantStatefulset(ctx, logger, etcd); err != nil {
if err := r.updateEtcdErrorStatus(ctx, etcd, nil, err); err != nil {
if err := r.updateEtcdErrorStatus(ctx, deleteOp, etcd, nil, err); err != nil {
return ctrl.Result{
Requeue: true,
}, err
Expand All @@ -275,7 +275,7 @@ func (r *EtcdReconciler) delete(ctx context.Context, etcd *druidv1alpha1.Etcd) (
}

if err := r.removeFinalizersToDependantSecrets(ctx, logger, etcd); err != nil {
if err := r.updateEtcdErrorStatus(ctx, etcd, nil, err); err != nil {
if err := r.updateEtcdErrorStatus(ctx, deleteOp, etcd, nil, err); err != nil {
return ctrl.Result{
Requeue: true,
}, err
Expand Down Expand Up @@ -557,7 +557,16 @@ func (r *EtcdReconciler) getConfigMapFromEtcd(etcd *druidv1alpha1.Etcd, rendered
return decoded, nil
}

func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd, values map[string]interface{}) (*appsv1.StatefulSet, error) {
type operationResult string

const (
bootstrapOp operationResult = "bootstrap"
reconcileOp operationResult = "reconcile"
deleteOp operationResult = "delete"
noOp operationResult = "none"
)

func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd, values map[string]interface{}) (operationResult, *appsv1.StatefulSet, error) {
logger.Info("Reconciling etcd statefulset")

// If any adoptions are attempted, we should first recheck for deletion with
Expand All @@ -582,19 +591,19 @@ func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.L
selector, err := metav1.LabelSelectorAsSelector(etcd.Spec.Selector)
if err != nil {
logger.Error(err, "Error converting etcd selector to selector")
return nil, err
return noOp, nil, err
}
dm := NewEtcdDruidRefManager(r.Client, r.Scheme, etcd, selector, etcdGVK, canAdoptFunc)
statefulSets, err := dm.FetchStatefulSet(ctx, etcd)
if err != nil {
logger.Error(err, "Error while fetching StatefulSet")
return nil, err
return noOp, nil, err
}

logger.Info("Claiming existing etcd StatefulSet")
claimedStatefulSets, err := dm.ClaimStatefulsets(ctx, statefulSets)
if err != nil {
return nil, err
return noOp, nil, err
}

if len(claimedStatefulSets) > 0 {
Expand All @@ -612,41 +621,42 @@ func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.L
// TODO: (timuthy) Check if this is really needed.
sts := &appsv1.StatefulSet{}
if err := r.Get(ctx, types.NamespacedName{Name: claimedStatefulSets[0].Name, Namespace: claimedStatefulSets[0].Namespace}, sts); err != nil {
return nil, err
return noOp, nil, err
}

// Statefulset is claimed by for this etcd. Just sync the specs
if sts, err = r.syncStatefulSetSpec(ctx, logger, sts, etcd, values); err != nil {
return nil, err
return noOp, nil, err
}

// restart etcd pods in crashloop backoff
selector, err := metav1.LabelSelectorAsSelector(sts.Spec.Selector)
if err != nil {
logger.Error(err, "error converting StatefulSet selector to selector")
return nil, err
return noOp, nil, err
}
podList := &v1.PodList{}
if err := r.List(ctx, podList, client.InNamespace(etcd.Namespace), client.MatchingLabelsSelector{Selector: selector}); err != nil {
return nil, err
return noOp, nil, err
}

for _, pod := range podList.Items {
if utils.IsPodInCrashloopBackoff(pod.Status) {
if err := r.Delete(ctx, &pod); err != nil {
logger.Error(err, fmt.Sprintf("error deleting etcd pod in crashloop: %s/%s", pod.Namespace, pod.Name))
return nil, err
return noOp, nil, err
}
}
}

return r.waitUntilStatefulSetReady(ctx, logger, etcd, sts)
sts, err = r.waitUntilStatefulSetReady(ctx, logger, etcd, sts)
return reconcileOp, sts, err
}

// Required statefulset doesn't exist. Create new
sts, err := r.getStatefulSetFromEtcd(etcd, values)
if err != nil {
return nil, err
return noOp, nil, err
}

err = r.Create(ctx, sts)
Expand All @@ -658,10 +668,11 @@ func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.L
err = nil
}
if err != nil {
return nil, err
return noOp, nil, err
}

return r.waitUntilStatefulSetReady(ctx, logger, etcd, sts)
sts, err = r.waitUntilStatefulSetReady(ctx, logger, etcd, sts)
return bootstrapOp, sts, err
}

func getContainerMapFromPodTemplateSpec(spec v1.PodSpec) map[string]v1.Container {
Expand Down Expand Up @@ -758,40 +769,39 @@ func (r *EtcdReconciler) getStatefulSetFromEtcd(etcd *druidv1alpha1.Etcd, values
return decoded, nil
}

func (r *EtcdReconciler) reconcileEtcd(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd) (*corev1.Service, *appsv1.StatefulSet, error) {

func (r *EtcdReconciler) reconcileEtcd(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd) (operationResult, *corev1.Service, *appsv1.StatefulSet, error) {
values, err := r.getMapFromEtcd(etcd)
if err != nil {
return nil, nil, err
return noOp, nil, nil, err
}

chartPath := getChartPath()
renderedChart, err := r.chartApplier.Render(chartPath, etcd.Name, etcd.Namespace, values)
if err != nil {
return nil, nil, err
return noOp, nil, nil, err
}
svc, err := r.reconcileServices(ctx, logger, etcd, renderedChart)
if err != nil {
return nil, nil, err
return noOp, nil, nil, err
}
if svc != nil {
values["serviceName"] = svc.Name
}

cm, err := r.reconcileConfigMaps(ctx, logger, etcd, renderedChart)
if err != nil {
return nil, nil, err
return noOp, nil, nil, err
}
if cm != nil {
values["configMapName"] = cm.Name
}

ss, err := r.reconcileStatefulSet(ctx, logger, etcd, values)
op, sts, err := r.reconcileStatefulSet(ctx, logger, etcd, values)
if err != nil {
return nil, nil, err
return noOp, nil, nil, err
}

return svc, ss, nil
return op, svc, sts, nil
}

func checkEtcdOwnerReference(refs []metav1.OwnerReference, etcd *druidv1alpha1.Etcd) bool {
Expand Down Expand Up @@ -1118,14 +1128,24 @@ func canDeleteStatefulset(sts *appsv1.StatefulSet, etcd *druidv1alpha1.Etcd) boo

}

func (r *EtcdReconciler) updateEtcdErrorStatus(ctx context.Context, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet, lastError error) error {
func bootstrapReset(etcd *druidv1alpha1.Etcd) {
etcd.Status.Members = nil
etcd.Status.ClusterSize = pointer.Int32Ptr(int32(etcd.Spec.Replicas))
}

func (r *EtcdReconciler) updateEtcdErrorStatus(ctx context.Context, op operationResult, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet, lastError error) error {
err := kutil.TryUpdateStatus(ctx, retry.DefaultBackoff, r.Client, etcd, func() error {
lastErrStr := fmt.Sprintf("%v", lastError)
etcd.Status.LastError = &lastErrStr
etcd.Status.ObservedGeneration = &etcd.Generation
if sts != nil {
ready := CheckStatefulSet(etcd, sts) == nil
etcd.Status.Ready = &ready

if op == bootstrapOp {
// Reset members in bootstrap phase to ensure depending conditions can be calculated correctly.
bootstrapReset(etcd)
}
}
return nil
})
Expand All @@ -1136,14 +1156,19 @@ func (r *EtcdReconciler) updateEtcdErrorStatus(ctx context.Context, etcd *druidv
return r.removeOperationAnnotation(ctx, etcd)
}

func (r *EtcdReconciler) updateEtcdStatus(ctx context.Context, etcd *druidv1alpha1.Etcd, svc *corev1.Service, sts *appsv1.StatefulSet) error {
func (r *EtcdReconciler) updateEtcdStatus(ctx context.Context, op operationResult, etcd *druidv1alpha1.Etcd, svc *corev1.Service, sts *appsv1.StatefulSet) error {
err := kutil.TryUpdateStatus(ctx, retry.DefaultBackoff, r.Client, etcd, func() error {
ready := CheckStatefulSet(etcd, sts) == nil
etcd.Status.Ready = &ready
svcName := svc.Name
etcd.Status.ServiceName = &svcName
etcd.Status.LastError = nil
etcd.Status.ObservedGeneration = &etcd.Generation

if op == bootstrapOp {
// Reset members in bootstrap phase to ensure depending conditions can be calculated correctly.
bootstrapReset(etcd)
}
return nil
})

Expand Down
Loading