Skip to content

Commit

Permalink
Failure domains support. Wait for the first controller
Browse files Browse the repository at this point in the history
Signed-off-by: Alexey Makhov <amakhov@mirantis.com>
  • Loading branch information
makhov committed Nov 7, 2024
1 parent 7914b4c commit 2d911e5
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 143 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ func (c *ControlPlaneController) getCerts(ctx context.Context, scope *Controller
err := c.Client.Get(ctx, client.ObjectKey{Namespace: scope.Cluster.Namespace, Name: secret.Name(scope.Cluster.Name, secret.Kubeconfig)}, s)
if err != nil {
if apierrors.IsNotFound(err) {
return nil, nil, fmt.Errorf("cluster's CA secret not found, waiting for secret")
return nil, nil, fmt.Errorf("cluster's kubeconfig secret not found, waiting for secret")
}
return nil, nil, err
}
Expand Down
50 changes: 0 additions & 50 deletions internal/controller/controlplane/failure_domain.go

This file was deleted.

68 changes: 0 additions & 68 deletions internal/controller/controlplane/failure_domain_test.go

This file was deleted.

12 changes: 5 additions & 7 deletions internal/controller/controlplane/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
cpv1beta1 "github.com/k0sproject/k0smotron/api/controlplane/v1beta1"
)

func (c *K0sController) createMachine(ctx context.Context, name string, cluster *clusterv1.Cluster, kcp *cpv1beta1.K0sControlPlane, infraRef corev1.ObjectReference, failureDomain string) (*clusterv1.Machine, error) {
func (c *K0sController) createMachine(ctx context.Context, name string, cluster *clusterv1.Cluster, kcp *cpv1beta1.K0sControlPlane, infraRef corev1.ObjectReference, failureDomain *string) (*clusterv1.Machine, error) {
machine, err := c.generateMachine(ctx, name, cluster, kcp, infraRef, failureDomain)
if err != nil {
return nil, fmt.Errorf("error generating machine: %w", err)
Expand Down Expand Up @@ -56,7 +56,7 @@ func (c *K0sController) deleteMachine(ctx context.Context, name string, kcp *cpv
return nil
}

func (c *K0sController) generateMachine(_ context.Context, name string, cluster *clusterv1.Cluster, kcp *cpv1beta1.K0sControlPlane, infraRef corev1.ObjectReference, failureDomain string) (*clusterv1.Machine, error) {
func (c *K0sController) generateMachine(_ context.Context, name string, cluster *clusterv1.Cluster, kcp *cpv1beta1.K0sControlPlane, infraRef corev1.ObjectReference, failureDomain *string) (*clusterv1.Machine, error) {
v := kcp.Spec.Version

labels := map[string]string{
Expand All @@ -83,8 +83,9 @@ func (c *K0sController) generateMachine(_ context.Context, name string, cluster
Labels: labels,
},
Spec: clusterv1.MachineSpec{
Version: &v,
ClusterName: cluster.Name,
Version: &v,
ClusterName: cluster.Name,
FailureDomain: failureDomain,
Bootstrap: clusterv1.Bootstrap{
ConfigRef: &corev1.ObjectReference{
APIVersion: "bootstrap.cluster.x-k8s.io/v1beta1",
Expand All @@ -95,9 +96,6 @@ func (c *K0sController) generateMachine(_ context.Context, name string, cluster
InfrastructureRef: infraRef,
},
}
if failureDomain != "" {
machine.Spec.FailureDomain = &failureDomain
}

return machine, nil
}
Expand Down
38 changes: 21 additions & 17 deletions internal/controller/controlplane/k0s_controlplane_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
capiutil "sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/collections"
"sigs.k8s.io/cluster-api/util/failuredomains"
"sigs.k8s.io/cluster-api/util/kubeconfig"
"sigs.k8s.io/cluster-api/util/secret"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -57,7 +58,10 @@ const (
defaultK0sVersion = "v1.27.9+k0s.0"
)

var ErrNewMachinesNotReady = fmt.Errorf("waiting for new machines")
var (
ErrNotReady = fmt.Errorf("waiting for the state")
ErrNewMachinesNotReady = fmt.Errorf("waiting for new machines: %w", ErrNotReady)
)

type K0sController struct {
client.Client
Expand Down Expand Up @@ -169,7 +173,7 @@ func (c *K0sController) Reconcile(ctx context.Context, req ctrl.Request) (res ct

_, err = c.reconcile(ctx, cluster, kcp)
if err != nil {
if errors.Is(err, ErrNewMachinesNotReady) {
if errors.Is(err, ErrNotReady) {
return ctrl.Result{RequeueAfter: 10, Requeue: true}, nil
}
return res, err
Expand All @@ -181,7 +185,7 @@ func (c *K0sController) Reconcile(ctx context.Context, req ctrl.Request) (res ct

func (c *K0sController) reconcileKubeconfig(ctx context.Context, cluster *clusterv1.Cluster, kcp *cpv1beta1.K0sControlPlane) error {
if cluster.Spec.ControlPlaneEndpoint.IsZero() {
return errors.New("control plane endpoint is not set")
return fmt.Errorf("control plane endpoint is not set: %w", ErrNotReady)
}

secretName := secret.Name(cluster.Name, secret.Kubeconfig)
Expand Down Expand Up @@ -245,14 +249,14 @@ func (c *K0sController) reconcile(ctx context.Context, cluster *clusterv1.Cluste
return kcp.Status.Replicas, err
}

replicasToReport, err := c.reconcileMachines(ctx, cluster, kcp)
err = c.reconcileKubeconfig(ctx, cluster, kcp)
if err != nil {
return replicasToReport, err
return kcp.Status.Replicas, fmt.Errorf("error reconciling kubeconfig secret: %w", err)
}

err = c.reconcileKubeconfig(ctx, cluster, kcp)
replicasToReport, err := c.reconcileMachines(ctx, cluster, kcp)
if err != nil {
return replicasToReport, fmt.Errorf("error reconciling kubeconfig secret: %w", err)
return replicasToReport, err
}

return replicasToReport, nil
Expand Down Expand Up @@ -310,7 +314,7 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv
}

desiredReplicas += kcp.Spec.Replicas
machinesToDelete = int(kcp.Spec.Replicas)
machinesToDelete = oldMachines
replicasToReport = desiredReplicas
log.Log.Info("Calculated new replicas", "desiredReplicas", desiredReplicas, "machinesToDelete", machinesToDelete, "replicasToReport", replicasToReport, "currentReplicas", currentReplicas)
} else {
Expand All @@ -326,13 +330,9 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv
}
}

failureDomainsStats := NewFailureDomainsStats(cluster.Status.FailureDomains.FilterControlPlane())
machineNames := make(map[string]bool)
for _, m := range machines {
machineNames[m.Name] = true
if m.Spec.FailureDomain != nil {
failureDomainsStats.Add(*m.Spec.FailureDomain)
}
}

if len(machineNames) < int(desiredReplicas) {
Expand All @@ -347,6 +347,14 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv

for name, exists := range machineNames {
if !exists || kcp.Spec.UpdateStrategy == cpv1beta1.UpdateInPlace {

if machines.Len() == 1 && kcp.Spec.Replicas > 1 {
err := c.checkMachineIsReady(ctx, machines.Oldest().GetName(), cluster)
if err != nil {
return int32(len(machines)), ErrNewMachinesNotReady
}
}

// Wait for the previous machine to be created to avoid etcd issues
if clusterIsUpdating {
err := c.checkMachineIsReady(ctx, machines.Newest().Name, cluster)
Expand All @@ -367,11 +375,7 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv
Namespace: kcp.Namespace,
}

var selectedFailureDomain string
if len(cluster.Status.FailureDomains.FilterControlPlane()) > 0 {
selectedFailureDomain = failureDomainsStats.Select()
failureDomainsStats.Add(selectedFailureDomain)
}
selectedFailureDomain := failuredomains.PickFewest(cluster.Status.FailureDomains.FilterControlPlane(), machines)
machine, err := c.createMachine(ctx, name, cluster, kcp, infraRef, selectedFailureDomain)
if err != nil {
return replicasToReport, fmt.Errorf("error creating machine: %w", err)
Expand Down

0 comments on commit 2d911e5

Please sign in to comment.