Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Implement Cluster TopologyReconciled v1beta2 condition #11394

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions api/v1beta1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,59 @@ const (
// ClusterTopologyReconciledV1Beta2Condition is true if the topology controller is working properly.
// Note: This condition is added only if the Cluster is referencing a ClusterClass / defining a managed Topology.
ClusterTopologyReconciledV1Beta2Condition = "TopologyReconciled"

// ClusterTopologyReconcileSucceededV1Beta2Reason documents the reconciliation of a Cluster topology succeeded.
ClusterTopologyReconcileSucceededV1Beta2Reason = "TopologyReconcileSucceeded"

// ClusterTopologyReconciledFailedV1Beta2Reason documents the reconciliation of a Cluster topology
// failing due to an error.
ClusterTopologyReconciledFailedV1Beta2Reason = "TopologyReconcileFailed"

// ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because Control Plane is not yet updated to match the desired topology spec.
ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason = "ControlPlaneUpgradePending"
sbueringer marked this conversation as resolved.
Show resolved Hide resolved

// ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachineDeployments is yet to be created.
// This generally happens because new MachineDeployment creations are held off while the ControlPlane is not stable.
ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason = "MachineDeploymentsCreatePending"

// ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachineDeployments is not yet updated to match the desired topology spec.
ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason = "MachineDeploymentsUpgradePending"

// ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because the upgrade for at least one of the MachineDeployments has been deferred.
ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason = "MachineDeploymentsUpgradeDeferred"

// ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachinePools is not yet updated to match the desired topology spec.
ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason = "MachinePoolsUpgradePending"

// ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachinePools is yet to be created.
// This generally happens because new MachinePool creations are held off while the ControlPlane is not stable.
ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason = "MachinePoolsCreatePending"

// ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because the upgrade for at least one of the MachinePools has been deferred.
ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason = "MachinePoolsUpgradeDeferred"

// ClusterTopologyReconciledHookBlockingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the lifecycle hooks is blocking.
ClusterTopologyReconciledHookBlockingV1Beta2Reason = "LifecycleHookBlocking"

// ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason documents reconciliation of a Cluster topology not
// yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue
// with the ClusterClass surfaced in the ClusterClass status or controller logs.
ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason = "ClusterClassNotReconciled"

// ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason surfaces when the Cluster is deleting because the
// DeletionTimestamp is set.
ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason

// ClusterTopologyReconcilePausedV1Beta2Reason surfaces when the Cluster is paused.
ClusterTopologyReconcilePausedV1Beta2Reason = PausedV1Beta2Reason
)

// Cluster's InfrastructureReady condition and corresponding reasons that will be used in v1Beta2 API version.
Expand Down
3 changes: 3 additions & 0 deletions api/v1beta1/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,9 @@ const (
// yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue
// with the ClusterClass surfaced in the ClusterClass status or controller logs.
TopologyReconciledClusterClassNotReconciledReason = "ClusterClassNotReconciled"

// TopologyReconciledPausedReason (Severity=Info) surfaces when the Cluster is paused.
TopologyReconciledPausedReason = "Paused"
)

// Conditions and condition reasons for ClusterClass.
Expand Down
18 changes: 9 additions & 9 deletions internal/controllers/topology/cluster/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
builder.WithPredicates(predicates.ResourceIsTopologyOwned(mgr.GetScheme(), predicateLog)),
).
WithOptions(options).
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)).
WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)).
Build(r)

if err != nil {
Expand Down Expand Up @@ -175,13 +175,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
return ctrl.Result{}, nil
}

// Return early if the Cluster is paused.
// TODO: What should we do if the cluster class is paused?
if annotations.IsPaused(cluster, cluster) {
log.Info("Reconciliation is paused for this object")
return ctrl.Result{}, nil
}

patchHelper, err := patch.NewHelper(cluster, r.Client)
if err != nil {
return ctrl.Result{}, err
Expand All @@ -200,14 +193,21 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
clusterv1.TopologyReconciledCondition,
}},
patch.WithForceOverwriteConditions{},
patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
clusterv1.ClusterTopologyReconciledV1Beta2Condition,
}},
}
if err := patchHelper.Patch(ctx, cluster, options...); err != nil {
reterr = kerrors.NewAggregate([]error{reterr, err})
return
}
}()

// Return early if the Cluster is paused.
if cluster.Spec.Paused || annotations.HasPaused(cluster) {
return ctrl.Result{}, nil
}

// In case the object is deleted, the managed topology stops to reconcile;
// (the other controllers will take care of deletion).
if !cluster.ObjectMeta.DeletionTimestamp.IsZero() {
Expand Down
94 changes: 81 additions & 13 deletions internal/controllers/topology/cluster/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,14 @@ import (
"strings"

"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/exp/topology/scope"
"sigs.k8s.io/cluster-api/internal/contract"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2"
)

func (r *Reconciler) reconcileConditions(s *scope.Scope, cluster *clusterv1.Cluster, reconcileErr error) error {
Expand All @@ -36,32 +39,62 @@ func (r *Reconciler) reconcileConditions(s *scope.Scope, cluster *clusterv1.Clus
// The TopologyReconciled condition is considered true if spec of all the objects associated with the
// cluster are in sync with the topology defined in the cluster.
// The condition is false under the following conditions:
// - The cluster is paused.
// - An error occurred during the reconcile process of the cluster topology.
// - The ClusterClass has not been successfully reconciled with its current spec.
// - The cluster upgrade has not yet propagated to all the components of the cluster.
// - For a managed topology cluster the version upgrade is propagated one component at a time.
// In such a case, since some of the component's spec would be adrift from the topology the
// topology cannot be considered fully reconciled.
func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluster *clusterv1.Cluster, reconcileErr error) error {
// Mark TopologyReconciled as false if the Cluster is paused.
if cluster.Spec.Paused || annotations.HasPaused(cluster) {
var messages []string
if cluster.Spec.Paused {
messages = append(messages, "Cluster spec.paused is set to true")
}
if annotations.HasPaused(cluster) {
messages = append(messages, "Cluster has the cluster.x-k8s.io/paused annotation")
}
conditions.Set(cluster,
conditions.FalseCondition(
clusterv1.TopologyReconciledCondition,
clusterv1.TopologyReconciledPausedReason,
clusterv1.ConditionSeverityInfo,
strings.Join(messages, ", "),
),
)
v1beta2conditions.Set(cluster, metav1.Condition{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconcilePausedV1Beta2Reason,
Message: strings.Join(messages, ", "),
})
return nil
}

// Mark TopologyReconciled as false due to cluster deletion.
if !cluster.ObjectMeta.DeletionTimestamp.IsZero() {
conditions.Set(
cluster,
conditions.Set(cluster,
conditions.FalseCondition(
clusterv1.TopologyReconciledCondition,
clusterv1.DeletedReason,
clusterv1.ConditionSeverityInfo,
"",
),
)
v1beta2conditions.Set(cluster, metav1.Condition{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason,
})
return nil
}

// If an error occurred during reconciliation set the TopologyReconciled condition to false.
// Add the error message from the reconcile function to the message of the condition.
if reconcileErr != nil {
conditions.Set(
cluster,
conditions.Set(cluster,
conditions.FalseCondition(
clusterv1.TopologyReconciledCondition,
clusterv1.TopologyReconcileFailedReason,
Expand All @@ -70,15 +103,21 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
reconcileErr.Error(),
),
)
v1beta2conditions.Set(cluster, metav1.Condition{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledFailedV1Beta2Reason,
// TODO: Add a protection for messages continuously changing leading to Cluster object changes/reconcile.
Message: reconcileErr.Error(),
})
return nil
}

// If the ClusterClass `metadata.Generation` doesn't match the `status.ObservedGeneration` requeue as the ClusterClass
// is not up to date.
if s.Blueprint != nil && s.Blueprint.ClusterClass != nil &&
s.Blueprint.ClusterClass.GetGeneration() != s.Blueprint.ClusterClass.Status.ObservedGeneration {
conditions.Set(
cluster,
conditions.Set(cluster,
conditions.FalseCondition(
clusterv1.TopologyReconciledCondition,
clusterv1.TopologyReconciledClusterClassNotReconciledReason,
Expand All @@ -87,14 +126,20 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
),
)
v1beta2conditions.Set(cluster, metav1.Condition{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason,
Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" +
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
})
return nil
}

// If any of the lifecycle hooks are blocking any part of the reconciliation then topology
// is not considered as fully reconciled.
if s.HookResponseTracker.AggregateRetryAfter() != 0 {
conditions.Set(
cluster,
conditions.Set(cluster,
conditions.FalseCondition(
clusterv1.TopologyReconciledCondition,
clusterv1.TopologyReconciledHookBlockingReason,
Expand All @@ -103,6 +148,13 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
s.HookResponseTracker.AggregateMessage(),
),
)
v1beta2conditions.Set(cluster, metav1.Condition{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledHookBlockingV1Beta2Reason,
// TODO: Add a protection for messages continuously changing leading to Cluster object changes/reconcile.
Message: s.HookResponseTracker.AggregateMessage(),
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved
})
return nil
}

Expand All @@ -121,6 +173,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
s.UpgradeTracker.MachinePools.DeferredUpgrade() {
msgBuilder := &strings.Builder{}
var reason string
var v1beta2Reason string

// TODO(ykakarap): Evaluate potential improvements to building the condition. Multiple causes can trigger the
// condition to be false at the same time (Example: ControlPlane.IsPendingUpgrade and MachineDeployments.IsAnyPendingCreate can
Expand All @@ -130,40 +183,47 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
case s.UpgradeTracker.ControlPlane.IsPendingUpgrade:
fmt.Fprintf(msgBuilder, "Control plane rollout and upgrade to version %s on hold.", s.Blueprint.Topology.Version)
reason = clusterv1.TopologyReconciledControlPlaneUpgradePendingReason
v1beta2Reason = clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason
case s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade():
fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s on hold.",
computeNameList(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()),
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason
v1beta2Reason = clusterv1.ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason
case s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate():
fmt.Fprintf(msgBuilder, "MachineDeployment(s) for Topologies %s creation on hold.",
computeNameList(s.UpgradeTracker.MachineDeployments.PendingCreateTopologyNames()),
)
reason = clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason
v1beta2Reason = clusterv1.ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason
case s.UpgradeTracker.MachineDeployments.DeferredUpgrade():
fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s deferred.",
computeNameList(s.UpgradeTracker.MachineDeployments.DeferredUpgradeNames()),
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason
v1beta2Reason = clusterv1.ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason
case s.UpgradeTracker.MachinePools.IsAnyPendingUpgrade():
fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s on hold.",
computeNameList(s.UpgradeTracker.MachinePools.PendingUpgradeNames()),
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason
v1beta2Reason = clusterv1.ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason
case s.UpgradeTracker.MachinePools.IsAnyPendingCreate():
fmt.Fprintf(msgBuilder, "MachinePool(s) for Topologies %s creation on hold.",
computeNameList(s.UpgradeTracker.MachinePools.PendingCreateTopologyNames()),
)
reason = clusterv1.TopologyReconciledMachinePoolsCreatePendingReason
v1beta2Reason = clusterv1.ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason
case s.UpgradeTracker.MachinePools.DeferredUpgrade():
fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s deferred.",
computeNameList(s.UpgradeTracker.MachinePools.DeferredUpgradeNames()),
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachinePoolsUpgradeDeferredReason
v1beta2Reason = clusterv1.ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason
}

switch {
Expand Down Expand Up @@ -191,26 +251,34 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
)
}

conditions.Set(
cluster,
conditions.Set(cluster,
conditions.FalseCondition(
clusterv1.TopologyReconciledCondition,
reason,
clusterv1.ConditionSeverityInfo,
msgBuilder.String(),
),
)
v1beta2conditions.Set(cluster, metav1.Condition{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2Reason,
Message: msgBuilder.String(),
})
return nil
}

// If there are no errors while reconciling and if the topology is not holding out changes
// we can consider that spec of all the objects is reconciled to match the topology. Set the
// TopologyReconciled condition to true.
conditions.Set(
cluster,
conditions.Set(cluster,
conditions.TrueCondition(clusterv1.TopologyReconciledCondition),
)

v1beta2conditions.Set(cluster, metav1.Condition{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: clusterv1.ClusterTopologyReconcileSucceededV1Beta2Reason,
})
return nil
}

Expand Down