diff --git a/api/v1beta1/cluster_types.go b/api/v1beta1/cluster_types.go index c07db3b89592..3abe40d7e975 100644 --- a/api/v1beta1/cluster_types.go +++ b/api/v1beta1/cluster_types.go @@ -56,6 +56,59 @@ const ( // ClusterTopologyReconciledV1Beta2Condition is true if the topology controller is working properly. // Note: This condition is added only if the Cluster is referencing a ClusterClass / defining a managed Topology. ClusterTopologyReconciledV1Beta2Condition = "TopologyReconciled" + + // ClusterTopologyReconcileSucceededV1Beta2Reason documents the reconciliation of a Cluster topology succeeded. + ClusterTopologyReconcileSucceededV1Beta2Reason = "TopologyReconcileSucceeded" + + // ClusterTopologyReconciledFailedV1Beta2Reason documents the reconciliation of a Cluster topology + // failing due to an error. + ClusterTopologyReconciledFailedV1Beta2Reason = "TopologyReconcileFailed" + + // ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because Control Plane is not yet updated to match the desired topology spec. + ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason = "ControlPlaneUpgradePending" + + // ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachineDeployments is yet to be created. + // This generally happens because new MachineDeployment creations are held off while the ControlPlane is not stable. + ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason = "MachineDeploymentsCreatePending" + + // ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachineDeployments is not yet updated to match the desired topology spec. + ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason = "MachineDeploymentsUpgradePending" + + // ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because the upgrade for at least one of the MachineDeployments has been deferred. + ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason = "MachineDeploymentsUpgradeDeferred" + + // ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachinePools is not yet updated to match the desired topology spec. + ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason = "MachinePoolsUpgradePending" + + // ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachinePools is yet to be created. + // This generally happens because new MachinePool creations are held off while the ControlPlane is not stable. + ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason = "MachinePoolsCreatePending" + + // ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because the upgrade for at least one of the MachinePools has been deferred. + ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason = "MachinePoolsUpgradeDeferred" + + // ClusterTopologyReconciledHookBlockingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because at least one of the lifecycle hooks is blocking. + ClusterTopologyReconciledHookBlockingV1Beta2Reason = "LifecycleHookBlocking" + + // ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason documents reconciliation of a Cluster topology not + // yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue + // with the ClusterClass surfaced in the ClusterClass status or controller logs. + ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason = "ClusterClassNotReconciled" + + // ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason surfaces when the Cluster is deleting because the + // DeletionTimestamp is set. + ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason + + // ClusterTopologyReconcilePausedV1Beta2Reason surfaces when the Cluster is paused. + ClusterTopologyReconcilePausedV1Beta2Reason = PausedV1Beta2Reason ) // Cluster's InfrastructureReady condition and corresponding reasons that will be used in v1Beta2 API version. diff --git a/api/v1beta1/condition_consts.go b/api/v1beta1/condition_consts.go index 059b4ccc74a9..19fe608b71d1 100644 --- a/api/v1beta1/condition_consts.go +++ b/api/v1beta1/condition_consts.go @@ -337,6 +337,9 @@ const ( // yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue // with the ClusterClass surfaced in the ClusterClass status or controller logs. TopologyReconciledClusterClassNotReconciledReason = "ClusterClassNotReconciled" + + // TopologyReconciledPausedReason (Severity=Info) surfaces when the Cluster is paused. + TopologyReconciledPausedReason = "Paused" ) // Conditions and condition reasons for ClusterClass. diff --git a/internal/controllers/topology/cluster/cluster_controller.go b/internal/controllers/topology/cluster/cluster_controller.go index 11cd135b7460..8eddb3df81b3 100644 --- a/internal/controllers/topology/cluster/cluster_controller.go +++ b/internal/controllers/topology/cluster/cluster_controller.go @@ -120,7 +120,7 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt builder.WithPredicates(predicates.ResourceIsTopologyOwned(mgr.GetScheme(), predicateLog)), ). WithOptions(options). - WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)). + WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)). Build(r) if err != nil { @@ -175,13 +175,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re return ctrl.Result{}, nil } - // Return early if the Cluster is paused. - // TODO: What should we do if the cluster class is paused? - if annotations.IsPaused(cluster, cluster) { - log.Info("Reconciliation is paused for this object") - return ctrl.Result{}, nil - } - patchHelper, err := patch.NewHelper(cluster, r.Client) if err != nil { return ctrl.Result{}, err @@ -200,7 +193,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ clusterv1.TopologyReconciledCondition, }}, - patch.WithForceOverwriteConditions{}, + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.ClusterTopologyReconciledV1Beta2Condition, + }}, } if err := patchHelper.Patch(ctx, cluster, options...); err != nil { reterr = kerrors.NewAggregate([]error{reterr, err}) @@ -208,6 +203,11 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } }() + // Return early if the Cluster is paused. + if cluster.Spec.Paused || annotations.HasPaused(cluster) { + return ctrl.Result{}, nil + } + // In case the object is deleted, the managed topology stops to reconcile; // (the other controllers will take care of deletion). if !cluster.ObjectMeta.DeletionTimestamp.IsZero() { diff --git a/internal/controllers/topology/cluster/conditions.go b/internal/controllers/topology/cluster/conditions.go index f9bafd9f968f..1670b5d9bbdf 100644 --- a/internal/controllers/topology/cluster/conditions.go +++ b/internal/controllers/topology/cluster/conditions.go @@ -21,11 +21,14 @@ import ( "strings" "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/exp/topology/scope" "sigs.k8s.io/cluster-api/internal/contract" + "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" ) func (r *Reconciler) reconcileConditions(s *scope.Scope, cluster *clusterv1.Cluster, reconcileErr error) error { @@ -36,6 +39,7 @@ func (r *Reconciler) reconcileConditions(s *scope.Scope, cluster *clusterv1.Clus // The TopologyReconciled condition is considered true if spec of all the objects associated with the // cluster are in sync with the topology defined in the cluster. // The condition is false under the following conditions: +// - The cluster is paused. // - An error occurred during the reconcile process of the cluster topology. // - The ClusterClass has not been successfully reconciled with its current spec. // - The cluster upgrade has not yet propagated to all the components of the cluster. @@ -43,10 +47,35 @@ func (r *Reconciler) reconcileConditions(s *scope.Scope, cluster *clusterv1.Clus // In such a case, since some of the component's spec would be adrift from the topology the // topology cannot be considered fully reconciled. func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluster *clusterv1.Cluster, reconcileErr error) error { + // Mark TopologyReconciled as false if the Cluster is paused. + if cluster.Spec.Paused || annotations.HasPaused(cluster) { + var messages []string + if cluster.Spec.Paused { + messages = append(messages, "Cluster spec.paused is set to true") + } + if annotations.HasPaused(cluster) { + messages = append(messages, "Cluster has the cluster.x-k8s.io/paused annotation") + } + conditions.Set(cluster, + conditions.FalseCondition( + clusterv1.TopologyReconciledCondition, + clusterv1.TopologyReconciledPausedReason, + clusterv1.ConditionSeverityInfo, + strings.Join(messages, ", "), + ), + ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconcilePausedV1Beta2Reason, + Message: strings.Join(messages, ", "), + }) + return nil + } + // Mark TopologyReconciled as false due to cluster deletion. if !cluster.ObjectMeta.DeletionTimestamp.IsZero() { - conditions.Set( - cluster, + conditions.Set(cluster, conditions.FalseCondition( clusterv1.TopologyReconciledCondition, clusterv1.DeletedReason, @@ -54,14 +83,18 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste "", ), ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason, + }) return nil } // If an error occurred during reconciliation set the TopologyReconciled condition to false. // Add the error message from the reconcile function to the message of the condition. if reconcileErr != nil { - conditions.Set( - cluster, + conditions.Set(cluster, conditions.FalseCondition( clusterv1.TopologyReconciledCondition, clusterv1.TopologyReconcileFailedReason, @@ -70,6 +103,13 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste reconcileErr.Error(), ), ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledFailedV1Beta2Reason, + // TODO: Add a protection for messages continuously changing leading to Cluster object changes/reconcile. + Message: reconcileErr.Error(), + }) return nil } @@ -77,8 +117,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste // is not up to date. if s.Blueprint != nil && s.Blueprint.ClusterClass != nil && s.Blueprint.ClusterClass.GetGeneration() != s.Blueprint.ClusterClass.Status.ObservedGeneration { - conditions.Set( - cluster, + conditions.Set(cluster, conditions.FalseCondition( clusterv1.TopologyReconciledCondition, clusterv1.TopologyReconciledClusterClassNotReconciledReason, @@ -87,14 +126,20 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste ".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused", ), ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason, + Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" + + ".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused", + }) return nil } // If any of the lifecycle hooks are blocking any part of the reconciliation then topology // is not considered as fully reconciled. if s.HookResponseTracker.AggregateRetryAfter() != 0 { - conditions.Set( - cluster, + conditions.Set(cluster, conditions.FalseCondition( clusterv1.TopologyReconciledCondition, clusterv1.TopologyReconciledHookBlockingReason, @@ -103,6 +148,13 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste s.HookResponseTracker.AggregateMessage(), ), ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledHookBlockingV1Beta2Reason, + // TODO: Add a protection for messages continuously changing leading to Cluster object changes/reconcile. + Message: s.HookResponseTracker.AggregateMessage(), + }) return nil } @@ -121,6 +173,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste s.UpgradeTracker.MachinePools.DeferredUpgrade() { msgBuilder := &strings.Builder{} var reason string + var v1beta2Reason string // TODO(ykakarap): Evaluate potential improvements to building the condition. Multiple causes can trigger the // condition to be false at the same time (Example: ControlPlane.IsPendingUpgrade and MachineDeployments.IsAnyPendingCreate can @@ -130,40 +183,47 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste case s.UpgradeTracker.ControlPlane.IsPendingUpgrade: fmt.Fprintf(msgBuilder, "Control plane rollout and upgrade to version %s on hold.", s.Blueprint.Topology.Version) reason = clusterv1.TopologyReconciledControlPlaneUpgradePendingReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason case s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade(): fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s on hold.", computeNameList(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()), s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason case s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate(): fmt.Fprintf(msgBuilder, "MachineDeployment(s) for Topologies %s creation on hold.", computeNameList(s.UpgradeTracker.MachineDeployments.PendingCreateTopologyNames()), ) reason = clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason case s.UpgradeTracker.MachineDeployments.DeferredUpgrade(): fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s deferred.", computeNameList(s.UpgradeTracker.MachineDeployments.DeferredUpgradeNames()), s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason case s.UpgradeTracker.MachinePools.IsAnyPendingUpgrade(): fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s on hold.", computeNameList(s.UpgradeTracker.MachinePools.PendingUpgradeNames()), s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason case s.UpgradeTracker.MachinePools.IsAnyPendingCreate(): fmt.Fprintf(msgBuilder, "MachinePool(s) for Topologies %s creation on hold.", computeNameList(s.UpgradeTracker.MachinePools.PendingCreateTopologyNames()), ) reason = clusterv1.TopologyReconciledMachinePoolsCreatePendingReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason case s.UpgradeTracker.MachinePools.DeferredUpgrade(): fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s deferred.", computeNameList(s.UpgradeTracker.MachinePools.DeferredUpgradeNames()), s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachinePoolsUpgradeDeferredReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason } switch { @@ -191,8 +251,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste ) } - conditions.Set( - cluster, + conditions.Set(cluster, conditions.FalseCondition( clusterv1.TopologyReconciledCondition, reason, @@ -200,17 +259,26 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste msgBuilder.String(), ), ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2Reason, + Message: msgBuilder.String(), + }) return nil } // If there are no errors while reconciling and if the topology is not holding out changes // we can consider that spec of all the objects is reconciled to match the topology. Set the // TopologyReconciled condition to true. - conditions.Set( - cluster, + conditions.Set(cluster, conditions.TrueCondition(clusterv1.TopologyReconciledCondition), ) - + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.ClusterTopologyReconcileSucceededV1Beta2Reason, + }) return nil }