Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Implement Cluster TopologyReconciled v1beta2 condition #11394

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions api/v1beta1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,59 @@ const (
// ClusterTopologyReconciledV1Beta2Condition is true if the topology controller is working properly.
// Note: This condition is added only if the Cluster is referencing a ClusterClass / defining a managed Topology.
ClusterTopologyReconciledV1Beta2Condition = "TopologyReconciled"

// ClusterTopologyReconcileSucceededV1Beta2Reason documents the reconciliation of a Cluster topology succeeded.
ClusterTopologyReconcileSucceededV1Beta2Reason = "TopologyReconcileSucceeded"

// ClusterTopologyReconciledFailedV1Beta2Reason documents the reconciliation of a Cluster topology
// failing due to an error.
ClusterTopologyReconciledFailedV1Beta2Reason = "TopologyReconcileFailed"

// ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because Control Plane is not yet updated to match the desired topology spec.
ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason = "ControlPlaneUpgradePending"
sbueringer marked this conversation as resolved.
Show resolved Hide resolved

// ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachineDeployments is yet to be created.
// This generally happens because new MachineDeployment creations are held off while the ControlPlane is not stable.
ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason = "MachineDeploymentsCreatePending"

// ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachineDeployments is not yet updated to match the desired topology spec.
ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason = "MachineDeploymentsUpgradePending"

// ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because the upgrade for at least one of the MachineDeployments has been deferred.
ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason = "MachineDeploymentsUpgradeDeferred"

// ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachinePools is not yet updated to match the desired topology spec.
ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason = "MachinePoolsUpgradePending"

// ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachinePools is yet to be created.
// This generally happens because new MachinePool creations are held off while the ControlPlane is not stable.
ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason = "MachinePoolsCreatePending"

// ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because the upgrade for at least one of the MachinePools has been deferred.
ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason = "MachinePoolsUpgradeDeferred"

// ClusterTopologyReconciledHookBlockingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the lifecycle hooks is blocking.
ClusterTopologyReconciledHookBlockingV1Beta2Reason = "LifecycleHookBlocking"

// ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason documents reconciliation of a Cluster topology not
// yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue
// with the ClusterClass surfaced in the ClusterClass status or controller logs.
ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason = "ClusterClassNotReconciled"

// ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason surfaces when the Cluster is deleting because the
// DeletionTimestamp is set.
ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason

// ClusterTopologyReconcilePausedV1Beta2Reason surfaces when the Cluster is paused.
ClusterTopologyReconcilePausedV1Beta2Reason = PausedV1Beta2Reason
)

// Cluster's InfrastructureReady condition and corresponding reasons that will be used in v1Beta2 API version.
Expand Down
3 changes: 3 additions & 0 deletions api/v1beta1/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,9 @@ const (
// yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue
// with the ClusterClass surfaced in the ClusterClass status or controller logs.
TopologyReconciledClusterClassNotReconciledReason = "ClusterClassNotReconciled"

// TopologyReconciledPausedReason (Severity=Info) surfaces when the Cluster is paused.
TopologyReconciledPausedReason = "Paused"
)

// Conditions and condition reasons for ClusterClass.
Expand Down
6 changes: 6 additions & 0 deletions internal/controllers/cluster/cluster_controller_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,12 @@ func (c clusterConditionCustomMergeStrategy) Merge(conditions []v1beta2condition
return v1beta2conditions.InfoMergePriority
}
}

// Treat all reasons except TopologyReconcileFailed and ClusterClassNotReconciled of TopologyReconciled condition as info.
if condition.Type == clusterv1.ClusterTopologyReconciledV1Beta2Condition && condition.Status == metav1.ConditionFalse &&
condition.Reason != clusterv1.ClusterTopologyReconciledFailedV1Beta2Reason && condition.Reason != clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason {
return v1beta2conditions.InfoMergePriority
}
return v1beta2conditions.GetDefaultMergePriorityFunc(c.negativePolarityConditionTypes)(condition)
}).Merge(conditions, conditionTypes)
}
Expand Down
225 changes: 225 additions & 0 deletions internal/controllers/cluster/cluster_controller_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1949,6 +1949,231 @@ func TestSetAvailableCondition(t *testing.T) {
Reason: v1beta2conditions.MultipleInfoReportedReason,
},
},
{
name: "Surfaces message from TopologyReconciled for reason that doesn't affect availability (no other issues)",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "machine-test",
Namespace: metav1.NamespaceDefault,
},
Spec: clusterv1.ClusterSpec{
Topology: &clusterv1.Topology{}, // using CC
},
Status: clusterv1.ClusterStatus{
V1Beta2: &clusterv1.ClusterV1Beta2Status{
Conditions: []metav1.Condition{
{
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterDeletingV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: "Foo",
},
{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason,
Message: "Control plane rollout and upgrade to version v1.29.0 on hold.",
},
},
},
},
},
expectCondition: metav1.Condition{
Type: clusterv1.ClusterAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: v1beta2conditions.MultipleInfoReportedReason,
Message: "TopologyReconciled: Control plane rollout and upgrade to version v1.29.0 on hold.",
},
},
{
name: "Drops messages from TopologyReconciled for reason that doesn't affect availability (when there is another issue)",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "machine-test",
Namespace: metav1.NamespaceDefault,
},
Spec: clusterv1.ClusterSpec{
Topology: &clusterv1.Topology{}, // using CC
},
Status: clusterv1.ClusterStatus{
V1Beta2: &clusterv1.ClusterV1Beta2Status{
Conditions: []metav1.Condition{
{
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2conditions.MultipleIssuesReportedReason,
Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1",
},
{
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterDeletingV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: "Foo",
},
{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason,
Message: "Control plane rollout and upgrade to version v1.29.0 on hold.",
},
},
},
},
},
expectCondition: metav1.Condition{
Type: clusterv1.ClusterAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2conditions.MultipleIssuesReportedReason, // Note: There is only one condition that is an issue, but it has the MultipleIssuesReported reason.
Message: "WorkersAvailable: 3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1",
},
},
{
name: "Takes into account messages from TopologyReconciled for reason that affects availability (no other issues)",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "machine-test",
Namespace: metav1.NamespaceDefault,
},
Spec: clusterv1.ClusterSpec{
Topology: &clusterv1.Topology{}, // using CC
},
Status: clusterv1.ClusterStatus{
V1Beta2: &clusterv1.ClusterV1Beta2Status{
Conditions: []metav1.Condition{
{
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterDeletingV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: "Foo",
},
{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason,
Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" +
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
},
},
},
},
},
expectCondition: metav1.Condition{
Type: clusterv1.ClusterAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason,
Message: "TopologyReconciled: ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" +
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
},
},
{
name: "Takes into account messages from TopologyReconciled for reason that affects availability (when there is another issue)",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "machine-test",
Namespace: metav1.NamespaceDefault,
},
Spec: clusterv1.ClusterSpec{
Topology: &clusterv1.Topology{}, // using CC
},
Status: clusterv1.ClusterStatus{
V1Beta2: &clusterv1.ClusterV1Beta2Status{
Conditions: []metav1.Condition{
{
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2conditions.MultipleIssuesReportedReason,
Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1",
},
{
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterDeletingV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: "Foo",
},
{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason,
Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" +
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
},
},
},
},
},
expectCondition: metav1.Condition{
Type: clusterv1.ClusterAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2conditions.MultipleIssuesReportedReason,
Message: "WorkersAvailable: 3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1; TopologyReconciled: ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if.status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
},
},
}

for _, tc := range testCases {
Expand Down
18 changes: 9 additions & 9 deletions internal/controllers/topology/cluster/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
builder.WithPredicates(predicates.ResourceIsTopologyOwned(mgr.GetScheme(), predicateLog)),
).
WithOptions(options).
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)).
WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)).
Build(r)

if err != nil {
Expand Down Expand Up @@ -175,13 +175,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
return ctrl.Result{}, nil
}

// Return early if the Cluster is paused.
// TODO: What should we do if the cluster class is paused?
if annotations.IsPaused(cluster, cluster) {
log.Info("Reconciliation is paused for this object")
return ctrl.Result{}, nil
}

patchHelper, err := patch.NewHelper(cluster, r.Client)
if err != nil {
return ctrl.Result{}, err
Expand All @@ -200,14 +193,21 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
clusterv1.TopologyReconciledCondition,
}},
patch.WithForceOverwriteConditions{},
patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
clusterv1.ClusterTopologyReconciledV1Beta2Condition,
}},
}
if err := patchHelper.Patch(ctx, cluster, options...); err != nil {
reterr = kerrors.NewAggregate([]error{reterr, err})
return
}
}()

// Return early if the Cluster is paused.
if cluster.Spec.Paused || annotations.HasPaused(cluster) {
return ctrl.Result{}, nil
}

// In case the object is deleted, the managed topology stops to reconcile;
// (the other controllers will take care of deletion).
if !cluster.ObjectMeta.DeletionTimestamp.IsZero() {
Expand Down
Loading