Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Implement grace period for KCP remote conditions #11339

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions api/v1beta1/machine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,15 +206,11 @@ const (
// during the deletion workflow, or by a users.
MachineNodeDeletedV1Beta2Reason = ObjectDeletedV1Beta2Reason

// MachineNodeRemoteConnectionFailedV1Beta2Reason surfaces that the remote connection failed.
// If the remote connection probe failed for longer than remote conditions grace period,
// this reason is used when setting NodeHealthy and NodeReady conditions to `Unknown`.
MachineNodeRemoteConnectionFailedV1Beta2Reason = RemoteConnectionFailedV1Beta2Reason

// MachineNodeRemoteConnectionDownV1Beta2Reason surfaces that the remote connection is down.
// This is used when setting NodeHealthy and NodeReady conditions to `Unknown`
// when the connection is down and they haven't been set yet.
MachineNodeRemoteConnectionDownV1Beta2Reason = RemoteConnectionDownV1Beta2Reason
// MachineNodeInspectionFailedV1Beta2Reason documents a failure when inspecting the status of a Node.
MachineNodeInspectionFailedV1Beta2Reason = InspectionFailedV1Beta2Reason

// MachineNodeConnectionDownV1Beta2Reason surfaces that the connection to the workload cluster is down.
MachineNodeConnectionDownV1Beta2Reason = ConnectionDownV1Beta2Reason
)

// Machine's HealthCheckSucceeded condition and corresponding reasons that will be used in v1Beta2 API version.
Expand Down
15 changes: 4 additions & 11 deletions api/v1beta1/v1beta2_condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ const (
// set to false and with the OwnerRemediated condition set to false by the MachineHealthCheck controller.
RemediatingV1Beta2Reason = "Remediating"

// NotRemediatingV1Beta2Reason surfaces when an object does not own any machines marked as not healthy
// by the MachineHealthCheck controller.
// NotRemediatingV1Beta2Reason surfaces when an object does not own any machines with HealthCheckSucceeded
// set to false and with the OwnerRemediated condition set to false by the MachineHealthCheck controller.
NotRemediatingV1Beta2Reason = "NotRemediating"

// NoReplicasV1Beta2Reason surfaces when an object that manage replicas does not have any.
Expand Down Expand Up @@ -142,15 +142,8 @@ const (
// PausedV1Beta2Reason surfaces when an object is paused.
PausedV1Beta2Reason = "Paused"

// RemoteConnectionFailedV1Beta2Reason surfaces that the remote connection failed.
// This is typically used when setting remote conditions (e.g. `NodeHealthy`) to `Unknown`
// after the remote connection probe didn't succeed for remote conditions grace period.
RemoteConnectionFailedV1Beta2Reason = "RemoteConnectionFailed"

// RemoteConnectionDownV1Beta2Reason surfaces that the remote connection is down.
// This is typically used when setting remote conditions (e.g. `NodeHealthy`) to `Unknown`
// when the connection is down and they haven't been set yet.
RemoteConnectionDownV1Beta2Reason = "RemoteConnectionDown"
// ConnectionDownV1Beta2Reason surfaces that the connection to the workload cluster is down.
ConnectionDownV1Beta2Reason = "ConnectionDown"

// DeletionTimestampNotSetV1Beta2Reason surfaces when an object is not deleting because the
// DeletionTimestamp is not set.
Expand Down
36 changes: 31 additions & 5 deletions controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,25 @@ import clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"

// KubeadmControlPlane's Available condition and corresponding reasons that will be used in v1Beta2 API version.
const (
// KubeadmControlPlaneAvailableV1Beta2Condition True if the control plane can be reached, EtcdClusterHealthy is true,
// KubeadmControlPlaneAvailableV1Beta2Condition is True if the control plane can be reached, EtcdClusterHealthy is true,
// and CertificatesAvailable is true.
KubeadmControlPlaneAvailableV1Beta2Condition = clusterv1.AvailableV1Beta2Condition
)

// KubeadmControlPlane's Initialized condition and corresponding reasons that will be used in v1Beta2 API version.
const (
// KubeadmControlPlaneInitializedV1Beta2Condition is True when the control plane is functional enough to accept
// requests. This information is usually used as a signal for starting all the provisioning operations that
// depend on a functional API server, but do not require a full HA control plane to exist.
KubeadmControlPlaneInitializedV1Beta2Condition = "Initialized"

// KubeadmControlPlaneInitializedV1Beta2Reason surfaces when the control plane is initialized.
KubeadmControlPlaneInitializedV1Beta2Reason = "Initialized"

// KubeadmControlPlaneNotInitializedV1Beta2Reason surfaces when the control plane is not initialized.
KubeadmControlPlaneNotInitializedV1Beta2Reason = "NotInitialized"
)

// KubeadmControlPlane's CertificatesAvailable condition and corresponding reasons that will be used in v1Beta2 API version.
const (
// KubeadmControlPlaneCertificatesAvailableV1Beta2Condition True if all the cluster certificates exist.
Expand Down Expand Up @@ -52,6 +66,10 @@ const (
// etcd cluster hosted on KubeadmControlPlane controlled machines.
KubeadmControlPlaneEtcdClusterInspectionFailedV1Beta2Reason = clusterv1.InspectionFailedV1Beta2Reason

// KubeadmControlPlaneEtcdClusterConnectionDownV1Beta2Reason surfaces that the connection to the workload
// cluster is down.
KubeadmControlPlaneEtcdClusterConnectionDownV1Beta2Reason = clusterv1.ConnectionDownV1Beta2Reason

// KubeadmControlPlaneEtcdClusterHealthyV1Beta2Reason surfaces when the etcd cluster hosted on KubeadmControlPlane
// machines is healthy.
KubeadmControlPlaneEtcdClusterHealthyV1Beta2Reason = "Healthy"
Expand All @@ -77,6 +95,10 @@ const (
// control plane components hosted on KubeadmControlPlane controlled machines.
KubeadmControlPlaneControlPlaneComponentsInspectionFailedV1Beta2Reason = clusterv1.InspectionFailedV1Beta2Reason

// KubeadmControlPlaneControlPlaneComponentsConnectionDownV1Beta2Reason surfaces that the connection to the workload
// cluster is down.
KubeadmControlPlaneControlPlaneComponentsConnectionDownV1Beta2Reason = clusterv1.ConnectionDownV1Beta2Reason

// KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Reason surfaces when the Kubernetes control plane components
// hosted on KubeadmControlPlane machines are healthy.
KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Reason = "Healthy"
Expand Down Expand Up @@ -233,13 +255,13 @@ const (
// pod hosted on a KubeadmControlPlane controlled machine.
KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason = clusterv1.InspectionFailedV1Beta2Reason

// KubeadmControlPlaneMachinePodConnectionDownV1Beta2Reason surfaces that the connection to the workload
// cluster is down.
KubeadmControlPlaneMachinePodConnectionDownV1Beta2Reason = clusterv1.ConnectionDownV1Beta2Reason

// KubeadmControlPlaneMachinePodDeletingV1Beta2Reason surfaces when the machine hosting control plane components
// is being deleted.
KubeadmControlPlaneMachinePodDeletingV1Beta2Reason = "Deleting"

// KubeadmControlPlaneMachinePodInternalErrorV1Beta2Reason surfaces unexpected failures when reading pod hosted
// on a KubeadmControlPlane controlled machine.
KubeadmControlPlaneMachinePodInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason
)

// EtcdMemberHealthy condition and corresponding reasons that will be used for KubeadmControlPlane controlled machines in v1Beta2 API version.
Expand All @@ -257,6 +279,10 @@ const (
// etcd member hosted on a KubeadmControlPlane controlled machine.
KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason = clusterv1.InspectionFailedV1Beta2Reason

// KubeadmControlPlaneMachineEtcdMemberConnectionDownV1Beta2Reason surfaces that the connection to the workload
// cluster is down.
KubeadmControlPlaneMachineEtcdMemberConnectionDownV1Beta2Reason = clusterv1.ConnectionDownV1Beta2Reason

// KubeadmControlPlaneMachineEtcdMemberDeletingV1Beta2Reason surfaces when the machine hosting an etcd member
// is being deleted.
KubeadmControlPlaneMachineEtcdMemberDeletingV1Beta2Reason = "Deleting"
Expand Down
3 changes: 3 additions & 0 deletions controlplane/kubeadm/controllers/alias.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ type KubeadmControlPlaneReconciler struct {
// WatchFilterValue is the label value used to filter events prior to reconciliation.
WatchFilterValue string

RemoteConditionsGracePeriod time.Duration

// Deprecated: DeprecatedInfraMachineNaming. Name the InfraStructureMachines after the InfraMachineTemplate.
DeprecatedInfraMachineNaming bool
}
Expand All @@ -53,6 +55,7 @@ func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mg
EtcdDialTimeout: r.EtcdDialTimeout,
EtcdCallTimeout: r.EtcdCallTimeout,
WatchFilterValue: r.WatchFilterValue,
RemoteConditionsGracePeriod: r.RemoteConditionsGracePeriod,
DeprecatedInfraMachineNaming: r.DeprecatedInfraMachineNaming,
}).SetupWithManager(ctx, mgr, options)
}
6 changes: 6 additions & 0 deletions controlplane/kubeadm/internal/control_plane.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,12 @@ func (c *ControlPlane) PatchMachines(ctx context.Context) error {
controlplanev1.MachineSchedulerPodHealthyCondition,
controlplanev1.MachineEtcdPodHealthyCondition,
controlplanev1.MachineEtcdMemberHealthyCondition,
}}, patch.WithOwnedV1Beta2Conditions{Conditions: []string{
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition,
controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition,
controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition,
controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition,
controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition,
}}); err != nil {
errList = append(errList, err)
}
Expand Down
Loading