From d65989a58b9272297385a888a304645243d7e7bd Mon Sep 17 00:00:00 2001 From: Vince Prignano Date: Thu, 20 Jun 2019 10:36:17 -0700 Subject: [PATCH] Use remote NodeRef in Machine and MachineSet controllers Signed-off-by: Vince Prignano --- pkg/controller/machine/BUILD.bazel | 1 + pkg/controller/machine/machine_controller.go | 41 +++++++++++++----- pkg/controller/machineset/BUILD.bazel | 1 + pkg/controller/machineset/status.go | 45 +++++++++++++++----- 4 files changed, 67 insertions(+), 21 deletions(-) diff --git a/pkg/controller/machine/BUILD.bazel b/pkg/controller/machine/BUILD.bazel index 2e84bb00a6de..38e73928346d 100644 --- a/pkg/controller/machine/BUILD.bazel +++ b/pkg/controller/machine/BUILD.bazel @@ -12,6 +12,7 @@ go_library( deps = [ "//pkg/apis/cluster/v1alpha1:go_default_library", "//pkg/controller/error:go_default_library", + "//pkg/controller/remote:go_default_library", "//pkg/util:go_default_library", "//vendor/github.com/pkg/errors:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", diff --git a/pkg/controller/machine/machine_controller.go b/pkg/controller/machine/machine_controller.go index 3b29738ea4ac..5352e22217b1 100644 --- a/pkg/controller/machine/machine_controller.go +++ b/pkg/controller/machine/machine_controller.go @@ -28,6 +28,7 @@ import ( "k8s.io/klog" clusterv1 "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1" controllerError "sigs.k8s.io/cluster-api/pkg/controller/error" + "sigs.k8s.io/cluster-api/pkg/controller/remote" "sigs.k8s.io/cluster-api/pkg/util" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -181,8 +182,8 @@ func (r *ReconcileMachine) Reconcile(request reconcile.Request) (reconcile.Resul if m.Status.NodeRef != nil { klog.Infof("Deleting node %q for machine %q", m.Status.NodeRef.Name, m.Name) - if err := r.deleteNode(ctx, m.Status.NodeRef.Name); err != nil { - klog.Errorf("Error deleting node %q for machine %q", name, err) + if err := r.deleteNode(ctx, cluster, m.Status.NodeRef.Name); err != nil && !apierrors.IsNotFound(err) { + klog.Errorf("Error deleting node %q for machine %q: %v", m.Status.NodeRef.Name, name, err) return reconcile.Result{}, err } } @@ -253,6 +254,9 @@ func (r *ReconcileMachine) getCluster(ctx context.Context, machine *clusterv1.Ma return cluster, nil } +// isDeletedAllowed returns false if the Machine we're trying to delete is the +// Machine hosting this controller. This method is meant to be functional +// only when the controllers are running in the workload cluster. func (r *ReconcileMachine) isDeleteAllowed(machine *clusterv1.Machine) bool { if r.nodeName == "" || machine.Status.NodeRef == nil { return true @@ -274,15 +278,30 @@ func (r *ReconcileMachine) isDeleteAllowed(machine *clusterv1.Machine) bool { return node.UID != machine.Status.NodeRef.UID } -func (r *ReconcileMachine) deleteNode(ctx context.Context, name string) error { - var node corev1.Node - if err := r.Client.Get(ctx, client.ObjectKey{Name: name}, &node); err != nil { - if apierrors.IsNotFound(err) { - klog.V(2).Infof("Node %q not found", name) - return nil +func (r *ReconcileMachine) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, name string) error { + if cluster == nil { + // Try to retrieve the Node from the local cluster, if no Cluster reference is found. + var node corev1.Node + if err := r.Client.Get(ctx, client.ObjectKey{Name: name}, &node); err != nil { + return err } - klog.Errorf("Failed to get node %q: %v", name, err) - return err + return r.Client.Delete(ctx, &node) + } + + // Otherwise, proceed to get the remote cluster client and get the Node. + remoteClient, err := remote.NewClusterClient(r.Client, cluster) + if err != nil { + klog.Errorf("Error creating a remote client for cluster %q while deleting Machine %q, won't retry: %v", + cluster.Name, name, err) + return nil } - return r.Client.Delete(ctx, &node) + + corev1Remote, err := remoteClient.CoreV1() + if err != nil { + klog.Errorf("Error creating a remote client for cluster %q while deleting Machine %q, won't retry: %v", + cluster.Name, name, err) + return nil + } + + return corev1Remote.Nodes().Delete(name, &metav1.DeleteOptions{}) } diff --git a/pkg/controller/machineset/BUILD.bazel b/pkg/controller/machineset/BUILD.bazel index 3488d679f8e2..b7b90d990a44 100644 --- a/pkg/controller/machineset/BUILD.bazel +++ b/pkg/controller/machineset/BUILD.bazel @@ -13,6 +13,7 @@ go_library( deps = [ "//pkg/apis/cluster/v1alpha1:go_default_library", "//pkg/controller/noderefutil:go_default_library", + "//pkg/controller/remote:go_default_library", "//pkg/util:go_default_library", "//vendor/github.com/pkg/errors:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", diff --git a/pkg/controller/machineset/status.go b/pkg/controller/machineset/status.go index 645d0dbb8c4f..1234ff3e8c0d 100644 --- a/pkg/controller/machineset/status.go +++ b/pkg/controller/machineset/status.go @@ -20,13 +20,13 @@ import ( "context" "fmt" - "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/klog" "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1" "sigs.k8s.io/cluster-api/pkg/controller/noderefutil" + "sigs.k8s.io/cluster-api/pkg/controller/remote" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -37,6 +37,7 @@ const ( func (c *ReconcileMachineSet) calculateStatus(ms *v1alpha1.MachineSet, filteredMachines []*v1alpha1.Machine) v1alpha1.MachineSetStatus { newStatus := ms.Status + // Count the number of machines that have labels matching the labels of the machine // template of the replica set, the matching machines may have more // labels than are in the template. Because the label of machineTemplateSpec is @@ -46,15 +47,28 @@ func (c *ReconcileMachineSet) calculateStatus(ms *v1alpha1.MachineSet, filteredM readyReplicasCount := 0 availableReplicasCount := 0 templateLabel := labels.Set(ms.Spec.Template.Labels).AsSelectorPreValidated() + + // Retrieve Cluster, if any. + cluster, _ := c.getCluster(ms) + for _, machine := range filteredMachines { if templateLabel.Matches(labels.Set(machine.Labels)) { fullyLabeledReplicasCount++ } - node, err := c.getMachineNode(machine) + + if machine.Status.NodeRef == nil { + klog.Warningf("Unable to retrieve Node status for Machine %q in namespace %q: missing NodeRef", + machine.Name, machine.Namespace) + continue + } + + node, err := c.getMachineNode(cluster, machine) if err != nil { - klog.V(4).Infof("Unable to get node for machine %v, %v", machine.Name, err) + klog.Warningf("Unable to retrieve Node status for Machine %q in namespace %q: %v", + machine.Name, machine.Namespace, err) continue } + if noderefutil.IsNodeReady(node) { readyReplicasCount++ if noderefutil.IsNodeAvailable(node, ms.Spec.MinReadySeconds, metav1.Now()) { @@ -122,13 +136,24 @@ func updateMachineSetStatus(c client.Client, ms *v1alpha1.MachineSet, newStatus return nil, updateErr } -func (c *ReconcileMachineSet) getMachineNode(machine *v1alpha1.Machine) (*corev1.Node, error) { - nodeRef := machine.Status.NodeRef - if nodeRef == nil { - return nil, errors.New("machine has no node ref") +func (c *ReconcileMachineSet) getMachineNode(cluster *v1alpha1.Cluster, machine *v1alpha1.Machine) (*corev1.Node, error) { + if cluster == nil { + // Try to retrieve the Node from the local cluster, if no Cluster reference is found. + node := &corev1.Node{} + err := c.Client.Get(context.Background(), client.ObjectKey{Name: machine.Status.NodeRef.Name}, node) + return node, err + } + + // Otherwise, proceed to get the remote cluster client and get the Node. + remoteClient, err := remote.NewClusterClient(c.Client, cluster) + if err != nil { + return nil, err + } + + corev1Remote, err := remoteClient.CoreV1() + if err != nil { + return nil, err } - node := &corev1.Node{} - err := c.Client.Get(context.Background(), client.ObjectKey{Name: nodeRef.Name}, node) - return node, err + return corev1Remote.Nodes().Get(machine.Status.NodeRef.Name, metav1.GetOptions{}) }