diff --git a/pkg/controller/machine.go b/pkg/controller/machine.go index 11fb8051b..fbe496782 100644 --- a/pkg/controller/machine.go +++ b/pkg/controller/machine.go @@ -599,7 +599,9 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv } } - if machineID != "" { + if machineID != "" && nodeName != "" { + // Begin drain logic only when the nodeName & providerID exist's for the machine + var ( forceDeletePods = false forceDeleteMachine = false diff --git a/pkg/controller/machine_test.go b/pkg/controller/machine_test.go index ff60fa56d..2c55922ee 100644 --- a/pkg/controller/machine_test.go +++ b/pkg/controller/machine_test.go @@ -822,6 +822,7 @@ var _ = Describe("machine", func() { machine *machinev1.Machine errOccurred bool machineDeleted bool + nodeDeleted bool } type data struct { setup setup @@ -948,11 +949,14 @@ var _ = Describe("machine", func() { if data.expect.machineDeleted { Expect(machineErr).To(HaveOccurred()) - Expect(nodeErr).To(HaveOccurred()) } else { Expect(machineErr).ToNot(HaveOccurred()) Expect(machine).ToNot(BeNil()) + } + if data.expect.nodeDeleted { + Expect(nodeErr).To(HaveOccurred()) + } else { Expect(nodeErr).ToNot(HaveOccurred()) Expect(node).ToNot(BeNil()) } @@ -991,6 +995,7 @@ var _ = Describe("machine", func() { expect: expect{ errOccurred: false, machineDeleted: true, + nodeDeleted: true, }, }), Entry("Allow proper deletion of the machine object when providerID is missing but actual VM still exists in cloud", &data{ @@ -1030,6 +1035,7 @@ var _ = Describe("machine", func() { expect: expect{ errOccurred: false, machineDeleted: true, + nodeDeleted: true, }, }), Entry("Machine deletion when drain fails", &data{ @@ -1071,6 +1077,7 @@ var _ = Describe("machine", func() { expect: expect{ errOccurred: true, machineDeleted: false, + nodeDeleted: false, }, }), Entry("Machine force deletion label is present", &data{ @@ -1108,6 +1115,7 @@ var _ = Describe("machine", func() { expect: expect{ errOccurred: false, machineDeleted: true, + nodeDeleted: true, }, }), Entry("Machine force deletion label is present and when drain call fails (APIServer call fails)", &data{ @@ -1150,6 +1158,7 @@ var _ = Describe("machine", func() { expect: expect{ errOccurred: false, machineDeleted: true, + nodeDeleted: true, }, }), Entry("Machine deletion when timeout occurred", &data{ @@ -1201,6 +1210,7 @@ var _ = Describe("machine", func() { expect: expect{ errOccurred: false, machineDeleted: true, + nodeDeleted: true, }, }), Entry("Machine deletion when last drain failed", &data{ @@ -1251,6 +1261,7 @@ var _ = Describe("machine", func() { expect: expect{ errOccurred: false, machineDeleted: true, + nodeDeleted: true, }, }), Entry("Machine deletion when last drain failed and current drain call also fails (APIServer call fails)", &data{ @@ -1306,6 +1317,7 @@ var _ = Describe("machine", func() { expect: expect{ errOccurred: true, machineDeleted: false, + nodeDeleted: false, }, }), Entry("Machine force deletion if underlying Node is NotReady for a long time", &data{ @@ -1343,6 +1355,7 @@ var _ = Describe("machine", func() { expect: expect{ errOccurred: false, machineDeleted: true, + nodeDeleted: true, }, }), Entry("Machine do not force deletion if underlying Node is NotReady for a small period of time, a Machine deletion fails, since kubelet fails to evict Pods", &data{ @@ -1385,6 +1398,44 @@ var _ = Describe("machine", func() { expect: expect{ errOccurred: true, machineDeleted: false, + nodeDeleted: false, + }, + }), + Entry("Allow machine object deletion where nodeName doesn't exist", &data{ + setup: setup{ + secrets: []*corev1.Secret{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + }, + }, + aws: []*machinev1.AWSMachineClass{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: machinev1.AWSMachineClassSpec{ + SecretRef: newSecretReference(objMeta, 0), + }, + }, + }, + machines: newMachines(1, &machinev1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: machinev1.MachineSpec{ + Class: machinev1.ClassSpec{ + Kind: "AWSMachineClass", + Name: "machine-0", + }, + }, + }, nil, nil, nil, nil), + }, + action: action{ + machine: "machine-0", + fakeProviderID: "fakeID-0", + fakeNodeName: "", //NodeName is set to emptyString + fakeError: nil, + }, + expect: expect{ + errOccurred: false, + machineDeleted: true, + nodeDeleted: false, }, }), ) diff --git a/pkg/util/provider/machinecontroller/controller_suite_test.go b/pkg/util/provider/machinecontroller/controller_suite_test.go index 606305341..f87aec157 100644 --- a/pkg/util/provider/machinecontroller/controller_suite_test.go +++ b/pkg/util/provider/machinecontroller/controller_suite_test.go @@ -45,9 +45,9 @@ import ( "k8s.io/utils/pointer" ) -func TestMachineControllerManagerSuite(t *testing.T) { +func TestMachineControllerSuite(t *testing.T) { RegisterFailHandler(Fail) - RunSpecs(t, "Machine Controller Manager Suite") + RunSpecs(t, "Machine Controller Suite") } var ( diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index f9e732505..3dc2d26a5 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -1226,6 +1226,108 @@ var _ = Describe("machine", func() { ), }, }), + Entry("Drain skipping as nodeName is not valid", &data{ + setup: setup{ + secrets: []*corev1.Secret{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + }, + }, + machineClasses: []*v1alpha1.MachineClass{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + SecretRef: newSecretReference(objMeta, 0), + }, + }, + machines: newMachines( + 1, + &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + ProviderID: "fakeID", + }, + }, + &v1alpha1.MachineStatus{ + Node: "fakeNode", + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + LastUpdateTime: metav1.Now(), + }, + LastOperation: v1alpha1.LastOperation{ + Description: machineutils.InitiateDrain, + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionUnknown, + LastTransitionTime: metav1.NewTime(time.Now().Add(-6 * time.Minute)), + }, + }, + }, + nil, + map[string]string{ + machineutils.MachinePriority: "3", + }, + map[string]string{ + "node": "", + }, + true, + ), + }, + action: action{ + machine: "machine-0", + fakeDriver: &driver.FakeDriver{ + VMExists: true, + ProviderID: "fakeID-0", + NodeName: "", + Err: nil, + }, + }, + expect: expect{ + err: fmt.Errorf("Skipping drain as nodeName is not a valid one for machine. Initiate VM deletion"), + retry: machineutils.RetryOp, + machine: newMachine( + &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + ProviderID: "fakeID", + }, + }, + &v1alpha1.MachineStatus{ + Node: "fakeNode", + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + LastUpdateTime: metav1.Now(), + }, + LastOperation: v1alpha1.LastOperation{ + Description: fmt.Sprintf("Skipping drain as nodeName is not a valid one for machine. Initiate VM deletion"), + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + }, + }, + nil, + map[string]string{ + machineutils.MachinePriority: "3", + }, + map[string]string{ + "node": "", + }, + true, + ), + }, + }), Entry("Drain skipping as machine is NotReady for a long time (5 minutes)", &data{ setup: setup{ secrets: []*corev1.Secret{ diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index e636e21f5..53ff198db 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -803,6 +803,16 @@ func (c *controller) getVMStatus(getMachineStatusRequest *driver.GetMachineStatu return retry, err } +// isValidNodeName checks if the nodeName is valid +func isValidNodeName(nodeName string) bool { + if nodeName == "" { + // if nodeName is empty + return false + } + + return true +} + // drainNode attempts to drain the node backed by the machine object func (c *controller) drainNode(deleteMachineRequest *driver.DeleteMachineRequest) (machineutils.Retry, error) { var ( @@ -826,20 +836,29 @@ func (c *controller) drainNode(deleteMachineRequest *driver.DeleteMachineRequest nodeNotReadyDuration = 5 * time.Minute ) - for _, condition := range machine.Status.Conditions { - if condition.Type == v1.NodeReady && condition.Status != corev1.ConditionTrue && (time.Since(condition.LastTransitionTime.Time) > nodeNotReadyDuration) { - klog.Warningf("Skipping drain for NotReady machine %q", machine.Name) - err = fmt.Errorf("Skipping drain as machine is NotReady for over 5minutes. %s", machineutils.InitiateVMDeletion) - skipDrain = true + if !isValidNodeName(nodeName) { + klog.Warningf("Skipping drain as nodeName is not a valid one for machine %q", machine.Name) + err = fmt.Errorf("Skipping drain as nodeName is not a valid one for machine. %s", machineutils.InitiateVMDeletion) + description = fmt.Sprintf("Skipping drain as nodeName is not a valid one for machine. %s", machineutils.InitiateVMDeletion) + skipDrain = true + } else { + for _, condition := range machine.Status.Conditions { + if condition.Type == v1.NodeReady { + if condition.Status != corev1.ConditionTrue && (time.Since(condition.LastTransitionTime.Time) > nodeNotReadyDuration) { + klog.Warningf("Skipping drain for NotReady machine %q", machine.Name) + err = fmt.Errorf("Skipping drain as machine is NotReady for over 5minutes. %s", machineutils.InitiateVMDeletion) + description = fmt.Sprintf("Skipping drain as machine is NotReady for over 5minutes. %s", machineutils.InitiateVMDeletion) + skipDrain = true + } + // break once the condition is found + break + } } } if skipDrain { - // If not is not ready for over 5 minutes, skip draining this machine - description = fmt.Sprintf("Skipping drain as machine is NotReady for over 5minutes. %s", machineutils.InitiateVMDeletion) state = v1alpha1.MachineStateProcessing phase = v1alpha1.MachineTerminating - } else { // Timeout value obtained by subtracting last operation with expected time out period timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time)