diff --git a/pkg/controller/pod_control.go b/pkg/controller/pod_control.go index f372ba85601..02874498aab 100644 --- a/pkg/controller/pod_control.go +++ b/pkg/controller/pod_control.go @@ -191,7 +191,7 @@ func (rpc *realPodControl) DeletePod(tc *v1alpha1.TidbCluster, pod *corev1.Pod) ns := tc.GetNamespace() tcName := tc.GetName() podName := pod.GetName() - preconditions := metav1.Preconditions{UID: &pod.UID} + preconditions := metav1.Preconditions{UID: &pod.UID, ResourceVersion: &pod.ResourceVersion} deleteOptions := metav1.DeleteOptions{Preconditions: &preconditions} err := rpc.kubeCli.CoreV1().Pods(ns).Delete(podName, &deleteOptions) if err != nil { diff --git a/pkg/manager/member/orphan_pods_cleaner.go b/pkg/manager/member/orphan_pods_cleaner.go index b914e649b4a..14aa5687a24 100644 --- a/pkg/manager/member/orphan_pods_cleaner.go +++ b/pkg/manager/member/orphan_pods_cleaner.go @@ -17,7 +17,6 @@ import ( "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/label" - v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" @@ -26,12 +25,12 @@ import ( ) const ( - skipReasonOrphanPodsCleanerIsNotPDOrTiKV = "orphan pods cleaner: member type is not pd or tikv" - skipReasonOrphanPodsCleanerPVCNameIsEmpty = "orphan pods cleaner: pvcName is empty" - skipReasonOrphanPodsCleanerPVCIsFound = "orphan pods cleaner: pvc is found" - skipReasonOrphanPodsCleanerPodIsNotPending = "orphan pods cleaner: pod is not pending" - skipReasonOrphanPodsCleanerPodIsNotFound = "orphan pods cleaner: pod does not exist anymore" - skipReasonOrphanPodsCleanerPodChanged = "orphan pods cleaner: pod changed before deletion" + skipReasonOrphanPodsCleanerIsNotPDOrTiKV = "orphan pods cleaner: member type is not pd or tikv" + skipReasonOrphanPodsCleanerPVCNameIsEmpty = "orphan pods cleaner: pvcName is empty" + skipReasonOrphanPodsCleanerPVCIsFound = "orphan pods cleaner: pvc is found" + skipReasonOrphanPodsCleanerPodHasBeenScheduled = "orphan pods cleaner: pod has been scheduled" + skipReasonOrphanPodsCleanerPodIsNotFound = "orphan pods cleaner: pod does not exist anymore" + skipReasonOrphanPodsCleanerPodChanged = "orphan pods cleaner: pod changed before deletion" ) // OrphanPodsCleaner implements the logic for cleaning the orphan pods(has no pvc) @@ -88,8 +87,8 @@ func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string continue } - if pod.Status.Phase != v1.PodPending { - skipReason[podName] = skipReasonOrphanPodsCleanerPodIsNotPending + if len(pod.Spec.NodeName) > 0 { + skipReason[podName] = skipReasonOrphanPodsCleanerPodHasBeenScheduled continue } @@ -128,7 +127,7 @@ func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string } // if the PVC is not found in apiserver (also informer cache) and the - // phase of the Pod is Pending, delete it and let the stateful + // pod has not been scheduled, delete it and let the stateful // controller to create the pod and its PVC(s) again apiPod, err := opc.kubeCli.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{}) if errors.IsNotFound(err) { @@ -138,8 +137,9 @@ func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string if err != nil { return skipReason, err } - // try our best to avoid deleting wrong object in apiserver - // TODO upgrade to use deleteOption.Preconditions.ResourceVersion in client-go 1.14+ + // In pre-1.14, kube-apiserver does not support + // deleteOption.Preconditions.ResourceVersion, we try our best to avoid + // deleting wrong object in apiserver. if apiPod.UID != pod.UID || apiPod.ResourceVersion != pod.ResourceVersion { skipReason[podName] = skipReasonOrphanPodsCleanerPodChanged continue diff --git a/pkg/manager/member/orphan_pods_cleaner_test.go b/pkg/manager/member/orphan_pods_cleaner_test.go index d7bc6251d38..f954b7c3d8a 100644 --- a/pkg/manager/member/orphan_pods_cleaner_test.go +++ b/pkg/manager/member/orphan_pods_cleaner_test.go @@ -227,7 +227,9 @@ func TestOrphanPodsCleanerClean(t *testing.T) { }, }, { - name: "pvc is not found but pod is not pending", + // in theory, this is is possible because we can't check the PVC + // and pod in an atomic operation. + name: "pvc is not found but pod has been scheduled", pods: []*corev1.Pod{ { ObjectMeta: metav1.ObjectMeta{ @@ -246,9 +248,7 @@ func TestOrphanPodsCleanerClean(t *testing.T) { }, }, }, - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, + NodeName: "foobar", }, }, }, @@ -256,7 +256,7 @@ func TestOrphanPodsCleanerClean(t *testing.T) { expectFn: func(g *GomegaWithT, skipReason map[string]string, opc *orphanPodsCleaner, err error) { g.Expect(err).NotTo(HaveOccurred()) g.Expect(len(skipReason)).To(Equal(1)) - g.Expect(skipReason["pod-1"]).To(Equal(skipReasonOrphanPodsCleanerPodIsNotPending)) + g.Expect(skipReason["pod-1"]).To(Equal(skipReasonOrphanPodsCleanerPodHasBeenScheduled)) }, }, {