Skip to content

Commit

Permalink
improve orphan pods clean logic (#2007)
Browse files Browse the repository at this point in the history
- check pod has been scheduled or not
- use ResourceVersion precondition
  • Loading branch information
cofyc authored Mar 23, 2020
1 parent b8ed43e commit 1f09144
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 18 deletions.
2 changes: 1 addition & 1 deletion pkg/controller/pod_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func (rpc *realPodControl) DeletePod(tc *v1alpha1.TidbCluster, pod *corev1.Pod)
ns := tc.GetNamespace()
tcName := tc.GetName()
podName := pod.GetName()
preconditions := metav1.Preconditions{UID: &pod.UID}
preconditions := metav1.Preconditions{UID: &pod.UID, ResourceVersion: &pod.ResourceVersion}
deleteOptions := metav1.DeleteOptions{Preconditions: &preconditions}
err := rpc.kubeCli.CoreV1().Pods(ns).Delete(podName, &deleteOptions)
if err != nil {
Expand Down
24 changes: 12 additions & 12 deletions pkg/manager/member/orphan_pods_cleaner.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
Expand All @@ -26,12 +25,12 @@ import (
)

const (
skipReasonOrphanPodsCleanerIsNotPDOrTiKV = "orphan pods cleaner: member type is not pd or tikv"
skipReasonOrphanPodsCleanerPVCNameIsEmpty = "orphan pods cleaner: pvcName is empty"
skipReasonOrphanPodsCleanerPVCIsFound = "orphan pods cleaner: pvc is found"
skipReasonOrphanPodsCleanerPodIsNotPending = "orphan pods cleaner: pod is not pending"
skipReasonOrphanPodsCleanerPodIsNotFound = "orphan pods cleaner: pod does not exist anymore"
skipReasonOrphanPodsCleanerPodChanged = "orphan pods cleaner: pod changed before deletion"
skipReasonOrphanPodsCleanerIsNotPDOrTiKV = "orphan pods cleaner: member type is not pd or tikv"
skipReasonOrphanPodsCleanerPVCNameIsEmpty = "orphan pods cleaner: pvcName is empty"
skipReasonOrphanPodsCleanerPVCIsFound = "orphan pods cleaner: pvc is found"
skipReasonOrphanPodsCleanerPodHasBeenScheduled = "orphan pods cleaner: pod has been scheduled"
skipReasonOrphanPodsCleanerPodIsNotFound = "orphan pods cleaner: pod does not exist anymore"
skipReasonOrphanPodsCleanerPodChanged = "orphan pods cleaner: pod changed before deletion"
)

// OrphanPodsCleaner implements the logic for cleaning the orphan pods(has no pvc)
Expand Down Expand Up @@ -88,8 +87,8 @@ func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string
continue
}

if pod.Status.Phase != v1.PodPending {
skipReason[podName] = skipReasonOrphanPodsCleanerPodIsNotPending
if len(pod.Spec.NodeName) > 0 {
skipReason[podName] = skipReasonOrphanPodsCleanerPodHasBeenScheduled
continue
}

Expand Down Expand Up @@ -128,7 +127,7 @@ func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string
}

// if the PVC is not found in apiserver (also informer cache) and the
// phase of the Pod is Pending, delete it and let the stateful
// pod has not been scheduled, delete it and let the stateful
// controller to create the pod and its PVC(s) again
apiPod, err := opc.kubeCli.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{})
if errors.IsNotFound(err) {
Expand All @@ -138,8 +137,9 @@ func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string
if err != nil {
return skipReason, err
}
// try our best to avoid deleting wrong object in apiserver
// TODO upgrade to use deleteOption.Preconditions.ResourceVersion in client-go 1.14+
// In pre-1.14, kube-apiserver does not support
// deleteOption.Preconditions.ResourceVersion, we try our best to avoid
// deleting wrong object in apiserver.
if apiPod.UID != pod.UID || apiPod.ResourceVersion != pod.ResourceVersion {
skipReason[podName] = skipReasonOrphanPodsCleanerPodChanged
continue
Expand Down
10 changes: 5 additions & 5 deletions pkg/manager/member/orphan_pods_cleaner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,9 @@ func TestOrphanPodsCleanerClean(t *testing.T) {
},
},
{
name: "pvc is not found but pod is not pending",
// in theory, this is is possible because we can't check the PVC
// and pod in an atomic operation.
name: "pvc is not found but pod has been scheduled",
pods: []*corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -246,17 +248,15 @@ func TestOrphanPodsCleanerClean(t *testing.T) {
},
},
},
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
NodeName: "foobar",
},
},
},
pvcs: []*corev1.PersistentVolumeClaim{},
expectFn: func(g *GomegaWithT, skipReason map[string]string, opc *orphanPodsCleaner, err error) {
g.Expect(err).NotTo(HaveOccurred())
g.Expect(len(skipReason)).To(Equal(1))
g.Expect(skipReason["pod-1"]).To(Equal(skipReasonOrphanPodsCleanerPodIsNotPending))
g.Expect(skipReason["pod-1"]).To(Equal(skipReasonOrphanPodsCleanerPodHasBeenScheduled))
},
},
{
Expand Down

0 comments on commit 1f09144

Please sign in to comment.