diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md
index 84e54e337e..a676271b16 100644
--- a/docs/api-references/docs.md
+++ b/docs/api-references/docs.md
@@ -5893,6 +5893,19 @@ Optional: Defaults to .spec.services
in favor of backward compatibi
maxFailoverCount
+
+int32
+
+MaxFailoverCount limit the max replicas could be added in failover, 0 means no failover. +Optional: Defaults to 3
+storageClassName
string
diff --git a/manifests/crd.yaml b/manifests/crd.yaml
index 8945edfa9c..9201b43b04 100644
--- a/manifests/crd.yaml
+++ b/manifests/crd.yaml
@@ -1818,6 +1818,11 @@ spec:
description: 'Limits describes the maximum amount of compute resources
allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
+ maxFailoverCount:
+ description: 'MaxFailoverCount limit the max replicas could be added
+ in failover, 0 means no failover. Optional: Defaults to 3'
+ format: int32
+ type: integer
nodeSelector:
description: 'NodeSelector of the component. Merged into the cluster-level
nodeSelector if non-empty Optional: Defaults to cluster-level
diff --git a/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go b/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go
index 772de9d63e..84533f80bc 100644
--- a/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go
+++ b/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go
@@ -107,6 +107,9 @@ func setPdSpecDefault(tc *v1alpha1.TidbCluster) {
tc.Spec.PD.BaseImage = defaultPDImage
}
}
+ if tc.Spec.PD.MaxFailoverCount == nil {
+ tc.Spec.PD.MaxFailoverCount = pointer.Int32Ptr(3)
+ }
}
func setPumpSpecDefault(tc *v1alpha1.TidbCluster) {
diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go
index 2e6bdb3af9..cd16aa5979 100644
--- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go
+++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go
@@ -2728,6 +2728,13 @@ func schema_pkg_apis_pingcap_v1alpha1_PDSpec(ref common.ReferenceCallback) commo
Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.ServiceSpec"),
},
},
+ "maxFailoverCount": {
+ SchemaProps: spec.SchemaProps{
+ Description: "MaxFailoverCount limit the max replicas could be added in failover, 0 means no failover. Optional: Defaults to 3",
+ Type: []string{"integer"},
+ Format: "int32",
+ },
+ },
"storageClassName": {
SchemaProps: spec.SchemaProps{
Description: "The storageClassName of the persistent volume for PD data storage. Defaults to Kubernetes default storage class.",
diff --git a/pkg/apis/pingcap/v1alpha1/types.go b/pkg/apis/pingcap/v1alpha1/types.go
index 5d9e4ce89c..3579491a89 100644
--- a/pkg/apis/pingcap/v1alpha1/types.go
+++ b/pkg/apis/pingcap/v1alpha1/types.go
@@ -234,6 +234,12 @@ type PDSpec struct {
// +optional
Service *ServiceSpec `json:"service,omitempty"`
+ // MaxFailoverCount limit the max replicas could be added in failover, 0 means no failover.
+ // Optional: Defaults to 3
+ // +kubebuilder:validation:Minimum=0
+ // +optional
+ MaxFailoverCount *int32 `json:"maxFailoverCount,omitempty"`
+
// The storageClassName of the persistent volume for PD data storage.
// Defaults to Kubernetes default storage class.
// +optional
diff --git a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go
index d2ddaf3da9..f0d0944c49 100644
--- a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go
@@ -2105,6 +2105,11 @@ func (in *PDSpec) DeepCopyInto(out *PDSpec) {
*out = new(ServiceSpec)
(*in).DeepCopyInto(*out)
}
+ if in.MaxFailoverCount != nil {
+ in, out := &in.MaxFailoverCount, &out.MaxFailoverCount
+ *out = new(int32)
+ **out = **in
+ }
if in.StorageClassName != nil {
in, out := &in.StorageClassName, &out.StorageClassName
*out = new(string)
diff --git a/pkg/manager/member/pd_failover.go b/pkg/manager/member/pd_failover.go
index 51526f1926..b5ba116218 100644
--- a/pkg/manager/member/pd_failover.go
+++ b/pkg/manager/member/pd_failover.go
@@ -31,7 +31,6 @@ import (
"k8s.io/klog"
)
-// TODO add maxFailoverCount
type pdFailover struct {
cli versioned.Interface
pdControl pdapi.PDControlInterface
@@ -93,6 +92,12 @@ func (pf *pdFailover) Failover(tc *v1alpha1.TidbCluster) error {
ns, tcName, healthCount, tc.PDStsDesiredReplicas(), tc.Spec.PD.Replicas, len(tc.Status.PD.FailureMembers))
}
+ failureReplicas := getFailureReplicas(tc)
+ if failureReplicas >= int(*tc.Spec.PD.MaxFailoverCount) {
+ klog.Errorf("PD failover replicas (%d) reaches the limit (%d), skip failover", failureReplicas, *tc.Spec.PD.MaxFailoverCount)
+ return nil
+ }
+
notDeletedCount := 0
for _, pdMember := range tc.Status.PD.FailureMembers {
if !pdMember.MemberDeleted {
diff --git a/pkg/manager/member/pd_failover_test.go b/pkg/manager/member/pd_failover_test.go
index bbb4696a7d..953589830a 100644
--- a/pkg/manager/member/pd_failover_test.go
+++ b/pkg/manager/member/pd_failover_test.go
@@ -33,6 +33,7 @@ import (
kubefake "k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
+ "k8s.io/utils/pointer"
)
func TestPDFailoverFailover(t *testing.T) {
@@ -42,6 +43,7 @@ func TestPDFailoverFailover(t *testing.T) {
type testcase struct {
name string
update func(*v1alpha1.TidbCluster)
+ maxFailoverCount int32
hasPVC bool
hasPod bool
podWithDeletionTimestamp bool
@@ -53,53 +55,12 @@ func TestPDFailoverFailover(t *testing.T) {
errExpectFn func(*GomegaWithT, error)
expectFn func(*v1alpha1.TidbCluster, *pdFailover)
}
- testFn := func(test *testcase, t *testing.T) {
- t.Log(test.name)
- tc := newTidbClusterForPD()
- test.update(tc)
-
- pdFailover, pvcIndexer, podIndexer, fakePDControl, fakePodControl, fakePVCControl := newFakePDFailover()
- pdClient := controller.NewFakePDClient(fakePDControl, tc)
- pdFailover.recorder = recorder
-
- pdClient.AddReaction(pdapi.DeleteMemberByIDActionType, func(action *pdapi.Action) (interface{}, error) {
- if test.delMemberFailed {
- return nil, fmt.Errorf("failed to delete member")
- }
- return nil, nil
- })
-
- if test.hasPVC {
- pvc := newPVCForPDFailover(tc, v1alpha1.PDMemberType, 1)
- if test.pvcWithDeletionTimestamp {
- pvc.DeletionTimestamp = &metav1.Time{Time: time.Now()}
- }
- pvcIndexer.Add(pvc)
- }
- if test.hasPod {
- pod := newPodForPDFailover(tc, v1alpha1.PDMemberType, 1)
- if test.podWithDeletionTimestamp {
- pod.DeletionTimestamp = &metav1.Time{Time: time.Now()}
- }
- podIndexer.Add(pod)
- }
- if test.delPodFailed {
- fakePodControl.SetDeletePodError(errors.NewInternalError(fmt.Errorf("delete pod: API server failed")), 0)
- }
- if test.delPVCFailed {
- fakePVCControl.SetDeletePVCError(errors.NewInternalError(fmt.Errorf("delete pvc: API server failed")), 0)
- }
- tc.Status.PD.Synced = !test.statusSyncFailed
-
- err := pdFailover.Failover(tc)
- test.errExpectFn(g, err)
- test.expectFn(tc, pdFailover)
- }
tests := []testcase{
{
name: "all members are ready",
update: allMembersReady,
+ maxFailoverCount: 3,
hasPVC: true,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -118,6 +79,7 @@ func TestPDFailoverFailover(t *testing.T) {
{
name: "pd status sync failed",
update: allMembersReady,
+ maxFailoverCount: 3,
hasPVC: true,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -135,6 +97,7 @@ func TestPDFailoverFailover(t *testing.T) {
{
name: "two members are not ready, not in quorum",
update: twoMembersNotReady,
+ maxFailoverCount: 3,
hasPVC: true,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -159,6 +122,7 @@ func TestPDFailoverFailover(t *testing.T) {
{
name: "two members are ready and a failure member",
update: oneFailureMember,
+ maxFailoverCount: 3,
hasPVC: true,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -187,6 +151,7 @@ func TestPDFailoverFailover(t *testing.T) {
pd1.LastTransitionTime = metav1.Time{Time: time.Now().Add(-2 * time.Minute)}
tc.Status.PD.Members[pd1Name] = pd1
},
+ maxFailoverCount: 3,
hasPVC: true,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -212,6 +177,7 @@ func TestPDFailoverFailover(t *testing.T) {
pd1.LastTransitionTime = metav1.Time{}
tc.Status.PD.Members[pd1Name] = pd1
},
+ maxFailoverCount: 3,
hasPVC: true,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -231,6 +197,7 @@ func TestPDFailoverFailover(t *testing.T) {
{
name: "has one not ready member, don't have pvc",
update: oneNotReadyMember,
+ maxFailoverCount: 3,
hasPVC: false,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -253,6 +220,7 @@ func TestPDFailoverFailover(t *testing.T) {
{
name: "has one not ready member",
update: oneNotReadyMember,
+ maxFailoverCount: 3,
hasPVC: true,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -278,9 +246,30 @@ func TestPDFailoverFailover(t *testing.T) {
g.Expect(events[1]).To(ContainSubstring("Unhealthy pd pod[test-pd-1] is unhealthy, msg:pd member[12891273174085095651] is unhealthy"))
},
},
+ {
+ name: "has one not ready member but maxFailoverCount is 0",
+ update: oneNotReadyMember,
+ maxFailoverCount: 0,
+ hasPVC: true,
+ hasPod: true,
+ podWithDeletionTimestamp: false,
+ delMemberFailed: false,
+ delPodFailed: false,
+ delPVCFailed: false,
+ statusSyncFailed: false,
+ errExpectFn: errExpectNil,
+ expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) {
+ g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3))
+ g.Expect(len(tc.Status.PD.FailureMembers)).To(Equal(0))
+ events := collectEvents(recorder.Events)
+ g.Expect(events).To(HaveLen(1))
+ g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy"))
+ },
+ },
{
name: "has one not ready member, and exceed deadline, don't have PVC, has Pod, delete pod success",
update: oneNotReadyMemberAndAFailureMember,
+ maxFailoverCount: 3,
hasPVC: false,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -310,6 +299,7 @@ func TestPDFailoverFailover(t *testing.T) {
pd1.MemberID = "wrong-id"
tc.Status.PD.FailureMembers[pd1Name] = pd1
},
+ maxFailoverCount: 3,
hasPVC: false,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -335,6 +325,7 @@ func TestPDFailoverFailover(t *testing.T) {
{
name: "has one not ready member, and exceed deadline, don't have PVC, has Pod, delete member failed",
update: oneNotReadyMemberAndAFailureMember,
+ maxFailoverCount: 3,
hasPVC: false,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -360,6 +351,7 @@ func TestPDFailoverFailover(t *testing.T) {
{
name: "has one not ready member, and exceed deadline, don't have PVC, has Pod, delete pod failed",
update: oneNotReadyMemberAndAFailureMember,
+ maxFailoverCount: 3,
hasPVC: false,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -386,6 +378,7 @@ func TestPDFailoverFailover(t *testing.T) {
{
name: "has one not ready member, and exceed deadline, has Pod, delete pvc failed",
update: oneNotReadyMemberAndAFailureMember,
+ maxFailoverCount: 3,
hasPVC: true,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -412,6 +405,7 @@ func TestPDFailoverFailover(t *testing.T) {
{
name: "has one not ready member, and exceed deadline, has Pod with deletion timestamp",
update: oneNotReadyMemberAndAFailureMember,
+ maxFailoverCount: 3,
hasPVC: true,
hasPod: true,
podWithDeletionTimestamp: true,
@@ -441,6 +435,7 @@ func TestPDFailoverFailover(t *testing.T) {
{
name: "has one not ready member, and exceed deadline, has PVC with deletion timestamp",
update: oneNotReadyMemberAndAFailureMember,
+ maxFailoverCount: 3,
hasPVC: true,
hasPod: true,
podWithDeletionTimestamp: false,
@@ -470,8 +465,50 @@ func TestPDFailoverFailover(t *testing.T) {
},
}
- for i := range tests {
- testFn(&tests[i], t)
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ tc := newTidbClusterForPD()
+ tc.Spec.PD.MaxFailoverCount = pointer.Int32Ptr(test.maxFailoverCount)
+ test.update(tc)
+
+ pdFailover, pvcIndexer, podIndexer, fakePDControl, fakePodControl, fakePVCControl := newFakePDFailover()
+ pdClient := controller.NewFakePDClient(fakePDControl, tc)
+ pdFailover.recorder = recorder
+
+ pdClient.AddReaction(pdapi.DeleteMemberByIDActionType, func(action *pdapi.Action) (interface{}, error) {
+ if test.delMemberFailed {
+ return nil, fmt.Errorf("failed to delete member")
+ }
+ return nil, nil
+ })
+
+ if test.hasPVC {
+ pvc := newPVCForPDFailover(tc, v1alpha1.PDMemberType, 1)
+ if test.pvcWithDeletionTimestamp {
+ pvc.DeletionTimestamp = &metav1.Time{Time: time.Now()}
+ }
+ pvcIndexer.Add(pvc)
+ }
+ if test.hasPod {
+ pod := newPodForPDFailover(tc, v1alpha1.PDMemberType, 1)
+ if test.podWithDeletionTimestamp {
+ pod.DeletionTimestamp = &metav1.Time{Time: time.Now()}
+ }
+ podIndexer.Add(pod)
+ }
+ if test.delPodFailed {
+ fakePodControl.SetDeletePodError(errors.NewInternalError(fmt.Errorf("delete pod: API server failed")), 0)
+ }
+ if test.delPVCFailed {
+ fakePVCControl.SetDeletePVCError(errors.NewInternalError(fmt.Errorf("delete pvc: API server failed")), 0)
+ }
+
+ tc.Status.PD.Synced = !test.statusSyncFailed
+
+ err := pdFailover.Failover(tc)
+ test.errExpectFn(g, err)
+ test.expectFn(tc, pdFailover)
+ })
}
}
diff --git a/pkg/manager/member/pd_member_manager.go b/pkg/manager/member/pd_member_manager.go
index d45ccb1e1f..fad42640d9 100644
--- a/pkg/manager/member/pd_member_manager.go
+++ b/pkg/manager/member/pd_member_manager.go
@@ -489,6 +489,16 @@ func (pmm *pdMemberManager) pdStatefulSetIsUpgrading(set *apps.StatefulSet, tc *
return false, nil
}
+func getFailureReplicas(tc *v1alpha1.TidbCluster) int {
+ failureReplicas := 0
+ for _, failureMember := range tc.Status.PD.FailureMembers {
+ if failureMember.MemberDeleted {
+ failureReplicas++
+ }
+ }
+ return failureReplicas
+}
+
func getNewPDSetForTidbCluster(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) (*apps.StatefulSet, error) {
ns := tc.Namespace
tcName := tc.Name
@@ -568,12 +578,7 @@ func getNewPDSetForTidbCluster(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) (
setName := controller.PDMemberName(tcName)
podAnnotations := CombineAnnotations(controller.AnnProm(2379), basePDSpec.Annotations())
stsAnnotations := getStsAnnotations(tc, label.PDLabelVal)
- failureReplicas := 0
- for _, failureMember := range tc.Status.PD.FailureMembers {
- if failureMember.MemberDeleted {
- failureReplicas++
- }
- }
+ failureReplicas := getFailureReplicas(tc)
pdContainer := corev1.Container{
Name: v1alpha1.PDMemberType.String(),
diff --git a/tests/e2e/tidbcluster/stability.go b/tests/e2e/tidbcluster/stability.go
index d56659d85a..e2dc42c175 100644
--- a/tests/e2e/tidbcluster/stability.go
+++ b/tests/e2e/tidbcluster/stability.go
@@ -189,39 +189,6 @@ var _ = ginkgo.Describe("[tidb-operator][Stability]", func() {
framework.ExpectEqual(err, wait.ErrWaitTimeout, "TiDB cluster is not affeteced")
})
}
- })
-
- ginkgo.Context("operator with auto-failover disabled", func() {
- var ocfg *tests.OperatorConfig
- var oa tests.OperatorActions
- var genericCli client.Client
-
- ginkgo.BeforeEach(func() {
- ocfg = &tests.OperatorConfig{
- Namespace: ns,
- ReleaseName: "operator",
- Image: cfg.OperatorImage,
- Tag: cfg.OperatorTag,
- LogLevel: "4",
- TestMode: true,
- AutoFailover: pointer.BoolPtr(false),
- }
- oa = tests.NewOperatorActions(cli, c, asCli, aggrCli, apiExtCli, tests.DefaultPollInterval, ocfg, e2econfig.TestConfig, nil, fw, f)
- ginkgo.By("Installing CRDs")
- oa.CleanCRDOrDie()
- oa.InstallCRDOrDie(ocfg)
- ginkgo.By("Installing tidb-operator")
- oa.CleanOperatorOrDie(ocfg)
- oa.DeployOperatorOrDie(ocfg)
- var err error
- genericCli, err = client.New(config, client.Options{Scheme: scheme.Scheme})
- framework.ExpectNoError(err, "failed to create clientset")
- })
-
- ginkgo.AfterEach(func() {
- ginkgo.By("Uninstall tidb-operator")
- oa.CleanOperatorOrDie(ocfg)
- })
// In this test, we demonstrate and verify the recover process when a
// node (and local storage on it) is permanently gone.
@@ -309,8 +276,11 @@ var _ = ginkgo.Describe("[tidb-operator][Stability]", func() {
clusterName := "test"
tc := fixture.GetTidbCluster(ns, clusterName, utilimage.TiDBV3Version)
tc.Spec.PD.Replicas = 3
+ tc.Spec.PD.MaxFailoverCount = pointer.Int32Ptr(0)
tc.Spec.TiDB.Replicas = 1
+ tc.Spec.TiDB.MaxFailoverCount = pointer.Int32Ptr(0)
tc.Spec.TiKV.Replicas = 3
+ tc.Spec.TiKV.MaxFailoverCount = pointer.Int32Ptr(0)
err := genericCli.Create(context.TODO(), tc)
framework.ExpectNoError(err)
err = oa.WaitForTidbClusterReady(tc, 30*time.Minute, 15*time.Second)