From 9d0818a0a685dd01bc1b991d238d64780ef901d2 Mon Sep 17 00:00:00 2001 From: Yecheng Fu Date: Wed, 15 Apr 2020 13:20:29 +0800 Subject: [PATCH] Add spec.pd.maxFailoverCount to limit max failover replicas for PD (#2184) * Add spec.pd.maxFailoverCount to limit max failover replicas for PD * update api generated files --- docs/api-references/docs.md | 13 ++ manifests/crd.yaml | 5 + .../v1alpha1/defaulting/tidbcluster.go | 3 + .../pingcap/v1alpha1/openapi_generated.go | 7 + pkg/apis/pingcap/v1alpha1/types.go | 6 + .../pingcap/v1alpha1/zz_generated.deepcopy.go | 5 + pkg/manager/member/pd_failover.go | 7 +- pkg/manager/member/pd_failover_test.go | 125 ++++++++++++------ pkg/manager/member/pd_member_manager.go | 17 ++- tests/e2e/tidbcluster/stability.go | 36 +---- 10 files changed, 140 insertions(+), 84 deletions(-) diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md index 189656a5fa..56df6fa653 100644 --- a/docs/api-references/docs.md +++ b/docs/api-references/docs.md @@ -5770,6 +5770,19 @@ Optional: Defaults to .spec.services in favor of backward compatibi +maxFailoverCount
+ +int32 + + + +(Optional) +

MaxFailoverCount limit the max replicas could be added in failover, 0 means no failover. +Optional: Defaults to 3

+ + + + storageClassName
string diff --git a/manifests/crd.yaml b/manifests/crd.yaml index 7c4605bae6..98742eb058 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -1818,6 +1818,11 @@ spec: description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' type: object + maxFailoverCount: + description: 'MaxFailoverCount limit the max replicas could be added + in failover, 0 means no failover. Optional: Defaults to 3' + format: int32 + type: integer nodeSelector: description: 'NodeSelector of the component. Merged into the cluster-level nodeSelector if non-empty Optional: Defaults to cluster-level diff --git a/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go b/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go index 772de9d63e..84533f80bc 100644 --- a/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go +++ b/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go @@ -107,6 +107,9 @@ func setPdSpecDefault(tc *v1alpha1.TidbCluster) { tc.Spec.PD.BaseImage = defaultPDImage } } + if tc.Spec.PD.MaxFailoverCount == nil { + tc.Spec.PD.MaxFailoverCount = pointer.Int32Ptr(3) + } } func setPumpSpecDefault(tc *v1alpha1.TidbCluster) { diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go index 18c0663c61..9d01b379de 100644 --- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go +++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go @@ -2639,6 +2639,13 @@ func schema_pkg_apis_pingcap_v1alpha1_PDSpec(ref common.ReferenceCallback) commo Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.ServiceSpec"), }, }, + "maxFailoverCount": { + SchemaProps: spec.SchemaProps{ + Description: "MaxFailoverCount limit the max replicas could be added in failover, 0 means no failover. Optional: Defaults to 3", + Type: []string{"integer"}, + Format: "int32", + }, + }, "storageClassName": { SchemaProps: spec.SchemaProps{ Description: "The storageClassName of the persistent volume for PD data storage. Defaults to Kubernetes default storage class.", diff --git a/pkg/apis/pingcap/v1alpha1/types.go b/pkg/apis/pingcap/v1alpha1/types.go index 5d9e4ce89c..3579491a89 100644 --- a/pkg/apis/pingcap/v1alpha1/types.go +++ b/pkg/apis/pingcap/v1alpha1/types.go @@ -234,6 +234,12 @@ type PDSpec struct { // +optional Service *ServiceSpec `json:"service,omitempty"` + // MaxFailoverCount limit the max replicas could be added in failover, 0 means no failover. + // Optional: Defaults to 3 + // +kubebuilder:validation:Minimum=0 + // +optional + MaxFailoverCount *int32 `json:"maxFailoverCount,omitempty"` + // The storageClassName of the persistent volume for PD data storage. // Defaults to Kubernetes default storage class. // +optional diff --git a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go index 4d30fc65f6..0a17b5a0db 100644 --- a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go @@ -2059,6 +2059,11 @@ func (in *PDSpec) DeepCopyInto(out *PDSpec) { *out = new(ServiceSpec) (*in).DeepCopyInto(*out) } + if in.MaxFailoverCount != nil { + in, out := &in.MaxFailoverCount, &out.MaxFailoverCount + *out = new(int32) + **out = **in + } if in.StorageClassName != nil { in, out := &in.StorageClassName, &out.StorageClassName *out = new(string) diff --git a/pkg/manager/member/pd_failover.go b/pkg/manager/member/pd_failover.go index 51526f1926..b5ba116218 100644 --- a/pkg/manager/member/pd_failover.go +++ b/pkg/manager/member/pd_failover.go @@ -31,7 +31,6 @@ import ( "k8s.io/klog" ) -// TODO add maxFailoverCount type pdFailover struct { cli versioned.Interface pdControl pdapi.PDControlInterface @@ -93,6 +92,12 @@ func (pf *pdFailover) Failover(tc *v1alpha1.TidbCluster) error { ns, tcName, healthCount, tc.PDStsDesiredReplicas(), tc.Spec.PD.Replicas, len(tc.Status.PD.FailureMembers)) } + failureReplicas := getFailureReplicas(tc) + if failureReplicas >= int(*tc.Spec.PD.MaxFailoverCount) { + klog.Errorf("PD failover replicas (%d) reaches the limit (%d), skip failover", failureReplicas, *tc.Spec.PD.MaxFailoverCount) + return nil + } + notDeletedCount := 0 for _, pdMember := range tc.Status.PD.FailureMembers { if !pdMember.MemberDeleted { diff --git a/pkg/manager/member/pd_failover_test.go b/pkg/manager/member/pd_failover_test.go index bbb4696a7d..953589830a 100644 --- a/pkg/manager/member/pd_failover_test.go +++ b/pkg/manager/member/pd_failover_test.go @@ -33,6 +33,7 @@ import ( kubefake "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" + "k8s.io/utils/pointer" ) func TestPDFailoverFailover(t *testing.T) { @@ -42,6 +43,7 @@ func TestPDFailoverFailover(t *testing.T) { type testcase struct { name string update func(*v1alpha1.TidbCluster) + maxFailoverCount int32 hasPVC bool hasPod bool podWithDeletionTimestamp bool @@ -53,53 +55,12 @@ func TestPDFailoverFailover(t *testing.T) { errExpectFn func(*GomegaWithT, error) expectFn func(*v1alpha1.TidbCluster, *pdFailover) } - testFn := func(test *testcase, t *testing.T) { - t.Log(test.name) - tc := newTidbClusterForPD() - test.update(tc) - - pdFailover, pvcIndexer, podIndexer, fakePDControl, fakePodControl, fakePVCControl := newFakePDFailover() - pdClient := controller.NewFakePDClient(fakePDControl, tc) - pdFailover.recorder = recorder - - pdClient.AddReaction(pdapi.DeleteMemberByIDActionType, func(action *pdapi.Action) (interface{}, error) { - if test.delMemberFailed { - return nil, fmt.Errorf("failed to delete member") - } - return nil, nil - }) - - if test.hasPVC { - pvc := newPVCForPDFailover(tc, v1alpha1.PDMemberType, 1) - if test.pvcWithDeletionTimestamp { - pvc.DeletionTimestamp = &metav1.Time{Time: time.Now()} - } - pvcIndexer.Add(pvc) - } - if test.hasPod { - pod := newPodForPDFailover(tc, v1alpha1.PDMemberType, 1) - if test.podWithDeletionTimestamp { - pod.DeletionTimestamp = &metav1.Time{Time: time.Now()} - } - podIndexer.Add(pod) - } - if test.delPodFailed { - fakePodControl.SetDeletePodError(errors.NewInternalError(fmt.Errorf("delete pod: API server failed")), 0) - } - if test.delPVCFailed { - fakePVCControl.SetDeletePVCError(errors.NewInternalError(fmt.Errorf("delete pvc: API server failed")), 0) - } - tc.Status.PD.Synced = !test.statusSyncFailed - - err := pdFailover.Failover(tc) - test.errExpectFn(g, err) - test.expectFn(tc, pdFailover) - } tests := []testcase{ { name: "all members are ready", update: allMembersReady, + maxFailoverCount: 3, hasPVC: true, hasPod: true, podWithDeletionTimestamp: false, @@ -118,6 +79,7 @@ func TestPDFailoverFailover(t *testing.T) { { name: "pd status sync failed", update: allMembersReady, + maxFailoverCount: 3, hasPVC: true, hasPod: true, podWithDeletionTimestamp: false, @@ -135,6 +97,7 @@ func TestPDFailoverFailover(t *testing.T) { { name: "two members are not ready, not in quorum", update: twoMembersNotReady, + maxFailoverCount: 3, hasPVC: true, hasPod: true, podWithDeletionTimestamp: false, @@ -159,6 +122,7 @@ func TestPDFailoverFailover(t *testing.T) { { name: "two members are ready and a failure member", update: oneFailureMember, + maxFailoverCount: 3, hasPVC: true, hasPod: true, podWithDeletionTimestamp: false, @@ -187,6 +151,7 @@ func TestPDFailoverFailover(t *testing.T) { pd1.LastTransitionTime = metav1.Time{Time: time.Now().Add(-2 * time.Minute)} tc.Status.PD.Members[pd1Name] = pd1 }, + maxFailoverCount: 3, hasPVC: true, hasPod: true, podWithDeletionTimestamp: false, @@ -212,6 +177,7 @@ func TestPDFailoverFailover(t *testing.T) { pd1.LastTransitionTime = metav1.Time{} tc.Status.PD.Members[pd1Name] = pd1 }, + maxFailoverCount: 3, hasPVC: true, hasPod: true, podWithDeletionTimestamp: false, @@ -231,6 +197,7 @@ func TestPDFailoverFailover(t *testing.T) { { name: "has one not ready member, don't have pvc", update: oneNotReadyMember, + maxFailoverCount: 3, hasPVC: false, hasPod: true, podWithDeletionTimestamp: false, @@ -253,6 +220,7 @@ func TestPDFailoverFailover(t *testing.T) { { name: "has one not ready member", update: oneNotReadyMember, + maxFailoverCount: 3, hasPVC: true, hasPod: true, podWithDeletionTimestamp: false, @@ -278,9 +246,30 @@ func TestPDFailoverFailover(t *testing.T) { g.Expect(events[1]).To(ContainSubstring("Unhealthy pd pod[test-pd-1] is unhealthy, msg:pd member[12891273174085095651] is unhealthy")) }, }, + { + name: "has one not ready member but maxFailoverCount is 0", + update: oneNotReadyMember, + maxFailoverCount: 0, + hasPVC: true, + hasPod: true, + podWithDeletionTimestamp: false, + delMemberFailed: false, + delPodFailed: false, + delPVCFailed: false, + statusSyncFailed: false, + errExpectFn: errExpectNil, + expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) { + g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3)) + g.Expect(len(tc.Status.PD.FailureMembers)).To(Equal(0)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(1)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) + }, + }, { name: "has one not ready member, and exceed deadline, don't have PVC, has Pod, delete pod success", update: oneNotReadyMemberAndAFailureMember, + maxFailoverCount: 3, hasPVC: false, hasPod: true, podWithDeletionTimestamp: false, @@ -310,6 +299,7 @@ func TestPDFailoverFailover(t *testing.T) { pd1.MemberID = "wrong-id" tc.Status.PD.FailureMembers[pd1Name] = pd1 }, + maxFailoverCount: 3, hasPVC: false, hasPod: true, podWithDeletionTimestamp: false, @@ -335,6 +325,7 @@ func TestPDFailoverFailover(t *testing.T) { { name: "has one not ready member, and exceed deadline, don't have PVC, has Pod, delete member failed", update: oneNotReadyMemberAndAFailureMember, + maxFailoverCount: 3, hasPVC: false, hasPod: true, podWithDeletionTimestamp: false, @@ -360,6 +351,7 @@ func TestPDFailoverFailover(t *testing.T) { { name: "has one not ready member, and exceed deadline, don't have PVC, has Pod, delete pod failed", update: oneNotReadyMemberAndAFailureMember, + maxFailoverCount: 3, hasPVC: false, hasPod: true, podWithDeletionTimestamp: false, @@ -386,6 +378,7 @@ func TestPDFailoverFailover(t *testing.T) { { name: "has one not ready member, and exceed deadline, has Pod, delete pvc failed", update: oneNotReadyMemberAndAFailureMember, + maxFailoverCount: 3, hasPVC: true, hasPod: true, podWithDeletionTimestamp: false, @@ -412,6 +405,7 @@ func TestPDFailoverFailover(t *testing.T) { { name: "has one not ready member, and exceed deadline, has Pod with deletion timestamp", update: oneNotReadyMemberAndAFailureMember, + maxFailoverCount: 3, hasPVC: true, hasPod: true, podWithDeletionTimestamp: true, @@ -441,6 +435,7 @@ func TestPDFailoverFailover(t *testing.T) { { name: "has one not ready member, and exceed deadline, has PVC with deletion timestamp", update: oneNotReadyMemberAndAFailureMember, + maxFailoverCount: 3, hasPVC: true, hasPod: true, podWithDeletionTimestamp: false, @@ -470,8 +465,50 @@ func TestPDFailoverFailover(t *testing.T) { }, } - for i := range tests { - testFn(&tests[i], t) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + tc := newTidbClusterForPD() + tc.Spec.PD.MaxFailoverCount = pointer.Int32Ptr(test.maxFailoverCount) + test.update(tc) + + pdFailover, pvcIndexer, podIndexer, fakePDControl, fakePodControl, fakePVCControl := newFakePDFailover() + pdClient := controller.NewFakePDClient(fakePDControl, tc) + pdFailover.recorder = recorder + + pdClient.AddReaction(pdapi.DeleteMemberByIDActionType, func(action *pdapi.Action) (interface{}, error) { + if test.delMemberFailed { + return nil, fmt.Errorf("failed to delete member") + } + return nil, nil + }) + + if test.hasPVC { + pvc := newPVCForPDFailover(tc, v1alpha1.PDMemberType, 1) + if test.pvcWithDeletionTimestamp { + pvc.DeletionTimestamp = &metav1.Time{Time: time.Now()} + } + pvcIndexer.Add(pvc) + } + if test.hasPod { + pod := newPodForPDFailover(tc, v1alpha1.PDMemberType, 1) + if test.podWithDeletionTimestamp { + pod.DeletionTimestamp = &metav1.Time{Time: time.Now()} + } + podIndexer.Add(pod) + } + if test.delPodFailed { + fakePodControl.SetDeletePodError(errors.NewInternalError(fmt.Errorf("delete pod: API server failed")), 0) + } + if test.delPVCFailed { + fakePVCControl.SetDeletePVCError(errors.NewInternalError(fmt.Errorf("delete pvc: API server failed")), 0) + } + + tc.Status.PD.Synced = !test.statusSyncFailed + + err := pdFailover.Failover(tc) + test.errExpectFn(g, err) + test.expectFn(tc, pdFailover) + }) } } diff --git a/pkg/manager/member/pd_member_manager.go b/pkg/manager/member/pd_member_manager.go index d45ccb1e1f..fad42640d9 100644 --- a/pkg/manager/member/pd_member_manager.go +++ b/pkg/manager/member/pd_member_manager.go @@ -489,6 +489,16 @@ func (pmm *pdMemberManager) pdStatefulSetIsUpgrading(set *apps.StatefulSet, tc * return false, nil } +func getFailureReplicas(tc *v1alpha1.TidbCluster) int { + failureReplicas := 0 + for _, failureMember := range tc.Status.PD.FailureMembers { + if failureMember.MemberDeleted { + failureReplicas++ + } + } + return failureReplicas +} + func getNewPDSetForTidbCluster(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) (*apps.StatefulSet, error) { ns := tc.Namespace tcName := tc.Name @@ -568,12 +578,7 @@ func getNewPDSetForTidbCluster(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) ( setName := controller.PDMemberName(tcName) podAnnotations := CombineAnnotations(controller.AnnProm(2379), basePDSpec.Annotations()) stsAnnotations := getStsAnnotations(tc, label.PDLabelVal) - failureReplicas := 0 - for _, failureMember := range tc.Status.PD.FailureMembers { - if failureMember.MemberDeleted { - failureReplicas++ - } - } + failureReplicas := getFailureReplicas(tc) pdContainer := corev1.Container{ Name: v1alpha1.PDMemberType.String(), diff --git a/tests/e2e/tidbcluster/stability.go b/tests/e2e/tidbcluster/stability.go index d56659d85a..e2dc42c175 100644 --- a/tests/e2e/tidbcluster/stability.go +++ b/tests/e2e/tidbcluster/stability.go @@ -189,39 +189,6 @@ var _ = ginkgo.Describe("[tidb-operator][Stability]", func() { framework.ExpectEqual(err, wait.ErrWaitTimeout, "TiDB cluster is not affeteced") }) } - }) - - ginkgo.Context("operator with auto-failover disabled", func() { - var ocfg *tests.OperatorConfig - var oa tests.OperatorActions - var genericCli client.Client - - ginkgo.BeforeEach(func() { - ocfg = &tests.OperatorConfig{ - Namespace: ns, - ReleaseName: "operator", - Image: cfg.OperatorImage, - Tag: cfg.OperatorTag, - LogLevel: "4", - TestMode: true, - AutoFailover: pointer.BoolPtr(false), - } - oa = tests.NewOperatorActions(cli, c, asCli, aggrCli, apiExtCli, tests.DefaultPollInterval, ocfg, e2econfig.TestConfig, nil, fw, f) - ginkgo.By("Installing CRDs") - oa.CleanCRDOrDie() - oa.InstallCRDOrDie(ocfg) - ginkgo.By("Installing tidb-operator") - oa.CleanOperatorOrDie(ocfg) - oa.DeployOperatorOrDie(ocfg) - var err error - genericCli, err = client.New(config, client.Options{Scheme: scheme.Scheme}) - framework.ExpectNoError(err, "failed to create clientset") - }) - - ginkgo.AfterEach(func() { - ginkgo.By("Uninstall tidb-operator") - oa.CleanOperatorOrDie(ocfg) - }) // In this test, we demonstrate and verify the recover process when a // node (and local storage on it) is permanently gone. @@ -309,8 +276,11 @@ var _ = ginkgo.Describe("[tidb-operator][Stability]", func() { clusterName := "test" tc := fixture.GetTidbCluster(ns, clusterName, utilimage.TiDBV3Version) tc.Spec.PD.Replicas = 3 + tc.Spec.PD.MaxFailoverCount = pointer.Int32Ptr(0) tc.Spec.TiDB.Replicas = 1 + tc.Spec.TiDB.MaxFailoverCount = pointer.Int32Ptr(0) tc.Spec.TiKV.Replicas = 3 + tc.Spec.TiKV.MaxFailoverCount = pointer.Int32Ptr(0) err := genericCli.Create(context.TODO(), tc) framework.ExpectNoError(err) err = oa.WaitForTidbClusterReady(tc, 30*time.Minute, 15*time.Second)