From b61fbbff4164406712c90cf1012534f8ed1f68de Mon Sep 17 00:00:00 2001 From: BornChanger Date: Tue, 19 Sep 2023 05:40:37 +0800 Subject: [PATCH 1/3] pkg: validate volume number for ebs snapshot restore Signed-off-by: BornChanger --- pkg/backup/restore/restore_manager.go | 87 +++++++++++----------- pkg/backup/restore/restore_manager_test.go | 76 ++++++++++++++++++- 2 files changed, 119 insertions(+), 44 deletions(-) diff --git a/pkg/backup/restore/restore_manager.go b/pkg/backup/restore/restore_manager.go index 7ac85ad6a4..d0956a751f 100644 --- a/pkg/backup/restore/restore_manager.go +++ b/pkg/backup/restore/restore_manager.go @@ -338,38 +338,12 @@ func (rm *restoreManager) readRestoreMetaFromExternalStorage(r *v1alpha1.Restore } func (rm *restoreManager) validateRestore(r *v1alpha1.Restore, tc *v1alpha1.TidbCluster) error { // check tiflash and tikv replicas - tiflashReplicas, tikvReplicas, reason, err := rm.readTiFlashAndTiKVReplicasFromBackupMeta(r) + err := rm.checkTiFlashAndTiKVReplicasFromBackupMeta(r, tc) if err != nil { - klog.Errorf("read tiflash replica failure with reason %s", reason) + klog.Errorf("check tikv/tiflash failure with reason %v", err) return err } - if tc.Spec.TiFlash == nil { - if tiflashReplicas != 0 { - klog.Errorf("tiflash is not configured, backupmeta has %d tiflash", tiflashReplicas) - return fmt.Errorf("tiflash replica missmatched") - } - - } else { - if tc.Spec.TiFlash.Replicas != tiflashReplicas { - klog.Errorf("cluster has %d tiflash configured, backupmeta has %d tiflash", tc.Spec.TiFlash.Replicas, tiflashReplicas) - return fmt.Errorf("tiflash replica missmatched") - } - } - - if tc.Spec.TiKV == nil { - if tikvReplicas != 0 { - klog.Errorf("tikv is not configured, backupmeta has %d tikv", tikvReplicas) - return fmt.Errorf("tikv replica missmatched") - } - - } else { - if tc.Spec.TiKV.Replicas != tikvReplicas { - klog.Errorf("cluster has %d tikv configured, backupmeta has %d tikv", tc.Spec.TiKV.Replicas, tikvReplicas) - return fmt.Errorf("tikv replica missmatched") - } - } - // Check recovery mode is on for EBS br across k8s if r.Spec.Mode == v1alpha1.RestoreModeVolumeSnapshot && r.Spec.FederalVolumeRestorePhase != v1alpha1.FederalVolumeRestoreFinish && !tc.Spec.RecoveryMode { klog.Errorf("recovery mode is not set for across k8s EBS snapshot restore") @@ -437,27 +411,54 @@ func (rm *restoreManager) checkTiKVEncryption(r *v1alpha1.Restore, tc *v1alpha1. return nil } -func (rm *restoreManager) readTiFlashAndTiKVReplicasFromBackupMeta(r *v1alpha1.Restore) (int32, int32, string, error) { +func (rm *restoreManager) checkTiFlashAndTiKVReplicasFromBackupMeta(r *v1alpha1.Restore, tc *v1alpha1.TidbCluster) error { metaInfo, err := backuputil.GetVolSnapBackupMetaData(r, rm.deps.SecretLister) if err != nil { - return 0, 0, "GetVolSnapBackupMetaData failed", err - } - - var tiflashReplicas, tikvReplicas int32 - - if metaInfo.KubernetesMeta.TiDBCluster.Spec.TiFlash == nil { - tiflashReplicas = 0 - } else { - tiflashReplicas = metaInfo.KubernetesMeta.TiDBCluster.Spec.TiFlash.Replicas + klog.Errorf("GetVolSnapBackupMetaData failed") + return err } - if metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV == nil { - tikvReplicas = 0 - } else { - tikvReplicas = metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.Replicas + // Check mismatch of tiflash config + if (metaInfo.KubernetesMeta.TiDBCluster.Spec.TiFlash == nil || + metaInfo.KubernetesMeta.TiDBCluster.Spec.TiFlash.Replicas == 0) && + tc.Spec.TiFlash != nil && tc.Spec.TiFlash.Replicas > 0 { + klog.Errorf("tiflash is enabled in TC but disabled in backup metadata") + return fmt.Errorf("tiflash replica mismatched") + } else if (tc.Spec.TiFlash == nil || tc.Spec.TiFlash.Replicas == 0) && + metaInfo.KubernetesMeta.TiDBCluster.Spec.TiFlash != nil && + metaInfo.KubernetesMeta.TiDBCluster.Spec.TiFlash.Replicas > 0 { + klog.Errorf("tiflash is disabled in TC enabled in backup metadata") + return fmt.Errorf("tiflash replica mismatched") + } else if tc.Spec.TiFlash != nil && metaInfo.KubernetesMeta.TiDBCluster.Spec.TiFlash != nil && + tc.Spec.TiFlash.Replicas != metaInfo.KubernetesMeta.TiDBCluster.Spec.TiFlash.Replicas { + klog.Errorf("tiflash number in TC is %d but is %d in backup metadata", tc.Spec.TiFlash.Replicas, metaInfo.KubernetesMeta.TiDBCluster.Spec.TiFlash.Replicas) + return fmt.Errorf("tiflash replica mismatched") + } + + // TiKV node must be there + if tc.Spec.TiKV == nil || tc.Spec.TiKV.Replicas == 0 { + klog.Errorf("ebs snapshot restore doesn't support cluster without tikv nodes") + return fmt.Errorf("restore to no tikv cluster") + } else if metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV == nil || + metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.Replicas == 0 { + klog.Errorf("backup source tc has no tikv nodes") + return fmt.Errorf("backup source tc has no tivk nodes") + } else if tc.Spec.TiKV.Replicas != metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.Replicas { + klog.Errorf("mismatch tikv replicas, tc has %d, while backup has %d", tc.Spec.TiKV.Replicas, metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV) + return fmt.Errorf("tikv replica mismatch") + } + + // Check volume number + if (tc.Spec.TiKV.StorageVolumes == nil && metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes != nil) || + (tc.Spec.TiKV.StorageVolumes != nil && metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes == nil) { + klog.Errorf("additional volumes # not match. either tc or backup has no additional volumes, while the other has") + return fmt.Errorf("additional volumes mismatched") + } else if len(tc.Spec.TiKV.StorageVolumes) != len(metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes) { + klog.Errorf("additional volumes # not match. tc has %d, and backup has %d", len(tc.Spec.TiKV.StorageVolumes), len(metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes)) + return fmt.Errorf("additional volumes mismatched") } - return tiflashReplicas, tikvReplicas, "", nil + return nil } func (rm *restoreManager) readTiKVConfigFromBackupMeta(r *v1alpha1.Restore) (*v1alpha1.TiKVConfigWraper, string, error) { diff --git a/pkg/backup/restore/restore_manager_test.go b/pkg/backup/restore/restore_manager_test.go index a36f8c6602..51815a5bc3 100644 --- a/pkg/backup/restore/restore_manager_test.go +++ b/pkg/backup/restore/restore_manager_test.go @@ -521,7 +521,7 @@ func TestInvalidReplicasBRRestoreByEBS(t *testing.T) { helper.CreateRestore(cases[0].restore) m := NewRestoreManager(deps) err := m.Sync(cases[0].restore) - g.Expect(err).Should(MatchError("tikv replica missmatched")) + g.Expect(err).Should(MatchError("tikv replica mismatch")) }) } @@ -598,3 +598,77 @@ func TestInvalidModeBRRestoreByEBS(t *testing.T) { g.Expect(err).Should(MatchError("recovery mode is off")) }) } + +func TestVolumeNumMismatchBRRestoreByEBS(t *testing.T) { + g := NewGomegaWithT(t) + helper := newHelper(t) + defer helper.Close() + deps := helper.Deps + + cases := []struct { + name string + restore *v1alpha1.Restore + }{ + { + name: "restore-volume", + restore: &v1alpha1.Restore{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "ns-1", + }, + Spec: v1alpha1.RestoreSpec{ + Type: v1alpha1.BackupTypeFull, + Mode: v1alpha1.RestoreModeVolumeSnapshot, + BR: &v1alpha1.BRConfig{ + ClusterNamespace: "ns-1", + Cluster: "cluster-1", + }, + StorageProvider: v1alpha1.StorageProvider{ + Local: &v1alpha1.LocalStorageProvider{ + // Prefix: "prefix", + Volume: corev1.Volume{ + Name: "nfs", + VolumeSource: corev1.VolumeSource{ + NFS: &corev1.NFSVolumeSource{ + Server: "fake-server", + Path: "/tmp", + ReadOnly: true, + }, + }, + }, + VolumeMount: corev1.VolumeMount{ + Name: "nfs", + MountPath: "/tmp", + }, + }, + }, + }, + Status: v1alpha1.RestoreStatus{}, + }, + }, + } + + // Verify invalid tc with mismatch tikv replicas + //generate the restore meta in local nfs, with only 2 tikv replicas + err := os.WriteFile("/tmp/restoremeta", []byte(testutils.ConstructRestore2TiKVMetaStr()), 0644) //nolint:gosec + g.Expect(err).To(Succeed()) + + //generate the backup meta in local nfs, tiflash check need backupmeta to validation + err = os.WriteFile("/tmp/backupmeta", []byte(testutils.ConstructRestore2TiKVMetaStr()), 0644) //nolint:gosec + g.Expect(err).To(Succeed()) + defer func() { + err = os.Remove("/tmp/restoremeta") + g.Expect(err).To(Succeed()) + + err = os.Remove("/tmp/backupmeta") + g.Expect(err).To(Succeed()) + }() + + t.Run(cases[0].name, func(t *testing.T) { + helper.CreateTC(cases[0].restore.Spec.BR.ClusterNamespace, cases[0].restore.Spec.BR.Cluster, true, true) + helper.CreateRestore(cases[0].restore) + m := NewRestoreManager(deps) + err := m.Sync(cases[0].restore) + g.Expect(err).Should(MatchError("tikv replica mismatch")) + }) +} From 0a120fdea41bcda4139e919da661ada3fb801c90 Mon Sep 17 00:00:00 2001 From: BornChanger Date: Mon, 25 Sep 2023 23:31:25 +0800 Subject: [PATCH 2/3] pkg: add unit test Signed-off-by: BornChanger --- pkg/backup/restore/restore_manager.go | 2 +- pkg/backup/restore/restore_manager_test.go | 8 +- pkg/backup/testutils/br.go | 199 ++++++++++++++++++++- pkg/backup/testutils/helpers.go | 4 + 4 files changed, 207 insertions(+), 6 deletions(-) diff --git a/pkg/backup/restore/restore_manager.go b/pkg/backup/restore/restore_manager.go index d0956a751f..46454fb4f3 100644 --- a/pkg/backup/restore/restore_manager.go +++ b/pkg/backup/restore/restore_manager.go @@ -451,7 +451,7 @@ func (rm *restoreManager) checkTiFlashAndTiKVReplicasFromBackupMeta(r *v1alpha1. // Check volume number if (tc.Spec.TiKV.StorageVolumes == nil && metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes != nil) || (tc.Spec.TiKV.StorageVolumes != nil && metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes == nil) { - klog.Errorf("additional volumes # not match. either tc or backup has no additional volumes, while the other has") + klog.Errorf("additional volumes # not match. either tc or backup metadata has no additional volumes, while the other has") return fmt.Errorf("additional volumes mismatched") } else if len(tc.Spec.TiKV.StorageVolumes) != len(metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes) { klog.Errorf("additional volumes # not match. tc has %d, and backup has %d", len(tc.Spec.TiKV.StorageVolumes), len(metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes)) diff --git a/pkg/backup/restore/restore_manager_test.go b/pkg/backup/restore/restore_manager_test.go index 51815a5bc3..28a0fd74a3 100644 --- a/pkg/backup/restore/restore_manager_test.go +++ b/pkg/backup/restore/restore_manager_test.go @@ -649,12 +649,12 @@ func TestVolumeNumMismatchBRRestoreByEBS(t *testing.T) { } // Verify invalid tc with mismatch tikv replicas - //generate the restore meta in local nfs, with only 2 tikv replicas - err := os.WriteFile("/tmp/restoremeta", []byte(testutils.ConstructRestore2TiKVMetaStr()), 0644) //nolint:gosec + //generate the restore meta in local nfs, with 3 volumes for each tikv + err := os.WriteFile("/tmp/restoremeta", []byte(testutils.ConstructRestoreTiKVVolumesMetaWithStr()), 0644) //nolint:gosec g.Expect(err).To(Succeed()) //generate the backup meta in local nfs, tiflash check need backupmeta to validation - err = os.WriteFile("/tmp/backupmeta", []byte(testutils.ConstructRestore2TiKVMetaStr()), 0644) //nolint:gosec + err = os.WriteFile("/tmp/backupmeta", []byte(testutils.ConstructRestoreTiKVVolumesMetaWithStr()), 0644) //nolint:gosec g.Expect(err).To(Succeed()) defer func() { err = os.Remove("/tmp/restoremeta") @@ -669,6 +669,6 @@ func TestVolumeNumMismatchBRRestoreByEBS(t *testing.T) { helper.CreateRestore(cases[0].restore) m := NewRestoreManager(deps) err := m.Sync(cases[0].restore) - g.Expect(err).Should(MatchError("tikv replica mismatch")) + g.Expect(err).Should(MatchError("additional volumes mismatched")) }) } diff --git a/pkg/backup/testutils/br.go b/pkg/backup/testutils/br.go index 1a106c2d63..3e78a7a7a8 100644 --- a/pkg/backup/testutils/br.go +++ b/pkg/backup/testutils/br.go @@ -287,7 +287,19 @@ func ConstructRestoreMetaStr() string { "version": "", "tikv": { "maxFailoverCount": 0, - "replicas": 3 + "replicas": 3, + "storageVolumes": [ + { + "mountPath": "/var/lib/raft", + "name": "raft", + "storageSize": "50Gi" + }, + { + "mountPath": "/var/lib/wal", + "name": "wal", + "storageSize": "50Gi" + } + ] } }, "status": { @@ -491,3 +503,188 @@ func ConstructRestore2TiKVMetaStr() string { "options": null }` } + +func ConstructRestoreTiKVVolumesMetaWithStr() string { + return `{ + "tikv": { + "replicas": 3, + "stores": [{ + "store_id": 1, + "volumes": [{ + "volume_id": "vol-0e65f40961a9f6244", + "type": "", + "mount_path": "", + "snapshot_id": "snap-1234567890abcdef0", + "restore_volume_id": "vol-0e65f40961a9f0001" + }] + }, { + "store_id": 2, + "volumes": [{ + "volume_id": "vol-0e65f40961a9f6245", + "type": "", + "mount_path": "", + "snapshot_id": "snap-1234567890abcdef1", + "restore_volume_id": "vol-0e65f40961a9f0002" + }] + }] + }, + "pd": { + "replicas": 0 + }, + "tidb": { + "replicas": 0 + }, + "kubernetes": { + "pvcs": [{ + "metadata": { + "name": "tikv-test-tikv-1", + "uid": "301b0e8b-3538-4f61-a0fd-a25abd9a3121", + "resourceVersion": "1957", + "creationTimestamp": null, + "labels": { + "test/label": "retained" + }, + "annotations": { + "pv.kubernetes.io/bind-completed": "yes", + "pv.kubernetes.io/bound-by-controller": "yes", + "test/annotation": "retained" + }, + "finalizers": ["kubernetes.io/pvc-protection"] + }, + "spec": { + "resources": {}, + "volumeName": "pv-1" + }, + "status": { + "phase": "Bound" + } + }, { + "metadata": { + "name": "tikv-test-tikv-2", + "uid": "301b0e8b-3538-4f61-a0fd-a25abd9a3123", + "resourceVersion": "1959", + "creationTimestamp": null, + "labels": { + "test/label": "retained" + }, + "annotations": { + "pv.kubernetes.io/bind-completed": "yes", + "pv.kubernetes.io/bound-by-controller": "yes", + "test/annotation": "retained" + }, + "finalizers": ["kubernetes.io/pvc-protection"] + }, + "spec": { + "resources": {}, + "volumeName": "pv-2" + }, + "status": { + "phase": "Bound" + } + }], + "pvs": [{ + "metadata": { + "name": "pv-1", + "uid": "301b0e8b-3538-4f61-a0fd-a25abd9a3122", + "resourceVersion": "1958", + "creationTimestamp": null, + "labels": { + "test/label": "retained" + }, + "annotations": { + "pv.kubernetes.io/provisioned-by": "ebs.csi.aws.com", + "temporary/volume-id": "vol-0e65f40961a9f6244", + "test/annotation": "retained" + }, + "finalizers": ["kubernetes.io/pv-protection"] + }, + "spec": { + "csi": { + "driver": "ebs.csi.aws.com", + "volumeHandle": "vol-0e65f40961a9f6244", + "fsType": "ext4" + }, + "claimRef": { + "name": "tikv-test-tikv-1", + "uid": "301b0e8b-3538-4f61-a0fd-a25abd9a3121", + "resourceVersion": "1957" + } + }, + "status": { + "phase": "Bound" + } + }, { + "metadata": { + "name": "pv-2", + "uid": "301b0e8b-3538-4f61-a0fd-a25abd9a3124", + "resourceVersion": "1960", + "creationTimestamp": null, + "labels": { + "test/label": "retained" + }, + "annotations": { + "pv.kubernetes.io/provisioned-by": "ebs.csi.aws.com", + "temporary/volume-id": "vol-0e65f40961a9f6245", + "test/annotation": "retained" + }, + "finalizers": ["kubernetes.io/pv-protection"] + }, + "spec": { + "csi": { + "driver": "ebs.csi.aws.com", + "volumeHandle": "vol-0e65f40961a9f6245", + "fsType": "ext4" + }, + "claimRef": { + "name": "tikv-test-tikv-2", + "uid": "301b0e8b-3538-4f61-a0fd-a25abd9a3123", + "resourceVersion": "1959" + } + }, + "status": { + "phase": "Bound" + } + }], + "crd_tidb_cluster": { + "metadata": { + "name": "test", + "creationTimestamp": null + }, + "spec": { + "discovery": {}, + "version": "", + "tikv": { + "maxFailoverCount": 0, + "replicas": 3, + "storageVolumes": [ + { + "mountPath": "/var/lib/raft", + "name": "raft", + "storageSize": "50Gi" + } + ] + } + }, + "status": { + "pd": { + "synced": false, + "leader": { + "name": "", + "id": "", + "clientURL": "", + "health": false, + "lastTransitionTime": null + } + }, + "tikv": {}, + "tidb": {}, + "pump": {}, + "tiflash": {}, + "ticdc": {} + } + }, + "options": null + }, + "options": null + }` +} diff --git a/pkg/backup/testutils/helpers.go b/pkg/backup/testutils/helpers.go index 7a6401c959..8e3c01deaa 100644 --- a/pkg/backup/testutils/helpers.go +++ b/pkg/backup/testutils/helpers.go @@ -137,6 +137,10 @@ func (h *Helper) CreateTC(namespace, clusterName string, acrossK8s, recoverMode TiKV: &v1alpha1.TiKVSpec{ BaseImage: "pingcap/tikv", Replicas: 3, + StorageVolumes: []v1alpha1.StorageVolume{ + {MountPath: "/var/lib/raft", Name: "raft", StorageSize: "50Gi"}, + {MountPath: "/var/lib/wal", Name: "wal", StorageSize: "50Gi"}, + }, }, TiDB: &v1alpha1.TiDBSpec{ TLSClient: &v1alpha1.TiDBTLSClient{Enabled: true}, From 0785eda282b98952d489277d2ddfbc6db3aec2c9 Mon Sep 17 00:00:00 2001 From: BornChanger Date: Wed, 27 Sep 2023 11:01:29 +0800 Subject: [PATCH 3/3] pkg: address comments Signed-off-by: BornChanger --- pkg/backup/restore/restore_manager.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pkg/backup/restore/restore_manager.go b/pkg/backup/restore/restore_manager.go index 46454fb4f3..9b6dd4d235 100644 --- a/pkg/backup/restore/restore_manager.go +++ b/pkg/backup/restore/restore_manager.go @@ -449,11 +449,7 @@ func (rm *restoreManager) checkTiFlashAndTiKVReplicasFromBackupMeta(r *v1alpha1. } // Check volume number - if (tc.Spec.TiKV.StorageVolumes == nil && metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes != nil) || - (tc.Spec.TiKV.StorageVolumes != nil && metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes == nil) { - klog.Errorf("additional volumes # not match. either tc or backup metadata has no additional volumes, while the other has") - return fmt.Errorf("additional volumes mismatched") - } else if len(tc.Spec.TiKV.StorageVolumes) != len(metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes) { + if len(tc.Spec.TiKV.StorageVolumes) != len(metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes) { klog.Errorf("additional volumes # not match. tc has %d, and backup has %d", len(tc.Spec.TiKV.StorageVolumes), len(metaInfo.KubernetesMeta.TiDBCluster.Spec.TiKV.StorageVolumes)) return fmt.Errorf("additional volumes mismatched") }