Skip to content

Commit

Permalink
br: modify vbk failed when all backups are complete or failed (#5735)
Browse files Browse the repository at this point in the history
Co-authored-by: Xuecheng Zhang <csuzhangxc@gmail.com>
  • Loading branch information
WangLe1321 and csuzhangxc authored Oct 9, 2024
1 parent 91d1065 commit 4af46b7
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 4 deletions.
25 changes: 22 additions & 3 deletions pkg/fedvolumebackup/backup/backup_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"fmt"
"math"
"strconv"
"strings"
"time"

corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -425,14 +426,18 @@ func (bm *backupManager) teardownVolumeBackup(ctx context.Context, volumeBackup

func (bm *backupManager) waitVolumeBackupComplete(ctx context.Context, volumeBackup *v1alpha1.VolumeBackup, backupMembers []*volumeBackupMember) error {
isBackupRunning := false
var failedBackups []*volumeBackupMember
for _, backupMember := range backupMembers {
if pingcapv1alpha1.IsVolumeBackupInitializeFailed(backupMember.backup) || pingcapv1alpha1.IsBackupFailed(backupMember.backup) {
if pingcapv1alpha1.IsVolumeBackupInitializeFailed(backupMember.backup) {
errMsg := fmt.Sprintf("backup member %s of cluster %s failed", backupMember.backup.Name, backupMember.k8sClusterName)
bm.setVolumeBackupFailed(&volumeBackup.Status, backupMembers, reasonVolumeBackupMemberFailed, errMsg)
klog.Errorf("VolumeBackup %s/%s failed, err: %s", volumeBackup.Namespace, volumeBackup.Name, errMsg)
return nil
}
if !pingcapv1alpha1.IsBackupComplete(backupMember.backup) {
} else if pingcapv1alpha1.IsBackupFailed(backupMember.backup) {
failedBackups = append(failedBackups, backupMember)
klog.Errorf("VolumeBackup %s/%s backup member %s of cluster %s is failed",
volumeBackup.Namespace, volumeBackup.Name, backupMember.backup.Name, backupMember.k8sClusterName)
} else if !pingcapv1alpha1.IsBackupComplete(backupMember.backup) {
isBackupRunning = true
klog.Infof(
"VolumeBackup %s/%s backup member %s of cluster %s is not complete",
Expand All @@ -442,6 +447,10 @@ func (bm *backupManager) waitVolumeBackupComplete(ctx context.Context, volumeBac

if isBackupRunning {
return controller.IgnoreErrorf("wait VolumeBackup complete")
} else if len(failedBackups) > 0 {
errMsg := genErrorMessageByFailedBackupMembers(failedBackups)
bm.setVolumeBackupFailed(&volumeBackup.Status, backupMembers, reasonVolumeBackupMemberFailed, errMsg)
return nil
} else {
klog.Infof("VolumeBackup %s/%s backup complete", volumeBackup.Namespace, volumeBackup.Name)
return bm.setVolumeBackupComplete(&volumeBackup.Status, backupMembers)
Expand Down Expand Up @@ -622,3 +631,13 @@ func (m *FakeBackupManager) IsStatusUpdated() bool {
}

var _ fedvolumebackup.BackupManager = &FakeBackupManager{}

func genErrorMessageByFailedBackupMembers(failedBackupMembers []*volumeBackupMember) string {
var backupNames, clusterNames []string
for _, member := range failedBackupMembers {
backupNames = append(backupNames, member.backup.Name)
clusterNames = append(clusterNames, member.k8sClusterName)
}
return fmt.Sprintf("backup members %s of clusters %s failed",
strings.Join(backupNames, ","), strings.Join(clusterNames, ","))
}
42 changes: 41 additions & 1 deletion pkg/fedvolumebackup/backup/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ func (h *helper) assertRunTeardown(ctx context.Context, volumeBackup *v1alpha1.V
h.g.Expect(backupMember3.Spec.FederalVolumeBackupPhase).To(gomega.Equal(pingcapv1alpha1.FederalVolumeBackupTeardown))
}

func (h *helper) assertRunning(volumeBackup *v1alpha1.VolumeBackup) {
h.g.Expect(v1alpha1.IsVolumeBackupRunning(volumeBackup)).To(gomega.BeTrue())
}

func (h *helper) assertComplete(volumeBackup *v1alpha1.VolumeBackup) {
h.g.Expect(v1alpha1.IsVolumeBackupComplete(volumeBackup)).To(gomega.BeTrue())
h.g.Expect(volumeBackup.Status.CommitTs).To(gomega.Equal("123"))
Expand Down Expand Up @@ -290,6 +294,35 @@ func (h *helper) setDataPlaneFailed(ctx context.Context) {
h.g.Expect(err).To(gomega.BeNil())
}

func (h *helper) setAllDataPlanesFailed(ctx context.Context) {
backupMember1, err := h.dataPlaneClient1.PingcapV1alpha1().Backups(fakeTcNamespace1).Get(ctx, h.backupMemberName1, metav1.GetOptions{})
h.g.Expect(err).To(gomega.BeNil())
pingcapv1alpha1.UpdateBackupCondition(&backupMember1.Status, &pingcapv1alpha1.BackupCondition{
Status: corev1.ConditionTrue,
Type: pingcapv1alpha1.BackupFailed,
})
_, err = h.dataPlaneClient1.PingcapV1alpha1().Backups(fakeTcNamespace1).UpdateStatus(ctx, backupMember1, metav1.UpdateOptions{})
h.g.Expect(err).To(gomega.BeNil())

backupMember2, err := h.dataPlaneClient2.PingcapV1alpha1().Backups(fakeTcNamespace2).Get(ctx, h.backupMemberName2, metav1.GetOptions{})
h.g.Expect(err).To(gomega.BeNil())
pingcapv1alpha1.UpdateBackupCondition(&backupMember2.Status, &pingcapv1alpha1.BackupCondition{
Status: corev1.ConditionTrue,
Type: pingcapv1alpha1.BackupFailed,
})
_, err = h.dataPlaneClient2.PingcapV1alpha1().Backups(fakeTcNamespace2).UpdateStatus(ctx, backupMember2, metav1.UpdateOptions{})
h.g.Expect(err).To(gomega.BeNil())

backupMember3, err := h.dataPlaneClient3.PingcapV1alpha1().Backups(fakeTcNamespace3).Get(ctx, h.backupMemberName3, metav1.GetOptions{})
h.g.Expect(err).To(gomega.BeNil())
pingcapv1alpha1.UpdateBackupCondition(&backupMember3.Status, &pingcapv1alpha1.BackupCondition{
Status: corev1.ConditionTrue,
Type: pingcapv1alpha1.BackupFailed,
})
_, err = h.dataPlaneClient3.PingcapV1alpha1().Backups(fakeTcNamespace3).UpdateStatus(ctx, backupMember3, metav1.UpdateOptions{})
h.g.Expect(err).To(gomega.BeNil())
}

func TestVolumeBackup(t *testing.T) {
ctx := context.Background()
backupName := "backup-1"
Expand Down Expand Up @@ -403,9 +436,16 @@ func TestVolumeBackupVolumeFailed(t *testing.T) {
h.g.Expect(err).To(gomega.BeNil())
h.assertRunTeardown(ctx, volumeBackup, false)

// volume backup failed
// one data plane failed
h.setDataPlaneFailed(ctx)
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).NotTo(gomega.BeNil())
h.g.Expect(err.Error()).To(gomega.ContainSubstring("wait VolumeBackup complete"))
h.assertRunning(volumeBackup)

// all data planes failed
h.setAllDataPlanesFailed(ctx)
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.BeNil())
h.assertFailed(volumeBackup)
}
Expand Down

0 comments on commit 4af46b7

Please sign in to comment.