Skip to content

Commit

Permalink
fix backup failed when pod was auto restarted by k8s (pingcap#4883)
Browse files Browse the repository at this point in the history
* init code for test

* just clean before backup data

* delete test code

* import pingcap/errors

* add check version

* remove test code

* add running status check

* add restart condition to clarify logic

* fix status update

* fix ut
  • Loading branch information
WizardXiao committed Mar 10, 2023
1 parent 2eacebc commit c86b2db
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 8 deletions.
45 changes: 45 additions & 0 deletions cmd/backup-manager/app/backup/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ import (
"strconv"
"time"

"github.com/Masterminds/semver"
"github.com/dustin/go-humanize"
"github.com/pingcap/errors"
"github.com/pingcap/tidb-operator/cmd/backup-manager/app/clean"
"github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants"
"github.com/pingcap/tidb-operator/cmd/backup-manager/app/util"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
Expand Down Expand Up @@ -92,6 +95,20 @@ func (bm *Manager) ProcessBackup() error {
return errorutils.NewAggregate(errs)
}

// we treat snapshot backup as restarted if its status is not scheduled when backup pod just start to run
// we will clean backup data before run br command
if backup.Spec.Mode == v1alpha1.BackupModeSnapshot && backup.Status.Phase != v1alpha1.BackupScheduled {
klog.Infof("snapshot backup %s was restarted, status is %s", bm, backup.Status.Phase)
uerr := bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{
Type: v1alpha1.BackupRestart,
Status: corev1.ConditionTrue,
}, nil)
if uerr != nil {
errs = append(errs, uerr)
return errorutils.NewAggregate(errs)
}
}

if backup.Spec.BR == nil {
return fmt.Errorf("no br config in %s", bm)
}
Expand Down Expand Up @@ -269,6 +286,15 @@ func (bm *Manager) performBackup(ctx context.Context, backup *v1alpha1.Backup, d
}
}

// clean snapshot backup data if it was restarted
if backup.Spec.Mode == v1alpha1.BackupModeSnapshot && v1alpha1.IsBackupRestart(backup) && !bm.isBRCanContinueRunByCheckpoint() {
klog.Infof("clean snapshot backup %s data before run br command, backup path is %s", bm, backup.Status.BackupPath)
err := bm.cleanSnapshotBackupEnv(ctx, backup)
if err != nil {
return errors.Annotatef(err, "clean snapshot backup %s failed", bm)
}
}

// change Prepare to Running before real backup process start
if err := bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{
Type: v1alpha1.BackupRunning,
Expand Down Expand Up @@ -537,3 +563,22 @@ func (bm *Manager) truncateLogBackup(ctx context.Context, backup *v1alpha1.Backu
}
return updateStatus, "", nil
}

func (bm *Manager) cleanSnapshotBackupEnv(ctx context.Context, backup *v1alpha1.Backup) error {
if backup.Spec.Mode != v1alpha1.BackupModeSnapshot {
return nil
}

cleanOpt := clean.Options{Namespace: bm.Namespace, BackupName: bm.ResourceName}
return cleanOpt.CleanBRRemoteBackupData(ctx, backup)
}

func (bm *Manager) isBRCanContinueRunByCheckpoint() bool {
v, err := semver.NewVersion(bm.TiKVVersion)
if err != nil {
klog.Errorf("Parse version %s failure, error: %v", bm.TiKVVersion, err)
return false
}
lessThanV651, _ := semver.NewConstraint("<v6.5.1-0")
return !lessThanV651.Check(v)
}
2 changes: 1 addition & 1 deletion cmd/backup-manager/app/clean/clean.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func (bo *Options) deleteVolumeSnapshots(meta *bkutil.EBSBasedBRMeta) error {
}

// cleanBRRemoteBackupData clean the backup data from remote
func (bo *Options) cleanBRRemoteBackupData(ctx context.Context, backup *v1alpha1.Backup) error {
func (bo *Options) CleanBRRemoteBackupData(ctx context.Context, backup *v1alpha1.Backup) error {
opt := backup.GetCleanOption()

backend, err := bkutil.NewStorageBackend(backup.Spec.StorageProvider, &bkutil.StorageCredential{})
Expand Down
2 changes: 1 addition & 1 deletion cmd/backup-manager/app/clean/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func (bm *Manager) performCleanBackup(ctx context.Context, backup *v1alpha1.Back

} else {
if backup.Spec.BR != nil {
err = bm.cleanBRRemoteBackupData(ctx, backup)
err = bm.CleanBRRemoteBackupData(ctx, backup)
} else {
opts := util.GetOptions(backup.Spec.StorageProvider)
err = bm.cleanRemoteBackupData(ctx, backup.Status.BackupPath, opts)
Expand Down
20 changes: 19 additions & 1 deletion pkg/apis/pingcap/v1alpha1/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,25 @@ func UpdateBackupCondition(status *BackupStatus, condition *BackupCondition) boo
// Try to find this Backup condition.
conditionIndex, oldCondition := GetBackupCondition(status, condition.Type)

status.Phase = condition.Type
isDiffPhase := status.Phase != condition.Type

// restart condition no need to update to phase
if isDiffPhase && condition.Type != BackupRestart {
status.Phase = condition.Type
}

if oldCondition == nil {
// We are adding new Backup condition.
status.Conditions = append(status.Conditions, *condition)
return true
}

// if phase is diff, we need update condition
if isDiffPhase {
status.Conditions[conditionIndex] = *condition
return true
}

// We are updating an existing condition, so we need to check if it has changed.
if condition.Status == oldCondition.Status {
condition.LastTransitionTime = oldCondition.LastTransitionTime
Expand Down Expand Up @@ -193,6 +205,12 @@ func IsBackupRunning(backup *Backup) bool {
return condition != nil && condition.Status == corev1.ConditionTrue
}

// IsBackupRestart returns true if a Backup was restarted.
func IsBackupRestart(backup *Backup) bool {
_, hasRestartCondition := GetBackupCondition(&backup.Status, BackupRestart)
return hasRestartCondition != nil
}

// IsBackupPrepared returns true if a Backup is Prepare.
func IsBackupPrepared(backup *Backup) bool {
if backup.Spec.Mode == BackupModeLog {
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/pingcap/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1981,6 +1981,8 @@ const (
BackupPrepare BackupConditionType = "Prepare"
// BackupStopped means the backup was stopped, just log backup has this condition
BackupStopped BackupConditionType = "Stopped"
// BackupRestart means the backup was restarted, now just support snapshot backup
BackupRestart BackupConditionType = "Restart"
)

// BackupCondition describes the observed state of a Backup at a certain point.
Expand Down
8 changes: 4 additions & 4 deletions pkg/backup/backup/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,15 +278,15 @@ func TestClean(t *testing.T) {
deps := helper.Deps

for _, backup := range genValidBRBackups() {
// make the backup need to be clean
backup.DeletionTimestamp = &metav1.Time{Time: time.Now()}
backup.Spec.CleanPolicy = v1alpha1.CleanPolicyTypeDelete

_, err := deps.Clientset.PingcapV1alpha1().Backups(backup.Namespace).Create(context.TODO(), backup, metav1.CreateOptions{})
g.Expect(err).Should(BeNil())
helper.CreateSecret(backup)
helper.CreateTC(backup.Spec.BR.ClusterNamespace, backup.Spec.BR.Cluster)

// make the backup need to be clean
backup.DeletionTimestamp = &metav1.Time{Time: time.Now()}
backup.Spec.CleanPolicy = v1alpha1.CleanPolicyTypeDelete

statusUpdater := controller.NewRealBackupConditionUpdater(deps.Clientset, deps.BackupLister, deps.Recorder)
bc := NewBackupCleaner(deps, statusUpdater)

Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/backup_status_updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func (u *realBackupConditionUpdater) Update(backup *v1alpha1.Backup, condition *
// Always get the latest backup before update.
if updated, err := u.backupLister.Backups(ns).Get(backupName); err == nil {
// make a copy so we don't mutate the shared cache
backup = updated.DeepCopy()
*backup = *(updated.DeepCopy())
} else {
utilruntime.HandleError(fmt.Errorf("error getting updated backup %s/%s from lister: %v", ns, backupName, err))
return err
Expand Down

0 comments on commit c86b2db

Please sign in to comment.