Skip to content

Commit

Permalink
ebs br: make sure backup can be resumed even after being paused for a…
Browse files Browse the repository at this point in the history
… long time (#5464)
  • Loading branch information
BornChanger committed Dec 18, 2023
1 parent 533439c commit 14f5a7e
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 18 deletions.
14 changes: 6 additions & 8 deletions pkg/backup/backupschedule/backup_schedule_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,20 +192,18 @@ func getLastScheduledTime(bs *v1alpha1.BackupSchedule, nowFn nowFn) (*time.Time,
// If there is a bug somewhere, or incorrect clock
// on controller's server or apiservers (for setting creationTimestamp)
// then there could be so many missed start times (it could be off
// by decades or more), that it would eat up all the CPU and memory
// of this controller. In that case, we want to not try to list
// all the missed start times.
//
// I've somewhat arbitrarily picked 100, as more than 80,
// but less than "lots".
if len(scheduledTimes) > 100 {
// by decades or more). So, we need to set LastBackupTime to now() in order to let
// next reconcile succeed.
if len(scheduledTimes) > 1000 {
// We can't get the last backup schedule time
if bs.Status.LastBackupTime == nil && bs.Status.AllBackupCleanTime != nil {
// Recovery backup schedule from pause status, should refresh AllBackupCleanTime to avoid unschedulable problem
bs.Status.AllBackupCleanTime = &metav1.Time{Time: nowFn()}
return nil, controller.RequeueErrorf("recovery backup schedule %s/%s from pause status, refresh AllBackupCleanTime.", ns, bsName)
}
klog.Error("Too many missed start backup schedule time (> 100). Check the clock.")
klog.Warning("Too many missed start backup schedule time (> 1000). Fail current one.")
offset := sched.Next(t).Sub(t)
bs.Status.LastBackupTime = &metav1.Time{Time: time.Now().Add(-offset)}
return nil, nil
}
}
Expand Down
6 changes: 5 additions & 1 deletion pkg/backup/backupschedule/backup_schedule_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,14 @@ func TestGetLastScheduledTime(t *testing.T) {
}

// test too many miss
bs.Status.LastBackupTime.Time = now.AddDate(-1000, 0, 0)
bs.Status.LastBackupTime.Time = now.AddDate(-10, 0, 0)
getTime, err = getLastScheduledTime(bs, time.Now)
g.Expect(err).Should(BeNil())
g.Expect(getTime).Should(BeNil())
// next reconcile should succeed
getTime, err = getLastScheduledTime(bs, time.Now)
g.Expect(err).Should(BeNil())
g.Expect(getTime).ShouldNot(BeNil())
}

func TestBuildBackup(t *testing.T) {
Expand Down
15 changes: 7 additions & 8 deletions pkg/fedvolumebackup/backupschedule/backup_schedule_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,20 +117,19 @@ func getLastScheduledTime(vbs *v1alpha1.VolumeBackupSchedule, nowFn nowFn) (*tim
// If there is a bug somewhere, or incorrect clock
// on controller's server or apiservers (for setting creationTimestamp)
// then there could be so many missed start times (it could be off
// by decades or more), that it would eat up all the CPU and memory
// of this controller. In that case, we want to not try to list
// all the missed start times.
//
// I've somewhat arbitrarily picked 100, as more than 80,
// but less than "lots".
if len(scheduledTimes) > 100 {
// by decades or more). So, we need to set LastBackupTime to now() in order to let
// next reconcile succeed.
if len(scheduledTimes) > 1000 {
// We can't get the last backup schedule time
if vbs.Status.LastBackupTime == nil && vbs.Status.AllBackupCleanTime != nil {
// Recovery backup schedule from pause status, should refresh AllBackupCleanTime to avoid unschedulable problem
vbs.Status.AllBackupCleanTime = &metav1.Time{Time: nowFn()}
return nil, controller.RequeueErrorf("recovery backup schedule %s/%s from pause status, refresh AllBackupCleanTime.", ns, bsName)
}
klog.Error("Too many missed start backup schedule time (> 100). Check the clock.")

klog.Warning("Too many missed start backup schedule time (> 1000). Fail current one.")
offset := sched.Next(t).Sub(t)
vbs.Status.LastBackupTime = &metav1.Time{Time: time.Now().Add(-offset)}
return nil, nil
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,14 @@ func TestGetLastScheduledTime(t *testing.T) {
}

// test too many miss
bs.Status.LastBackupTime.Time = now.AddDate(-1000, 0, 0)
bs.Status.LastBackupTime.Time = now.AddDate(-10, 0, 0)
getTime, err = getLastScheduledTime(bs, time.Now)
g.Expect(err).Should(BeNil())
g.Expect(getTime).Should(BeNil())
getTime, err = getLastScheduledTime(bs, time.Now)
// next reconcile should succeed
g.Expect(err).Should(BeNil())
g.Expect(getTime).ShouldNot(BeNil())
}

func TestBuildBackup(t *testing.T) {
Expand Down

0 comments on commit 14f5a7e

Please sign in to comment.