Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ebs br: make sure backup can be resumed even after being paused for a long time #5464

Merged
merged 4 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions pkg/backup/backupschedule/backup_schedule_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,20 +192,18 @@ func getLastScheduledTime(bs *v1alpha1.BackupSchedule, nowFn nowFn) (*time.Time,
// If there is a bug somewhere, or incorrect clock
// on controller's server or apiservers (for setting creationTimestamp)
// then there could be so many missed start times (it could be off
// by decades or more), that it would eat up all the CPU and memory
// of this controller. In that case, we want to not try to list
// all the missed start times.
//
// I've somewhat arbitrarily picked 100, as more than 80,
// but less than "lots".
if len(scheduledTimes) > 100 {
// by decades or more). So, we need to set LastBackupTime to now() in order to let
// next reconcile succeed.
if len(scheduledTimes) > 1000 {
// We can't get the last backup schedule time
if bs.Status.LastBackupTime == nil && bs.Status.AllBackupCleanTime != nil {
// Recovery backup schedule from pause status, should refresh AllBackupCleanTime to avoid unschedulable problem
bs.Status.AllBackupCleanTime = &metav1.Time{Time: nowFn()}
return nil, controller.RequeueErrorf("recovery backup schedule %s/%s from pause status, refresh AllBackupCleanTime.", ns, bsName)
}
klog.Error("Too many missed start backup schedule time (> 100). Check the clock.")
klog.Warning("Too many missed start backup schedule time (> 1000). Fail current one.")
offset := sched.Next(t).Sub(t)
bs.Status.LastBackupTime = &metav1.Time{Time: time.Now().Add(-offset)}
return nil, nil
}
}
Expand Down
6 changes: 5 additions & 1 deletion pkg/backup/backupschedule/backup_schedule_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,14 @@ func TestGetLastScheduledTime(t *testing.T) {
}

// test too many miss
bs.Status.LastBackupTime.Time = now.AddDate(-1000, 0, 0)
bs.Status.LastBackupTime.Time = now.AddDate(-10, 0, 0)
getTime, err = getLastScheduledTime(bs, time.Now)
g.Expect(err).Should(BeNil())
g.Expect(getTime).Should(BeNil())
// next reconcile should succeed
getTime, err = getLastScheduledTime(bs, time.Now)
g.Expect(err).Should(BeNil())
g.Expect(getTime).ShouldNot(BeNil())
}

func TestBuildBackup(t *testing.T) {
Expand Down
15 changes: 7 additions & 8 deletions pkg/fedvolumebackup/backupschedule/backup_schedule_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,20 +117,19 @@ func getLastScheduledTime(vbs *v1alpha1.VolumeBackupSchedule, nowFn nowFn) (*tim
// If there is a bug somewhere, or incorrect clock
// on controller's server or apiservers (for setting creationTimestamp)
// then there could be so many missed start times (it could be off
// by decades or more), that it would eat up all the CPU and memory
// of this controller. In that case, we want to not try to list
// all the missed start times.
//
// I've somewhat arbitrarily picked 100, as more than 80,
// but less than "lots".
if len(scheduledTimes) > 100 {
// by decades or more). So, we need to set LastBackupTime to now() in order to let
// next reconcile succeed.
if len(scheduledTimes) > 1000 {
// We can't get the last backup schedule time
if vbs.Status.LastBackupTime == nil && vbs.Status.AllBackupCleanTime != nil {
// Recovery backup schedule from pause status, should refresh AllBackupCleanTime to avoid unschedulable problem
vbs.Status.AllBackupCleanTime = &metav1.Time{Time: nowFn()}
return nil, controller.RequeueErrorf("recovery backup schedule %s/%s from pause status, refresh AllBackupCleanTime.", ns, bsName)
}
klog.Error("Too many missed start backup schedule time (> 100). Check the clock.")

klog.Warning("Too many missed start backup schedule time (> 1000). Fail current one.")
offset := sched.Next(t).Sub(t)
vbs.Status.LastBackupTime = &metav1.Time{Time: time.Now().Add(-offset)}
return nil, nil
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,14 @@ func TestGetLastScheduledTime(t *testing.T) {
}

// test too many miss
bs.Status.LastBackupTime.Time = now.AddDate(-1000, 0, 0)
bs.Status.LastBackupTime.Time = now.AddDate(-10, 0, 0)
getTime, err = getLastScheduledTime(bs, time.Now)
g.Expect(err).Should(BeNil())
g.Expect(getTime).Should(BeNil())
getTime, err = getLastScheduledTime(bs, time.Now)
// next reconcile should succeed
g.Expect(err).Should(BeNil())
g.Expect(getTime).ShouldNot(BeNil())
}

func TestBuildBackup(t *testing.T) {
Expand Down
Loading