diff --git a/pkg/backup/backupschedule/backup_schedule_manager.go b/pkg/backup/backupschedule/backup_schedule_manager.go index 434378a8df..1f4ff7a8c6 100644 --- a/pkg/backup/backupschedule/backup_schedule_manager.go +++ b/pkg/backup/backupschedule/backup_schedule_manager.go @@ -187,24 +187,24 @@ func getLastScheduledTime(bs *v1alpha1.BackupSchedule, nowFn nowFn) (*time.Time, } var scheduledTimes []time.Time - var warningPrinted = false for t := sched.Next(earliestTime); !t.After(now); t = sched.Next(t) { scheduledTimes = append(scheduledTimes, t) // If there is a bug somewhere, or incorrect clock // on controller's server or apiservers (for setting creationTimestamp) // then there could be so many missed start times (it could be off - // by decades or more). - if len(scheduledTimes) > 100 { + // by decades or more). So, we need to set LastBackupTime to now() in order to let + // next reconcile succeed. + if len(scheduledTimes) > 1000 { // We can't get the last backup schedule time if bs.Status.LastBackupTime == nil && bs.Status.AllBackupCleanTime != nil { // Recovery backup schedule from pause status, should refresh AllBackupCleanTime to avoid unschedulable problem bs.Status.AllBackupCleanTime = &metav1.Time{Time: nowFn()} return nil, controller.RequeueErrorf("recovery backup schedule %s/%s from pause status, refresh AllBackupCleanTime.", ns, bsName) } - if !warningPrinted { - klog.Warning("Too many missed start backup schedule time (> 100). Check the clock.") - warningPrinted = true - } + klog.Warning("Too many missed start backup schedule time (> 1000). Fail current one.") + offset := sched.Next(t).Sub(t) + bs.Status.LastBackupTime = &metav1.Time{Time: time.Now().Add(-offset)} + return nil, nil } } diff --git a/pkg/backup/backupschedule/backup_schedule_manager_test.go b/pkg/backup/backupschedule/backup_schedule_manager_test.go index b0a3774200..4573ccff1a 100644 --- a/pkg/backup/backupschedule/backup_schedule_manager_test.go +++ b/pkg/backup/backupschedule/backup_schedule_manager_test.go @@ -176,7 +176,11 @@ func TestGetLastScheduledTime(t *testing.T) { } // test too many miss - bs.Status.LastBackupTime.Time = now.AddDate(0, -12, 0) + bs.Status.LastBackupTime.Time = now.AddDate(-10, 0, 0) + getTime, err = getLastScheduledTime(bs, time.Now) + g.Expect(err).Should(BeNil()) + g.Expect(getTime).Should(BeNil()) + // next reconcile should succeed getTime, err = getLastScheduledTime(bs, time.Now) g.Expect(err).Should(BeNil()) g.Expect(getTime).ShouldNot(BeNil()) diff --git a/pkg/fedvolumebackup/backupschedule/backup_schedule_manager.go b/pkg/fedvolumebackup/backupschedule/backup_schedule_manager.go index f31b238ed3..8f4853a6fc 100644 --- a/pkg/fedvolumebackup/backupschedule/backup_schedule_manager.go +++ b/pkg/fedvolumebackup/backupschedule/backup_schedule_manager.go @@ -112,24 +112,25 @@ func getLastScheduledTime(vbs *v1alpha1.VolumeBackupSchedule, nowFn nowFn) (*tim } var scheduledTimes []time.Time - var warningPrinted = false for t := sched.Next(earliestTime); !t.After(now); t = sched.Next(t) { scheduledTimes = append(scheduledTimes, t) // If there is a bug somewhere, or incorrect clock // on controller's server or apiservers (for setting creationTimestamp) // then there could be so many missed start times (it could be off - // by decades or more). - if len(scheduledTimes) > 100 { + // by decades or more). So, we need to set LastBackupTime to now() in order to let + // next reconcile succeed. + if len(scheduledTimes) > 1000 { // We can't get the last backup schedule time if vbs.Status.LastBackupTime == nil && vbs.Status.AllBackupCleanTime != nil { // Recovery backup schedule from pause status, should refresh AllBackupCleanTime to avoid unschedulable problem vbs.Status.AllBackupCleanTime = &metav1.Time{Time: nowFn()} return nil, controller.RequeueErrorf("recovery backup schedule %s/%s from pause status, refresh AllBackupCleanTime.", ns, bsName) } - if !warningPrinted { - klog.Warning("Too many missed start backup schedule time (> 100). Check the clock.") - warningPrinted = true - } + + klog.Warning("Too many missed start backup schedule time (> 1000). Fail current one.") + offset := sched.Next(t).Sub(t) + vbs.Status.LastBackupTime = &metav1.Time{Time: time.Now().Add(-offset)} + return nil, nil } } diff --git a/pkg/fedvolumebackup/backupschedule/backup_schedule_manager_test.go b/pkg/fedvolumebackup/backupschedule/backup_schedule_manager_test.go index 1162aeab0b..90d6039559 100644 --- a/pkg/fedvolumebackup/backupschedule/backup_schedule_manager_test.go +++ b/pkg/fedvolumebackup/backupschedule/backup_schedule_manager_test.go @@ -170,9 +170,13 @@ func TestGetLastScheduledTime(t *testing.T) { } // test too many miss - bs.Status.LastBackupTime.Time = now.AddDate(0, -12, 0) + bs.Status.LastBackupTime.Time = now.AddDate(-10, 0, 0) getTime, err = getLastScheduledTime(bs, time.Now) g.Expect(err).Should(BeNil()) + g.Expect(getTime).Should(BeNil()) + getTime, err = getLastScheduledTime(bs, time.Now) + // next reconcile should succeed + g.Expect(err).Should(BeNil()) g.Expect(getTime).ShouldNot(BeNil()) }