From 0a1327a7ad888ba88b16a7a79492f0b4ec83b7c2 Mon Sep 17 00:00:00 2001 From: Blake Gentry Date: Sun, 26 Nov 2023 14:33:23 -0600 Subject: [PATCH] fix panic in PeriodicJobEnqueuer The previous version of this code could sometimes provide a non-positive interval for Ticker.Reset, which causes it to panic. This could happen if a job was already overdue to run when `timeUntilNextRun()` was called, or as time progressed throughout its execution. There are two fixes here: 1. In `timeUntilNextRun()`, only evaluate the current time once and use that throughout so we can be sure we're not skipping safety checks. 2. Switch the `PeriodicJobEnqueuer` loop to use a `time.Timer` instead of a `time.Ticker` since we intend to reset it on every iteration anyway. Step (2) was tricky as it always is when reusing `time.Timer` in Go [1] thanks to its poor design, but I believe I've done that in a safe way here. The reusable timer is now created with 0 duration, ensuring that it will fire quickly, and then the next line always blocks until it fires. This introduces a minimal delay, but it's done to ensure that the first part of the loop (resetting the timer) can always succeed without needing to first try to stop the timer. The only other way the loop can repeat is if we've just received from the timer, so we are guaranteed to be able to safely reset it here. [1]: https://blogtitle.github.io/go-advanced-concurrency-patterns-part-2-timers/ --- internal/maintenance/periodic_job_enqueuer.go | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/internal/maintenance/periodic_job_enqueuer.go b/internal/maintenance/periodic_job_enqueuer.go index 851d2493..a24b46e7 100644 --- a/internal/maintenance/periodic_job_enqueuer.go +++ b/internal/maintenance/periodic_job_enqueuer.go @@ -132,15 +132,19 @@ func (s *PeriodicJobEnqueuer) Start(ctx context.Context) error { s.TestSignals.EnteredLoop.Signal(struct{}{}) - tickerUntilNextRun := time.NewTicker(1 * time.Hour) // duration is Reset immediately below - defer tickerUntilNextRun.Stop() + timerUntilNextRun := time.NewTimer(0) // duration is Reset immediately below + if !timerUntilNextRun.Stop() { + <-timerUntilNextRun.C + } loop: for { - tickerUntilNextRun.Reset(s.timeUntilNextRun()) + // We know it is safe to directly call Reset because the only way we can + // get to this line is if the timer has already fired or been stopped: + timerUntilNextRun.Reset(s.timeUntilNextRun()) select { - case <-tickerUntilNextRun.C: + case <-timerUntilNextRun.C: var insertParamsMany []*dbadapter.JobInsertParams now := s.TimeNowUTC() @@ -213,9 +217,9 @@ func (s *PeriodicJobEnqueuer) timeUntilNextRun() time.Duration { ) for _, periodicJob := range s.periodicJobs { - // In case we detect a job that should've run before now, immediately - // short circuit with a zero. In addition to being marginally faster, it - // prevents us accidentally returning a negative number below. + // In case we detect a job that should've run before now, immediately short + // circuit with a 0 duration. This avoids needlessly iterating through the + // rest of the loop when we already know we're overdue for the next job. if periodicJob.nextRunAt.Before(now) { return 0 } @@ -225,5 +229,5 @@ func (s *PeriodicJobEnqueuer) timeUntilNextRun() time.Duration { } } - return firstNextRunAt.Sub(s.TimeNowUTC()) + return firstNextRunAt.Sub(now) }