From 6d06635b92ddcf2caf2dbf8285d06c95c2998118 Mon Sep 17 00:00:00 2001 From: Fred Lotter Date: Mon, 12 Aug 2024 11:09:48 +0200 Subject: [PATCH] fix(overlord): allow ensure when state is available --- internals/overlord/checkstate/manager.go | 10 +------ internals/overlord/overlord.go | 33 +++++++++++++++++++++--- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 45fcf54d3..81eed861d 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -20,7 +20,6 @@ import ( "reflect" "sort" "sync" - "sync/atomic" "gopkg.in/tomb.v2" @@ -38,8 +37,7 @@ const ( // CheckManager starts and manages the health checks. type CheckManager struct { - state *state.State - ensureDone atomic.Bool + state *state.State failureHandlers []FailureFunc @@ -87,7 +85,6 @@ func NewManager(s *state.State, runner *state.TaskRunner) *CheckManager { } func (m *CheckManager) Ensure() error { - m.ensureDone.Store(true) return nil } @@ -164,11 +161,6 @@ func (m *CheckManager) PlanChanged(newPlan *plan.Plan) { shouldEnsure = true } } - if !m.ensureDone.Load() { - // Can't call EnsureBefore before Overlord.Loop is running (which will - // call m.Ensure for the first time). - return - } if shouldEnsure { m.state.EnsureBefore(0) // start new tasks right away } diff --git a/internals/overlord/overlord.go b/internals/overlord/overlord.go index a0982f3ff..03d39dc61 100644 --- a/internals/overlord/overlord.go +++ b/internals/overlord/overlord.go @@ -337,22 +337,49 @@ func (o *Overlord) ensureBefore(d time.Duration) { o.ensureLock.Lock() defer o.ensureLock.Unlock() if o.ensureTimer == nil { - panic("cannot use EnsureBefore before Overlord.Loop") + // While the timer is not setup we have not yet entered the overlord loop. + // Since the overlord loop will unconditionally perform an ensure on entry, + // the ensure is already scheduled. + return } now := time.Now() next := now.Add(d) + + // If this requested ensure must take place before the currently scheduled + // ensure time, let's reschedule the pending ensure. if next.Before(o.ensureNext) { o.ensureTimer.Reset(d) o.ensureNext = next return } + // Go timers do not take sleep/suspend time into account (CLOCK_MONOTONIC, + // not CLOCK_BOOTTIME). This means that following a wakeup, the timer will + // only then continue to countdown, while the o.ensureNext wallclock time + // could point to a time that already expired. + // https://github.com/golang/go/issues/24595 + // 1. https://github.com/canonical/snapd/pull/1150 + // 2. https://github.com/canonical/snapd/pull/6472 + // + // If we detect a wake-up condition where the scheduled expiry time is in + // the past, let's reschedule the ensure to happen right now. if o.ensureNext.Before(now) { - // timer already expired, it will be reset in Loop() and - // next Ensure() will be called shortly. + // We have to know if the timer already expired. If this is true then + // it means a channel write has already taken place, and no further + // action is required. The overlord loop will ensure soon after this: + // + // https://go.dev/wiki/Go123Timer: + // < Go 1.23: buffered channel (overlord loop may not have observed yet) + // >= Go 1.23: unbuffered channel (overlord loop already observed) + // + // In both these cases, the overlord loop ensure will still take place. if !o.ensureTimer.Stop() { return } + // Since the scheduled time was reached, and the timer did not expire + // before we called stop, we know that due to a sleep/suspend activity + // the real timer will expire at some arbitrary point in the future. + // Instead, let's get that ensure completed right now. o.ensureTimer.Reset(0) o.ensureNext = now }