From fe04bb46bef49743cbe51c18dd233bde6e22a177 Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Mon, 2 Dec 2024 16:14:04 -0800 Subject: [PATCH] fix: make lease functions less chatty Without this change, the consule would be filled with "failed to release lease: lease not found" even if flyctl couldn't acquire one lease. ``` Updating existing machines in '...' with bluegreen strategy WARN failed to acquire lease: failed to get lease on VM e7843075ad2408: request returned non-2xx status: 408: WARN failed to release lease for machine 178122df9672d8: lease not found WARN failed to release lease: lease not found WARN failed to release lease for machine 7811772a5d7698: lease not found WARN failed to release lease: lease not found WARN failed to release lease for machine e286076f933508: lease not found WARN failed to release lease: lease not found ... ``` --- internal/machine/leasable_machine.go | 9 ++---- internal/machine/machine_set.go | 46 +++++++++++++--------------- 2 files changed, 24 insertions(+), 31 deletions(-) diff --git a/internal/machine/leasable_machine.go b/internal/machine/leasable_machine.go index 5653516906..244bb19613 100644 --- a/internal/machine/leasable_machine.go +++ b/internal/machine/leasable_machine.go @@ -516,6 +516,7 @@ func (lm *leasableMachine) refreshLeaseUntilCanceled(ctx context.Context, durati } } +// ReleaseLease releases the lease on this machine. func (lm *leasableMachine) ReleaseLease(ctx context.Context) error { lm.mu.Lock() defer lm.mu.Unlock() @@ -536,13 +537,7 @@ func (lm *leasableMachine) ReleaseLease(ctx context.Context) error { defer cancel() } - err := lm.flapsClient.ReleaseLease(ctx, lm.machine.ID, nonce) - contextTimedOutOrCanceled := errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) - if err != nil && (!contextWasAlreadyCanceled || !contextTimedOutOrCanceled) { - terminal.Warnf("failed to release lease for machine %s: %v\n", lm.machine.ID, err) - return err - } - return nil + return lm.flapsClient.ReleaseLease(ctx, lm.machine.ID, nonce) } func (lm *leasableMachine) resetLease() { diff --git a/internal/machine/machine_set.go b/internal/machine/machine_set.go index ae905e3cfb..01c4d38822 100644 --- a/internal/machine/machine_set.go +++ b/internal/machine/machine_set.go @@ -15,6 +15,7 @@ import ( "github.com/superfly/flyctl/internal/tracing" "github.com/superfly/flyctl/iostreams" "github.com/superfly/flyctl/terminal" + "golang.org/x/sync/errgroup" ) type MachineSet interface { @@ -49,36 +50,27 @@ func (ms *machineSet) GetMachines() []LeasableMachine { return ms.machines } +// AcquireLeases acquires leases on all machines in the set for the given duration. func (ms *machineSet) AcquireLeases(ctx context.Context, duration time.Duration) error { if len(ms.machines) == 0 { return nil } - results := make(chan error, len(ms.machines)) - var wg sync.WaitGroup + // Don't override ctx. Even leaseCtx is cancelled, we still want to release the leases. + eg, leaseCtx := errgroup.WithContext(ctx) for _, m := range ms.machines { - wg.Add(1) - go func(m LeasableMachine) { - defer wg.Done() - results <- m.AcquireLease(ctx, duration) - }(m) + eg.Go(func() error { + return m.AcquireLease(leaseCtx, duration) + }) } - go func() { - wg.Wait() - close(results) - }() - hadError := false - for err := range results { - if err != nil { - hadError = true - terminal.Warnf("failed to acquire lease: %v\n", err) - } - } - if hadError { + + waitErr := eg.Wait() + if waitErr != nil { + terminal.Warnf("failed to acquire lease: %v\n", waitErr) if err := ms.ReleaseLeases(ctx); err != nil { terminal.Warnf("error releasing machine leases: %v\n", err) } - return fmt.Errorf("error acquiring leases on all machines") + return waitErr } return nil } @@ -100,6 +92,7 @@ func (ms *machineSet) RemoveMachines(ctx context.Context, machines []LeasableMac return subset.ReleaseLeases(ctx) } +// ReleaseLeases releases leases on all machines in this set. func (ms *machineSet) ReleaseLeases(ctx context.Context) error { if len(ms.machines) == 0 { return nil @@ -130,10 +123,15 @@ func (ms *machineSet) ReleaseLeases(ctx context.Context) error { }() hadError := false for err := range results { - contextTimedOutOrCanceled := errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) - if err != nil && (!contextWasAlreadyCanceled || !contextTimedOutOrCanceled) { - hadError = true - terminal.Warnf("failed to release lease: %v\n", err) + if err != nil { + contextTimedOutOrCanceled := errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) + var ferr *flaps.FlapsError + if errors.As(err, &ferr) && ferr.ResponseStatusCode == http.StatusNotFound { + // Having StatusNotFound is expected, if acquiring this entire set is partially failing. + } else if !contextWasAlreadyCanceled || !contextTimedOutOrCanceled { + hadError = true + terminal.Warnf("failed to release lease: %v\n", err) + } } } if hadError {