From a79536a30039b030d2ab8148b251ea2434440105 Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Mon, 2 Dec 2024 16:14:04 -0800 Subject: [PATCH 1/2] fix: make lease functions less chatty Without this change, the console would be filled with "failed to release lease: lease not found" even if flyctl couldn't acquire one lease. ``` Updating existing machines in '...' with bluegreen strategy WARN failed to acquire lease: failed to get lease on VM e7843075ad2408: request returned non-2xx status: 408: WARN failed to release lease for machine 178122df9672d8: lease not found WARN failed to release lease: lease not found WARN failed to release lease for machine 7811772a5d7698: lease not found WARN failed to release lease: lease not found WARN failed to release lease for machine e286076f933508: lease not found WARN failed to release lease: lease not found ... ``` --- internal/machine/leasable_machine.go | 9 ++---- internal/machine/machine_set.go | 46 +++++++++++++--------------- 2 files changed, 24 insertions(+), 31 deletions(-) diff --git a/internal/machine/leasable_machine.go b/internal/machine/leasable_machine.go index 5653516906..244bb19613 100644 --- a/internal/machine/leasable_machine.go +++ b/internal/machine/leasable_machine.go @@ -516,6 +516,7 @@ func (lm *leasableMachine) refreshLeaseUntilCanceled(ctx context.Context, durati } } +// ReleaseLease releases the lease on this machine. func (lm *leasableMachine) ReleaseLease(ctx context.Context) error { lm.mu.Lock() defer lm.mu.Unlock() @@ -536,13 +537,7 @@ func (lm *leasableMachine) ReleaseLease(ctx context.Context) error { defer cancel() } - err := lm.flapsClient.ReleaseLease(ctx, lm.machine.ID, nonce) - contextTimedOutOrCanceled := errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) - if err != nil && (!contextWasAlreadyCanceled || !contextTimedOutOrCanceled) { - terminal.Warnf("failed to release lease for machine %s: %v\n", lm.machine.ID, err) - return err - } - return nil + return lm.flapsClient.ReleaseLease(ctx, lm.machine.ID, nonce) } func (lm *leasableMachine) resetLease() { diff --git a/internal/machine/machine_set.go b/internal/machine/machine_set.go index ae905e3cfb..01c4d38822 100644 --- a/internal/machine/machine_set.go +++ b/internal/machine/machine_set.go @@ -15,6 +15,7 @@ import ( "github.com/superfly/flyctl/internal/tracing" "github.com/superfly/flyctl/iostreams" "github.com/superfly/flyctl/terminal" + "golang.org/x/sync/errgroup" ) type MachineSet interface { @@ -49,36 +50,27 @@ func (ms *machineSet) GetMachines() []LeasableMachine { return ms.machines } +// AcquireLeases acquires leases on all machines in the set for the given duration. func (ms *machineSet) AcquireLeases(ctx context.Context, duration time.Duration) error { if len(ms.machines) == 0 { return nil } - results := make(chan error, len(ms.machines)) - var wg sync.WaitGroup + // Don't override ctx. Even leaseCtx is cancelled, we still want to release the leases. + eg, leaseCtx := errgroup.WithContext(ctx) for _, m := range ms.machines { - wg.Add(1) - go func(m LeasableMachine) { - defer wg.Done() - results <- m.AcquireLease(ctx, duration) - }(m) + eg.Go(func() error { + return m.AcquireLease(leaseCtx, duration) + }) } - go func() { - wg.Wait() - close(results) - }() - hadError := false - for err := range results { - if err != nil { - hadError = true - terminal.Warnf("failed to acquire lease: %v\n", err) - } - } - if hadError { + + waitErr := eg.Wait() + if waitErr != nil { + terminal.Warnf("failed to acquire lease: %v\n", waitErr) if err := ms.ReleaseLeases(ctx); err != nil { terminal.Warnf("error releasing machine leases: %v\n", err) } - return fmt.Errorf("error acquiring leases on all machines") + return waitErr } return nil } @@ -100,6 +92,7 @@ func (ms *machineSet) RemoveMachines(ctx context.Context, machines []LeasableMac return subset.ReleaseLeases(ctx) } +// ReleaseLeases releases leases on all machines in this set. func (ms *machineSet) ReleaseLeases(ctx context.Context) error { if len(ms.machines) == 0 { return nil @@ -130,10 +123,15 @@ func (ms *machineSet) ReleaseLeases(ctx context.Context) error { }() hadError := false for err := range results { - contextTimedOutOrCanceled := errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) - if err != nil && (!contextWasAlreadyCanceled || !contextTimedOutOrCanceled) { - hadError = true - terminal.Warnf("failed to release lease: %v\n", err) + if err != nil { + contextTimedOutOrCanceled := errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) + var ferr *flaps.FlapsError + if errors.As(err, &ferr) && ferr.ResponseStatusCode == http.StatusNotFound { + // Having StatusNotFound is expected, if acquiring this entire set is partially failing. + } else if !contextWasAlreadyCanceled || !contextTimedOutOrCanceled { + hadError = true + terminal.Warnf("failed to release lease: %v\n", err) + } } } if hadError { From b8c018fcf8e7c4ec35ed5179ff2e466db137ac7d Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Tue, 3 Dec 2024 09:32:33 -0800 Subject: [PATCH 2/2] fix: log the error from ReleaseLease --- .../command/deploy/machines_deploymachinesapp.go | 12 ++++++++++-- internal/command/deploy/strategy_bluegreen.go | 2 +- internal/machine/leasable_machine.go | 4 +++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/internal/command/deploy/machines_deploymachinesapp.go b/internal/command/deploy/machines_deploymachinesapp.go index 0dc1da2331..dc994c538a 100644 --- a/internal/command/deploy/machines_deploymachinesapp.go +++ b/internal/command/deploy/machines_deploymachinesapp.go @@ -960,6 +960,14 @@ func (md *machineDeployment) updateEntriesGroup(parentCtx context.Context, group return updatePool.Wait() } +// releaseLease releases the lease and log the error if any. +func releaseLease(ctx context.Context, m machine.LeasableMachine) { + err := m.ReleaseLease(ctx) + if err != nil { + terminal.Warnf("failed to release lease for machine %s: %s", m.FormattedMachineId(), err) + } +} + func (md *machineDeployment) updateMachineByReplace(ctx context.Context, e *machineUpdateEntry) error { ctx, span := tracing.GetTracer().Start(ctx, "update_by_replace", trace.WithAttributes(attribute.String("id", e.launchInput.ID))) defer span.End() @@ -981,7 +989,7 @@ func (md *machineDeployment) updateMachineByReplace(ctx context.Context, e *mach } lm = machine.NewLeasableMachine(md.flapsClient, md.io, newMachineRaw, false) - defer lm.ReleaseLease(ctx) + defer releaseLease(ctx, lm) e.leasableMachine = lm return nil } @@ -1059,7 +1067,7 @@ func (md *machineDeployment) spawnMachineInGroup(ctx context.Context, groupName lm := machine.NewLeasableMachine(md.flapsClient, md.io, newMachineRaw, false) statuslogger.Logf(ctx, "Machine %s was created", md.colorize.Bold(lm.FormattedMachineId())) - defer lm.ReleaseLease(ctx) + defer releaseLease(ctx, lm) // Don't wait for SkipLaunch machines, they are created but not started if launchInput.SkipLaunch { diff --git a/internal/command/deploy/strategy_bluegreen.go b/internal/command/deploy/strategy_bluegreen.go index cd75137870..5db6f245a4 100644 --- a/internal/command/deploy/strategy_bluegreen.go +++ b/internal/command/deploy/strategy_bluegreen.go @@ -175,7 +175,7 @@ func (bg *blueGreen) CreateGreenMachines(ctx context.Context) error { } greenMachine := machine.NewLeasableMachine(bg.flaps, bg.io, newMachineRaw, true) - defer greenMachine.ReleaseLease(ctx) + defer releaseLease(ctx, greenMachine) lock.Lock() defer lock.Unlock() diff --git a/internal/machine/leasable_machine.go b/internal/machine/leasable_machine.go index 244bb19613..dd5d495ec9 100644 --- a/internal/machine/leasable_machine.go +++ b/internal/machine/leasable_machine.go @@ -60,8 +60,10 @@ type leasableMachine struct { leaseNonce string } -// TODO: make sure the other functions handle showLogs correctly +// NewLeasableMachine creates a wrapper for the given machine. +// A lease must be held before calling this function. func NewLeasableMachine(flapsClient flapsutil.FlapsClient, io *iostreams.IOStreams, machine *fly.Machine, showLogs bool) LeasableMachine { + // TODO: make sure the other functions handle showLogs correctly return &leasableMachine{ flapsClient: flapsClient, io: io,