diff --git a/changelog/fragments/1757010848-agent-responds-with-clean-insufficient-disk-space-error-message.yaml b/changelog/fragments/1757010848-agent-responds-with-clean-insufficient-disk-space-error-message.yaml new file mode 100644 index 00000000000..02ba8344b84 --- /dev/null +++ b/changelog/fragments/1757010848-agent-responds-with-clean-insufficient-disk-space-error-message.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: when there is a disk space error during an upgrade, agent responds with clean insufficient disk space error message + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: "elastic-agent" + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: https://github.com/elastic/elastic-agent/pull/9392 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/elastic/elastic-agent/issues/5235 diff --git a/internal/pkg/agent/application/coordinator/coordinator.go b/internal/pkg/agent/application/coordinator/coordinator.go index 1a13c996cc1..f20b5a3e849 100644 --- a/internal/pkg/agent/application/coordinator/coordinator.go +++ b/internal/pkg/agent/application/coordinator/coordinator.go @@ -31,6 +31,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade" + upgradeErrors "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact/download/errors" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/internal/pkg/agent/configuration" "github.com/elastic/elastic-agent/internal/pkg/agent/protection" @@ -804,6 +805,15 @@ func (c *Coordinator) Upgrade(ctx context.Context, version string, sourceURI str det.SetState(details.StateCompleted) return c.upgradeMgr.AckAction(ctx, c.fleetAcker, action) } + + c.logger.Errorw("upgrade failed", "error", logp.Error(err)) + // If ErrInsufficientDiskSpace is in the error chain, we want to set the + // the error to ErrInsufficientDiskSpace so that the error message is + // more concise and clear. + if errors.Is(err, upgradeErrors.ErrInsufficientDiskSpace) { + err = upgradeErrors.ErrInsufficientDiskSpace + } + det.Fail(err) return err } diff --git a/internal/pkg/agent/application/coordinator/coordinator_unit_test.go b/internal/pkg/agent/application/coordinator/coordinator_unit_test.go index 57ce1b864f4..5c4146d098a 100644 --- a/internal/pkg/agent/application/coordinator/coordinator_unit_test.go +++ b/internal/pkg/agent/application/coordinator/coordinator_unit_test.go @@ -15,16 +15,19 @@ import ( "context" "encoding/json" "errors" + "fmt" "net" "net/http" "net/http/httptest" "os" "path/filepath" "strings" + "sync" "testing" "time" "github.com/elastic/elastic-agent-client/v7/pkg/proto" + "github.com/elastic/elastic-agent/internal/pkg/fleetapi/acker" "github.com/elastic/elastic-agent/internal/pkg/testutils" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/status" @@ -41,9 +44,11 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/info" "github.com/elastic/elastic-agent/internal/pkg/agent/application/monitoring/reload" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec" "github.com/elastic/elastic-agent/internal/pkg/agent/application/secret" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact" + upgradeErrors "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact/download/errors" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/internal/pkg/agent/configuration" "github.com/elastic/elastic-agent/internal/pkg/agent/storage" @@ -1977,3 +1982,100 @@ func TestHasEndpoint(t *testing.T) { }) } } + +type mockUpgradeManager struct { + upgradeErr error +} + +func (m *mockUpgradeManager) Upgradeable() bool { + return true +} + +func (m *mockUpgradeManager) Reload(cfg *config.Config) error { + return nil +} + +func (m *mockUpgradeManager) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { + return nil, m.upgradeErr +} + +func (m *mockUpgradeManager) Ack(ctx context.Context, acker acker.Acker) error { + return nil +} + +func (m *mockUpgradeManager) AckAction(ctx context.Context, acker acker.Acker, action fleetapi.Action) error { + return nil +} + +func (m *mockUpgradeManager) MarkerWatcher() upgrade.MarkerWatcher { + return nil +} + +func TestCoordinator_Upgrade_InsufficientDiskSpaceError(t *testing.T) { + log, _ := loggertest.New("coordinator-insufficient-disk-space-test") + + mockUpgradeManager := &mockUpgradeManager{ + upgradeErr: fmt.Errorf("wrapped: %w", upgradeErrors.ErrInsufficientDiskSpace), + } + + initialState := State{ + CoordinatorState: agentclient.Healthy, + CoordinatorMessage: "Running", + } + + coord := &Coordinator{ + state: initialState, + logger: log, + upgradeMgr: mockUpgradeManager, + stateBroadcaster: broadcaster.New(initialState, 64, 32), + overrideStateChan: make(chan *coordinatorOverrideState), + upgradeDetailsChan: make(chan *details.Details), + } + + wg := sync.WaitGroup{} + wg.Add(2) + + overrideStates := []agentclient.State{} + go func() { + state1 := <-coord.overrideStateChan + overrideStates = append(overrideStates, state1.state) + + state2 := <-coord.overrideStateChan + if state2 != nil { + overrideStates = append(overrideStates, state2.state) + } + + wg.Done() + }() + + upgradeDetails := []*details.Details{} + go func() { + upgradeDetails = append(upgradeDetails, <-coord.upgradeDetailsChan) + upgradeDetails = append(upgradeDetails, <-coord.upgradeDetailsChan) + wg.Done() + }() + + err := coord.Upgrade(t.Context(), "", "", nil) + require.Error(t, err) + require.Equal(t, err, upgradeErrors.ErrInsufficientDiskSpace) + + wg.Wait() + + require.Equal(t, []agentclient.State{agentclient.Upgrading}, overrideStates) + + require.Equal(t, []*details.Details{ + { + TargetVersion: "", + State: details.StateRequested, + ActionID: "", + }, + { + TargetVersion: "", + State: details.StateFailed, + Metadata: details.Metadata{ + FailedState: details.StateRequested, + ErrorMsg: upgradeErrors.ErrInsufficientDiskSpace.Error(), + }, + }, + }, upgradeDetails) +}