diff --git a/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_upgrade.go b/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_upgrade.go index cfc7ea83749..a0d78a91622 100644 --- a/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_upgrade.go +++ b/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_upgrade.go @@ -39,6 +39,14 @@ func (h *Upgrade) Handle(ctx context.Context, a fleetapi.Action, acker store.Fle } _, err := h.upgrader.Upgrade(ctx, &upgradeAction{action}, true) + if err != nil { + // Always log upgrade failures at the error level. Action errors are logged at debug level + // by default higher up the stack in ActionDispatcher.Dispatch() + h.log.Errorw("Upgrade action failed", "error.message", err, + "action.version", action.Version, "action.source_uri", action.SourceURI, "action.id", action.ActionID, + "action.start_time", action.StartTime, "action.expiration", action.ActionExpiration) + } + return err } diff --git a/internal/pkg/agent/application/upgrade/cleanup.go b/internal/pkg/agent/application/upgrade/cleanup.go index 5e0618dfe78..9f16144276f 100644 --- a/internal/pkg/agent/application/upgrade/cleanup.go +++ b/internal/pkg/agent/application/upgrade/cleanup.go @@ -13,11 +13,15 @@ import ( "github.com/hashicorp/go-multierror" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/pkg/core/logger" ) -// preUpgradeCleanup will remove files that do not have the passed version number from the downloads directory. -func preUpgradeCleanup(version string) error { - files, err := os.ReadDir(paths.Downloads()) +// cleanNonMatchingVersionsFromDownloads will remove files that do not have the passed version number from the downloads directory. +func cleanNonMatchingVersionsFromDownloads(log *logger.Logger, version string) error { + downloadsPath := paths.Downloads() + log.Infow("Cleaning up non-matching downloaded versions", "version", version, "downloads.path", downloadsPath) + + files, err := os.ReadDir(downloadsPath) if err != nil { return fmt.Errorf("unable to read directory %q: %w", paths.Downloads(), err) } diff --git a/internal/pkg/agent/application/upgrade/cleanup_test.go b/internal/pkg/agent/application/upgrade/cleanup_test.go index 736a9c42b3d..1170c26946d 100644 --- a/internal/pkg/agent/application/upgrade/cleanup_test.go +++ b/internal/pkg/agent/application/upgrade/cleanup_test.go @@ -9,7 +9,9 @@ import ( "path/filepath" "testing" + "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/pkg/core/logger" "github.com/stretchr/testify/require" ) @@ -31,7 +33,8 @@ func setupDir(t *testing.T) { func TestPreUpgradeCleanup(t *testing.T) { setupDir(t) - err := preUpgradeCleanup("8.4.0") + log := newErrorLogger(t) + err := cleanNonMatchingVersionsFromDownloads(log, "8.4.0") require.NoError(t, err) files, err := os.ReadDir(paths.Downloads()) @@ -42,3 +45,14 @@ func TestPreUpgradeCleanup(t *testing.T) { require.NoError(t, err) require.Equal(t, []byte("hello, world!"), p) } + +func newErrorLogger(t *testing.T) *logger.Logger { + t.Helper() + + loggerCfg := logger.DefaultLoggingConfig() + loggerCfg.Level = logp.ErrorLevel + + log, err := logger.NewFromConfig("", loggerCfg, false) + require.NoError(t, err) + return log +} diff --git a/internal/pkg/agent/application/upgrade/rollback.go b/internal/pkg/agent/application/upgrade/rollback.go index 8ce6958beae..319710aa1c7 100644 --- a/internal/pkg/agent/application/upgrade/rollback.go +++ b/internal/pkg/agent/application/upgrade/rollback.go @@ -31,14 +31,14 @@ const ( ) // Rollback rollbacks to previous version which was functioning before upgrade. -func Rollback(ctx context.Context, prevHash, currentHash string) error { +func Rollback(ctx context.Context, log *logger.Logger, prevHash string, currentHash string) error { // change symlink - if err := ChangeSymlink(ctx, prevHash); err != nil { + if err := ChangeSymlink(ctx, log, prevHash); err != nil { return err } // revert active commit - if err := UpdateActiveCommit(prevHash); err != nil { + if err := UpdateActiveCommit(log, prevHash); err != nil { return err } @@ -113,6 +113,7 @@ func InvokeWatcher(log *logger.Logger) error { } }() + log.Debugw("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir) return cmd.Start() } diff --git a/internal/pkg/agent/application/upgrade/step_download.go b/internal/pkg/agent/application/upgrade/step_download.go index 27e4b9c9e9c..906d52e2176 100644 --- a/internal/pkg/agent/application/upgrade/step_download.go +++ b/internal/pkg/agent/application/upgrade/step_download.go @@ -40,6 +40,10 @@ func (u *Upgrader) downloadArtifact(ctx context.Context, version, sourceURI stri } } + u.log.Infow("Downloading upgrade artifact", "version", version, + "source_uri", settings.SourceURI, "drop_path", settings.DropPath, + "target_path", settings.TargetDirectory, "install_path", settings.InstallPath) + verifier, err := newVerifier(version, u.log, &settings) if err != nil { return "", errors.New(err, "initiating verifier") diff --git a/internal/pkg/agent/application/upgrade/step_mark.go b/internal/pkg/agent/application/upgrade/step_mark.go index 66924337699..91eca75d2eb 100644 --- a/internal/pkg/agent/application/upgrade/step_mark.go +++ b/internal/pkg/agent/application/upgrade/step_mark.go @@ -17,6 +17,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/errors" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" "github.com/elastic/elastic-agent/internal/pkg/release" + "github.com/elastic/elastic-agent/pkg/core/logger" ) const markerFilename = ".update-marker" @@ -91,7 +92,7 @@ func newMarkerSerializer(m *UpdateMarker) *updateMarkerSerializer { } // markUpgrade marks update happened so we can handle grace period -func (u *Upgrader) markUpgrade(_ context.Context, hash string, action Action) error { +func (u *Upgrader) markUpgrade(_ context.Context, log *logger.Logger, hash string, action Action) error { prevVersion := release.Version() prevHash := release.Commit() if len(prevHash) > hashLen { @@ -112,11 +113,12 @@ func (u *Upgrader) markUpgrade(_ context.Context, hash string, action Action) er } markerPath := markerFilePath() + log.Infow("Writing upgrade marker file", "file.path", markerPath, "hash", marker.Hash, "prev_hash", prevHash) if err := ioutil.WriteFile(markerPath, markerBytes, 0600); err != nil { return errors.New(err, errors.TypeFilesystem, "failed to create update marker file", errors.M(errors.MetaKeyPath, markerPath)) } - if err := UpdateActiveCommit(hash); err != nil { + if err := UpdateActiveCommit(log, hash); err != nil { return err } @@ -124,8 +126,9 @@ func (u *Upgrader) markUpgrade(_ context.Context, hash string, action Action) er } // UpdateActiveCommit updates active.commit file to point to active version. -func UpdateActiveCommit(hash string) error { +func UpdateActiveCommit(log *logger.Logger, hash string) error { activeCommitPath := filepath.Join(paths.Top(), agentCommitFile) + log.Infow("Updating active commit", "file.path", activeCommitPath, "hash", hash) if err := ioutil.WriteFile(activeCommitPath, []byte(hash), 0600); err != nil { return errors.New(err, errors.TypeFilesystem, "failed to update active commit", errors.M(errors.MetaKeyPath, activeCommitPath)) } diff --git a/internal/pkg/agent/application/upgrade/step_relink.go b/internal/pkg/agent/application/upgrade/step_relink.go index 9c998262ecd..e56b5a6642e 100644 --- a/internal/pkg/agent/application/upgrade/step_relink.go +++ b/internal/pkg/agent/application/upgrade/step_relink.go @@ -14,10 +14,11 @@ import ( "github.com/elastic/elastic-agent-libs/file" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/errors" + "github.com/elastic/elastic-agent/pkg/core/logger" ) // ChangeSymlink updates symlink paths to match current version. -func ChangeSymlink(ctx context.Context, targetHash string) error { +func ChangeSymlink(ctx context.Context, log *logger.Logger, targetHash string) error { // create symlink to elastic-agent-{hash} hashedDir := fmt.Sprintf("%s-%s", agentName, targetHash) @@ -31,6 +32,7 @@ func ChangeSymlink(ctx context.Context, targetHash string) error { } prevNewPath := prevSymlinkPath() + log.Infow("Changing symlink", "symlink_path", symlinkPath, "new_path", newPath, "prev_path", prevNewPath) // remove symlink to avoid upgrade failures if err := os.Remove(prevNewPath); !os.IsNotExist(err) { diff --git a/internal/pkg/agent/application/upgrade/step_unpack.go b/internal/pkg/agent/application/upgrade/step_unpack.go index 108593c5083..065a81d0011 100644 --- a/internal/pkg/agent/application/upgrade/step_unpack.go +++ b/internal/pkg/agent/application/upgrade/step_unpack.go @@ -21,6 +21,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/errors" + "github.com/elastic/elastic-agent/pkg/core/logger" ) // unpack unpacks archive correctly, skips root (symlink, config...) unpacks data/* @@ -30,18 +31,21 @@ func (u *Upgrader) unpack(ctx context.Context, version, archivePath string) (str var hash string var err error if runtime.GOOS == "windows" { - hash, err = unzip(version, archivePath) + hash, err = unzip(u.log, version, archivePath) } else { - hash, err = untar(version, archivePath) + hash, err = untar(u.log, version, archivePath) } + if err != nil { + u.log.Infow("Failed to unpack upgrade artifact", "error.message", err, "version", version, "file.path", archivePath, "hash", hash) return "", err } + u.log.Infow("Unpacked upgrade artifact", "version", version, "file.path", archivePath, "hash", hash) return hash, nil } -func unzip(version, archivePath string) (string, error) { +func unzip(log *logger.Logger, version string, archivePath string) (string, error) { var hash, rootDir string r, err := zip.OpenReader(archivePath) if err != nil { @@ -82,8 +86,10 @@ func unzip(version, archivePath string) (string, error) { path := filepath.Join(paths.Data(), strings.TrimPrefix(fileName, "data/")) if f.FileInfo().IsDir() { + log.Debugw("Unpacking directory", "archive", "zip", "file.path", path) os.MkdirAll(path, f.Mode()) } else { + log.Debugw("Unpacking file", "archive", "zip", "file.path", path) os.MkdirAll(filepath.Dir(path), f.Mode()) f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) if err != nil { @@ -119,7 +125,7 @@ func unzip(version, archivePath string) (string, error) { return hash, nil } -func untar(version, archivePath string) (string, error) { +func untar(log *logger.Logger, version string, archivePath string) (string, error) { r, err := os.Open(archivePath) if err != nil { return "", errors.New(fmt.Sprintf("artifact for 'elastic-agent' version '%s' could not be found at '%s'", version, archivePath), errors.TypeFilesystem, errors.M(errors.MetaKeyPath, archivePath)) @@ -183,6 +189,7 @@ func untar(version, archivePath string) (string, error) { mode := fi.Mode() switch { case mode.IsRegular(): + log.Debugw("Unpacking file", "archive", "tar", "file.path", abs) // just to be sure, it should already be created by Dir type if err := os.MkdirAll(filepath.Dir(abs), 0755); err != nil { return "", errors.New(err, "TarInstaller: creating directory for file "+abs, errors.TypeFilesystem, errors.M(errors.MetaKeyPath, abs)) @@ -201,6 +208,7 @@ func untar(version, archivePath string) (string, error) { return "", fmt.Errorf("TarInstaller: error writing to %s: %w", abs, err) } case mode.IsDir(): + log.Debugw("Unpacking directory", "archive", "tar", "file.path", abs) if err := os.MkdirAll(abs, 0755); err != nil { return "", errors.New(err, "TarInstaller: creating directory for file "+abs, errors.TypeFilesystem, errors.M(errors.MetaKeyPath, abs)) } diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index 1c6a85fa9d9..af359736535 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -108,6 +108,7 @@ func (u *Upgrader) Upgradeable() bool { // Upgrade upgrades running agent, function returns shutdown callback if some needs to be executed for cases when // reexec is called by caller. func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ reexec.ShutdownCallbackFn, err error) { + u.log.Infow("Upgrading agent", "version", a.Version(), "source_uri", a.SourceURI()) span, ctx := apm.StartSpan(ctx, "upgrade", "app.internal") defer span.End() // report failed @@ -126,9 +127,9 @@ func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ ree "running under control of the systems supervisor") } - err = preUpgradeCleanup(u.agentInfo.Version()) + err = cleanNonMatchingVersionsFromDownloads(u.log, u.agentInfo.Version()) if err != nil { - u.log.Errorf("Unable to clean downloads dir %q before update: %v", paths.Downloads(), err) + u.log.Errorw("Unable to clean downloads before update", "error.message", err, "downloads.path", paths.Downloads()) } if u.caps != nil { @@ -142,10 +143,10 @@ func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ ree sourceURI := u.sourceURI(a.SourceURI()) archivePath, err := u.downloadArtifact(ctx, a.Version(), sourceURI) if err != nil { - // Run the same preUpgradeCleanup task to get rid of any newly downloaded files + // Run the same pre-upgrade cleanup task to get rid of any newly downloaded files // This may have an issue if users are upgrading to the same version number. - if dErr := preUpgradeCleanup(u.agentInfo.Version()); dErr != nil { - u.log.Errorf("Unable to remove file after verification failure: %v", dErr) + if dErr := cleanNonMatchingVersionsFromDownloads(u.log, u.agentInfo.Version()); dErr != nil { + u.log.Errorw("Unable to remove file after verification failure", "error.message", dErr) } return nil, err } @@ -169,39 +170,47 @@ func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ ree return nil, nil } - if err := copyActionStore(newHash); err != nil { + if err := copyActionStore(u.log, newHash); err != nil { return nil, errors.New(err, "failed to copy action store") } - if err := ChangeSymlink(ctx, newHash); err != nil { - rollbackInstall(ctx, newHash) + if err := ChangeSymlink(ctx, u.log, newHash); err != nil { + u.log.Errorw("Rolling back: changing symlink failed", "error.message", err) + rollbackInstall(ctx, u.log, newHash) return nil, err } - if err := u.markUpgrade(ctx, newHash, a); err != nil { - rollbackInstall(ctx, newHash) + if err := u.markUpgrade(ctx, u.log, newHash, a); err != nil { + u.log.Errorw("Rolling back: marking upgrade failed", "error.message", err) + rollbackInstall(ctx, u.log, newHash) return nil, err } if err := InvokeWatcher(u.log); err != nil { - rollbackInstall(ctx, newHash) + u.log.Errorw("Rolling back: starting watcher failed", "error.message", err) + rollbackInstall(ctx, u.log, newHash) return nil, errors.New("failed to invoke rollback watcher", err) } - cb := shutdownCallback(u.log, paths.Home(), release.Version(), a.Version(), release.TrimCommit(newHash)) + trimmedNewHash := release.TrimCommit(newHash) + cb := shutdownCallback(u.log, paths.Home(), release.Version(), a.Version(), trimmedNewHash) if reexecNow { + u.log.Infow("Removing downloads directory", "file.path", paths.Downloads(), "rexec", reexecNow) err = os.RemoveAll(paths.Downloads()) if err != nil { - u.log.Errorf("Unable to clean downloads dir %q after update: %v", paths.Downloads(), err) + u.log.Errorw("Unable to clean downloads after update", "error.message", err, "downloads.path", paths.Downloads()) } + u.log.Infow("Restarting after upgrade", "new_version", release.Version(), "prev_version", a.Version(), + "hash", trimmedNewHash, "home", paths.Home()) u.reexec.ReExec(cb) return nil, nil } // Clean everything from the downloads dir + u.log.Infow("Removing downloads directory", "file.path", paths.Downloads(), "rexec", reexecNow) err = os.RemoveAll(paths.Downloads()) if err != nil { - u.log.Errorf("Unable to clean downloads dir %q after update: %v", paths.Downloads(), err) + u.log.Errorw("Unable to clean downloads after update", "error.message", err, "file.path", paths.Downloads()) } return cb, nil @@ -283,19 +292,20 @@ func (u *Upgrader) reportUpdating(version string) { ) } -func rollbackInstall(ctx context.Context, hash string) { +func rollbackInstall(ctx context.Context, log *logger.Logger, hash string) { os.RemoveAll(filepath.Join(paths.Data(), fmt.Sprintf("%s-%s", agentName, hash))) - _ = ChangeSymlink(ctx, release.ShortCommit()) + _ = ChangeSymlink(ctx, log, release.ShortCommit()) } -func copyActionStore(newHash string) error { +func copyActionStore(log *logger.Logger, newHash string) error { // copies legacy action_store.yml, state.yml and state.enc encrypted file if exists storePaths := []string{paths.AgentActionStoreFile(), paths.AgentStateStoreYmlFile(), paths.AgentStateStoreFile()} + newHome := filepath.Join(filepath.Dir(paths.Home()), fmt.Sprintf("%s-%s", agentName, newHash)) + log.Infow("Copying action store", "new_home_path", newHome) for _, currentActionStorePath := range storePaths { - newHome := filepath.Join(filepath.Dir(paths.Home()), fmt.Sprintf("%s-%s", agentName, newHash)) newActionStorePath := filepath.Join(newHome, filepath.Base(currentActionStorePath)) - + log.Debugw("Copying action store path", "from", currentActionStorePath, "to", newActionStorePath) currentActionStore, err := ioutil.ReadFile(currentActionStorePath) if os.IsNotExist(err) { // nothing to copy diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 64bd604cd85..bc9c8f2204c 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -15,6 +15,7 @@ import ( "github.com/spf13/cobra" + "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade" @@ -40,8 +41,13 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command Short: "Watch watches Elastic Agent for failures and initiates rollback.", Long: `Watch watches Elastic Agent for failures and initiates rollback.`, Run: func(_ *cobra.Command, _ []string) { - if err := watchCmd(); err != nil { - fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage()) + log, err := configuredLogger() + if err != nil { + fmt.Fprintf(streams.Err, "Error configuring logger: %v\n%s\n", err, troubleshootMessage()) + } + if err := watchCmd(log); err != nil { + log.Errorw("Watch command failed", "error.message", err) + fmt.Fprintf(streams.Err, "Watch command failed: %v\n%s\n", err, troubleshootMessage()) os.Exit(1) } }, @@ -50,12 +56,7 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command return cmd } -func watchCmd() error { - log, err := configuredLogger() - if err != nil { - return err - } - +func watchCmd(log *logp.Logger) error { marker, err := upgrade.LoadMarker() if err != nil { log.Error("failed to load marker", err) @@ -97,8 +98,8 @@ func watchCmd() error { ctx := context.Background() if err := watch(ctx, tilGrace, log); err != nil { - log.Debugf("Error detected proceeding to rollback: %v", err) - err = upgrade.Rollback(ctx, marker.PrevHash, marker.Hash) + log.Debug("Error detected proceeding to rollback: %v", err) + err = upgrade.Rollback(ctx, log, marker.PrevHash, marker.Hash) if err != nil { log.Error("rollback failed", err) }