Skip to content

Commit

Permalink
[8.4](backport #1287) Improve logging for agent upgrades. (#1309)
Browse files Browse the repository at this point in the history
* Improve logging for agent upgrades. (#1287)

(cherry picked from commit 5225e54)

# Conflicts:
#	internal/pkg/agent/control/server/server.go

* Fix merge conflicts.

Co-authored-by: Craig MacKenzie <craig.mackenzie@elastic.co>
  • Loading branch information
mergify[bot] and cmacknz authored Sep 26, 2022
1 parent 7f10625 commit 330ae74
Show file tree
Hide file tree
Showing 12 changed files with 115 additions and 51 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,4 @@
- Add liveness endpoint, allow fleet-gateway component to report degraded state, add update time and messages to status output. {issue}390[390] {pull}569[569]
- Redact sensitive information on diagnostics collect command. {issue}[241] {pull}[566]
- Fix incorrectly creating a filebeat redis input when a policy contains a packetbeat redis input. {issue}[427] {pull}[700]
- Improve logging during upgrades. {pull}[1287]
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ func (h *Upgrade) Handle(ctx context.Context, a fleetapi.Action, acker store.Fle
}

_, err := h.upgrader.Upgrade(ctx, &upgradeAction{action}, true)
if err != nil {
// Always log upgrade failures at the error level. Action errors are logged at debug level
// by default higher up the stack in ActionDispatcher.Dispatch()
h.log.Errorw("Upgrade action failed", "error.message", err,
"action.version", action.Version, "action.source_uri", action.SourceURI, "action.id", action.ActionID,
"action.start_time", action.StartTime, "action.expiration", action.ActionExpiration)
}

return err
}

Expand Down
10 changes: 7 additions & 3 deletions internal/pkg/agent/application/upgrade/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,15 @@ import (
"github.com/hashicorp/go-multierror"

"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/pkg/core/logger"
)

// preUpgradeCleanup will remove files that do not have the passed version number from the downloads directory.
func preUpgradeCleanup(version string) error {
files, err := os.ReadDir(paths.Downloads())
// cleanNonMatchingVersionsFromDownloads will remove files that do not have the passed version number from the downloads directory.
func cleanNonMatchingVersionsFromDownloads(log *logger.Logger, version string) error {
downloadsPath := paths.Downloads()
log.Debugw("Cleaning up non-matching downloaded versions", "version", version, "downloads.path", downloadsPath)

files, err := os.ReadDir(downloadsPath)
if err != nil {
return fmt.Errorf("unable to read directory %q: %w", paths.Downloads(), err)
}
Expand Down
16 changes: 15 additions & 1 deletion internal/pkg/agent/application/upgrade/cleanup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ import (
"path/filepath"
"testing"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/pkg/core/logger"

"github.com/stretchr/testify/require"
)
Expand All @@ -31,7 +33,8 @@ func setupDir(t *testing.T) {

func TestPreUpgradeCleanup(t *testing.T) {
setupDir(t)
err := preUpgradeCleanup("8.4.0")
log := newErrorLogger(t)
err := cleanNonMatchingVersionsFromDownloads(log, "8.4.0")
require.NoError(t, err)

files, err := os.ReadDir(paths.Downloads())
Expand All @@ -42,3 +45,14 @@ func TestPreUpgradeCleanup(t *testing.T) {
require.NoError(t, err)
require.Equal(t, []byte("hello, world!"), p)
}

func newErrorLogger(t *testing.T) *logger.Logger {
t.Helper()

loggerCfg := logger.DefaultLoggingConfig()
loggerCfg.Level = logp.ErrorLevel

log, err := logger.NewFromConfig("", loggerCfg, false)
require.NoError(t, err)
return log
}
20 changes: 13 additions & 7 deletions internal/pkg/agent/application/upgrade/rollback.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,33 +31,35 @@ const (
)

// Rollback rollbacks to previous version which was functioning before upgrade.
func Rollback(ctx context.Context, prevHash, currentHash string) error {
func Rollback(ctx context.Context, log *logger.Logger, prevHash string, currentHash string) error {
// change symlink
if err := ChangeSymlink(ctx, prevHash); err != nil {
if err := ChangeSymlink(ctx, log, prevHash); err != nil {
return err
}

// revert active commit
if err := UpdateActiveCommit(prevHash); err != nil {
if err := UpdateActiveCommit(log, prevHash); err != nil {
return err
}

// Restart
log.Info("Restarting the agent after rollback")
if err := restartAgent(ctx); err != nil {
return err
}

// cleanup everything except version we're rolling back into
return Cleanup(prevHash, true)
return Cleanup(log, prevHash, true)
}

// Cleanup removes all artifacts and files related to a specified version.
func Cleanup(currentHash string, removeMarker bool) error {
func Cleanup(log *logger.Logger, currentHash string, removeMarker bool) error {
log.Debugw("Cleaning up upgrade", "hash", currentHash, "remove_marker", removeMarker)
<-time.After(afterRestartDelay)

// remove upgrade marker
if removeMarker {
if err := CleanMarker(); err != nil {
if err := CleanMarker(log); err != nil {
return err
}
}
Expand All @@ -74,7 +76,9 @@ func Cleanup(currentHash string, removeMarker bool) error {
}

// remove symlink to avoid upgrade failures, ignore error
_ = os.Remove(prevSymlinkPath())
prevSymlink := prevSymlinkPath()
log.Debugw("Removing previous symlink path", "file.path", prevSymlinkPath())
_ = os.Remove(prevSymlink)

dirPrefix := fmt.Sprintf("%s-", agentName)
currentDir := fmt.Sprintf("%s-%s", agentName, currentHash)
Expand All @@ -88,6 +92,7 @@ func Cleanup(currentHash string, removeMarker bool) error {
}

hashedDir := filepath.Join(paths.Data(), dir)
log.Debugw("Removing hashed data directory", "file.path", hashedDir)
if cleanupErr := install.RemovePath(hashedDir); cleanupErr != nil {
err = multierror.Append(err, cleanupErr)
}
Expand All @@ -113,6 +118,7 @@ func InvokeWatcher(log *logger.Logger) error {
}
}()

log.Debugw("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir)
return cmd.Start()
}

Expand Down
4 changes: 4 additions & 0 deletions internal/pkg/agent/application/upgrade/step_download.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ func (u *Upgrader) downloadArtifact(ctx context.Context, version, sourceURI stri
}
}

u.log.Debugw("Downloading upgrade artifact", "version", version,
"source_uri", settings.SourceURI, "drop_path", settings.DropPath,
"target_path", settings.TargetDirectory, "install_path", settings.InstallPath)

verifier, err := newVerifier(version, u.log, &settings)
if err != nil {
return "", errors.New(err, "initiating verifier")
Expand Down
12 changes: 8 additions & 4 deletions internal/pkg/agent/application/upgrade/step_mark.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/fleetapi"
"github.com/elastic/elastic-agent/internal/pkg/release"
"github.com/elastic/elastic-agent/pkg/core/logger"
)

const markerFilename = ".update-marker"
Expand Down Expand Up @@ -91,7 +92,7 @@ func newMarkerSerializer(m *UpdateMarker) *updateMarkerSerializer {
}

// markUpgrade marks update happened so we can handle grace period
func (u *Upgrader) markUpgrade(_ context.Context, hash string, action Action) error {
func (u *Upgrader) markUpgrade(_ context.Context, log *logger.Logger, hash string, action Action) error {
prevVersion := release.Version()
prevHash := release.Commit()
if len(prevHash) > hashLen {
Expand All @@ -112,20 +113,22 @@ func (u *Upgrader) markUpgrade(_ context.Context, hash string, action Action) er
}

markerPath := markerFilePath()
log.Infow("Writing upgrade marker file", "file.path", markerPath, "hash", marker.Hash, "prev_hash", prevHash)
if err := ioutil.WriteFile(markerPath, markerBytes, 0600); err != nil {
return errors.New(err, errors.TypeFilesystem, "failed to create update marker file", errors.M(errors.MetaKeyPath, markerPath))
}

if err := UpdateActiveCommit(hash); err != nil {
if err := UpdateActiveCommit(log, hash); err != nil {
return err
}

return nil
}

// UpdateActiveCommit updates active.commit file to point to active version.
func UpdateActiveCommit(hash string) error {
func UpdateActiveCommit(log *logger.Logger, hash string) error {
activeCommitPath := filepath.Join(paths.Top(), agentCommitFile)
log.Infow("Updating active commit", "file.path", activeCommitPath, "hash", hash)
if err := ioutil.WriteFile(activeCommitPath, []byte(hash), 0600); err != nil {
return errors.New(err, errors.TypeFilesystem, "failed to update active commit", errors.M(errors.MetaKeyPath, activeCommitPath))
}
Expand All @@ -134,8 +137,9 @@ func UpdateActiveCommit(hash string) error {
}

// CleanMarker removes a marker from disk.
func CleanMarker() error {
func CleanMarker(log *logger.Logger) error {
markerFile := markerFilePath()
log.Debugw("Removing marker file", "file.path", markerFile)
if err := os.Remove(markerFile); !os.IsNotExist(err) {
return err
}
Expand Down
4 changes: 3 additions & 1 deletion internal/pkg/agent/application/upgrade/step_relink.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ import (
"github.com/elastic/elastic-agent-libs/file"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/pkg/core/logger"
)

// ChangeSymlink updates symlink paths to match current version.
func ChangeSymlink(ctx context.Context, targetHash string) error {
func ChangeSymlink(ctx context.Context, log *logger.Logger, targetHash string) error {
// create symlink to elastic-agent-{hash}
hashedDir := fmt.Sprintf("%s-%s", agentName, targetHash)

Expand All @@ -31,6 +32,7 @@ func ChangeSymlink(ctx context.Context, targetHash string) error {
}

prevNewPath := prevSymlinkPath()
log.Infow("Changing symlink", "symlink_path", symlinkPath, "new_path", newPath, "prev_path", prevNewPath)

// remove symlink to avoid upgrade failures
if err := os.Remove(prevNewPath); !os.IsNotExist(err) {
Expand Down
16 changes: 12 additions & 4 deletions internal/pkg/agent/application/upgrade/step_unpack.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/pkg/core/logger"
)

// unpack unpacks archive correctly, skips root (symlink, config...) unpacks data/*
Expand All @@ -30,18 +31,21 @@ func (u *Upgrader) unpack(ctx context.Context, version, archivePath string) (str
var hash string
var err error
if runtime.GOOS == "windows" {
hash, err = unzip(version, archivePath)
hash, err = unzip(u.log, version, archivePath)
} else {
hash, err = untar(version, archivePath)
hash, err = untar(u.log, version, archivePath)
}

if err != nil {
u.log.Errorw("Failed to unpack upgrade artifact", "error.message", err, "version", version, "file.path", archivePath, "hash", hash)
return "", err
}

u.log.Infow("Unpacked upgrade artifact", "version", version, "file.path", archivePath, "hash", hash)
return hash, nil
}

func unzip(version, archivePath string) (string, error) {
func unzip(log *logger.Logger, version string, archivePath string) (string, error) {
var hash, rootDir string
r, err := zip.OpenReader(archivePath)
if err != nil {
Expand Down Expand Up @@ -82,8 +86,10 @@ func unzip(version, archivePath string) (string, error) {
path := filepath.Join(paths.Data(), strings.TrimPrefix(fileName, "data/"))

if f.FileInfo().IsDir() {
log.Debugw("Unpacking directory", "archive", "zip", "file.path", path)
os.MkdirAll(path, f.Mode())
} else {
log.Debugw("Unpacking file", "archive", "zip", "file.path", path)
os.MkdirAll(filepath.Dir(path), f.Mode())
f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
if err != nil {
Expand Down Expand Up @@ -119,7 +125,7 @@ func unzip(version, archivePath string) (string, error) {
return hash, nil
}

func untar(version, archivePath string) (string, error) {
func untar(log *logger.Logger, version string, archivePath string) (string, error) {
r, err := os.Open(archivePath)
if err != nil {
return "", errors.New(fmt.Sprintf("artifact for 'elastic-agent' version '%s' could not be found at '%s'", version, archivePath), errors.TypeFilesystem, errors.M(errors.MetaKeyPath, archivePath))
Expand Down Expand Up @@ -183,6 +189,7 @@ func untar(version, archivePath string) (string, error) {
mode := fi.Mode()
switch {
case mode.IsRegular():
log.Debugw("Unpacking file", "archive", "tar", "file.path", abs)
// just to be sure, it should already be created by Dir type
if err := os.MkdirAll(filepath.Dir(abs), 0755); err != nil {
return "", errors.New(err, "TarInstaller: creating directory for file "+abs, errors.TypeFilesystem, errors.M(errors.MetaKeyPath, abs))
Expand All @@ -201,6 +208,7 @@ func untar(version, archivePath string) (string, error) {
return "", fmt.Errorf("TarInstaller: error writing to %s: %w", abs, err)
}
case mode.IsDir():
log.Debugw("Unpacking directory", "archive", "tar", "file.path", abs)
if err := os.MkdirAll(abs, 0755); err != nil {
return "", errors.New(err, "TarInstaller: creating directory for file "+abs, errors.TypeFilesystem, errors.M(errors.MetaKeyPath, abs))
}
Expand Down
Loading

0 comments on commit 330ae74

Please sign in to comment.