From 3dfb893d0dbaa527d6c79c2bb7c7a1b01bbd959c Mon Sep 17 00:00:00 2001 From: Matt Cottingham Date: Tue, 28 May 2019 16:07:02 +0100 Subject: [PATCH] Propagate retriable/haltable errors from compactor Previously, a new error was created from all worker errors so the custom type was not propagated. If all errors returned are retriable, we assume it is safe to retry the compaction. Since the compactor may still return a single error this logic is preserved. --- cmd/thanos/compact.go | 6 +++--- pkg/compact/compact.go | 30 ++++++++++++++++++++++++------ pkg/compact/compact_test.go | 26 ++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index b65a389f83..a789b70842 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -285,9 +285,9 @@ func runCompact( if err == nil { return nil } + // The HaltError type signals that we hit a critical bug and should block - // for investigation. - // You should alert on this being halted. + // for investigation. You should alert on this being halted. if compact.IsHaltError(err) { if haltOnError { level.Error(logger).Log("msg", "critical error detected; halting", "err", err) @@ -299,7 +299,7 @@ func runCompact( } // The RetryError signals that we hit an retriable error (transient error, no connection). - // You should alert on this being triggered to frequently. + // You should alert on this being triggered too frequently. if compact.IsRetryError(err) { level.Error(logger).Log("msg", "retriable error", "err", err) retried.Inc() diff --git a/pkg/compact/compact.go b/pkg/compact/compact.go index c9c21cdeb0..b2177cb865 100644 --- a/pkg/compact/compact.go +++ b/pkg/compact/compact.go @@ -8,15 +8,13 @@ import ( "path" "path/filepath" "sort" - "strings" "sync" "time" - "github.com/improbable-eng/thanos/pkg/block/metadata" - "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/improbable-eng/thanos/pkg/block" + "github.com/improbable-eng/thanos/pkg/block/metadata" "github.com/improbable-eng/thanos/pkg/compact/downsample" "github.com/improbable-eng/thanos/pkg/objstore" "github.com/oklog/ulid" @@ -613,7 +611,17 @@ func (e HaltError) Error() string { } // IsHaltError returns true if the base error is a HaltError. +// If a multierror is passed, any halt error will return true. func IsHaltError(err error) bool { + if multiErr, ok := err.(tsdb.MultiError); ok { + for _, err := range multiErr { + if _, ok := errors.Cause(err).(HaltError); ok { + return true + } + } + return false + } + _, ok := errors.Cause(err).(HaltError) return ok } @@ -636,7 +644,17 @@ func (e RetryError) Error() string { } // IsRetryError returns true if the base error is a RetryError. +// If a multierror is passed, all errors must be retriable. func IsRetryError(err error) bool { + if multiErr, ok := err.(tsdb.MultiError); ok { + for _, err := range multiErr { + if _, ok := errors.Cause(err).(RetryError); !ok { + return false + } + } + return true + } + _, ok := errors.Cause(err).(RetryError) return ok } @@ -1037,12 +1055,12 @@ func (c *BucketCompactor) Compact(ctx context.Context) error { close(errChan) workCtxCancel() if err != nil { - errMsgs := []string{err.Error()} + errs := tsdb.MultiError{err} // Collect any other errors reported by the workers. for e := range errChan { - errMsgs = append(errMsgs, e.Error()) + errs.Add(e) } - return errors.New(strings.Join(errMsgs, "; ")) + return errs } if finishedAllGroups { diff --git a/pkg/compact/compact_test.go b/pkg/compact/compact_test.go index 47155161f9..29a0468b9a 100644 --- a/pkg/compact/compact_test.go +++ b/pkg/compact/compact_test.go @@ -11,6 +11,7 @@ import ( "github.com/improbable-eng/thanos/pkg/testutil" "github.com/oklog/ulid" "github.com/pkg/errors" + "github.com/prometheus/tsdb" ) func TestHaltError(t *testing.T) { @@ -27,6 +28,31 @@ func TestHaltError(t *testing.T) { testutil.Assert(t, IsHaltError(err), "not a halt error") } +func TestHaltMultiError(t *testing.T) { + haltErr := halt(errors.New("halt error")) + nonHaltErr := errors.New("not a halt error") + + errs := tsdb.MultiError{nonHaltErr} + testutil.Assert(t, !IsHaltError(errs), "should not be a halt error") + + errs.Add(haltErr) + testutil.Assert(t, IsHaltError(errs), "if any halt errors are present this should return true") +} + +func TestRetryMultiError(t *testing.T) { + retryErr := retry(errors.New("retry error")) + nonRetryErr := errors.New("not a retry error") + + errs := tsdb.MultiError{nonRetryErr} + testutil.Assert(t, !IsRetryError(errs), "should not be a retry error") + + errs = tsdb.MultiError{retryErr} + testutil.Assert(t, IsRetryError(errs), "if all errors are retriable this should return true") + + errs = tsdb.MultiError{nonRetryErr, retryErr} + testutil.Assert(t, !IsRetryError(errs), "mixed errors should return false") +} + func TestRetryError(t *testing.T) { err := errors.New("test") testutil.Assert(t, !IsRetryError(err), "retry error")