From 852d2bd2e60fb48d5ef5a3398f69f116fa25f73c Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Wed, 6 Apr 2022 10:51:30 +0100 Subject: [PATCH] Correctly handle 504s --- cmd/pint/tests/0067_relaxed.txt | 2 +- docs/changelog.md | 7 +++++++ internal/discovery/discovery.go | 21 +++++++++++++++------ internal/discovery/glob.go | 4 +++- internal/discovery/glob_test.go | 15 ++++++++++++++- internal/promapi/errors.go | 3 +++ internal/promapi/range_test.go | 21 ++++++++++++++++++++- 7 files changed, 63 insertions(+), 10 deletions(-) diff --git a/cmd/pint/tests/0067_relaxed.txt b/cmd/pint/tests/0067_relaxed.txt index dd2fa02a..05a22258 100644 --- a/cmd/pint/tests/0067_relaxed.txt +++ b/cmd/pint/tests/0067_relaxed.txt @@ -5,7 +5,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=info msg="Loading configuration file" path=.pint.hcl level=info msg="File parsed" path=rules/relaxed.yml rules=1 -level=error msg="Failed to parse file content" error="yaml: unmarshal errors:\n line 1: cannot unmarshal !!seq into rulefmt.RuleGroups" path=rules/strict.yml +level=error msg="Failed to unmarshal file content" error="yaml: unmarshal errors:\n line 1: cannot unmarshal !!seq into rulefmt.RuleGroups" path=rules/strict.yml rules/strict.yml:1: yaml: unmarshal errors: line 1: cannot unmarshal !!seq into rulefmt.RuleGroups (yaml/parse) - alert: No Owner diff --git a/docs/changelog.md b/docs/changelog.md index 33d15333..4cdccf4d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,12 @@ # Changelog +## v0.17.1 + +### Fixed + +- Handle `504 Gateway Timeout` HTTP responses from Prometheus same as query + timeouts and retry with a shorter range query. + ## v0.17.0 ### Added diff --git a/internal/discovery/discovery.go b/internal/discovery/discovery.go index f7e2b407..183b6ed6 100644 --- a/internal/discovery/discovery.go +++ b/internal/discovery/discovery.go @@ -3,6 +3,7 @@ package discovery import ( "os" "regexp" + "strings" "github.com/prometheus/prometheus/model/rulefmt" "gopkg.in/yaml.v3" @@ -43,16 +44,22 @@ func readFile(path string, isStrict bool) (entries []Entry, err error) { return nil, err } + contentLines := []int{} + for i := 1; i <= strings.Count(string(content), "\n"); i++ { + contentLines = append(contentLines, i) + } + fileOwner, _ := parser.GetComment(string(content), FileOwnerComment) if isStrict { var r rulefmt.RuleGroups if err = yaml.Unmarshal(content, &r); err != nil { - log.Error().Str("path", path).Err(err).Msg("Failed to parse file content") + log.Error().Str("path", path).Err(err).Msg("Failed to unmarshal file content") entries = append(entries, Entry{ - Path: path, - PathError: err, - Owner: fileOwner, + Path: path, + PathError: err, + Owner: fileOwner, + ModifiedLines: contentLines, }) return entries, nil } @@ -62,8 +69,10 @@ func readFile(path string, isStrict bool) (entries []Entry, err error) { if err != nil { log.Error().Str("path", path).Err(err).Msg("Failed to parse file content") entries = append(entries, Entry{ - Path: path, - PathError: err, + Path: path, + PathError: err, + Owner: fileOwner, + ModifiedLines: contentLines, }) return entries, nil } diff --git a/internal/discovery/glob.go b/internal/discovery/glob.go index 1c441973..1186e3d1 100644 --- a/internal/discovery/glob.go +++ b/internal/discovery/glob.go @@ -55,7 +55,9 @@ func (f GlobFinder) Find() (entries []Entry, err error) { return nil, fmt.Errorf("invalid file syntax: %w", err) } for _, e := range el { - e.ModifiedLines = e.Rule.Lines() + if len(e.ModifiedLines) == 0 { + e.ModifiedLines = e.Rule.Lines() + } entries = append(entries, e) } } diff --git a/internal/discovery/glob_test.go b/internal/discovery/glob_test.go index 3bd766e8..b35752f8 100644 --- a/internal/discovery/glob_test.go +++ b/internal/discovery/glob_test.go @@ -1,6 +1,7 @@ package discovery_test import ( + "errors" "fmt" "io/ioutil" "os" @@ -92,7 +93,19 @@ func TestGlobPathFinder(t *testing.T) { { Path: "bar.yml", PathError: strictErr, - ModifiedLines: []int{0}, + ModifiedLines: []int{1, 2, 3, 4}, + Owner: "bob", + }, + }, + }, + { + files: map[string]string{"bar.yml": "record:::{}\n expr: sum(foo)\n\n# pint file/owner bob\n"}, + finder: discovery.NewGlobFinder([]string{"*"}, []*regexp.Regexp{regexp.MustCompile(".*")}), + entries: []discovery.Entry{ + { + Path: "bar.yml", + PathError: errors.New("yaml: line 2: mapping values are not allowed in this context"), + ModifiedLines: []int{1, 2, 3, 4}, Owner: "bob", }, }, diff --git a/internal/promapi/errors.go b/internal/promapi/errors.go index 9062b846..01a9b650 100644 --- a/internal/promapi/errors.go +++ b/internal/promapi/errors.go @@ -58,6 +58,9 @@ func CanRetryError(err error, delta time.Duration) (time.Duration, bool) { return delta / 2, true case v1.ErrBadResponse: case v1.ErrServer: + if apiErr.Msg == "server error: 504" { + return (delta / 4).Round(time.Minute), true + } case v1.ErrClient: return (delta / 2).Round(time.Minute), true } diff --git a/internal/promapi/range_test.go b/internal/promapi/range_test.go index 4449dd58..5fecbba0 100644 --- a/internal/promapi/range_test.go +++ b/internal/promapi/range_test.go @@ -37,7 +37,7 @@ func TestRange(t *testing.T) { query != "too_many_samples1" && query != "error1" && query != "error2" && query != "vector1" && query != "vector2" && - query != "slow1" && query != "timeout1" { + query != "slow1" && query != "timeout1" && query != "gateway_timeout1" { t.Errorf("%q already requested diff=%s", query, diff) t.FailNow() } @@ -96,6 +96,17 @@ func TestRange(t *testing.T) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`unknown start/end`)) } + case "gateway_timeout1": + switch diff.String() { + case "168h0m0s", "42h0m0s", "10h30m0s", "2h38m0s", "40m0s", "10m0s": + w.WriteHeader(504) + _, _ = w.Write([]byte(`504 Gateway Time-out`)) + default: + t.Errorf("invalid timeout diff: %s", diff) + w.WriteHeader(500) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`unknown start/end`)) + } case "timeout_until_success1": switch diff.String() { case "168h0m0s", "42h0m0s", "10h30m0s", "2h38m0s": @@ -343,6 +354,14 @@ func TestRange(t *testing.T) { }, runs: 5, }, + { + query: "gateway_timeout1", + lookback: time.Hour * 24 * 7, + step: time.Minute * 5, + timeout: time.Second, + err: "no more retries possible", + runs: 5, + }, // cache hit { query: "retry_until_success1",