From a06c5c3af73e73db63211a90870f2f15d0cc35be Mon Sep 17 00:00:00 2001 From: Mark Holt Date: Sat, 20 Apr 2024 20:51:17 +0100 Subject: [PATCH 1/3] added body close on retry --- erigon-lib/downloader/downloader.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/erigon-lib/downloader/downloader.go b/erigon-lib/downloader/downloader.go index 9de4b1ba101..8d8fdda61a9 100644 --- a/erigon-lib/downloader/downloader.go +++ b/erigon-lib/downloader/downloader.go @@ -183,6 +183,11 @@ func (r *requestHandler) RoundTrip(req *http.Request) (resp *http.Response, err case http.StatusInternalServerError, http.StatusBadGateway: r.downloader.stats.WebseedServerFails.Add(1) + if resp.Body != nil { + resp.Body.Close() + resp.Body = nil + } + attempts++ delayTimer := time.NewTimer(delay) From f38be45b285b87222ab2b3512f3ec63d21bff48a Mon Sep 17 00:00:00 2001 From: Mark Holt Date: Mon, 22 Apr 2024 14:59:21 +0100 Subject: [PATCH 2/3] additional error handling for http --- erigon-lib/downloader/downloader.go | 57 +++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/erigon-lib/downloader/downloader.go b/erigon-lib/downloader/downloader.go index 8d8fdda61a9..e3a268fb2ec 100644 --- a/erigon-lib/downloader/downloader.go +++ b/erigon-lib/downloader/downloader.go @@ -24,6 +24,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "math/rand" "net/http" "net/url" @@ -33,6 +34,7 @@ import ( "reflect" "runtime" "sort" + "strconv" "strings" "sync" "sync/atomic" @@ -148,15 +150,52 @@ func insertCloudflareHeaders(req *http.Request) { } } +// retryBackoff performs exponential backoff based on the attempt number and limited +// by the provided minimum and maximum durations. +// +// It also tries to parse Retry-After response header when a http.StatusTooManyRequests +// (HTTP Code 429) is found in the resp parameter. Hence it will return the number of +// seconds the server states it may be ready to process more requests from this client. +func calcBackoff(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration { + if resp != nil { + if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable { + if s, ok := resp.Header["Retry-After"]; ok { + if sleep, err := strconv.ParseInt(s[0], 10, 64); err == nil { + return time.Second * time.Duration(sleep) + } + } + } + } + + mult := math.Pow(2, float64(attemptNum)) * float64(min) + sleep := time.Duration(mult) + if float64(sleep) != mult || sleep > max { + sleep = max + } + + return sleep +} + func (r *requestHandler) RoundTrip(req *http.Request) (resp *http.Response, err error) { + defer func() { + if r := recover(); r != nil { + if resp != nil && resp.Body != nil { + resp.Body.Close() + resp.Body = nil + } + + err = fmt.Errorf("http client panic: %s", r) + } + }() + insertCloudflareHeaders(req) resp, err = r.Transport.RoundTrip(req) - delay := 500 * time.Millisecond attempts := 1 retry := true + const minDelay = 500 * time.Millisecond const maxDelay = 5 * time.Second const maxAttempts = 10 @@ -180,7 +219,15 @@ func (r *requestHandler) RoundTrip(req *http.Request) (resp *http.Response, err r.downloader.stats.WebseedBytesDownload.Add(resp.ContentLength) retry = false - case http.StatusInternalServerError, http.StatusBadGateway: + // the first two statuses here have been observed from cloudflare + // during testing. The remainder are generally understood to be + // retriable http responses, calcBackoff will use the Retry-After + // header if its availible + case http.StatusInternalServerError, http.StatusBadGateway, + http.StatusRequestTimeout, http.StatusTooEarly, + http.StatusTooManyRequests, http.StatusServiceUnavailable, + http.StatusGatewayTimeout: + r.downloader.stats.WebseedServerFails.Add(1) if resp.Body != nil { @@ -189,7 +236,7 @@ func (r *requestHandler) RoundTrip(req *http.Request) (resp *http.Response, err } attempts++ - delayTimer := time.NewTimer(delay) + delayTimer := time.NewTimer(calcBackoff(minDelay, maxDelay, attempts, resp)) select { case <-delayTimer.C: @@ -197,10 +244,6 @@ func (r *requestHandler) RoundTrip(req *http.Request) (resp *http.Response, err resp, err = r.Transport.RoundTrip(req) r.downloader.stats.WebseedTripCount.Add(1) - if err == nil && delay < maxDelay { - delay = delay + (time.Duration(rand.Intn(200-75)+75)*delay)/100 - } - case <-req.Context().Done(): err = req.Context().Err() } From 40c1bc0b5c26ac63074c1251663ac227444fc03d Mon Sep 17 00:00:00 2001 From: Mark Holt Date: Mon, 22 Apr 2024 15:01:08 +0100 Subject: [PATCH 3/3] fix retry check --- erigon-lib/downloader/downloader.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/erigon-lib/downloader/downloader.go b/erigon-lib/downloader/downloader.go index e3a268fb2ec..b2c15f1d716 100644 --- a/erigon-lib/downloader/downloader.go +++ b/erigon-lib/downloader/downloader.go @@ -247,7 +247,7 @@ func (r *requestHandler) RoundTrip(req *http.Request) (resp *http.Response, err case <-req.Context().Done(): err = req.Context().Err() } - retry = attempts > maxAttempts + retry = attempts < maxAttempts default: r.downloader.stats.WebseedBytesDownload.Add(resp.ContentLength)