From 9ce610fa26986292a0bd78511431245ff562b2fa Mon Sep 17 00:00:00 2001 From: James Bardin Date: Sun, 8 Oct 2017 12:32:46 -0400 Subject: [PATCH 1/2] retry locks on network errors When communicating with a local agent and watching a lock, a dropped connection between the agent and server will show up as a server error and immediately be retried. However if the client is connected to a remote server, a dropped connection immediately aborts the lock. --- api/api.go | 10 +++++++--- api/api_test.go | 19 ++++++++++++------- api/lock.go | 2 +- api/semaphore.go | 2 +- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/api/api.go b/api/api.go index 3c28afe2896e..6e1cb222754e 100644 --- a/api/api.go +++ b/api/api.go @@ -555,13 +555,17 @@ func durToMsec(dur time.Duration) string { // serverError is a string we look for to detect 500 errors. const serverError = "Unexpected response code: 500" -// IsServerError returns true for 500 errors from the Consul servers, these are -// usually retryable at a later time. -func IsServerError(err error) bool { +// IsRetryableError returns true for 500 errors from the Consul servers, and +// network connection errors. These are usually retryable at a later time. +func IsRetryableError(err error) bool { if err == nil { return false } + if _, ok := err.(net.Error); ok { + return true + } + // TODO (slackpad) - Make a real error type here instead of using // a string check. return strings.Contains(err.Error(), serverError) diff --git a/api/api_test.go b/api/api_test.go index ba3a448ab167..f06bc1b304a7 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -4,6 +4,7 @@ import ( crand "crypto/rand" "crypto/tls" "fmt" + "net" "net/http" "os" "path/filepath" @@ -529,17 +530,21 @@ func TestAPI_durToMsec(t *testing.T) { } } -func TestAPI_IsServerError(t *testing.T) { +func TestAPI_IsRetryableError(t *testing.T) { t.Parallel() - if IsServerError(nil) { - t.Fatalf("should not be a server error") + if IsRetryableError(nil) { + t.Fatal("should not be a retryable error") } - if IsServerError(fmt.Errorf("not the error you are looking for")) { - t.Fatalf("should not be a server error") + if IsRetryableError(fmt.Errorf("not the error you are looking for")) { + t.Fatal("should not be a retryable error") } - if !IsServerError(fmt.Errorf(serverError)) { - t.Fatalf("should be a server error") + if !IsRetryableError(fmt.Errorf(serverError)) { + t.Fatal("should be a retryable error") + } + + if !IsRetryableError(&net.OpError{Err: fmt.Errorf("network conn error")}) { + t.Fatal("should be a retryable error") } } diff --git a/api/lock.go b/api/lock.go index 466ef5fdf192..d3187113f722 100644 --- a/api/lock.go +++ b/api/lock.go @@ -370,7 +370,7 @@ RETRY: // by doing retries. Note that we have to attempt the retry in a non- // blocking fashion so that we have a clean place to reset the retry // counter if service is restored. - if retries > 0 && IsServerError(err) { + if retries > 0 && IsRetryableError(err) { time.Sleep(l.opts.MonitorRetryTime) retries-- opts.WaitIndex = 0 diff --git a/api/semaphore.go b/api/semaphore.go index 9ddbdc49e7f1..d848dfd0b19a 100644 --- a/api/semaphore.go +++ b/api/semaphore.go @@ -492,7 +492,7 @@ RETRY: // by doing retries. Note that we have to attempt the retry in a non- // blocking fashion so that we have a clean place to reset the retry // counter if service is restored. - if retries > 0 && IsServerError(err) { + if retries > 0 && IsRetryableError(err) { time.Sleep(s.opts.MonitorRetryTime) retries-- opts.WaitIndex = 0 From 78db97554b5d06765a373375aa802466893d1859 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Wed, 11 Oct 2017 07:41:39 -0700 Subject: [PATCH 2/2] Updates comment about it being unsafe for writes. --- api/api.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/api.go b/api/api.go index 6e1cb222754e..87cd3c5a377d 100644 --- a/api/api.go +++ b/api/api.go @@ -557,6 +557,9 @@ const serverError = "Unexpected response code: 500" // IsRetryableError returns true for 500 errors from the Consul servers, and // network connection errors. These are usually retryable at a later time. +// This applies to reads but NOT to writes. This may return true for errors +// on writes that may have still gone through, so do not use this to retry +// any write operations. func IsRetryableError(err error) bool { if err == nil { return false