From 9a808356bff4cab475be9216cf764c663a689517 Mon Sep 17 00:00:00 2001 From: Shawn Reuland Date: Mon, 29 Nov 2021 13:30:48 -0800 Subject: [PATCH] #3710: capture client disconnects for metrics reporting, include db layer distinction of timeout vs cancel --- services/horizon/internal/httpx/server.go | 3 +- support/db/main.go | 3 ++ support/db/session.go | 21 +++++++----- support/db/session_test.go | 39 +++++++++++++++++++++++ 4 files changed, 57 insertions(+), 9 deletions(-) diff --git a/services/horizon/internal/httpx/server.go b/services/horizon/internal/httpx/server.go index 85eaa5bc96..c3f6983c2c 100644 --- a/services/horizon/internal/httpx/server.go +++ b/services/horizon/internal/httpx/server.go @@ -56,7 +56,8 @@ func init() { problem.RegisterError(sse.ErrRateLimited, hProblem.RateLimitExceeded) problem.RegisterError(context.DeadlineExceeded, hProblem.Timeout) problem.RegisterError(context.Canceled, hProblem.ClientDisconnected) - problem.RegisterError(db.ErrCancelled, hProblem.ServiceUnavailable) + problem.RegisterError(db.ErrCancelled, hProblem.ClientDisconnected) + problem.RegisterError(db.ErrTimeout, hProblem.ServiceUnavailable) problem.RegisterError(db.ErrConflictWithRecovery, hProblem.ServiceUnavailable) problem.RegisterError(db.ErrBadConnection, hProblem.ServiceUnavailable) } diff --git a/support/db/main.go b/support/db/main.go index 2d0eaec21e..ed6e65285d 100644 --- a/support/db/main.go +++ b/support/db/main.go @@ -31,6 +31,9 @@ const ( ) var ( + // ErrTimeout is an error returned by Session methods when request has + // taken longer than context's deadline max duration + ErrTimeout = errors.New("canceling statement due to lack of response within timeout period") // ErrCancelled is an error returned by Session methods when request has // been cancelled (ex. context cancelled). ErrCancelled = errors.New("canceling statement due to user request") diff --git a/support/db/session.go b/support/db/session.go index 43e7121be0..3d72f87bfe 100644 --- a/support/db/session.go +++ b/support/db/session.go @@ -23,7 +23,7 @@ func (s *Session) Begin() error { tx, err := s.DB.BeginTxx(context.Background(), nil) if err != nil { - if knownErr := s.replaceWithKnownError(err); knownErr != nil { + if knownErr := s.replaceWithKnownError(err, context.Background()); knownErr != nil { return knownErr } @@ -44,7 +44,7 @@ func (s *Session) BeginTx(opts *sql.TxOptions) error { tx, err := s.DB.BeginTxx(context.Background(), opts) if err != nil { - if knownErr := s.replaceWithKnownError(err); knownErr != nil { + if knownErr := s.replaceWithKnownError(err, context.Background()); knownErr != nil { return knownErr } @@ -142,7 +142,7 @@ func (s *Session) GetRaw(ctx context.Context, dest interface{}, query string, ar return nil } - if knownErr := s.replaceWithKnownError(err); knownErr != nil { + if knownErr := s.replaceWithKnownError(err, ctx); knownErr != nil { return knownErr } @@ -211,7 +211,7 @@ func (s *Session) ExecRaw(ctx context.Context, query string, args ...interface{} return result, nil } - if knownErr := s.replaceWithKnownError(err); knownErr != nil { + if knownErr := s.replaceWithKnownError(err, ctx); knownErr != nil { return nil, knownErr } @@ -230,10 +230,15 @@ func (s *Session) NoRows(err error) bool { // replaceWithKnownError tries to replace Postgres error with package error. // Returns a new error if the err is known. -func (s *Session) replaceWithKnownError(err error) error { +func (s *Session) replaceWithKnownError(err error, ctx context.Context) error { switch { + case ctx.Err() == context.Canceled: + return ErrCancelled + case ctx.Err() == context.DeadlineExceeded: + // if libpq waits too long to obtain conn from pool, can get ctx timeout before server trip + return ErrTimeout case strings.Contains(err.Error(), "pq: canceling statement due to user request"): - return ErrCancelled + return ErrTimeout case strings.Contains(err.Error(), "pq: canceling statement due to conflict with recovery"): return ErrConflictWithRecovery case strings.Contains(err.Error(), "driver: bad connection"): @@ -267,7 +272,7 @@ func (s *Session) QueryRaw(ctx context.Context, query string, args ...interface{ return result, nil } - if knownErr := s.replaceWithKnownError(err); knownErr != nil { + if knownErr := s.replaceWithKnownError(err, ctx); knownErr != nil { return nil, knownErr } @@ -341,7 +346,7 @@ func (s *Session) SelectRaw( return nil } - if knownErr := s.replaceWithKnownError(err); knownErr != nil { + if knownErr := s.replaceWithKnownError(err, ctx); knownErr != nil { return knownErr } diff --git a/support/db/session_test.go b/support/db/session_test.go index 1cbccb9301..ea34db0fc8 100644 --- a/support/db/session_test.go +++ b/support/db/session_test.go @@ -3,12 +3,51 @@ package db import ( "context" "testing" + "time" "github.com/stellar/go/support/db/dbtest" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) + +func TestServerTimeout (t *testing.T) { + db := dbtest.Postgres(t).Load(testSchema) + defer db.Close() + + var cancel context.CancelFunc + ctx := context.Background() + ctx, cancel = context.WithTimeout(ctx, time.Duration(1)) + assert := assert.New(t) + + sess := &Session{DB: db.Open()} + defer sess.DB.Close() + defer cancel() + + var count int + err := sess.GetRaw(ctx, &count, "SELECT pg_sleep(2), COUNT(*) FROM people") + assert.ErrorIs(err, ErrTimeout, "long running db server operation past context timeout, should return timeout") +} + +func TestUserCancel (t *testing.T) { + db := dbtest.Postgres(t).Load(testSchema) + defer db.Close() + + var cancel context.CancelFunc + ctx := context.Background() + ctx, cancel = context.WithCancel(ctx) + assert := assert.New(t) + + sess := &Session{DB: db.Open()} + defer sess.DB.Close() + defer cancel() + + var count int + cancel() + err := sess.GetRaw(ctx, &count, "SELECT pg_sleep(2), COUNT(*) FROM people") + assert.ErrorIs(err, ErrCancelled, "any ongoing db server operation should return error immediately after user cancel") +} + func TestSession(t *testing.T) { db := dbtest.Postgres(t).Load(testSchema) defer db.Close()