Skip to content

Commit

Permalink
client: fix detection of whether IO was performed in NewStream (#4611)
Browse files Browse the repository at this point in the history
For transparent retry.

Also allow non-WFR RPCs to retry indefinitely on errors that resulted in no I/O; the spec used to forbid it at one point during development, but it no longer does.
  • Loading branch information
dfawley authored Jul 23, 2021
1 parent 582ef45 commit 1ddab33
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 44 deletions.
35 changes: 24 additions & 11 deletions internal/transport/http2_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -616,26 +616,39 @@ func (t *http2Client) getCallAuthData(ctx context.Context, audience string, call
return callAuthData, nil
}

// PerformedIOError wraps an error to indicate IO may have been performed
// before the error occurred.
type PerformedIOError struct {
// NewStreamError wraps an error and reports additional information.
type NewStreamError struct {
Err error

DoNotRetry bool
PerformedIO bool
}

// Error implements error.
func (p PerformedIOError) Error() string {
return p.Err.Error()
func (e NewStreamError) Error() string {
return e.Err.Error()
}

// NewStream creates a stream and registers it into the transport as "active"
// streams.
// streams. All non-nil errors returned will be *NewStreamError.
func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (_ *Stream, err error) {
defer func() {
if err != nil {
nse, ok := err.(*NewStreamError)
if !ok {
nse = &NewStreamError{Err: err}
}
if len(t.perRPCCreds) > 0 || callHdr.Creds != nil {
// We may have performed I/O in the per-RPC creds callback, so do not
// allow transparent retry.
nse.PerformedIO = true
}
err = nse
}
}()
ctx = peer.NewContext(ctx, t.getPeer())
headerFields, err := t.createHeaderFields(ctx, callHdr)
if err != nil {
// We may have performed I/O in the per-RPC creds callback, so do not
// allow transparent retry.
return nil, PerformedIOError{err}
return nil, err
}
s := t.newStream(ctx, callHdr)
cleanup := func(err error) {
Expand Down Expand Up @@ -741,7 +754,7 @@ func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (_ *Strea
break
}
if hdrListSizeErr != nil {
return nil, hdrListSizeErr
return nil, &NewStreamError{Err: hdrListSizeErr, DoNotRetry: true}
}
firstTry = false
select {
Expand Down
2 changes: 1 addition & 1 deletion internal/transport/transport_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ func (s) TestGracefulClose(t *testing.T) {
go func() {
defer wg.Done()
str, err := ct.NewStream(ctx, &CallHdr{})
if err == ErrConnClosing {
if err != nil && err.(*NewStreamError).Err == ErrConnClosing {
return
} else if err != nil {
t.Errorf("_.NewStream(_, _) = _, %v, want _, %v", err, ErrConnClosing)
Expand Down
28 changes: 15 additions & 13 deletions rpc_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -829,26 +829,28 @@ func Errorf(c codes.Code, format string, a ...interface{}) error {

// toRPCErr converts an error into an error from the status package.
func toRPCErr(err error) error {
if err == nil || err == io.EOF {
switch err {
case nil, io.EOF:
return err
}
if err == io.ErrUnexpectedEOF {
case context.DeadlineExceeded:
return status.Error(codes.DeadlineExceeded, err.Error())
case context.Canceled:
return status.Error(codes.Canceled, err.Error())
case io.ErrUnexpectedEOF:
return status.Error(codes.Internal, err.Error())
}
if _, ok := status.FromError(err); ok {
return err
}

switch e := err.(type) {
case transport.ConnectionError:
return status.Error(codes.Unavailable, e.Desc)
default:
switch err {
case context.DeadlineExceeded:
return status.Error(codes.DeadlineExceeded, err.Error())
case context.Canceled:
return status.Error(codes.Canceled, err.Error())
}
case *transport.NewStreamError:
return toRPCErr(e.Err)
}

if _, ok := status.FromError(err); ok {
return err
}

return status.Error(codes.Unknown, err.Error())
}

Expand Down
49 changes: 30 additions & 19 deletions stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,12 +421,9 @@ func (a *csAttempt) newStream() error {
cs.callHdr.PreviousAttempts = cs.numRetries
s, err := a.t.NewStream(cs.ctx, cs.callHdr)
if err != nil {
if _, ok := err.(transport.PerformedIOError); ok {
// Return without converting to an RPC error so retry code can
// inspect.
return err
}
return toRPCErr(err)
// Return without converting to an RPC error so retry code can
// inspect.
return err
}
cs.attempt.s = s
cs.attempt.p = &parser{r: s}
Expand Down Expand Up @@ -525,19 +522,28 @@ func (cs *clientStream) commitAttempt() {
// shouldRetry returns nil if the RPC should be retried; otherwise it returns
// the error that should be returned by the operation.
func (cs *clientStream) shouldRetry(err error) error {
unprocessed := false
if cs.attempt.s == nil {
pioErr, ok := err.(transport.PerformedIOError)
if ok {
// Unwrap error.
err = toRPCErr(pioErr.Err)
} else {
unprocessed = true
// Error from NewClientStream.
nse, ok := err.(*transport.NewStreamError)
if !ok {
// Unexpected, but assume no I/O was performed and the RPC is not
// fatal, so retry indefinitely.
return nil
}
if !ok && !cs.callInfo.failFast {
// In the event of a non-IO operation error from NewStream, we
// never attempted to write anything to the wire, so we can retry
// indefinitely for non-fail-fast RPCs.

// Unwrap and convert error.
err = toRPCErr(nse.Err)

// Never retry DoNotRetry errors, which indicate the RPC should not be
// retried due to max header list size violation, etc.
if nse.DoNotRetry {
return err
}

// In the event of a non-IO operation error from NewStream, we never
// attempted to write anything to the wire, so we can retry
// indefinitely.
if !nse.PerformedIO {
return nil
}
}
Expand All @@ -546,6 +552,7 @@ func (cs *clientStream) shouldRetry(err error) error {
return err
}
// Wait for the trailers.
unprocessed := false
if cs.attempt.s != nil {
<-cs.attempt.s.Done()
unprocessed = cs.attempt.s.Unprocessed()
Expand Down Expand Up @@ -634,7 +641,7 @@ func (cs *clientStream) shouldRetry(err error) error {
// Returns nil if a retry was performed and succeeded; error otherwise.
func (cs *clientStream) retryLocked(lastErr error) error {
for {
cs.attempt.finish(lastErr)
cs.attempt.finish(toRPCErr(lastErr))
if err := cs.shouldRetry(lastErr); err != nil {
cs.commitAttemptLocked()
return err
Expand All @@ -661,7 +668,11 @@ func (cs *clientStream) withRetry(op func(a *csAttempt) error, onSuccess func())
for {
if cs.committed {
cs.mu.Unlock()
return op(cs.attempt)
// toRPCErr is used in case the error from the attempt comes from
// NewClientStream, which intentionally doesn't return a status
// error to allow for further inspection; all other errors should
// already be status errors.
return toRPCErr(op(cs.attempt))
}
a := cs.attempt
cs.mu.Unlock()
Expand Down

0 comments on commit 1ddab33

Please sign in to comment.