Skip to content

Commit

Permalink
grpclb: propagate the most recent connection error when grpclb enters…
Browse files Browse the repository at this point in the history
… transient failure (#4605)
  • Loading branch information
apolcyn authored Jul 21, 2021
1 parent 8332d5b commit 0a8c637
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 1 deletion.
6 changes: 5 additions & 1 deletion balancer/grpclb/grpclb.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ package grpclb
import (
"context"
"errors"
"fmt"
"sync"
"time"

Expand Down Expand Up @@ -221,6 +222,7 @@ type lbBalancer struct {
// when resolved address updates are received, and read in the goroutine
// handling fallback.
resolvedBackendAddrs []resolver.Address
connErr error // the last connection error
}

// regeneratePicker takes a snapshot of the balancer, and generates a picker from
Expand All @@ -230,7 +232,7 @@ type lbBalancer struct {
// Caller must hold lb.mu.
func (lb *lbBalancer) regeneratePicker(resetDrop bool) {
if lb.state == connectivity.TransientFailure {
lb.picker = &errPicker{err: balancer.ErrTransientFailure}
lb.picker = &errPicker{err: fmt.Errorf("all SubConns are in TransientFailure, last connection error: %v", lb.connErr)}
return
}

Expand Down Expand Up @@ -336,6 +338,8 @@ func (lb *lbBalancer) UpdateSubConnState(sc balancer.SubConn, scs balancer.SubCo
// When an address was removed by resolver, b called RemoveSubConn but
// kept the sc's state in scStates. Remove state for this sc here.
delete(lb.scStates, sc)
case connectivity.TransientFailure:
lb.connErr = scs.ConnectionError
}
// Force regenerate picker if
// - this sc became ready from not-ready
Expand Down
59 changes: 59 additions & 0 deletions balancer/grpclb/grpclb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,65 @@ func (s) TestGRPCLBPickFirst(t *testing.T) {
}
}

func (s) TestGRPCLBBackendConnectionErrorPropagation(t *testing.T) {
r := manual.NewBuilderWithScheme("whatever")

// Start up an LB which will tell the client to fall back
// right away.
tss, cleanup, err := newLoadBalancer(0, "", nil)
if err != nil {
t.Fatalf("failed to create new load balancer: %v", err)
}
defer cleanup()

// Start a standalone backend, to be used during fallback. The creds
// are intentionally misconfigured in order to simulate failure of a
// security handshake.
beLis, err := net.Listen("tcp", "localhost:0")
if err != nil {
t.Fatalf("Failed to listen %v", err)
}
defer beLis.Close()
standaloneBEs := startBackends("arbitrary.invalid.name", true, beLis)
defer stopBackends(standaloneBEs)

creds := serverNameCheckCreds{}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
cc, err := grpc.DialContext(ctx, r.Scheme()+":///"+beServerName, grpc.WithResolvers(r),
grpc.WithTransportCredentials(&creds), grpc.WithContextDialer(fakeNameDialer))
if err != nil {
t.Fatalf("Failed to dial to the backend %v", err)
}
defer cc.Close()
testC := testpb.NewTestServiceClient(cc)

r.UpdateState(resolver.State{Addresses: []resolver.Address{{
Addr: tss.lbAddr,
Type: resolver.GRPCLB,
ServerName: lbServerName,
}, {
Addr: beLis.Addr().String(),
Type: resolver.Backend,
}}})

// If https://github.com/grpc/grpc-go/blob/65cabd74d8e18d7347fecd414fa8d83a00035f5f/balancer/grpclb/grpclb_test.go#L103
// changes, then expectedErrMsg may need to be updated.
const expectedErrMsg = "received unexpected server name"
ctx, cancel = context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
var wg sync.WaitGroup
wg.Add(1)
go func() {
tss.ls.fallbackNow()
wg.Done()
}()
if _, err := testC.EmptyCall(ctx, &testpb.Empty{}); err == nil || !strings.Contains(err.Error(), expectedErrMsg) {
t.Fatalf("%v.EmptyCall(_, _) = _, %v, want _, rpc error containing substring: %q", testC, err, expectedErrMsg)
}
wg.Wait()
}

type failPreRPCCred struct{}

func (failPreRPCCred) GetRequestMetadata(ctx context.Context, uri ...string) (map[string]string, error) {
Expand Down

0 comments on commit 0a8c637

Please sign in to comment.