From cd247ebad60dfae745c70128d0c1e56a389f4759 Mon Sep 17 00:00:00 2001 From: Andrew Mason Date: Mon, 25 Oct 2021 14:30:22 -0400 Subject: [PATCH 1/3] Fix `DeleteCellInfo(force=true)` with downed local topo Signed-off-by: Andrew Mason --- go/vt/topo/cell_info.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/go/vt/topo/cell_info.go b/go/vt/topo/cell_info.go index 9c1ec731209..9703232d095 100644 --- a/go/vt/topo/cell_info.go +++ b/go/vt/topo/cell_info.go @@ -153,6 +153,21 @@ func (ts *Server) DeleteCellInfo(ctx context.Context, cell string, force bool) e if !force { return vterrors.Wrap(err, "can't list SrvKeyspace entries in the cell; use -force flag to continue anyway (e.g. if cell-local topo was already permanently shut down)") } + + select { + case <-ctx.Done(): + // If our context has expired and we got an error back from + // GetSrvKeyspaceNames, we assume that call failed because the + // local cell topo was down. If force=true, then we make a new + // background context to cleanup from the global topo. Otherwise a + // local-down-topo scenario would mean we never can delete it. + // (see https://github.com/vitessio/vitess/issues/8220). + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(context.Background(), *RemoteOperationTimeout) + defer cancel() + default: + // Context still has some time left, no need to make a new one. + } } filePath := pathForCellInfo(cell) From 0b0a23086989b9623e05043c54fcb5a03d53cff9 Mon Sep 17 00:00:00 2001 From: Andrew Mason Date: Fri, 29 Oct 2021 19:39:09 -0400 Subject: [PATCH 2/3] Extend memorytopo's Conn implementation to simulate an unreachable cell Signed-off-by: Andrew Mason --- go/vt/topo/memorytopo/directory.go | 4 ++++ go/vt/topo/memorytopo/file.go | 16 +++++++++++++ go/vt/topo/memorytopo/lock.go | 4 ++++ go/vt/topo/memorytopo/memorytopo.go | 35 ++++++++++++++++++++++++----- 4 files changed, 53 insertions(+), 6 deletions(-) diff --git a/go/vt/topo/memorytopo/directory.go b/go/vt/topo/memorytopo/directory.go index 8ac6c6251ad..f68c87a2166 100644 --- a/go/vt/topo/memorytopo/directory.go +++ b/go/vt/topo/memorytopo/directory.go @@ -27,6 +27,10 @@ import ( // ListDir is part of the topo.Conn interface. func (c *Conn) ListDir(ctx context.Context, dirPath string, full bool) ([]topo.DirEntry, error) { + if err := c.dial(ctx); err != nil { + return nil, err + } + c.factory.mu.Lock() defer c.factory.mu.Unlock() diff --git a/go/vt/topo/memorytopo/file.go b/go/vt/topo/memorytopo/file.go index b1126e11af9..36330cb374d 100644 --- a/go/vt/topo/memorytopo/file.go +++ b/go/vt/topo/memorytopo/file.go @@ -30,6 +30,10 @@ import ( // Create is part of topo.Conn interface. func (c *Conn) Create(ctx context.Context, filePath string, contents []byte) (topo.Version, error) { + if err := c.dial(ctx); err != nil { + return nil, err + } + if contents == nil { contents = []byte{} } @@ -61,6 +65,10 @@ func (c *Conn) Create(ctx context.Context, filePath string, contents []byte) (to // Update is part of topo.Conn interface. func (c *Conn) Update(ctx context.Context, filePath string, contents []byte, version topo.Version) (topo.Version, error) { + if err := c.dial(ctx); err != nil { + return nil, err + } + if contents == nil { contents = []byte{} } @@ -125,6 +133,10 @@ func (c *Conn) Update(ctx context.Context, filePath string, contents []byte, ver // Get is part of topo.Conn interface. func (c *Conn) Get(ctx context.Context, filePath string) ([]byte, topo.Version, error) { + if err := c.dial(ctx); err != nil { + return nil, nil, err + } + c.factory.mu.Lock() defer c.factory.mu.Unlock() @@ -146,6 +158,10 @@ func (c *Conn) Get(ctx context.Context, filePath string) ([]byte, topo.Version, // Delete is part of topo.Conn interface. func (c *Conn) Delete(ctx context.Context, filePath string, version topo.Version) error { + if err := c.dial(ctx); err != nil { + return err + } + c.factory.mu.Lock() defer c.factory.mu.Unlock() diff --git a/go/vt/topo/memorytopo/lock.go b/go/vt/topo/memorytopo/lock.go index b1fe2221dd4..ab9e8088873 100644 --- a/go/vt/topo/memorytopo/lock.go +++ b/go/vt/topo/memorytopo/lock.go @@ -44,6 +44,10 @@ type memoryTopoLockDescriptor struct { // Lock is part of the topo.Conn interface. func (c *Conn) Lock(ctx context.Context, dirPath, contents string) (topo.LockDescriptor, error) { for { + if err := c.dial(ctx); err != nil { + return nil, err + } + c.factory.mu.Lock() if c.factory.err != nil { diff --git a/go/vt/topo/memorytopo/memorytopo.go b/go/vt/topo/memorytopo/memorytopo.go index 155a1e66433..89c107636b5 100644 --- a/go/vt/topo/memorytopo/memorytopo.go +++ b/go/vt/topo/memorytopo/memorytopo.go @@ -38,6 +38,15 @@ const ( electionsPath = "elections" ) +const ( + // UnreachableServerAddr is a sentinel value for CellInfo.ServerAddr. + // If a memorytopo topo.Conn is created with this serverAddr then every + // method on that Conn which takes a context will simply block until the + // context finishes, and return ctx.Err(), in order to simulate an + // unreachable local cell for testing. + UnreachableServerAddr = "unreachable" +) + var ( nextWatchIndex = 0 ) @@ -79,8 +88,9 @@ func (f *Factory) Create(cell, serverAddr, root string) (topo.Conn, error) { return nil, topo.NewError(topo.NoNode, cell) } return &Conn{ - factory: f, - cell: cell, + factory: f, + cell: cell, + serverAddr: serverAddr, }, nil } @@ -110,11 +120,24 @@ func (f *Factory) Unlock() { f.mu.Unlock() } -// Conn implements the topo.Conn interface. It remembers the cell, and -// points at the Factory that has all the data. +// Conn implements the topo.Conn interface. It remembers the cell and serverAddr, +// and points at the Factory that has all the data. type Conn struct { - factory *Factory - cell string + factory *Factory + cell string + serverAddr string +} + +// dial returns immediately, unless the Conn points to the sentinel +// UnreachableServerAddr, in which case it will block until the context expires +// and return the context's error. +func (c *Conn) dial(ctx context.Context) error { + if c.serverAddr == UnreachableServerAddr { + <-ctx.Done() + return ctx.Err() + } + + return nil } // Close is part of the topo.Conn interface. From 5f6dd949d36762fc2e146f62c517ddfc5bfc060b Mon Sep 17 00:00:00 2001 From: Andrew Mason Date: Fri, 29 Oct 2021 19:39:40 -0400 Subject: [PATCH 3/3] Add unit tests for DeleteCellInfo(force={true,false}) with unreachable local cell Signed-off-by: Andrew Mason --- go/vt/topo/topotests/cell_info_test.go | 50 ++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/go/vt/topo/topotests/cell_info_test.go b/go/vt/topo/topotests/cell_info_test.go index eaae838efab..50ebe0fc09e 100644 --- a/go/vt/topo/topotests/cell_info_test.go +++ b/go/vt/topo/topotests/cell_info_test.go @@ -20,6 +20,7 @@ import ( "fmt" "strings" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -226,3 +227,52 @@ func TestExpandCells(t *testing.T) { } }) } + +func TestDeleteCellInfo(t *testing.T) { + ctx := context.Background() + ts := memorytopo.NewServer("zone1", "unreachable") + + err := ts.UpdateCellInfoFields(ctx, "unreachable", func(ci *topodatapb.CellInfo) error { + ci.ServerAddress = memorytopo.UnreachableServerAddr + return nil + }) + require.NoError(t, err, "failed to update cell to point at unreachable addr") + + tests := []struct { + force bool + shouldErr bool + shouldExist bool + }{ + { + force: false, + shouldErr: true, + shouldExist: true, + }, + { + force: true, + shouldErr: false, + shouldExist: false, + }, + } + for _, tt := range tests { + func() { + ctx, cancel := context.WithTimeout(ctx, 10*time.Millisecond) + defer cancel() + + err := ts.DeleteCellInfo(ctx, "unreachable", tt.force) + if tt.shouldErr { + assert.Error(t, err, "force=%t", tt.force) + } else { + assert.NoError(t, err, "force=%t", tt.force) + } + + ci, err := ts.GetCellInfo(ctx, "unreachable", true /* strongRead */) + if tt.shouldExist { + assert.NoError(t, err) + assert.NotNil(t, ci) + } else { + assert.True(t, topo.IsErrType(err, topo.NoNode), "expected cell %q to not exist", "unreachable") + } + }() + } +}