From 510b1fe379415d7a5a078c47dc80d8bc3233e7d1 Mon Sep 17 00:00:00 2001 From: deepthi Date: Thu, 2 Jan 2020 14:17:25 -0800 Subject: [PATCH 1/4] DeleteTablet: allow deletion of old master tablet without -allow_master flag Signed-off-by: deepthi --- go/vt/wrangler/tablet.go | 8 ++- go/vt/wrangler/tablet_test.go | 101 ++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 1 deletion(-) diff --git a/go/vt/wrangler/tablet.go b/go/vt/wrangler/tablet.go index a8b4cab9bc3..5e3fd0d3e33 100644 --- a/go/vt/wrangler/tablet.go +++ b/go/vt/wrangler/tablet.go @@ -109,7 +109,13 @@ func (wr *Wrangler) DeleteTablet(ctx context.Context, tabletAlias *topodatapb.Ta if err != nil { return err } - wasMaster := ti.Type == topodatapb.TabletType_MASTER + si, err := wr.ts.GetShard(ctx, ti.Keyspace, ti.Shard) + if err != nil { + return err + } + + // the true master tablet will have the same MasterTermStartTime as the shard + wasMaster := ti.Type == topodatapb.TabletType_MASTER && topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) && logutil.ProtoToTime(ti.MasterTermStartTime).Equal(logutil.ProtoToTime(si.MasterTermStartTime)) if wasMaster && !allowMaster { return fmt.Errorf("cannot delete tablet %v as it is a master, use allow_master flag", topoproto.TabletAliasString(tabletAlias)) } diff --git a/go/vt/wrangler/tablet_test.go b/go/vt/wrangler/tablet_test.go index 68542de8a82..792fcb90d89 100644 --- a/go/vt/wrangler/tablet_test.go +++ b/go/vt/wrangler/tablet_test.go @@ -17,11 +17,13 @@ limitations under the License. package wrangler import ( + "strings" "testing" "golang.org/x/net/context" "vitess.io/vitess/go/vt/logutil" topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/topo/memorytopo" ) @@ -56,3 +58,102 @@ func TestInitTabletShardConversion(t *testing.T) { t.Errorf("Got wrong tablet.KeyRange, got %v expected 80-c0", ti.KeyRange) } } + +// TestDeleteTabletBasic tests delete of non-master tablet +func TestDeleteTabletBasic(t *testing.T) { + cell := "cell1" + ts := memorytopo.NewServer(cell) + wr := New(logutil.NewConsoleLogger(), ts, nil) + + tablet := &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: cell, + Uid: 1, + }, + Shard: "0", + } + + if err := wr.InitTablet(context.Background(), tablet, false /*allowMasterOverride*/, true /*createShardAndKeyspace*/, false /*allowUpdate*/); err != nil { + t.Fatalf("InitTablet failed: %v", err) + } + + if _, err := ts.GetTablet(context.Background(), tablet.Alias); err != nil { + t.Fatalf("GetTablet failed: %v", err) + } + + if err := wr.DeleteTablet(context.Background(), tablet.Alias, false); err != nil { + t.Fatalf("DeleteTablet failed: %v", err) + } +} + +// TestDeleteTabletTrueMaster tests that you can delete a true master tablet +// only if allowMaster is set to true +func TestDeleteTabletTrueMaster(t *testing.T) { + cell := "cell1" + ts := memorytopo.NewServer(cell) + wr := New(logutil.NewConsoleLogger(), ts, nil) + + _, err := ts.GetOrCreateShard(context.Background(), "test", "0") + tablet := &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: cell, + Uid: 1, + }, + Keyspace: "test", + Shard: "0", + Type: topodatapb.TabletType_MASTER, + } + + if err := wr.InitTablet(context.Background(), tablet, false /*allowMasterOverride*/, false /*createShardAndKeyspace*/, false /*allowUpdate*/); err != nil { + t.Fatalf("InitTablet failed: %v", err) + } + if _, err := ts.GetTablet(context.Background(), tablet.Alias); err != nil { + t.Fatalf("GetTablet failed: %v", err) + } + + // set MasterAlias and MasterTermStartTime on shard to match chosen master tablet + _, err = ts.UpdateShardFields(context.Background(), "test", "0", func(si *topo.ShardInfo) error { + si.MasterAlias = tablet.Alias + si.MasterTermStartTime = tablet.MasterTermStartTime + return nil + }) + + err = wr.DeleteTablet(context.Background(), tablet.Alias, false) + wantError := "as it is a master, use allow_master flag" + if err == nil || !strings.Contains(err.Error(), wantError) { + t.Fatalf("DeleteTablet on master: want error = %v, got error = %v", wantError, err) + } + + if err := wr.DeleteTablet(context.Background(), tablet.Alias, true); err != nil { + t.Fatalf("DeleteTablet failed: %v", err) + } +} + +// TestDeleteTabletFalseMaster tests that you can delete a false master tablet +// with allowMaster set to false +func TestDeleteTabletFalseMaster(t *testing.T) { + cell := "cell1" + ts := memorytopo.NewServer(cell) + wr := New(logutil.NewConsoleLogger(), ts, nil) + + tablet := &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: cell, + Uid: 1, + }, + Keyspace: "test", + Shard: "0", + Type: topodatapb.TabletType_MASTER, + } + + if err := wr.InitTablet(context.Background(), tablet, false /*allowMasterOverride*/, true /*createShardAndKeyspace*/, false /*allowUpdate*/); err != nil { + t.Fatalf("InitTablet failed: %v", err) + } + if _, err := ts.GetTablet(context.Background(), tablet.Alias); err != nil { + t.Fatalf("GetTablet failed: %v", err) + } + + if err := wr.DeleteTablet(context.Background(), tablet.Alias, false); err != nil { + t.Fatalf("DeleteTablet failed: %v", err) + } +} From f4baace68dabbbec1aa45ee25c64f668c6df2425 Mon Sep 17 00:00:00 2001 From: deepthi Date: Fri, 3 Jan 2020 14:14:32 -0800 Subject: [PATCH 2/4] DeleteTablet: detect whether tablet is master consistent with how we define shard master. Reduce dependence on global topo. Improve test case Signed-off-by: deepthi --- go/vt/wrangler/tablet.go | 36 +++++++++++++++++++++++------- go/vt/wrangler/tablet_test.go | 41 ++++++++++++++++++++++++++--------- 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/go/vt/wrangler/tablet.go b/go/vt/wrangler/tablet.go index 5e3fd0d3e33..300f02fba30 100644 --- a/go/vt/wrangler/tablet.go +++ b/go/vt/wrangler/tablet.go @@ -109,23 +109,18 @@ func (wr *Wrangler) DeleteTablet(ctx context.Context, tabletAlias *topodatapb.Ta if err != nil { return err } - si, err := wr.ts.GetShard(ctx, ti.Keyspace, ti.Shard) + + wasMaster, err := wr.isMasterTablet(ctx, ti) if err != nil { return err } - // the true master tablet will have the same MasterTermStartTime as the shard - wasMaster := ti.Type == topodatapb.TabletType_MASTER && topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) && logutil.ProtoToTime(ti.MasterTermStartTime).Equal(logutil.ProtoToTime(si.MasterTermStartTime)) if wasMaster && !allowMaster { return fmt.Errorf("cannot delete tablet %v as it is a master, use allow_master flag", topoproto.TabletAliasString(tabletAlias)) } - // remove the record and its replication graph entry - if err := topotools.DeleteTablet(ctx, wr.ts, ti.Tablet); err != nil { - return err - } - // update the Shard object if the master was scrapped. + // we do this before calling DeleteTablet so that the operation can be retried in case of failure. if wasMaster { // We lock the shard to not conflict with reparent operations. ctx, unlock, lockErr := wr.ts.LockShard(ctx, ti.Keyspace, ti.Shard, fmt.Sprintf("DeleteTablet(%v)", topoproto.TabletAliasString(tabletAlias))) @@ -146,6 +141,11 @@ func (wr *Wrangler) DeleteTablet(ctx context.Context, tabletAlias *topodatapb.Ta return err } + // remove the record and its replication graph entry + if err := topotools.DeleteTablet(ctx, wr.ts, ti.Tablet); err != nil { + return err + } + return nil } @@ -207,3 +207,23 @@ func (wr *Wrangler) VReplicationExec(ctx context.Context, tabletAlias *topodatap } return wr.tmc.VReplicationExec(ctx, ti.Tablet, query) } + +func (wr *Wrangler) isMasterTablet(ctx context.Context, ti *topo.TabletInfo) (bool, error) { + // Tablet record claims to be non-master, we believe it + if ti.Type != topodatapb.TabletType_MASTER { + return false, nil + } + si, err := wr.ts.GetShard(ctx, ti.Keyspace, ti.Shard) + if err != nil { + // strictly speaking it isn't correct to return false here, the tablet status is unknown + return false, err + } + // Tablet record claims to be master, and shard record matches + if topoproto.TabletAliasEqual(si.MasterAlias, ti.Tablet.Alias) { + return true, nil + } + // Shard record has another tablet as master, so check MasterTermStartTime + // If tablet record's MasterTermStartTime is equal to or later than the one in the shard record, then tablet is master + // !Before == Equal || After + return !logutil.ProtoToTime(ti.MasterTermStartTime).Before(logutil.ProtoToTime(si.MasterTermStartTime)), nil +} diff --git a/go/vt/wrangler/tablet_test.go b/go/vt/wrangler/tablet_test.go index 792fcb90d89..cf0454062be 100644 --- a/go/vt/wrangler/tablet_test.go +++ b/go/vt/wrangler/tablet_test.go @@ -93,7 +93,6 @@ func TestDeleteTabletTrueMaster(t *testing.T) { ts := memorytopo.NewServer(cell) wr := New(logutil.NewConsoleLogger(), ts, nil) - _, err := ts.GetOrCreateShard(context.Background(), "test", "0") tablet := &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ Cell: cell, @@ -104,7 +103,7 @@ func TestDeleteTabletTrueMaster(t *testing.T) { Type: topodatapb.TabletType_MASTER, } - if err := wr.InitTablet(context.Background(), tablet, false /*allowMasterOverride*/, false /*createShardAndKeyspace*/, false /*allowUpdate*/); err != nil { + if err := wr.InitTablet(context.Background(), tablet, false /*allowMasterOverride*/, true /*createShardAndKeyspace*/, false /*allowUpdate*/); err != nil { t.Fatalf("InitTablet failed: %v", err) } if _, err := ts.GetTablet(context.Background(), tablet.Alias); err != nil { @@ -112,13 +111,15 @@ func TestDeleteTabletTrueMaster(t *testing.T) { } // set MasterAlias and MasterTermStartTime on shard to match chosen master tablet - _, err = ts.UpdateShardFields(context.Background(), "test", "0", func(si *topo.ShardInfo) error { + if _, err := ts.UpdateShardFields(context.Background(), "test", "0", func(si *topo.ShardInfo) error { si.MasterAlias = tablet.Alias si.MasterTermStartTime = tablet.MasterTermStartTime return nil - }) + }); err != nil { + t.Fatalf("UpdateShardFields failed: %v", err) + } - err = wr.DeleteTablet(context.Background(), tablet.Alias, false) + err := wr.DeleteTablet(context.Background(), tablet.Alias, false) wantError := "as it is a master, use allow_master flag" if err == nil || !strings.Contains(err.Error(), wantError) { t.Fatalf("DeleteTablet on master: want error = %v, got error = %v", wantError, err) @@ -136,7 +137,7 @@ func TestDeleteTabletFalseMaster(t *testing.T) { ts := memorytopo.NewServer(cell) wr := New(logutil.NewConsoleLogger(), ts, nil) - tablet := &topodatapb.Tablet{ + tablet1 := &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ Cell: cell, Uid: 1, @@ -146,14 +147,34 @@ func TestDeleteTabletFalseMaster(t *testing.T) { Type: topodatapb.TabletType_MASTER, } - if err := wr.InitTablet(context.Background(), tablet, false /*allowMasterOverride*/, true /*createShardAndKeyspace*/, false /*allowUpdate*/); err != nil { + if err := wr.InitTablet(context.Background(), tablet1, false /*allowMasterOverride*/, true /*createShardAndKeyspace*/, false /*allowUpdate*/); err != nil { t.Fatalf("InitTablet failed: %v", err) } - if _, err := ts.GetTablet(context.Background(), tablet.Alias); err != nil { - t.Fatalf("GetTablet failed: %v", err) + + tablet2 := &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: cell, + Uid: 2, + }, + Keyspace: "test", + Shard: "0", + Type: topodatapb.TabletType_MASTER, + } + if err := wr.InitTablet(context.Background(), tablet2, true /*allowMasterOverride*/, false /*createShardAndKeyspace*/, false /*allowUpdate*/); err != nil { + t.Fatalf("InitTablet failed: %v", err) } - if err := wr.DeleteTablet(context.Background(), tablet.Alias, false); err != nil { + // set MasterAlias and MasterTermStartTime on shard to match chosen master tablet + if _, err := ts.UpdateShardFields(context.Background(), "test", "0", func(si *topo.ShardInfo) error { + si.MasterAlias = tablet2.Alias + si.MasterTermStartTime = tablet2.MasterTermStartTime + return nil + }); err != nil { + t.Fatalf("UpdateShardFields failed: %v", err) + } + + // Should be able to delete old (false) master with allowMaster = false + if err := wr.DeleteTablet(context.Background(), tablet1.Alias, false); err != nil { t.Fatalf("DeleteTablet failed: %v", err) } } From 71083daa7442382a60078ef9f17b5d17523805fb Mon Sep 17 00:00:00 2001 From: deepthi Date: Fri, 3 Jan 2020 15:01:23 -0800 Subject: [PATCH 3/4] fix return Signed-off-by: deepthi --- go/vt/wrangler/tablet.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/go/vt/wrangler/tablet.go b/go/vt/wrangler/tablet.go index 300f02fba30..981735b6ad4 100644 --- a/go/vt/wrangler/tablet.go +++ b/go/vt/wrangler/tablet.go @@ -130,15 +130,16 @@ func (wr *Wrangler) DeleteTablet(ctx context.Context, tabletAlias *topodatapb.Ta defer unlock(&err) // update the shard record's master - _, err = wr.ts.UpdateShardFields(ctx, ti.Keyspace, ti.Shard, func(si *topo.ShardInfo) error { + if _, err := wr.ts.UpdateShardFields(ctx, ti.Keyspace, ti.Shard, func(si *topo.ShardInfo) error { if !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) { wr.Logger().Warningf("Deleting master %v from shard %v/%v but master in Shard object was %v", topoproto.TabletAliasString(tabletAlias), ti.Keyspace, ti.Shard, topoproto.TabletAliasString(si.MasterAlias)) return topo.NewError(topo.NoUpdateNeeded, si.Keyspace()+"/"+si.ShardName()) } si.MasterAlias = nil return nil - }) - return err + }); err != nil { + return err + } } // remove the record and its replication graph entry From 1f7fa6b16b6db0cdbcfcbc87cd72313d217f4ae5 Mon Sep 17 00:00:00 2001 From: deepthi Date: Mon, 6 Jan 2020 09:32:35 -0800 Subject: [PATCH 4/4] address reviews Signed-off-by: deepthi --- go/vt/wrangler/tablet.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/go/vt/wrangler/tablet.go b/go/vt/wrangler/tablet.go index 981735b6ad4..82075ecbb05 100644 --- a/go/vt/wrangler/tablet.go +++ b/go/vt/wrangler/tablet.go @@ -209,6 +209,16 @@ func (wr *Wrangler) VReplicationExec(ctx context.Context, tabletAlias *topodatap return wr.tmc.VReplicationExec(ctx, ti.Tablet, query) } +// isMasterTablet is a shortcut way to determine whether the current tablet +// is a master before we allow its tablet record to be deleted. The canonical +// way to determine the only true master in a shard is to list all the tablets +// and find the one with the highest MasterTermStartTime among the ones that +// claim to be master. +// We err on the side of caution here, i.e. we should never return false for +// a true master tablet, but it is ok to return true for a tablet that isn't +// the true master. This can occur if someone issues a DeleteTablet while +// the system is in transition (a reparenting event is in progress and parts of +// the topo have not yet been updated). func (wr *Wrangler) isMasterTablet(ctx context.Context, ti *topo.TabletInfo) (bool, error) { // Tablet record claims to be non-master, we believe it if ti.Type != topodatapb.TabletType_MASTER { @@ -224,7 +234,8 @@ func (wr *Wrangler) isMasterTablet(ctx context.Context, ti *topo.TabletInfo) (bo return true, nil } // Shard record has another tablet as master, so check MasterTermStartTime - // If tablet record's MasterTermStartTime is equal to or later than the one in the shard record, then tablet is master - // !Before == Equal || After - return !logutil.ProtoToTime(ti.MasterTermStartTime).Before(logutil.ProtoToTime(si.MasterTermStartTime)), nil + // If tablet record's MasterTermStartTime is later than the one in the shard record, then tablet is master + tabletMTST := logutil.ProtoToTime(ti.MasterTermStartTime) + shardMTST := logutil.ProtoToTime(si.MasterTermStartTime) + return tabletMTST.After(shardMTST), nil }