From 0fcc3dd06f4a96e68449f0dd28231a35b0e27d8d Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Mon, 21 Sep 2020 08:08:52 +0200 Subject: [PATCH 1/4] Logs for debugging CI flakiness Signed-off-by: Rohit Nayak --- go/vt/wrangler/traffic_switcher.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 7693ffd8e0e..fadfa1238c7 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -743,14 +743,19 @@ func (ts *trafficSwitcher) waitForCatchup(ctx context.Context, filteredReplicati return ts.forAllUids(func(target *tsTarget, uid uint32) error { ts.wr.Logger().Infof("uid: %d, target master %s, target position %s, shard %s", uid, target.master.AliasString(), target.position, target.si.String()) + log.Infof("uid: %d, target master %s, target position %s, shard %s", uid, + target.master.AliasString(), target.position, target.si.String()) bls := target.sources[uid] source := ts.sources[bls.Shard] ts.wr.Logger().Infof("waiting for keyspace:shard: %v:%v, source position %v, uid %d", ts.targetKeyspace, target.si.ShardName(), source.position, uid) + log.Infof("waiting for keyspace:shard: %v:%v, source position %v, uid %d", + ts.targetKeyspace, target.si.ShardName(), source.position, uid) if err := ts.wr.tmc.VReplicationWaitForPos(ctx, target.master.Tablet, int(uid), source.position); err != nil { return err } ts.wr.Logger().Infof("position for keyspace:shard: %v:%v reached, uid %d", ts.targetKeyspace, target.si.ShardName(), uid) + log.Infof("position for keyspace:shard: %v:%v reached, uid %d", ts.targetKeyspace, target.si.ShardName(), uid) if _, err := ts.wr.tmc.VReplicationExec(ctx, target.master.Tablet, binlogplayer.StopVReplication(uid, "stopped for cutover")); err != nil { log.Infof("error marking stopped for cutover on %s, uid %d", target.master.AliasString(), uid) return err @@ -765,6 +770,7 @@ func (ts *trafficSwitcher) waitForCatchup(ctx context.Context, filteredReplicati var err error target.position, err = ts.wr.tmc.MasterPosition(ctx, target.master.Tablet) ts.wr.Logger().Infof("Position for target master %s, uid %v: %v", target.master.AliasString(), uid, target.position) + log.Infof("Position for target master %s, uid %v: %v", target.master.AliasString(), uid, target.position) return err }) } From 8959771d30dff38faf7bbbf5b8c8bb7b814f0228 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Mon, 21 Sep 2020 08:19:13 +0200 Subject: [PATCH 2/4] Trigger rebuild Signed-off-by: Rohit Nayak From e2599b0c6d07b4d9d2de918624696c2f2b2e8592 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Mon, 21 Sep 2020 08:46:20 +0200 Subject: [PATCH 3/4] try reducing tests to retry faster Signed-off-by: Rohit Nayak --- test/config.json | 64 +----------------------------------------------- 1 file changed, 1 insertion(+), 63 deletions(-) diff --git a/test/config.json b/test/config.json index d10894edbcb..ae17929b30d 100644 --- a/test/config.json +++ b/test/config.json @@ -35,29 +35,7 @@ "RetryMax": 0, "Tags": [] }, - "unit": { - "File": "", - "Args": [], - "Command": [ - "tools/unit_test_runner.sh" - ], - "Manual": false, - "Shard": -1, - "RetryMax": 0, - "Tags": [] - }, - "unit_race": { - "File": "", - "Args": [], - "Command": [ - "make", - "unit_test_race" - ], - "Manual": false, - "Shard": 5, - "RetryMax": 0, - "Tags": [] - }, + "backup": { "File": "unused.go", "Args": ["vitess.io/vitess/go/test/endtoend/backup/vtctlbackup"], @@ -222,28 +200,6 @@ "site_test" ] }, - "merge_sharding": { - "File": "unused.go", - "Args": ["vitess.io/vitess/go/test/endtoend/sharding/mergesharding/int"], - "Command": [], - "Manual": false, - "Shard": 22, - "RetryMax": 0, - "Tags": [ - "worker_test" - ] - }, - "merge_sharding_bytes": { - "File": "unused.go", - "Args": ["vitess.io/vitess/go/test/endtoend/sharding/mergesharding/string"], - "Command": [], - "Manual": false, - "Shard": 22, - "RetryMax": 0, - "Tags": [ - "worker_test" - ] - }, "mysqlctl": { "File": "unused.go", "Args": ["vitess.io/vitess/go/test/endtoend/mysqlctl"], @@ -529,23 +485,5 @@ "RetryMax": 0, "Tags": [] }, - "vreplication_cellalias": { - "File": "unused.go", - "Args": ["vitess.io/vitess/go/test/endtoend/vreplication", "-run", "CellAlias"], - "Command": [], - "Manual": false, - "Shard": 23, - "RetryMax": 0, - "Tags": [] - }, - "vreplication_basic": { - "File": "unused.go", - "Args": ["vitess.io/vitess/go/test/endtoend/vreplication", "-run", "Basic"], - "Command": [], - "Manual": false, - "Shard": 24, - "RetryMax": 0, - "Tags": [] - } } } From d50d0fe294b0fe571321cc3f182e274aac1c0327 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Mon, 21 Sep 2020 09:28:19 +0200 Subject: [PATCH 4/4] more logs Signed-off-by: Rohit Nayak --- go/test/endtoend/vreplication/helper.go | 4 ++-- go/test/endtoend/vreplication/vreplication_test.go | 10 +++++++--- go/vt/discovery/tablet_picker.go | 2 +- go/vt/wrangler/traffic_switcher.go | 8 ++++---- test/config.json | 2 +- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/go/test/endtoend/vreplication/helper.go b/go/test/endtoend/vreplication/helper.go index 2dfc3467a93..1aee1dba6ee 100644 --- a/go/test/endtoend/vreplication/helper.go +++ b/go/test/endtoend/vreplication/helper.go @@ -242,13 +242,13 @@ func expectNumberOfStreams(t *testing.T, vtgateConn *mysql.Conn, name string, wo } } -func printShardPositions(vc *VitessCluster, ksShards []string) { +func printShardPositions(vc *VitessCluster, ksShards []string, msg string) { for _, ksShard := range ksShards { output, err := vc.VtctlClient.ExecuteCommandWithOutput("ShardReplicationPositions", ksShard) if err != nil { fmt.Printf("Error in ShardReplicationPositions: %v, output %v", err, output) } else { - fmt.Printf("Position of %s: %s", ksShard, output) + fmt.Printf("%s: Position of %s: %s", msg, ksShard, output) } } } diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index bbbe9e14766..612db640e82 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -192,6 +192,7 @@ func insertMoreCustomers(t *testing.T, numCustomers int) { // FIXME: if testReverse if false we don't dropsources and that creates a problem later on in the test due to existence of blacklisted tables func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAlias string) { + ksShards := []string{"product/0", "customer/-80", "customer/80-"} if _, err := vc.AddKeyspace(t, cells, "customer", "-80,80-", customerVSchema, customerSchema, defaultReplicas, defaultRdonly, 200); err != nil { t.Fatal(err) } @@ -231,6 +232,7 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl var output string var err error + printShardPositions(vc, ksShards, "p2c: Before SwitchReads (rdonly)") if output, err = vc.VtctlClient.ExecuteCommandWithOutput("SwitchReads", "-cells="+allCellNames, "-tablet_type=rdonly", "customer.p2c"); err != nil { t.Fatalf("SwitchReads error: %s\n", output) } @@ -238,6 +240,7 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl if output, err = vc.VtctlClient.ExecuteCommandWithOutput("SwitchReads", "-cells="+allCellNames, "-tablet_type=replica", "-dry_run", "customer.p2c"); err != nil { t.Fatalf("SwitchReads Dry Run error: %s\n", output) } + printShardPositions(vc, ksShards, "p2c: Before SwitchReads (replica)") validateDryRunResults(t, output, want) if output, err = vc.VtctlClient.ExecuteCommandWithOutput("SwitchReads", "-cells="+allCellNames, "-tablet_type=replica", "customer.p2c"); err != nil { t.Fatalf("SwitchReads error: %s\n", output) @@ -249,12 +252,12 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl t.Fatalf("SwitchWrites error: %s\n", output) } validateDryRunResults(t, output, want) + printShardPositions(vc, ksShards, "p2c: Before SwitchWrites") if output, err := vc.VtctlClient.ExecuteCommandWithOutput("SwitchWrites", "customer.p2c"); err != nil { t.Fatalf("SwitchWrites error: %s\n", output) } - ksShards := []string{"product/0", "customer/-80", "customer/80-"} - printShardPositions(vc, ksShards) + printShardPositions(vc, ksShards, "p2c: After SwitchWrites") insertQuery2 := "insert into customer(name) values('tempCustomer2')" matchInsertQuery2 := "insert into customer(name, cid) values (:vtg1, :_cid0)" assert.False(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "customer", insertQuery2, matchInsertQuery2)) @@ -273,10 +276,11 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl if output, err := vc.VtctlClient.ExecuteCommandWithOutput("SwitchReads", "-cells="+allCellNames, "-tablet_type=replica", "product.p2c_reverse"); err != nil { t.Fatalf("SwitchReads error: %s\n", output) } - printShardPositions(vc, ksShards) + printShardPositions(vc, ksShards, "pc2_reverse: before SwitchWrites") if output, err := vc.VtctlClient.ExecuteCommandWithOutput("SwitchWrites", "product.p2c_reverse"); err != nil { t.Fatalf("SwitchWrites error: %s\n", output) } + printShardPositions(vc, ksShards, "pc2_reverse: after SwitchWrites") insertQuery1 = "insert into customer(cid, name) values(1002, 'tempCustomer5')" assert.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "product", insertQuery1, matchInsertQuery1)) // both inserts go into 80-, this tests the edge-case where a stream (-80) has no relevant new events after the previous switch diff --git a/go/vt/discovery/tablet_picker.go b/go/vt/discovery/tablet_picker.go index 2aded25401a..487ba99ff2c 100644 --- a/go/vt/discovery/tablet_picker.go +++ b/go/vt/discovery/tablet_picker.go @@ -38,7 +38,7 @@ import ( ) var ( - tabletPickerRetryDelay = 30 * time.Second + tabletPickerRetryDelay = 5 * time.Second muTabletPickerRetryDelay sync.Mutex ) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index fadfa1238c7..139a0107e2a 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -743,19 +743,19 @@ func (ts *trafficSwitcher) waitForCatchup(ctx context.Context, filteredReplicati return ts.forAllUids(func(target *tsTarget, uid uint32) error { ts.wr.Logger().Infof("uid: %d, target master %s, target position %s, shard %s", uid, target.master.AliasString(), target.position, target.si.String()) - log.Infof("uid: %d, target master %s, target position %s, shard %s", uid, + fmt.Printf("uid: %d, target master %s, target position %s, shard %s\n", uid, target.master.AliasString(), target.position, target.si.String()) bls := target.sources[uid] source := ts.sources[bls.Shard] ts.wr.Logger().Infof("waiting for keyspace:shard: %v:%v, source position %v, uid %d", ts.targetKeyspace, target.si.ShardName(), source.position, uid) - log.Infof("waiting for keyspace:shard: %v:%v, source position %v, uid %d", + fmt.Printf("waiting for keyspace:shard: %v:%v, source position %v, uid %d\n", ts.targetKeyspace, target.si.ShardName(), source.position, uid) if err := ts.wr.tmc.VReplicationWaitForPos(ctx, target.master.Tablet, int(uid), source.position); err != nil { return err } ts.wr.Logger().Infof("position for keyspace:shard: %v:%v reached, uid %d", ts.targetKeyspace, target.si.ShardName(), uid) - log.Infof("position for keyspace:shard: %v:%v reached, uid %d", ts.targetKeyspace, target.si.ShardName(), uid) + fmt.Printf("position for keyspace:shard: %v:%v reached, uid %d\n", ts.targetKeyspace, target.si.ShardName(), uid) if _, err := ts.wr.tmc.VReplicationExec(ctx, target.master.Tablet, binlogplayer.StopVReplication(uid, "stopped for cutover")); err != nil { log.Infof("error marking stopped for cutover on %s, uid %d", target.master.AliasString(), uid) return err @@ -770,7 +770,7 @@ func (ts *trafficSwitcher) waitForCatchup(ctx context.Context, filteredReplicati var err error target.position, err = ts.wr.tmc.MasterPosition(ctx, target.master.Tablet) ts.wr.Logger().Infof("Position for target master %s, uid %v: %v", target.master.AliasString(), uid, target.position) - log.Infof("Position for target master %s, uid %v: %v", target.master.AliasString(), uid, target.position) + fmt.Printf("Position for target master %s, uid %v: %v\n", target.master.AliasString(), uid, target.position) return err }) } diff --git a/test/config.json b/test/config.json index ae17929b30d..6194d06fd9f 100644 --- a/test/config.json +++ b/test/config.json @@ -484,6 +484,6 @@ "Shard": 22, "RetryMax": 0, "Tags": [] - }, + } } }