diff --git a/server/replication/replication_mode.go b/server/replication/replication_mode.go index c03aa3856612..182128daa389 100644 --- a/server/replication/replication_mode.go +++ b/server/replication/replication_mode.go @@ -206,12 +206,12 @@ const ( ) type drAutoSyncStatus struct { - State string `json:"state,omitempty"` - StateID uint64 `json:"state_id,omitempty"` - RecoverStartTime time.Time `json:"recover_start,omitempty"` - TotalRegions int `json:"total_regions,omitempty"` - SyncedRegions int `json:"synced_regions,omitempty"` - RecoverProgress float32 `json:"recover_progress,omitempty"` + State string `json:"state,omitempty"` + StateID uint64 `json:"state_id,omitempty"` + RecoverStartTime *time.Time `json:"recover_start,omitempty"` + TotalRegions int `json:"total_regions,omitempty"` + SyncedRegions int `json:"synced_regions,omitempty"` + RecoverProgress float32 `json:"recover_progress,omitempty"` } func (m *ModeManager) loadDRAutoSync() error { @@ -280,7 +280,8 @@ func (m *ModeManager) drSwitchToSyncRecoverWithLock() error { log.Warn("failed to switch to sync_recover state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err } - dr := drAutoSyncStatus{State: drStateSyncRecover, StateID: id, RecoverStartTime: time.Now()} + now := time.Now() + dr := drAutoSyncStatus{State: drStateSyncRecover, StateID: id, RecoverStartTime: &now} if err := m.drPersistStatus(dr); err != nil { return err } @@ -387,6 +388,16 @@ func (m *ModeManager) tickDR() { } hasMajority := upPeers*2 > totalPrimaryPeers+totalDrPeers + log.Debug("replication store status", + zap.Int("up-primary", upPrimayStores), + zap.Int("up-dr", upDrStores), + zap.Int("down-primary", downPrimaryStores), + zap.Int("down-dr", downDrStores), + zap.Bool("can-sync", canSync), + zap.Int("up-peers", upPeers), + zap.Bool("has-majority", hasMajority), + ) + // If hasMajority is false, the cluster is always unavailable. Switch to async won't help. if !canSync && hasMajority && m.drGetState() != drStateAsync && m.drCheckAsyncTimeout() { m.drSwitchToAsync() diff --git a/server/replication/replication_mode_test.go b/server/replication/replication_mode_test.go index 02790e339672..4e087d447b55 100644 --- a/server/replication/replication_mode_test.go +++ b/server/replication/replication_mode_test.go @@ -17,6 +17,7 @@ package replication import ( "context" "errors" + "fmt" "testing" "time" @@ -141,10 +142,12 @@ func (s *testReplicationMode) TestStatus(c *C) { } type mockFileReplicator struct { - err error + lastData string + err error } -func (rep *mockFileReplicator) ReplicateFileToAllMembers(context.Context, string, []byte) error { +func (rep *mockFileReplicator) ReplicateFileToAllMembers(ctx context.Context, name string, data []byte) error { + rep.lastData = string(data) return rep.err } @@ -172,6 +175,7 @@ func (s *testReplicationMode) TestStateSwitch(c *C) { c.Assert(rep.drGetState(), Equals, drStateSync) stateID := rep.drAutoSync.StateID c.Assert(stateID, Not(Equals), uint64(0)) + c.Assert(replicator.lastData, Equals, fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID)) assertStateIDUpdate := func() { c.Assert(rep.drAutoSync.StateID, Not(Equals), stateID) stateID = rep.drAutoSync.StateID @@ -181,6 +185,7 @@ func (s *testReplicationMode) TestStateSwitch(c *C) { rep.tickDR() c.Assert(rep.drGetState(), Equals, drStateAsync) assertStateIDUpdate() + c.Assert(replicator.lastData, Equals, fmt.Sprintf(`{"state":"async","state_id":%d}`, stateID)) // add new store in dr zone. cluster.AddLabelsStore(4, 1, map[string]string{"zone": "zone2"})