Skip to content

Commit

Permalink
dr-autosync: improve state file content (tikv#4342)
Browse files Browse the repository at this point in the history
close tikv#4341

Signed-off-by: disksing <i@disksing.com>

Co-authored-by: Ti Chi Robot <ti-community-prow-bot@tidb.io>
  • Loading branch information
2 people authored and IcePigZDB committed Nov 29, 2021
1 parent b95080e commit cfe74f4
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 9 deletions.
25 changes: 18 additions & 7 deletions server/replication/replication_mode.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,12 @@ const (
)

type drAutoSyncStatus struct {
State string `json:"state,omitempty"`
StateID uint64 `json:"state_id,omitempty"`
RecoverStartTime time.Time `json:"recover_start,omitempty"`
TotalRegions int `json:"total_regions,omitempty"`
SyncedRegions int `json:"synced_regions,omitempty"`
RecoverProgress float32 `json:"recover_progress,omitempty"`
State string `json:"state,omitempty"`
StateID uint64 `json:"state_id,omitempty"`
RecoverStartTime *time.Time `json:"recover_start,omitempty"`
TotalRegions int `json:"total_regions,omitempty"`
SyncedRegions int `json:"synced_regions,omitempty"`
RecoverProgress float32 `json:"recover_progress,omitempty"`
}

func (m *ModeManager) loadDRAutoSync() error {
Expand Down Expand Up @@ -280,7 +280,8 @@ func (m *ModeManager) drSwitchToSyncRecoverWithLock() error {
log.Warn("failed to switch to sync_recover state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err))
return err
}
dr := drAutoSyncStatus{State: drStateSyncRecover, StateID: id, RecoverStartTime: time.Now()}
now := time.Now()
dr := drAutoSyncStatus{State: drStateSyncRecover, StateID: id, RecoverStartTime: &now}
if err := m.drPersistStatus(dr); err != nil {
return err
}
Expand Down Expand Up @@ -387,6 +388,16 @@ func (m *ModeManager) tickDR() {
}
hasMajority := upPeers*2 > totalPrimaryPeers+totalDrPeers

log.Debug("replication store status",
zap.Int("up-primary", upPrimayStores),
zap.Int("up-dr", upDrStores),
zap.Int("down-primary", downPrimaryStores),
zap.Int("down-dr", downDrStores),
zap.Bool("can-sync", canSync),
zap.Int("up-peers", upPeers),
zap.Bool("has-majority", hasMajority),
)

// If hasMajority is false, the cluster is always unavailable. Switch to async won't help.
if !canSync && hasMajority && m.drGetState() != drStateAsync && m.drCheckAsyncTimeout() {
m.drSwitchToAsync()
Expand Down
9 changes: 7 additions & 2 deletions server/replication/replication_mode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package replication
import (
"context"
"errors"
"fmt"
"testing"
"time"

Expand Down Expand Up @@ -141,10 +142,12 @@ func (s *testReplicationMode) TestStatus(c *C) {
}

type mockFileReplicator struct {
err error
lastData string
err error
}

func (rep *mockFileReplicator) ReplicateFileToAllMembers(context.Context, string, []byte) error {
func (rep *mockFileReplicator) ReplicateFileToAllMembers(ctx context.Context, name string, data []byte) error {
rep.lastData = string(data)
return rep.err
}

Expand Down Expand Up @@ -172,6 +175,7 @@ func (s *testReplicationMode) TestStateSwitch(c *C) {
c.Assert(rep.drGetState(), Equals, drStateSync)
stateID := rep.drAutoSync.StateID
c.Assert(stateID, Not(Equals), uint64(0))
c.Assert(replicator.lastData, Equals, fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID))
assertStateIDUpdate := func() {
c.Assert(rep.drAutoSync.StateID, Not(Equals), stateID)
stateID = rep.drAutoSync.StateID
Expand All @@ -181,6 +185,7 @@ func (s *testReplicationMode) TestStateSwitch(c *C) {
rep.tickDR()
c.Assert(rep.drGetState(), Equals, drStateAsync)
assertStateIDUpdate()
c.Assert(replicator.lastData, Equals, fmt.Sprintf(`{"state":"async","state_id":%d}`, stateID))

// add new store in dr zone.
cluster.AddLabelsStore(4, 1, map[string]string{"zone": "zone2"})
Expand Down

0 comments on commit cfe74f4

Please sign in to comment.