diff --git a/go.mod b/go.mod index eedb32762..7fd8fd58d 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 - github.com/pingcap/kvproto v0.0.0-20230720094213-a3b4a77b4333 + github.com/pingcap/kvproto v0.0.0-20230818065851-7b612d935bf9 github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.15.1 diff --git a/go.sum b/go.sum index 2df15720c..570bd7435 100644 --- a/go.sum +++ b/go.sum @@ -136,8 +136,8 @@ github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c h1:CgbKAHto5CQgW github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E= github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw= -github.com/pingcap/kvproto v0.0.0-20230720094213-a3b4a77b4333 h1:A6Wqgq0uMw51UiRAH27TVN0QlzVR5CVtV6fTQSAmvKM= -github.com/pingcap/kvproto v0.0.0-20230720094213-a3b4a77b4333/go.mod h1:r0q/CFcwvyeRhKtoqzmWMBebrtpIziQQ9vR+JKh1knc= +github.com/pingcap/kvproto v0.0.0-20230818065851-7b612d935bf9 h1:VDoZ18CAXoTUNTCxfl4BjQSD5rJQri8QlH8nu0ZuHeg= +github.com/pingcap/kvproto v0.0.0-20230818065851-7b612d935bf9/go.mod h1:r0q/CFcwvyeRhKtoqzmWMBebrtpIziQQ9vR+JKh1knc= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/integration_tests/go.mod b/integration_tests/go.mod index 97636a2f6..01e508d62 100644 --- a/integration_tests/go.mod +++ b/integration_tests/go.mod @@ -6,7 +6,7 @@ require ( github.com/ninedraft/israce v0.0.3 github.com/pingcap/errors v0.11.5-0.20221009092201-b66cddb77c32 github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c - github.com/pingcap/kvproto v0.0.0-20230720094213-a3b4a77b4333 + github.com/pingcap/kvproto v0.0.0-20230818065851-7b612d935bf9 github.com/pingcap/tidb v1.1.0-beta.0.20230619015310-8b1006f1af04 github.com/pkg/errors v0.9.1 github.com/stretchr/testify v1.8.4 diff --git a/integration_tests/go.sum b/integration_tests/go.sum index ac10fa09a..134ec393c 100644 --- a/integration_tests/go.sum +++ b/integration_tests/go.sum @@ -363,8 +363,8 @@ github.com/pingcap/fn v1.0.0 h1:CyA6AxcOZkQh52wIqYlAmaVmF6EvrcqFywP463pjA8g= github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E= github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= -github.com/pingcap/kvproto v0.0.0-20230720094213-a3b4a77b4333 h1:A6Wqgq0uMw51UiRAH27TVN0QlzVR5CVtV6fTQSAmvKM= -github.com/pingcap/kvproto v0.0.0-20230720094213-a3b4a77b4333/go.mod h1:r0q/CFcwvyeRhKtoqzmWMBebrtpIziQQ9vR+JKh1knc= +github.com/pingcap/kvproto v0.0.0-20230818065851-7b612d935bf9 h1:VDoZ18CAXoTUNTCxfl4BjQSD5rJQri8QlH8nu0ZuHeg= +github.com/pingcap/kvproto v0.0.0-20230818065851-7b612d935bf9/go.mod h1:r0q/CFcwvyeRhKtoqzmWMBebrtpIziQQ9vR+JKh1knc= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v1.1.0/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/log v1.1.1-0.20230317032135-a0d097d16e22 h1:2SOzvGvE8beiC1Y4g9Onkvu6UmuBBOeWRGQEjJaT/JY= diff --git a/internal/client/client_interceptor.go b/internal/client/client_interceptor.go index 74cabbb8c..64ae333ed 100644 --- a/internal/client/client_interceptor.go +++ b/internal/client/client_interceptor.go @@ -107,10 +107,15 @@ func buildResourceControlInterceptor( // Build the interceptor. interceptFn := func(next interceptor.RPCInterceptorFunc) interceptor.RPCInterceptorFunc { return func(target string, req *tikvrpc.Request) (*tikvrpc.Response, error) { + // bypass some internal requests and it's may influence user experience. For example, the + // request of `alter user password`, totally bypasses the resource control. it's not cost + // many resources, but it's may influence the user experience. // If the resource group has background jobs, we should not record consumption and wait for it. - if resourceControlInterceptor.IsBackgroundRequest(ctx, resourceGroupName, req.RequestSource) { + // Background jobs will record and report in tikv side. + if reqInfo.Bypass() || resourceControlInterceptor.IsBackgroundRequest(ctx, resourceGroupName, req.RequestSource) { return next(target, req) } + consumption, penalty, err := resourceControlInterceptor.OnRequestWait(ctx, resourceGroupName, reqInfo) if err != nil { return nil, err diff --git a/internal/locate/region_cache.go b/internal/locate/region_cache.go index 87cd06704..56444ac09 100644 --- a/internal/locate/region_cache.go +++ b/internal/locate/region_cache.go @@ -627,16 +627,17 @@ func (c *RegionCache) SetPDClient(client pd.Client) { // RPCContext contains data that is needed to send RPC to a region. type RPCContext struct { - Region RegionVerID - Meta *metapb.Region - Peer *metapb.Peer - AccessIdx AccessIndex - Store *Store - Addr string - AccessMode accessMode - ProxyStore *Store // nil means proxy is not used - ProxyAddr string // valid when ProxyStore is not nil - TiKVNum int // Number of TiKV nodes among the region's peers. Assuming non-TiKV peers are all TiFlash peers. + Region RegionVerID + Meta *metapb.Region + Peer *metapb.Peer + AccessIdx AccessIndex + Store *Store + Addr string + AccessMode accessMode + ProxyStore *Store // nil means proxy is not used + ProxyAddr string // valid when ProxyStore is not nil + TiKVNum int // Number of TiKV nodes among the region's peers. Assuming non-TiKV peers are all TiFlash peers. + BucketVersion uint64 contextPatcher contextPatcher // kvrpcpb.Context fields that need to be overridden } @@ -1289,9 +1290,11 @@ func (c *RegionCache) reloadRegion(regionID uint64) { // ignore error and use old region info. logutil.Logger(bo.GetCtx()).Error("load region failure", zap.Uint64("regionID", regionID), zap.Error(err)) + c.mu.RLock() if oldRegion := c.getRegionByIDFromCache(regionID); oldRegion != nil { oldRegion.asyncReload.Store(false) } + c.mu.RUnlock() return } c.mu.Lock() @@ -1945,6 +1948,26 @@ func (c *RegionCache) getStoresByLabels(labels []*metapb.StoreLabel) []*Store { return s } +// OnBucketVersionNotMatch removes the old buckets meta if the version is stale. +func (c *RegionCache) OnBucketVersionNotMatch(ctx *RPCContext, version uint64, keys [][]byte) { + r := c.GetCachedRegionWithRLock(ctx.Region) + if r == nil { + return + } + + buckets := r.getStore().buckets + if buckets == nil || buckets.GetVersion() < version { + oldStore := r.getStore() + store := oldStore.clone() + store.buckets = &metapb.Buckets{ + Version: version, + Keys: keys, + RegionId: r.meta.GetId(), + } + r.compareAndSwapStore(oldStore, store) + } +} + // OnRegionEpochNotMatch removes the old region and inserts new regions into the cache. // It returns whether retries the request because it's possible the region epoch is ahead of TiKV's due to slow appling. func (c *RegionCache) OnRegionEpochNotMatch(bo *retry.Backoffer, ctx *RPCContext, currentRegions []*metapb.Region) (bool, error) { diff --git a/internal/locate/region_cache_test.go b/internal/locate/region_cache_test.go index 619da2d2e..6226a1c60 100644 --- a/internal/locate/region_cache_test.go +++ b/internal/locate/region_cache_test.go @@ -1646,6 +1646,15 @@ func (s *testRegionCacheSuite) TestShouldNotRetryFlashback() { shouldRetry, err = reqSend.onRegionError(s.bo, ctx, nil, &errorpb.Error{FlashbackNotPrepared: &errorpb.FlashbackNotPrepared{}}) s.Error(err) s.False(shouldRetry) + + shouldRetry, err = reqSend.onRegionError(s.bo, ctx, nil, &errorpb.Error{BucketVersionNotMatch: &errorpb.BucketVersionNotMatch{Keys: [][]byte{[]byte("a")}, Version: 1}}) + s.Nil(err) + s.False(shouldRetry) + ctx.Region.GetID() + key, err := s.cache.LocateKey(s.bo, []byte("a")) + s.Nil(err) + s.Equal(key.Buckets.Keys, [][]byte{[]byte("a")}) + s.Equal(key.Buckets.Version, uint64(1)) } func (s *testRegionCacheSuite) TestBackgroundCacheGC() { diff --git a/internal/locate/region_request.go b/internal/locate/region_request.go index e071d739a..3bf19cb98 100644 --- a/internal/locate/region_request.go +++ b/internal/locate/region_request.go @@ -347,7 +347,7 @@ func (state *accessKnownLeader) next(bo *retry.Backoffer, selector *replicaSelec // a request. So, before the new leader is elected, we should not send requests // to the unreachable old leader to avoid unnecessary timeout. if liveness != reachable || leader.isExhausted(maxReplicaAttempt) { - selector.state = &tryFollower{leaderIdx: state.leaderIdx, lastIdx: state.leaderIdx} + selector.state = &tryFollower{leaderIdx: state.leaderIdx, lastIdx: state.leaderIdx, fromAccessKnownLeader: true} return nil, stateChanged{} } if selector.busyThreshold > 0 { @@ -371,7 +371,7 @@ func (state *accessKnownLeader) onSendFailure(bo *retry.Backoffer, selector *rep return } if liveness != reachable || selector.targetReplica().isExhausted(maxReplicaAttempt) { - selector.state = &tryFollower{leaderIdx: state.leaderIdx, lastIdx: state.leaderIdx} + selector.state = &tryFollower{leaderIdx: state.leaderIdx, lastIdx: state.leaderIdx, fromAccessKnownLeader: true} } if liveness != reachable { selector.invalidateReplicaStore(selector.targetReplica(), cause) @@ -379,7 +379,7 @@ func (state *accessKnownLeader) onSendFailure(bo *retry.Backoffer, selector *rep } func (state *accessKnownLeader) onNoLeader(selector *replicaSelector) { - selector.state = &tryFollower{leaderIdx: state.leaderIdx, lastIdx: state.leaderIdx, fromOnNotLeader: true} + selector.state = &tryFollower{leaderIdx: state.leaderIdx, lastIdx: state.leaderIdx, fromAccessKnownLeader: true} } // tryFollower is the state where we cannot access the known leader @@ -393,28 +393,53 @@ type tryFollower struct { stateBase leaderIdx AccessIndex lastIdx AccessIndex - // fromOnNotLeader indicates whether the state is changed from onNotLeader. - fromOnNotLeader bool + // fromAccessKnownLeader indicates whether the state is changed from `accessKnownLeader`. + fromAccessKnownLeader bool + labels []*metapb.StoreLabel } func (state *tryFollower) next(bo *retry.Backoffer, selector *replicaSelector) (*RPCContext, error) { - var targetReplica *replica hasDeadlineExceededErr := false - // Search replica that is not attempted from the last accessed replica - for i := 1; i < len(selector.replicas); i++ { - idx := AccessIndex((int(state.lastIdx) + i) % len(selector.replicas)) - targetReplica = selector.replicas[idx] - hasDeadlineExceededErr = hasDeadlineExceededErr || targetReplica.deadlineErrUsingConfTimeout - if idx == state.leaderIdx { - continue + //hasDeadlineExceededErr || targetReplica.deadlineErrUsingConfTimeout + filterReplicas := func(fn func(*replica) bool) (AccessIndex, *replica) { + for i := 0; i < len(selector.replicas); i++ { + idx := AccessIndex((int(state.lastIdx) + i) % len(selector.replicas)) + if idx == state.leaderIdx { + continue + } + selectReplica := selector.replicas[idx] + hasDeadlineExceededErr = hasDeadlineExceededErr || selectReplica.deadlineErrUsingConfTimeout + if selectReplica.store.getLivenessState() != unreachable && !selectReplica.deadlineErrUsingConfTimeout && + fn(selectReplica) { + return idx, selectReplica + } } - // Each follower is only tried once - if !targetReplica.isExhausted(1) && targetReplica.store.getLivenessState() != unreachable && !targetReplica.deadlineErrUsingConfTimeout { + return -1, nil + } + + if len(state.labels) > 0 { + idx, selectReplica := filterReplicas(func(selectReplica *replica) bool { + return selectReplica.store.IsLabelsMatch(state.labels) + }) + if selectReplica != nil && idx >= 0 { state.lastIdx = idx selector.targetIdx = idx - break } + // labels only take effect for first try. + state.labels = nil } + + if selector.targetIdx < 0 { + // Search replica that is not attempted from the last accessed replica + idx, selectReplica := filterReplicas(func(selectReplica *replica) bool { + return !selectReplica.isExhausted(1) + }) + if selectReplica != nil && idx >= 0 { + state.lastIdx = idx + selector.targetIdx = idx + } + } + // If all followers are tried and fail, backoff and retry. if selector.targetIdx < 0 { if hasDeadlineExceededErr { @@ -427,22 +452,24 @@ func (state *tryFollower) next(bo *retry.Backoffer, selector *replicaSelector) ( } rpcCtx, err := selector.buildRPCContext(bo) if err != nil || rpcCtx == nil { - return nil, err + return rpcCtx, err } - // If the state is changed from onNotLeader, the `replicaRead` flag should not be set as leader read would still be used. - if !state.fromOnNotLeader { - replicaRead := selector.targetIdx != state.leaderIdx + if !state.fromAccessKnownLeader { + replicaRead := true rpcCtx.contextPatcher.replicaRead = &replicaRead } - disableStaleRead := false - rpcCtx.contextPatcher.staleRead = &disableStaleRead + staleRead := false + rpcCtx.contextPatcher.staleRead = &staleRead return rpcCtx, nil } func (state *tryFollower) onSendSuccess(selector *replicaSelector) { - if state.fromOnNotLeader { - if !selector.region.switchWorkLeaderToPeer(selector.targetReplica().peer) { - panic("the store must exist") + if state.fromAccessKnownLeader { + peer := selector.targetReplica().peer + if !selector.region.switchWorkLeaderToPeer(peer) { + logutil.BgLogger().Warn("the store must exist", + zap.Uint64("store", peer.StoreId), + zap.Uint64("peer", peer.Id)) } } } @@ -565,6 +592,10 @@ type accessFollower struct { learnerOnly bool } +// Follower read will try followers first, if no follower is available, it will fallback to leader. +// Specially, for stale read, it tries local peer(can be either leader or follower), then use snapshot read in the leader, +// if the leader read receive server-is-busy and connection errors, the region cache is still valid, +// and the state will be changed to tryFollower, which will read by replica read. func (state *accessFollower) next(bo *retry.Backoffer, selector *replicaSelector) (*RPCContext, error) { replicaSize := len(selector.replicas) resetStaleRead := false @@ -633,7 +664,8 @@ func (state *accessFollower) next(bo *retry.Backoffer, selector *replicaSelector // If there is no candidate, fallback to the leader. if selector.targetIdx < 0 { leader := selector.replicas[state.leaderIdx] - leaderInvalid := leader.isEpochStale() || state.IsLeaderExhausted(leader) + leaderEpochStale := leader.isEpochStale() + leaderInvalid := leaderEpochStale || state.IsLeaderExhausted(leader) if len(state.option.labels) > 0 { logutil.Logger(bo.GetCtx()).Warn("unable to find stores with given labels", zap.Uint64("region", selector.region.GetID()), @@ -645,6 +677,20 @@ func (state *accessFollower) next(bo *retry.Backoffer, selector *replicaSelector return nil, nil } if leaderInvalid { + // In stale-read, the request will fallback to leader after the local follower failure. + // If the leader is also unavailable, we can fallback to the follower and use replica-read flag again, + // The remote follower not tried yet, and the local follower can retry without stale-read flag. + if state.isStaleRead { + selector.state = &tryFollower{ + leaderIdx: state.leaderIdx, + lastIdx: state.leaderIdx, + labels: state.option.labels, + } + if leaderEpochStale { + selector.regionCache.scheduleReloadRegion(selector.region) + } + return nil, stateChanged{} + } metrics.TiKVReplicaSelectorFailureCounter.WithLabelValues("exhausted").Inc() selector.invalidateRegion() return nil, nil @@ -695,23 +741,25 @@ func (state *accessFollower) isCandidate(idx AccessIndex, replica *replica) bool if replica.isEpochStale() || replica.isExhausted(1) || replica.store.getLivenessState() == unreachable || replica.deadlineErrUsingConfTimeout { return false } - // The request can only be sent to the leader. - if state.option.leaderOnly && idx == state.leaderIdx { - return true + if state.option.leaderOnly { + // The request can only be sent to the leader. + return idx == state.leaderIdx } - // Choose a replica with matched labels. - followerCandidate := !state.option.leaderOnly && (state.tryLeader || idx != state.leaderIdx) && - replica.store.IsLabelsMatch(state.option.labels) && (!state.learnerOnly || replica.peer.Role == metapb.PeerRole_Learner) - if !followerCandidate { + if !state.tryLeader && idx == state.leaderIdx { + // The request cannot be sent to leader. return false } + if state.learnerOnly { + // The request can only be sent to the learner. + return replica.peer.Role == metapb.PeerRole_Learner + } // And If the leader store is abnormal to be accessed under `ReplicaReadPreferLeader` mode, we should choose other valid followers // as candidates to serve the Read request. if state.option.preferLeader && replica.store.isSlow() { return false } - // If the stores are limited, check if the store is in the list. - return replica.store.IsStoreMatch(state.option.stores) + // Choose a replica with matched labels. + return replica.store.IsStoreMatch(state.option.stores) && replica.store.IsLabelsMatch(state.option.labels) } // tryIdleReplica is the state where we find the leader is busy and retry the request using replica read. @@ -1101,6 +1149,9 @@ func (s *replicaSelector) onServerIsBusy( // Mark the server is busy (the next incoming READs could be redirect // to expected followers. ) ctx.Store.markAlreadySlow() + if s.canFallback2Follower() { + return true, nil + } } err = bo.Backoff(retry.BoTiKVServerBusy, errors.Errorf("server is busy, ctx: %v", ctx)) if err != nil { @@ -1109,6 +1160,23 @@ func (s *replicaSelector) onServerIsBusy( return true, nil } +// For some reasons, the leader is unreachable by now, try followers instead. +// the state is changed in accessFollower.next when leader is unavailable. +func (s *replicaSelector) canFallback2Follower() bool { + if s == nil || s.state == nil { + return false + } + state, ok := s.state.(*accessFollower) + if !ok { + return false + } + if !state.isStaleRead { + return false + } + // can fallback to follower only when the leader is exhausted. + return state.lastIdx == state.leaderIdx && state.IsLeaderExhausted(s.replicas[state.leaderIdx]) +} + func (s *replicaSelector) invalidateRegion() { if s.region != nil { s.region.invalidate(Other) @@ -1814,6 +1882,8 @@ func regionErrorToLabel(e *errorpb.Error) string { return "peer_is_witness" } else if isDeadlineExceeded(e) { return "deadline_exceeded" + } else if e.GetMismatchPeerId() != nil { + return "mismatch_peer_id" } return "unknown" } @@ -1968,6 +2038,17 @@ func (s *RegionRequestSender) onRegionError( return retry, err } + if bucketVersionNotMatch := regionErr.GetBucketVersionNotMatch(); bucketVersionNotMatch != nil { + logutil.Logger(bo.GetCtx()).Debug( + "tikv reports `BucketVersionNotMatch` retry later", + zap.Stringer("bucketVersionNotMatch", bucketVersionNotMatch), + zap.Stringer("ctx", ctx), + ) + // bucket version is not match, we should split this cop request again. + s.regionCache.OnBucketVersionNotMatch(ctx, bucketVersionNotMatch.Version, bucketVersionNotMatch.Keys) + return false, nil + } + if serverIsBusy := regionErr.GetServerIsBusy(); serverIsBusy != nil { if s.replicaSelector != nil && strings.Contains(serverIsBusy.GetReason(), "deadline is exceeded") { s.replicaSelector.onDeadlineExceeded() @@ -2105,6 +2186,18 @@ func (s *RegionRequestSender) onRegionError( s.replicaSelector.onDeadlineExceeded() } + if mismatch := regionErr.GetMismatchPeerId(); mismatch != nil { + logutil.Logger(bo.GetCtx()).Warn( + "tikv reports `MismatchPeerId`, invalidate region cache", + zap.Uint64("req peer id", mismatch.GetRequestPeerId()), + zap.Uint64("store peer id", mismatch.GetStorePeerId()), + ) + if s.replicaSelector != nil { + s.replicaSelector.invalidateRegion() + } + return false, nil + } + logutil.Logger(bo.GetCtx()).Debug( "tikv reports region failed", zap.Stringer("regionErr", regionErr), diff --git a/internal/locate/region_request3_test.go b/internal/locate/region_request3_test.go index f56b4022f..28bc43e87 100644 --- a/internal/locate/region_request3_test.go +++ b/internal/locate/region_request3_test.go @@ -348,7 +348,7 @@ func (s *testRegionRequestToThreeStoresSuite) TestLearnerReplicaSelector() { atomic.StoreInt64(®ion.lastAccess, time.Now().Unix()) rpcCtx, err := replicaSelector.next(s.bo) s.Nil(err) - // Should swith to the next follower. + // Should switch to the next follower. s.Equal(AccessIndex(tikvLearnerAccessIdx), accessLearner.lastIdx) AssertRPCCtxEqual(s, rpcCtx, replicaSelector.replicas[replicaSelector.targetIdx], nil) } @@ -590,7 +590,7 @@ func (s *testRegionRequestToThreeStoresSuite) TestReplicaSelector() { for i := 0; i < regionStore.accessStoreNum(tiKVOnly)-1; i++ { rpcCtx, err := replicaSelector.next(s.bo) s.Nil(err) - // Should swith to the next follower. + // Should switch to the next follower. s.NotEqual(lastIdx, state3.lastIdx) // Shouldn't access the leader if followers aren't exhausted. s.NotEqual(regionStore.workTiKVIdx, state3.lastIdx) @@ -1284,3 +1284,144 @@ func (s *testRegionRequestToThreeStoresSuite) TestSendReqFirstTimeout() { } } } + +func (s *testRegionRequestToThreeStoresSuite) TestStaleReadFallback2Follower() { + leaderStore, _ := s.loadAndGetLeaderStore() + leaderLabel := []*metapb.StoreLabel{ + { + Key: "id", + Value: strconv.FormatUint(leaderStore.StoreID(), 10), + }, + } + var followerID *uint64 + for _, storeID := range s.storeIDs { + if storeID != leaderStore.storeID { + id := storeID + followerID = &id + break + } + } + s.NotNil(followerID) + followerLabel := []*metapb.StoreLabel{ + { + Key: "id", + Value: strconv.FormatUint(*followerID, 10), + }, + } + + regionLoc, err := s.cache.LocateRegionByID(s.bo, s.regionID) + s.Nil(err) + s.NotNil(regionLoc) + + dataIsNotReady := false + s.regionRequestSender.client = &fnClient{fn: func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { + select { + case <-ctx.Done(): + return nil, errors.New("timeout") + default: + } + if dataIsNotReady && req.StaleRead { + dataIsNotReady = false + return &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{RegionError: &errorpb.Error{ + DataIsNotReady: &errorpb.DataIsNotReady{}, + }}}, nil + } + if addr == leaderStore.addr { + return &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{RegionError: &errorpb.Error{ + ServerIsBusy: &errorpb.ServerIsBusy{}, + }}}, nil + } + if !req.ReplicaRead { + return &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{RegionError: &errorpb.Error{ + NotLeader: &errorpb.NotLeader{}, + }}}, nil + } + return &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{Value: []byte(addr)}}, nil + }} + + for _, localLeader := range []bool{true, false} { + dataIsNotReady = true + // data is not ready, then server is busy in the first round, + // directly server is busy in the second round. + for i := 0; i < 2; i++ { + req := tikvrpc.NewReplicaReadRequest(tikvrpc.CmdGet, &kvrpcpb.GetRequest{Key: []byte("key")}, kv.ReplicaReadLeader, nil) + req.ReadReplicaScope = oracle.GlobalTxnScope + req.TxnScope = oracle.GlobalTxnScope + req.EnableStaleRead() + req.ReplicaReadType = kv.ReplicaReadMixed + var ops []StoreSelectorOption + if localLeader { + ops = append(ops, WithMatchLabels(leaderLabel)) + } else { + ops = append(ops, WithMatchLabels(followerLabel)) + } + + ctx, cancel := context.WithTimeout(context.Background(), 10000*time.Second) + bo := retry.NewBackoffer(ctx, -1) + s.Nil(err) + resp, _, _, err := s.regionRequestSender.SendReqCtx(bo, req, regionLoc.Region, time.Second, tikvrpc.TiKV, ops...) + s.Nil(err) + + regionErr, err := resp.GetRegionError() + s.Nil(err) + s.Nil(regionErr) + getResp, ok := resp.Resp.(*kvrpcpb.GetResponse) + s.True(ok) + if localLeader { + s.NotEqual(getResp.Value, []byte("store"+leaderLabel[0].Value)) + } else { + s.Equal(getResp.Value, []byte("store"+followerLabel[0].Value)) + } + cancel() + } + } +} + +func (s *testRegionRequestToThreeStoresSuite) TestReplicaReadFallbackToLeaderRegionError() { + regionLoc, err := s.cache.LocateRegionByID(s.bo, s.regionID) + s.Nil(err) + s.NotNil(regionLoc) + + s.regionRequestSender.client = &fnClient{fn: func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { + select { + case <-ctx.Done(): + return nil, errors.New("timeout") + default: + } + // Return `mismatch peer id` when accesses the leader. + if addr == s.cluster.GetStore(s.storeIDs[0]).Address { + return &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{RegionError: &errorpb.Error{ + MismatchPeerId: &errorpb.MismatchPeerId{ + RequestPeerId: 1, + StorePeerId: 2, + }, + }}}, nil + } + return &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{RegionError: &errorpb.Error{ + DataIsNotReady: &errorpb.DataIsNotReady{}, + }}}, nil + }} + + region := s.cache.getRegionByIDFromCache(regionLoc.Region.GetID()) + s.True(region.isValid()) + + req := tikvrpc.NewReplicaReadRequest(tikvrpc.CmdGet, &kvrpcpb.GetRequest{Key: []byte("key")}, kv.ReplicaReadLeader, nil) + req.ReadReplicaScope = oracle.GlobalTxnScope + req.TxnScope = oracle.GlobalTxnScope + req.EnableStaleRead() + req.ReplicaReadType = kv.ReplicaReadFollower + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + bo := retry.NewBackoffer(ctx, -1) + s.Nil(err) + resp, _, _, err := s.regionRequestSender.SendReqCtx(bo, req, regionLoc.Region, time.Second, tikvrpc.TiKV) + s.Nil(err) + regionErr, err := resp.GetRegionError() + s.Nil(err) + s.Equal(regionErrorToLabel(regionErr), "mismatch_peer_id") + // return non-epoch-not-match region error and the upper layer can auto retry. + s.Nil(regionErr.GetEpochNotMatch()) + // after region error returned, the region should be invalidated. + s.False(region.isValid()) +} diff --git a/internal/resourcecontrol/resource_control.go b/internal/resourcecontrol/resource_control.go index fb55d25fd..23a48b3c2 100644 --- a/internal/resourcecontrol/resource_control.go +++ b/internal/resourcecontrol/resource_control.go @@ -16,12 +16,14 @@ package resourcecontrol import ( "reflect" + "strings" "time" "github.com/pingcap/kvproto/pkg/coprocessor" "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/tikv/client-go/v2/internal/logutil" "github.com/tikv/client-go/v2/tikvrpc" + "github.com/tikv/client-go/v2/util" "go.uber.org/zap" ) @@ -34,12 +36,23 @@ type RequestInfo struct { writeBytes int64 storeID uint64 replicaNumber int64 + // bypass indicates whether the request should be bypassed. + // some internal request should be bypassed, such as Privilege request. + bypass bool } // MakeRequestInfo extracts the relevant information from a BatchRequest. func MakeRequestInfo(req *tikvrpc.Request) *RequestInfo { + var bypass bool + requestSource := req.Context.GetRequestSource() + if len(requestSource) > 0 { + if strings.Contains(requestSource, util.InternalRequestPrefix+util.InternalTxnOthers) { + bypass = true + } + } + storeID := req.Context.GetPeer().GetStoreId() if !req.IsTxnWriteRequest() && !req.IsRawWriteRequest() { - return &RequestInfo{writeBytes: -1} + return &RequestInfo{writeBytes: -1, storeID: storeID, bypass: bypass} } var writeBytes int64 @@ -57,7 +70,7 @@ func MakeRequestInfo(req *tikvrpc.Request) *RequestInfo { writeBytes += int64(len(k)) } } - return &RequestInfo{writeBytes: writeBytes, storeID: req.Context.Peer.StoreId, replicaNumber: req.ReplicaNumber} + return &RequestInfo{writeBytes: writeBytes, storeID: storeID, replicaNumber: req.ReplicaNumber, bypass: bypass} } // IsWrite returns whether the request is a write request. @@ -68,13 +81,21 @@ func (req *RequestInfo) IsWrite() bool { // WriteBytes returns the actual write size of the request, // -1 will be returned if it's not a write request. func (req *RequestInfo) WriteBytes() uint64 { - return uint64(req.writeBytes) + if req.writeBytes > 0 { + return uint64(req.writeBytes) + } + return 0 } func (req *RequestInfo) ReplicaNumber() int64 { return req.replicaNumber } +// Bypass returns whether the request should be bypassed. +func (req *RequestInfo) Bypass() bool { + return req.bypass +} + func (req *RequestInfo) StoreID() uint64 { return req.storeID } diff --git a/internal/resourcecontrol/resource_control_test.go b/internal/resourcecontrol/resource_control_test.go new file mode 100644 index 000000000..25f6f72aa --- /dev/null +++ b/internal/resourcecontrol/resource_control_test.go @@ -0,0 +1,48 @@ +package resourcecontrol + +import ( + "testing" + + "github.com/pingcap/kvproto/pkg/kvrpcpb" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/stretchr/testify/assert" + "github.com/tikv/client-go/v2/tikvrpc" +) + +func TestMakeRequestInfo(t *testing.T) { + // Test a non-write request. + req := &tikvrpc.Request{Req: &kvrpcpb.BatchGetRequest{}, Context: kvrpcpb.Context{Peer: &metapb.Peer{StoreId: 1}}} + info := MakeRequestInfo(req) + assert.False(t, info.IsWrite()) + assert.Equal(t, uint64(0), info.WriteBytes()) + assert.False(t, info.Bypass()) + assert.Equal(t, uint64(1), info.StoreID()) + + // Test a prewrite request. + mutation := &kvrpcpb.Mutation{Key: []byte("foo"), Value: []byte("bar")} + prewriteReq := &kvrpcpb.PrewriteRequest{Mutations: []*kvrpcpb.Mutation{mutation}, PrimaryLock: []byte("baz")} + req = &tikvrpc.Request{Type: tikvrpc.CmdPrewrite, Req: prewriteReq, ReplicaNumber: 1, Context: kvrpcpb.Context{Peer: &metapb.Peer{StoreId: 2}}} + requestSource := "xxx_internal_others" + req.Context.RequestSource = requestSource + info = MakeRequestInfo(req) + assert.True(t, info.IsWrite()) + assert.Equal(t, uint64(9), info.WriteBytes()) + assert.True(t, info.Bypass()) + assert.Equal(t, uint64(2), info.StoreID()) + // Test a commit request. + commitReq := &kvrpcpb.CommitRequest{Keys: [][]byte{[]byte("qux")}} + req = &tikvrpc.Request{Type: tikvrpc.CmdCommit, Req: commitReq, ReplicaNumber: 2, Context: kvrpcpb.Context{Peer: &metapb.Peer{StoreId: 3}}} + info = MakeRequestInfo(req) + assert.True(t, info.IsWrite()) + assert.Equal(t, uint64(3), info.WriteBytes()) + assert.False(t, info.Bypass()) + assert.Equal(t, uint64(3), info.StoreID()) + + // Test Nil Peer in Context + req = &tikvrpc.Request{Type: tikvrpc.CmdCommit, Req: commitReq, ReplicaNumber: 2, Context: kvrpcpb.Context{}} + info = MakeRequestInfo(req) + assert.True(t, info.IsWrite()) + assert.Equal(t, uint64(3), info.WriteBytes()) + assert.False(t, info.Bypass()) + assert.Equal(t, uint64(0), info.StoreID()) +} diff --git a/util/request_source.go b/util/request_source.go index 6ebadfc7e..97d3b83fd 100644 --- a/util/request_source.go +++ b/util/request_source.go @@ -57,6 +57,8 @@ var ExplicitTypeList = []string{ExplicitTypeEmpty, ExplicitTypeLightning, Explic const ( // InternalRequest is the scope of internal queries InternalRequest = "internal" + // InternalRequestPrefix is the prefix of internal queries + InternalRequestPrefix = "internal_" // ExternalRequest is the scope of external queries ExternalRequest = "external" // SourceUnknown keeps same with the default value(empty string)