pkg/kv/kv.go

// Copyright 2015 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package kv

import (
	"bytes"
	"context"
	"crypto/tls"
	"slices"
	"time"

	"github.com/pingcap/errors"
	deadlockpb "github.com/pingcap/kvproto/pkg/deadlock"
	"github.com/pingcap/kvproto/pkg/kvrpcpb"
	"github.com/pingcap/kvproto/pkg/metapb"
	"github.com/pingcap/tidb/pkg/config"
	"github.com/pingcap/tidb/pkg/meta/model"
	"github.com/pingcap/tidb/pkg/parser"
	"github.com/pingcap/tidb/pkg/resourcegroup"
	"github.com/pingcap/tidb/pkg/util/memory"
	"github.com/pingcap/tidb/pkg/util/resourcegrouptag"
	"github.com/pingcap/tidb/pkg/util/tiflash"
	"github.com/pingcap/tidb/pkg/util/trxevents"
	"github.com/pingcap/tipb/go-tipb"
	tikvstore "github.com/tikv/client-go/v2/kv"
	"github.com/tikv/client-go/v2/oracle"
	"github.com/tikv/client-go/v2/tikv"
	"github.com/tikv/client-go/v2/tikvrpc"
	"github.com/tikv/client-go/v2/util"
	pd "github.com/tikv/pd/client"
	pdhttp "github.com/tikv/pd/client/http"
	"go.uber.org/atomic"
)

// UnCommitIndexKVFlag uses to indicate the index key/value is no need to commit.
// This is used in the situation of the index key/value was unchanged when do update.
// Usage:
// 1. For non-unique index: normally, the index value is '0'.
// Change the value to '1' indicate the index key/value is no need to commit.
// 2. For unique index: normally, the index value is the record handle ID, 8 bytes.
// Append UnCommitIndexKVFlag to the value indicate the index key/value is no need to commit.
const UnCommitIndexKVFlag byte = '1'

// Those limits is enforced to make sure the transaction can be well handled by TiKV.
var (
	// TxnEntrySizeLimit is limit of single entry size (len(key) + len(value)).
	TxnEntrySizeLimit = atomic.NewUint64(config.DefTxnEntrySizeLimit)
	// TxnTotalSizeLimit is limit of the sum of all entry size.
	TxnTotalSizeLimit = atomic.NewUint64(config.DefTxnTotalSizeLimit)
)

// Getter is the interface for the Get method.
type Getter interface {
	// Get gets the value for key k from kv store.
	// If corresponding kv pair does not exist, it returns nil and ErrNotExist.
	Get(ctx context.Context, k Key) ([]byte, error)
}

// Retriever is the interface wraps the basic Get and Seek methods.
type Retriever interface {
	Getter
	// Iter creates an Iterator positioned on the first entry that k <= entry's key.
	// If such entry is not found, it returns an invalid Iterator with no error.
	// It yields only keys that < upperBound. If upperBound is nil, it means the upperBound is unbounded.
	// The Iterator must be Closed after use.
	Iter(k Key, upperBound Key) (Iterator, error)

	// IterReverse creates a reversed Iterator positioned on the first entry which key is less than k.
	// The returned iterator will iterate from greater key to smaller key.
	// If k is nil, the returned iterator will be positioned at the last key.
	// It yields only keys that >= lowerBound. If lowerBound is nil, it means the lowerBound is unbounded.
	IterReverse(k, lowerBound Key) (Iterator, error)
}

// EmptyIterator is an iterator without any entry
type EmptyIterator struct{}

// Valid returns true if the current iterator is valid.
func (*EmptyIterator) Valid() bool { return false }

// Key returns the current key. Always return nil for this iterator
func (*EmptyIterator) Key() Key { return nil }

// Value returns the current value. Always return nil for this iterator
func (*EmptyIterator) Value() []byte { return nil }

// Next goes the next position. Always return error for this iterator
func (*EmptyIterator) Next() error { return errors.New("iterator is invalid") }

// Close closes the iterator.
func (*EmptyIterator) Close() {}

// EmptyRetriever is a retriever without any entry
type EmptyRetriever struct{}

// Get gets the value for key k from kv store. Always return nil for this retriever
func (*EmptyRetriever) Get(_ context.Context, _ Key) ([]byte, error) {
	return nil, ErrNotExist
}

// Iter creates an Iterator. Always return EmptyIterator for this retriever
func (*EmptyRetriever) Iter(_ Key, _ Key) (Iterator, error) { return &EmptyIterator{}, nil }

// IterReverse creates a reversed Iterator. Always return EmptyIterator for this retriever
func (*EmptyRetriever) IterReverse(_ Key, _ Key) (Iterator, error) {
	return &EmptyIterator{}, nil
}

// Mutator is the interface wraps the basic Set and Delete methods.
type Mutator interface {
	// Set sets the value for key k as v into kv store.
	// v must NOT be nil or empty, otherwise it returns ErrCannotSetNilValue.
	Set(k Key, v []byte) error
	// Delete removes the entry for key k from kv store.
	Delete(k Key) error
}

// StagingHandle is the reference of a staging buffer.
type StagingHandle int

var (
	// InvalidStagingHandle is an invalid handler, MemBuffer will check handler to ensure safety.
	InvalidStagingHandle StagingHandle = 0
	// LastActiveStagingHandle is an special handler which always point to the last active staging buffer.
	LastActiveStagingHandle StagingHandle = -1
)

// RetrieverMutator is the interface that groups Retriever and Mutator interfaces.
type RetrieverMutator interface {
	Retriever
	Mutator
}

// MemBuffer is an in-memory kv collection, can be used to buffer write operations.
type MemBuffer interface {
	RetrieverMutator

	// RLock locks the MemBuffer for shared read.
	// In the most case, MemBuffer will only used by single goroutine,
	// but it will be read by multiple goroutine when combined with executor.UnionScanExec.
	// To avoid race introduced by executor.UnionScanExec, MemBuffer expose read lock for it.
	RLock()
	// RUnlock unlocks the MemBuffer.
	RUnlock()

	// GetFlags returns the latest flags associated with key.
	GetFlags(Key) (KeyFlags, error)
	// SetWithFlags put key-value into the last active staging buffer with the given KeyFlags.
	SetWithFlags(Key, []byte, ...FlagsOp) error
	// UpdateFlags updates the flags associated with key.
	UpdateFlags(Key, ...FlagsOp)
	// DeleteWithFlags delete key with the given KeyFlags
	DeleteWithFlags(Key, ...FlagsOp) error

	// Staging create a new staging buffer inside the MemBuffer.
	// Subsequent writes will be temporarily stored in this new staging buffer.
	// When you think all modifications looks good, you can call `Release` to public all of them to the upper level buffer.
	Staging() StagingHandle
	// Release publish all modifications in the latest staging buffer to upper level.
	Release(StagingHandle)
	// Cleanup cleanup the resources referenced by the StagingHandle.
	// If the changes are not published by `Release`, they will be discarded.
	Cleanup(StagingHandle)
	// InspectStage used to inspect the value updates in the given stage.
	InspectStage(StagingHandle, func(Key, KeyFlags, []byte))

	// SnapshotGetter returns a Getter for a snapshot of MemBuffer.
	SnapshotGetter() Getter
	// SnapshotIter returns a Iterator for a snapshot of MemBuffer.
	SnapshotIter(k, upperbound Key) Iterator
	// SnapshotIterReverse returns a reverse Iterator for a snapshot of MemBuffer.
	SnapshotIterReverse(k, lowerBound Key) Iterator

	// Len returns the number of entries in the DB.
	Len() int

	// Size returns sum of keys and values length.
	Size() int

	// RemoveFromBuffer removes the entry from the buffer. It's used for testing.
	RemoveFromBuffer(Key)

	// GetLocal checks if the key exists in the buffer in local memory.
	GetLocal(context.Context, []byte) ([]byte, error)

	// BatchGet gets values from the memory buffer.
	BatchGet(ctx context.Context, keys [][]byte) (map[string][]byte, error)
}

// FindKeysInStage returns all keys in the given stage that satisfies the given condition.
func FindKeysInStage(m MemBuffer, h StagingHandle, predicate func(Key, KeyFlags, []byte) bool) []Key {
	result := make([]Key, 0)
	m.InspectStage(h, func(k Key, f KeyFlags, v []byte) {
		if predicate(k, f, v) {
			result = append(result, k)
		}
	})
	return result
}

// LockCtx contains information for LockKeys method.
type LockCtx = tikvstore.LockCtx

// Transaction defines the interface for operations inside a Transaction.
// This is not thread safe.
type Transaction interface {
	RetrieverMutator
	AssertionProto
	FairLockingController
	// Size returns sum of keys and values length.
	Size() int
	// Mem returns the memory consumption of the transaction.
	Mem() uint64
	// SetMemoryFootprintChangeHook sets the hook that will be called when the memory footprint changes.
	SetMemoryFootprintChangeHook(func(uint64))
	// MemHookSet returns whether the memory footprint change hook is set.
	MemHookSet() bool
	// Len returns the number of entries in the DB.
	Len() int
	// Commit commits the transaction operations to KV store.
	Commit(context.Context) error
	// Rollback undoes the transaction operations to KV store.
	Rollback() error
	// String implements fmt.Stringer interface.
	String() string
	// LockKeys tries to lock the entries with the keys in KV store.
	// Will block until all keys are locked successfully or an error occurs.
	LockKeys(ctx context.Context, lockCtx *LockCtx, keys ...Key) error
	// LockKeysFunc tries to lock the entries with the keys in KV store.
	// Will block until all keys are locked successfully or an error occurs.
	// fn is called before LockKeys unlocks the keys.
	LockKeysFunc(ctx context.Context, lockCtx *LockCtx, fn func(), keys ...Key) error
	// SetOption sets an option with a value, when val is nil, uses the default
	// value of this option.
	SetOption(opt int, val any)
	// GetOption returns the option
	GetOption(opt int) any
	// IsReadOnly checks if the transaction has only performed read operations.
	IsReadOnly() bool
	// StartTS returns the transaction start timestamp.
	StartTS() uint64
	// Valid returns if the transaction is valid.
	// A transaction become invalid after commit or rollback.
	Valid() bool
	// GetMemBuffer return the MemBuffer binding to this transaction.
	GetMemBuffer() MemBuffer
	// GetSnapshot returns the Snapshot binding to this transaction.
	GetSnapshot() Snapshot
	// SetVars sets variables to the transaction.
	SetVars(vars any)
	// GetVars gets variables from the transaction.
	GetVars() any
	// BatchGet gets kv from the memory buffer of statement and transaction, and the kv storage.
	// Do not use len(value) == 0 or value == nil to represent non-exist.
	// If a key doesn't exist, there shouldn't be any corresponding entry in the result map.
	BatchGet(ctx context.Context, keys []Key) (map[string][]byte, error)
	IsPessimistic() bool
	// CacheTableInfo caches the index name.
	// PresumeKeyNotExists will use this to help decode error message.
	CacheTableInfo(id int64, info *model.TableInfo)
	// GetTableInfo returns the cached index name.
	// If there is no such index already inserted through CacheIndexName, it will return UNKNOWN.
	GetTableInfo(id int64) *model.TableInfo

	// SetDiskFullOpt set allowed options of current operation in each TiKV disk usage level.
	SetDiskFullOpt(level kvrpcpb.DiskFullOpt)
	// ClearDiskFullOpt clear allowed flag
	ClearDiskFullOpt()

	// GetMemDBCheckpoint gets the transaction's memDB checkpoint.
	GetMemDBCheckpoint() *tikv.MemDBCheckpoint

	// RollbackMemDBToCheckpoint rollbacks the transaction's memDB to the specified checkpoint.
	RollbackMemDBToCheckpoint(*tikv.MemDBCheckpoint)

	// UpdateMemBufferFlags updates the flags of a node in the mem buffer.
	UpdateMemBufferFlags(key []byte, flags ...FlagsOp)
	// IsPipelined returns whether the transaction is used for pipelined DML.
	IsPipelined() bool
	// MayFlush flush the pipelined memdb if the keys or size exceeds threshold, no effect for standard DML.
	MayFlush() error
}

// AssertionProto is an interface defined for the assertion protocol.
type AssertionProto interface {
	// SetAssertion sets an assertion for an operation on the key.
	// TODO: Use a special type instead of `FlagsOp`. Otherwise there's risk that the assertion flag is incorrectly used
	// in other places like `MemBuffer.SetWithFlags`.
	SetAssertion(key []byte, assertion ...FlagsOp) error
}

// FairLockingController is the interface that defines fair locking related operations.
type FairLockingController interface {
	StartFairLocking() error
	RetryFairLocking(ctx context.Context) error
	CancelFairLocking(ctx context.Context) error
	DoneFairLocking(ctx context.Context) error
	IsInFairLockingMode() bool
}

// Client is used to send request to KV layer.
type Client interface {
	// Send sends request to KV layer, returns a Response.
	Send(ctx context.Context, req *Request, vars any, option *ClientSendOption) Response

	// IsRequestTypeSupported checks if reqType and subType is supported.
	IsRequestTypeSupported(reqType, subType int64) bool
}

// ClientSendOption wraps options during Client Send
type ClientSendOption struct {
	SessionMemTracker          *memory.Tracker
	EnabledRateLimitAction     bool
	EventCb                    trxevents.EventCallback
	EnableCollectExecutionInfo bool
	TiFlashReplicaRead         tiflash.ReplicaRead
	AppendWarning              func(warn error)
	TryCopLiteWorker           *atomic.Uint32
}

// ReqTypes.
const (
	ReqTypeSelect   = 101
	ReqTypeIndex    = 102
	ReqTypeDAG      = 103
	ReqTypeAnalyze  = 104
	ReqTypeChecksum = 105

	ReqSubTypeBasic      = 0
	ReqSubTypeDesc       = 10000
	ReqSubTypeGroupBy    = 10001
	ReqSubTypeTopN       = 10002
	ReqSubTypeSignature  = 10003
	ReqSubTypeAnalyzeIdx = 10004
	ReqSubTypeAnalyzeCol = 10005
)

// StoreType represents the type of storage engine.
type StoreType uint8

const (
	// TiKV means the type of store engine is TiKV.
	TiKV StoreType = iota
	// TiFlash means the type of store engine is TiFlash.
	TiFlash
	// TiDB means the type of store engine is TiDB.
	// used to read memory data from other instances to have a global view of the
	// data, such as for information_schema.cluster_slow_query.
	TiDB
	// UnSpecified means the store engine type is unknown
	UnSpecified = 255
)

// Name returns the name of store type.
func (t StoreType) Name() string {
	if t == TiFlash {
		return "tiflash"
	} else if t == TiDB {
		return "tidb"
	} else if t == TiKV {
		return "tikv"
	}
	return "unspecified"
}

// KeyRanges wrap the ranges for partitioned table cases.
// We might send ranges from different in the one request.
type KeyRanges struct {
	ranges        [][]KeyRange
	rowCountHints [][]int

	isPartitioned bool
}

// NewPartitionedKeyRanges constructs a new RequestRange for partitioned table.
func NewPartitionedKeyRanges(ranges [][]KeyRange) *KeyRanges {
	return NewPartitionedKeyRangesWithHints(ranges, nil)
}

// NewNonPartitionedKeyRanges constructs a new RequestRange for a non-partitioned table.
func NewNonPartitionedKeyRanges(ranges []KeyRange) *KeyRanges {
	return NewNonParitionedKeyRangesWithHint(ranges, nil)
}

// NewPartitionedKeyRangesWithHints constructs a new RequestRange for partitioned table with row count hint.
func NewPartitionedKeyRangesWithHints(ranges [][]KeyRange, hints [][]int) *KeyRanges {
	return &KeyRanges{
		ranges:        ranges,
		rowCountHints: hints,
		isPartitioned: true,
	}
}

// NewNonParitionedKeyRangesWithHint constructs a new RequestRange for a non partitioned table with rou count hint.
func NewNonParitionedKeyRangesWithHint(ranges []KeyRange, hints []int) *KeyRanges {
	rr := &KeyRanges{
		ranges:        [][]KeyRange{ranges},
		isPartitioned: false,
	}
	if hints != nil {
		rr.rowCountHints = [][]int{hints}
	}
	return rr
}

// FirstPartitionRange returns the the result of first range.
// We may use some func to generate ranges for both partitioned table and non partitioned table.
// This method provides a way to fallback to non-partitioned ranges.
func (rr *KeyRanges) FirstPartitionRange() []KeyRange {
	if len(rr.ranges) == 0 {
		return []KeyRange{}
	}
	return rr.ranges[0]
}

// SetToNonPartitioned set the status to non-partitioned.
func (rr *KeyRanges) SetToNonPartitioned() error {
	if len(rr.ranges) > 1 {
		return errors.Errorf("you want to change the partitioned ranges to non-partitioned ranges")
	}
	rr.isPartitioned = false
	return nil
}

// AppendSelfTo appends itself to another slice.
func (rr *KeyRanges) AppendSelfTo(ranges []KeyRange) []KeyRange {
	for _, r := range rr.ranges {
		ranges = append(ranges, r...)
	}
	return ranges
}

// SortByFunc sorts each partition's ranges.
// Since the ranges are sorted in most cases, we check it first.
func (rr *KeyRanges) SortByFunc(sortFunc func(i, j KeyRange) int) {
	if !slices.IsSortedFunc(rr.ranges, func(i, j []KeyRange) int {
		// A simple short-circuit since the empty range actually won't make anything wrong.
		if len(i) == 0 || len(j) == 0 {
			return -1
		}
		return sortFunc(i[0], j[0])
	}) {
		slices.SortFunc(rr.ranges, func(i, j []KeyRange) int {
			if len(i) == 0 {
				return -1
			}
			if len(j) == 0 {
				return 1
			}
			return sortFunc(i[0], j[0])
		})
	}
	for i := range rr.ranges {
		if !slices.IsSortedFunc(rr.ranges[i], sortFunc) {
			slices.SortFunc(rr.ranges[i], sortFunc)
		}
	}
}

// ForEachPartitionWithErr runs the func for each partition with an error check.
func (rr *KeyRanges) ForEachPartitionWithErr(theFunc func([]KeyRange, []int) error) (err error) {
	for i := range rr.ranges {
		var hints []int
		if len(rr.rowCountHints) > i {
			hints = rr.rowCountHints[i]
		}
		err = theFunc(rr.ranges[i], hints)
		if err != nil {
			return err
		}
	}
	return nil
}

// ForEachPartition runs the func for each partition without error check.
func (rr *KeyRanges) ForEachPartition(theFunc func([]KeyRange)) {
	for i := range rr.ranges {
		theFunc(rr.ranges[i])
	}
}

// PartitionNum returns how many partition is involved in the ranges.
func (rr *KeyRanges) PartitionNum() int {
	return len(rr.ranges)
}

// IsFullySorted checks whether the ranges are sorted inside partition and each partition is also sorated.
func (rr *KeyRanges) IsFullySorted() bool {
	sortedByPartition := slices.IsSortedFunc(rr.ranges, func(i, j []KeyRange) int {
		// A simple short-circuit since the empty range actually won't make anything wrong.
		if len(i) == 0 || len(j) == 0 {
			return -1
		}
		return bytes.Compare(i[0].StartKey, j[0].StartKey)
	})
	if !sortedByPartition {
		return false
	}
	for _, ranges := range rr.ranges {
		if !slices.IsSortedFunc(ranges, func(i, j KeyRange) int {
			return bytes.Compare(i.StartKey, j.StartKey)
		}) {
			return false
		}
	}
	return true
}

// TotalRangeNum returns how many ranges there are.
func (rr *KeyRanges) TotalRangeNum() int {
	ret := 0
	for _, r := range rr.ranges {
		ret += len(r)
	}
	return ret
}

// Request represents a kv request.
type Request struct {
	// Tp is the request type.
	Tp      int64
	StartTs uint64
	Data    []byte

	// KeyRanges makes sure that the request is sent first by partition then by region.
	// When the table is small, it's possible that multiple partitions are in the same region.
	KeyRanges *KeyRanges

	// For PartitionTableScan used by tiflash.
	PartitionIDAndRanges []PartitionIDAndRanges

	// Concurrency is 1, if it only sends the request to a single storage unit when
	// ResponseIterator.Next is called. If concurrency is greater than 1, the request will be
	// sent to multiple storage units concurrently.
	Concurrency int
	// IsolationLevel is the isolation level, default is SI.
	IsolationLevel IsoLevel
	// Priority is the priority of this KV request, its value may be PriorityNormal/PriorityLow/PriorityHigh.
	Priority int
	// memTracker is used to trace and control memory usage in co-processor layer.
	MemTracker *memory.Tracker
	// KeepOrder is true, if the response should be returned in order.
	KeepOrder bool
	// Desc is true, if the request is sent in descending order.
	Desc bool
	// NotFillCache makes this request do not touch the LRU cache of the underlying storage.
	NotFillCache bool
	// ReplicaRead is used for reading data from replicas, only follower is supported at this time.
	ReplicaRead ReplicaReadType
	// StoreType represents this request is sent to the which type of store.
	StoreType StoreType
	// Cacheable is true if the request can be cached. Currently only deterministic DAG requests can be cached.
	Cacheable bool
	// SchemaVer is for any schema-ful storage to validate schema correctness if necessary.
	SchemaVar int64
	// BatchCop indicates whether send batch coprocessor request to tiflash.
	BatchCop bool
	// TaskID is an unique ID for an execution of a statement
	TaskID uint64
	// TiDBServerID is the specified TiDB serverID to execute request. `0` means all TiDB instances.
	TiDBServerID uint64
	// TxnScope is the scope of the txn
	TxnScope string
	// ReadReplicaScope is the scope of the read replica.
	ReadReplicaScope string
	// IsStaleness indicates whether the request read staleness data
	IsStaleness bool
	// ClosestReplicaReadAdjuster used to adjust a copr request.
	ClosestReplicaReadAdjuster CoprRequestAdjuster
	// MatchStoreLabels indicates the labels the store should be matched
	MatchStoreLabels []*metapb.StoreLabel
	// ResourceGroupTagger indicates the kv request task group tagger.
	ResourceGroupTagger *ResourceGroupTagBuilder
	// Paging indicates whether the request is a paging request.
	Paging struct {
		Enable bool
		// MinPagingSize is used when Paging is true.
		MinPagingSize uint64
		// MaxPagingSize is used when Paging is true.
		MaxPagingSize uint64
	}
	// RequestSource indicates whether the request is an internal request.
	RequestSource util.RequestSource
	// StoreBatchSize indicates the batch size of coprocessor in the same store.
	StoreBatchSize int
	// ResourceGroupName is the name of the bind resource group.
	ResourceGroupName string
	// LimitSize indicates whether the request is scan and limit
	LimitSize uint64
	// StoreBusyThreshold is the threshold for the store to return ServerIsBusy
	StoreBusyThreshold time.Duration
	// TiKVClientReadTimeout is the timeout of kv read request
	TiKVClientReadTimeout uint64
	// MaxExecutionTime is the timeout of the whole query execution
	MaxExecutionTime uint64

	RunawayChecker resourcegroup.RunawayChecker

	// ConnID stores the session connection id.
	ConnID uint64
	// ConnAlias stores the session connection alias.
	ConnAlias string
}

// CoprRequestAdjuster is used to check and adjust a copr request according to specific rules.
// return true if the request is changed.
type CoprRequestAdjuster func(*Request, int) bool

// PartitionIDAndRanges used by PartitionTableScan in tiflash.
type PartitionIDAndRanges struct {
	ID        int64
	KeyRanges []KeyRange
}

const (
	// GlobalReplicaScope indicates the default replica scope for tidb to request
	GlobalReplicaScope = oracle.GlobalTxnScope
)

// ResultSubset represents a result subset from a single storage unit.
// TODO: Find a better interface for ResultSubset that can reuse bytes.
type ResultSubset interface {
	// GetData gets the data.
	GetData() []byte
	// GetStartKey gets the start key.
	GetStartKey() Key
	// MemSize returns how many bytes of memory this result use for tracing memory usage.
	MemSize() int64
	// RespTime returns the response time for the request.
	RespTime() time.Duration
}

// Response represents the response returned from KV layer.
type Response interface {
	// Next returns a resultSubset from a single storage unit.
	// When full result set is returned, nil is returned.
	Next(ctx context.Context) (resultSubset ResultSubset, err error)
	// Close response.
	Close() error
}

// Snapshot defines the interface for the snapshot fetched from KV store.
type Snapshot interface {
	Retriever
	// BatchGet gets a batch of values from snapshot.
	BatchGet(ctx context.Context, keys []Key) (map[string][]byte, error)
	// SetOption sets an option with a value, when val is nil, uses the default
	// value of this option. Only ReplicaRead is supported for snapshot
	SetOption(opt int, val any)
}

// SnapshotInterceptor is used to intercept snapshot's read operation
type SnapshotInterceptor interface {
	// OnGet intercepts Get operation for Snapshot
	OnGet(ctx context.Context, snap Snapshot, k Key) ([]byte, error)
	// OnBatchGet intercepts BatchGet operation for Snapshot
	OnBatchGet(ctx context.Context, snap Snapshot, keys []Key) (map[string][]byte, error)
	// OnIter intercepts Iter operation for Snapshot
	OnIter(snap Snapshot, k Key, upperBound Key) (Iterator, error)
	// OnIterReverse intercepts IterReverse operation for Snapshot
	OnIterReverse(snap Snapshot, k Key, lowerBound Key) (Iterator, error)
}

// BatchGetter is the interface for BatchGet.
type BatchGetter interface {
	// BatchGet gets a batch of values.
	BatchGet(ctx context.Context, keys []Key) (map[string][]byte, error)
}

// Driver is the interface that must be implemented by a KV storage.
type Driver interface {
	// Open returns a new Storage.
	// The path is the string for storage specific format.
	Open(path string) (Storage, error)
}

// Storage defines the interface for storage.
// Isolation should be at least SI(SNAPSHOT ISOLATION)
type Storage interface {
	// Begin a global transaction
	Begin(opts ...tikv.TxnOption) (Transaction, error)
	// GetSnapshot gets a snapshot that is able to read any data which data is <= ver.
	// if ver is MaxVersion or > current max committed version, we will use current version for this snapshot.
	GetSnapshot(ver Version) Snapshot
	// GetClient gets a client instance.
	GetClient() Client
	// GetMPPClient gets a mpp client instance.
	GetMPPClient() MPPClient
	// Close store
	Close() error
	// UUID return a unique ID which represents a Storage.
	UUID() string
	// CurrentVersion returns current max committed version with the given txnScope (local or global).
	CurrentVersion(txnScope string) (Version, error)
	// GetOracle gets a timestamp oracle client.
	GetOracle() oracle.Oracle
	// SupportDeleteRange gets the storage support delete range or not.
	SupportDeleteRange() (supported bool)
	// Name gets the name of the storage engine
	Name() string
	// Describe returns of brief introduction of the storage
	Describe() string
	// ShowStatus returns the specified status of the storage
	ShowStatus(ctx context.Context, key string) (any, error)
	// GetMemCache return memory manager of the storage.
	GetMemCache() MemManager
	// GetMinSafeTS return the minimal SafeTS of the storage with given txnScope.
	GetMinSafeTS(txnScope string) uint64
	// GetLockWaits return all lock wait information
	GetLockWaits() ([]*deadlockpb.WaitForEntry, error)
	// GetCodec gets the codec of the storage.
	GetCodec() tikv.Codec
	// SetOption is a thin wrapper around sync.Map.
	SetOption(k any, v any)
	// GetOption is a thin wrapper around sync.Map.
	GetOption(k any) (any, bool)
}

// EtcdBackend is used for judging a storage is a real TiKV.
type EtcdBackend interface {
	EtcdAddrs() ([]string, error)
	TLSConfig() *tls.Config
	StartGCWorker() error
}

// StorageWithPD is used to get pd client.
type StorageWithPD interface {
	GetPDClient() pd.Client
	GetPDHTTPClient() pdhttp.Client
}

// FnKeyCmp is the function for iterator the keys
type FnKeyCmp func(key Key) bool

// Iterator is the interface for a iterator on KV store.
type Iterator interface {
	Valid() bool
	Key() Key
	Value() []byte
	Next() error
	Close()
}

// SplittableStore is the kv store which supports split regions.
type SplittableStore interface {
	SplitRegions(ctx context.Context, splitKey [][]byte, scatter bool, tableID *int64) (regionID []uint64, err error)
	WaitScatterRegionFinish(ctx context.Context, regionID uint64, backOff int) error
	CheckRegionInScattering(regionID uint64) (bool, error)
}

// Priority value for transaction priority.
const (
	PriorityNormal = iota
	PriorityLow
	PriorityHigh
)

// IsoLevel is the transaction's isolation level.
type IsoLevel int

const (
	// SI stands for 'snapshot isolation'.
	SI IsoLevel = iota
	// RC stands for 'read committed'.
	RC
	// RCCheckTS stands for 'read consistency read with ts check'.
	RCCheckTS
)

// ResourceGroupTagBuilder is used to build the resource group tag for a kv request.
type ResourceGroupTagBuilder struct {
	sqlDigest  *parser.Digest
	planDigest *parser.Digest
	accessKey  []byte
}

// NewResourceGroupTagBuilder creates a new ResourceGroupTagBuilder.
func NewResourceGroupTagBuilder() *ResourceGroupTagBuilder {
	return &ResourceGroupTagBuilder{}
}

// SetSQLDigest sets the sql digest for the request.
func (b *ResourceGroupTagBuilder) SetSQLDigest(digest *parser.Digest) *ResourceGroupTagBuilder {
	b.sqlDigest = digest
	return b
}

// SetPlanDigest sets the plan digest for the request.
func (b *ResourceGroupTagBuilder) SetPlanDigest(digest *parser.Digest) *ResourceGroupTagBuilder {
	b.planDigest = digest
	return b
}

// BuildProtoTagger sets the access key for the request.
func (b *ResourceGroupTagBuilder) BuildProtoTagger() tikvrpc.ResourceGroupTagger {
	return func(req *tikvrpc.Request) {
		b.Build(req)
	}
}

// EncodeTagWithKey encodes the resource group tag, returns the encoded bytes.
func (b *ResourceGroupTagBuilder) EncodeTagWithKey(key []byte) []byte {
	tag := &tipb.ResourceGroupTag{}
	if b.sqlDigest != nil {
		tag.SqlDigest = b.sqlDigest.Bytes()
	}
	if b.planDigest != nil {
		tag.PlanDigest = b.planDigest.Bytes()
	}
	if len(key) > 0 {
		tag.TableId = decodeTableID(key)
		label := resourcegrouptag.GetResourceGroupLabelByKey(key)
		tag.Label = &label
	}
	tagEncoded, err := tag.Marshal()
	if err != nil {
		return nil
	}
	return tagEncoded
}

// Build builds the resource group tag for the request.
func (b *ResourceGroupTagBuilder) Build(req *tikvrpc.Request) {
	if req == nil {
		return
	}
	if encodedBytes := b.EncodeTagWithKey(resourcegrouptag.GetFirstKeyFromRequest(req)); len(encodedBytes) > 0 {
		req.ResourceGroupTag = encodedBytes
	}
}

// DecodeTableIDFunc is used to decode table id from key.
var DecodeTableIDFunc func(Key) int64

// avoid import cycle, not import tablecodec in kv package.
func decodeTableID(key Key) int64 {
	if DecodeTableIDFunc != nil {
		return DecodeTableIDFunc(key)
	}
	return 0
}