From 35782278d6ae49f10b20f48a3f056e78d1533af3 Mon Sep 17 00:00:00 2001 From: you06 Date: Wed, 18 Jan 2023 12:07:30 +0800 Subject: [PATCH 01/13] add copr concurrency status Signed-off-by: you06 --- distsql/select_result.go | 92 ++++++++--- sessionctx/variable/tidb_vars.go | 2 +- store/copr/coprocessor.go | 263 +++++++++++++++++++------------ 3 files changed, 239 insertions(+), 118 deletions(-) diff --git a/distsql/select_result.go b/distsql/select_result.go index 6d1f6308e4120..28670f359359b 100644 --- a/distsql/select_result.go +++ b/distsql/select_result.go @@ -157,7 +157,7 @@ func (r *selectResult) fetchResp(ctx context.Context) error { if r.stats != nil { // Ignore internal sql. if !r.ctx.GetSessionVars().InRestrictedSQL && len(r.stats.copRespTime) > 0 { - ratio := float64(r.stats.CoprCacheHitNum) / float64(len(r.stats.copRespTime)) + ratio := r.stats.calcCacheHit() if ratio >= 1 { telemetry.CurrentCoprCacheHitRatioGTE100Count.Inc() } @@ -364,6 +364,11 @@ func (r *selectResult) updateCopRuntimeStats(ctx context.Context, copStats *copr rpcStat: tikv.NewRegionRequestRuntimeStats(), distSQLConcurrency: r.distSQLConcurrency, } + if ci, ok := r.resp.(copr.CopInfo); ok { + conc, extraConc := ci.GetConcurrency() + r.stats.distSQLConcurrency = conc + r.stats.extraConcurrency = extraConc + } } r.stats.mergeCopRuntimeStats(copStats, respTime) @@ -455,26 +460,36 @@ func (r *selectResult) Close() error { r.memConsume(-respSize) } if r.stats != nil { - defer r.ctx.GetSessionVars().StmtCtx.RuntimeStatsColl.RegisterStats(r.rootPlanID, r.stats) + defer func() { + if ci, ok := r.resp.(copr.CopInfo); ok { + r.stats.storeBatchedNum, r.stats.storeBatchedFallbackNum = ci.GetStoreBatchInfo() + r.stats.buildTaskDuration = ci.GetBuildTaskElapsed() + } + r.ctx.GetSessionVars().StmtCtx.RuntimeStatsColl.RegisterStats(r.rootPlanID, r.stats) + }() } return r.resp.Close() } -// CopRuntimeStats is a interface uses to check whether the result has cop runtime stats. +// CopRuntimeStats is an interface uses to check whether the result has cop runtime stats. type CopRuntimeStats interface { // GetCopRuntimeStats gets the cop runtime stats information. GetCopRuntimeStats() *copr.CopRuntimeStats } type selectResultRuntimeStats struct { - copRespTime []time.Duration - procKeys []int64 - backoffSleep map[string]time.Duration - totalProcessTime time.Duration - totalWaitTime time.Duration - rpcStat tikv.RegionRequestRuntimeStats - distSQLConcurrency int - CoprCacheHitNum int64 + copRespTime []time.Duration + procKeys []int64 + backoffSleep map[string]time.Duration + totalProcessTime time.Duration + totalWaitTime time.Duration + rpcStat tikv.RegionRequestRuntimeStats + distSQLConcurrency int + extraConcurrency int + CoprCacheHitNum int64 + storeBatchedNum uint64 + storeBatchedFallbackNum uint64 + buildTaskDuration time.Duration } func (s *selectResultRuntimeStats) mergeCopRuntimeStats(copStats *copr.CopRuntimeStats, respTime time.Duration) { @@ -495,12 +510,16 @@ func (s *selectResultRuntimeStats) mergeCopRuntimeStats(copStats *copr.CopRuntim func (s *selectResultRuntimeStats) Clone() execdetails.RuntimeStats { newRs := selectResultRuntimeStats{ - copRespTime: make([]time.Duration, 0, len(s.copRespTime)), - procKeys: make([]int64, 0, len(s.procKeys)), - backoffSleep: make(map[string]time.Duration, len(s.backoffSleep)), - rpcStat: tikv.NewRegionRequestRuntimeStats(), - distSQLConcurrency: s.distSQLConcurrency, - CoprCacheHitNum: s.CoprCacheHitNum, + copRespTime: make([]time.Duration, 0, len(s.copRespTime)), + procKeys: make([]int64, 0, len(s.procKeys)), + backoffSleep: make(map[string]time.Duration, len(s.backoffSleep)), + rpcStat: tikv.NewRegionRequestRuntimeStats(), + distSQLConcurrency: s.distSQLConcurrency, + extraConcurrency: s.extraConcurrency, + CoprCacheHitNum: s.CoprCacheHitNum, + storeBatchedNum: s.storeBatchedNum, + storeBatchedFallbackNum: s.storeBatchedFallbackNum, + buildTaskDuration: s.buildTaskDuration, } newRs.copRespTime = append(newRs.copRespTime, s.copRespTime...) newRs.procKeys = append(newRs.procKeys, s.procKeys...) @@ -528,6 +547,15 @@ func (s *selectResultRuntimeStats) Merge(rs execdetails.RuntimeStats) { s.totalWaitTime += other.totalWaitTime s.rpcStat.Merge(other.rpcStat) s.CoprCacheHitNum += other.CoprCacheHitNum + if other.distSQLConcurrency > s.distSQLConcurrency { + s.distSQLConcurrency = other.distSQLConcurrency + } + if other.extraConcurrency > s.extraConcurrency { + s.extraConcurrency = other.extraConcurrency + } + s.storeBatchedNum += other.storeBatchedNum + s.storeBatchedFallbackNum += other.storeBatchedFallbackNum + s.buildTaskDuration += other.buildTaskDuration } func (s *selectResultRuntimeStats) String() string { @@ -579,14 +607,30 @@ func (s *selectResultRuntimeStats) String() string { } if config.GetGlobalConfig().TiKVClient.CoprCache.CapacityMB > 0 { buf.WriteString(fmt.Sprintf(", copr_cache_hit_ratio: %v", - strconv.FormatFloat(float64(s.CoprCacheHitNum)/float64(len(s.copRespTime)), 'f', 2, 64))) + strconv.FormatFloat(s.calcCacheHit(), 'f', 2, 64))) } else { buf.WriteString(", copr_cache: disabled") } + if s.buildTaskDuration > 0 { + buf.WriteString(", build_task_duration: ") + buf.WriteString(execdetails.FormatDuration(s.buildTaskDuration)) + } if s.distSQLConcurrency > 0 { buf.WriteString(", distsql_concurrency: ") buf.WriteString(strconv.FormatInt(int64(s.distSQLConcurrency), 10)) } + if s.extraConcurrency > 0 { + buf.WriteString(", extra_concurrency: ") + buf.WriteString(strconv.FormatInt(int64(s.extraConcurrency), 10)) + } + if s.storeBatchedNum > 0 { + buf.WriteString(", store_batch_num: ") + buf.WriteString(strconv.FormatInt(int64(s.storeBatchedNum), 10)) + } + if s.storeBatchedFallbackNum > 0 { + buf.WriteString(", store_batch_fallback_num: ") + buf.WriteString(strconv.FormatInt(int64(s.storeBatchedFallbackNum), 10)) + } buf.WriteString("}") } @@ -615,3 +659,15 @@ func (s *selectResultRuntimeStats) String() string { func (*selectResultRuntimeStats) Tp() int { return execdetails.TpSelectResultRuntimeStats } + +func (s *selectResultRuntimeStats) calcCacheHit() float64 { + hit := s.CoprCacheHitNum + tot := len(s.copRespTime) + if s.storeBatchedNum > 0 { + tot += int(s.storeBatchedNum) + } + if tot == 0 { + return 0 + } + return float64(hit) / float64(tot) +} diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 2149e2387f236..3090ab630f3b7 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -1168,7 +1168,7 @@ const ( DefTiDBTTLDeleteRateLimit = 0 DefPasswordReuseHistory = 0 DefPasswordReuseTime = 0 - DefTiDBStoreBatchSize = 0 + DefTiDBStoreBatchSize = 4 DefTiDBHistoricalStatsDuration = 7 * 24 * time.Hour DefTiDBEnableHistoricalStatsForCapture = false DefTiDBTTLJobScheduleWindowStartTime = "00:00 +0000" diff --git a/store/copr/coprocessor.go b/store/copr/coprocessor.go index eca0b8037daa6..542ac89110992 100644 --- a/store/copr/coprocessor.go +++ b/store/copr/coprocessor.go @@ -145,11 +145,13 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars err error ) tryRowHint := optRowHint(req) + elapsed := time.Duration(0) buildOpt := &buildCopTaskOpt{ req: req, cache: c.store.GetRegionCache(), eventCb: eventCb, respChan: req.KeepOrder, + elapsed: &elapsed, } buildTaskFunc := func(ranges []kv.KeyRange, hints []int) error { keyRanges := NewKeyRanges(ranges) @@ -185,14 +187,15 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars return nil, copErrorResponse{err} } it := &copIterator{ - store: c.store, - req: req, - concurrency: req.Concurrency, - finishCh: make(chan struct{}), - vars: vars, - memTracker: req.MemTracker, - replicaReadSeed: c.replicaReadSeed, - rpcCancel: tikv.NewRPCanceller(), + store: c.store, + req: req, + concurrency: req.Concurrency, + finishCh: make(chan struct{}), + vars: vars, + memTracker: req.MemTracker, + replicaReadSeed: c.replicaReadSeed, + rpcCancel: tikv.NewRPCanceller(), + buildTaskElapsed: *buildOpt.elapsed, } it.tasks = tasks if it.concurrency > len(tasks) { @@ -283,13 +286,14 @@ func (r *copTask) ToPBBatchTasks() []*coprocessor.StoreBatchTask { } pbTasks := make([]*coprocessor.StoreBatchTask, 0, len(r.batchTaskList)) for _, task := range r.batchTaskList { - pbTasks = append(pbTasks, &coprocessor.StoreBatchTask{ + storeBatchTask := &coprocessor.StoreBatchTask{ RegionId: task.region.GetRegionId(), RegionEpoch: task.region.GetRegionEpoch(), Peer: task.peer, Ranges: task.region.GetRanges(), TaskId: task.task.taskID, - }) + } + pbTasks = append(pbTasks, storeBatchTask) } return pbTasks } @@ -303,6 +307,7 @@ type buildCopTaskOpt struct { eventCb trxevents.EventCallback respChan bool rowHints []int + elapsed *time.Duration } func buildCopTasks(bo *Backoffer, ranges *KeyRanges, opt *buildCopTaskOpt) ([]*copTask, error) { @@ -425,6 +430,9 @@ func buildCopTasks(bo *Backoffer, ranges *KeyRanges, opt *buildCopTaskOpt) ([]*c defer span1.Finish() } } + if opt.elapsed != nil { + *opt.elapsed = *opt.elapsed + elapsed + } metrics.TxnRegionsNumHistogramWithCoprocessor.Observe(float64(builder.regionNum())) return tasks, nil } @@ -573,7 +581,9 @@ func isSmallTask(task *copTask) bool { // strictly, only RowCountHint == -1 stands for unknown task rows, // but when RowCountHint == 0, it may be caused by initialized value, // to avoid the future bugs, let the tasks with RowCountHint == 0 be non-small tasks. - return task.RowCountHint > 0 && task.RowCountHint <= CopSmallTaskRow + return task.RowCountHint > 0 && + (len(task.batchTaskList) == 0 && task.RowCountHint <= CopSmallTaskRow) || + (len(task.batchTaskList) > 0 && task.RowCountHint <= 2*CopSmallTaskRow) } // smallTaskConcurrency counts the small tasks of tasks, @@ -594,6 +604,16 @@ func smallTaskConcurrency(tasks []*copTask) (int, int) { return res, int(extraConc) } +// CopInfo is used to expose functions of copIterator. +type CopInfo interface { + // GetConcurrency returns the concurrency and small task concurrency. + GetConcurrency() (int, int) + // GetStoreBatchInfo returns the batched and fallback num. + GetStoreBatchInfo() (uint64, uint64) + // GetBuildTaskElapsed returns the duration of building task. + GetBuildTaskElapsed() time.Duration +} + type copIterator struct { store *Store req *kv.Request @@ -631,6 +651,10 @@ type copIterator struct { actionOnExceed *rateLimitAction pagingTaskIdx uint32 + + buildTaskElapsed time.Duration + storeBatchedNum atomic.Uint64 + storeBatchedFallbackNum atomic.Uint64 } // copIteratorWorker receives tasks from copIteratorTaskSender, handles tasks and sends the copResponse to respChan. @@ -650,6 +674,9 @@ type copIteratorWorker struct { enableCollectExecutionInfo bool pagingTaskIdx *uint32 + + storeBatchedNum *atomic.Uint64 + storeBatchedFallbackNum *atomic.Uint64 } // copIteratorTaskSender sends tasks to taskCh then wait for the workers to exit. @@ -782,6 +809,8 @@ func (it *copIterator) open(ctx context.Context, enabledRateLimitAction, enableC replicaReadSeed: it.replicaReadSeed, enableCollectExecutionInfo: enableCollectExecutionInfo, pagingTaskIdx: &it.pagingTaskIdx, + storeBatchedNum: &it.storeBatchedNum, + storeBatchedFallbackNum: &it.storeBatchedFallbackNum, } go worker.run(ctx) } @@ -880,6 +909,16 @@ func (it *copIterator) GetConcurrency() (int, int) { return it.concurrency, it.smallTaskConcurrency } +// GetStoreBatchInfo returns the batched and fallback num. +func (it *copIterator) GetStoreBatchInfo() (uint64, uint64) { + return it.storeBatchedNum.Load(), it.storeBatchedFallbackNum.Load() +} + +// GetBuildTaskElapsed returns the duration of building task. +func (it *copIterator) GetBuildTaskElapsed() time.Duration { + return it.buildTaskElapsed +} + // GetSendRate returns the rate-limit object. func (it *copIterator) GetSendRate() *util.RateLimit { return it.sendRate @@ -1076,30 +1115,7 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch Tasks: task.ToPBBatchTasks(), } - var cacheKey []byte - var cacheValue *coprCacheValue - - // If there are many ranges, it is very likely to be a TableLookupRequest. They are not worth to cache since - // computing is not the main cost. Ignore such requests directly to avoid slowly building the cache key. - if task.cmdType == tikvrpc.CmdCop && worker.store.coprCache != nil && worker.req.Cacheable && worker.store.coprCache.CheckRequestAdmission(len(copReq.Ranges)) { - cKey, err := coprCacheBuildKey(&copReq) - if err == nil { - cacheKey = cKey - cValue := worker.store.coprCache.Get(cKey) - copReq.IsCacheEnabled = true - - if cValue != nil && cValue.RegionID == task.region.GetID() && cValue.TimeStamp <= worker.req.StartTs { - // Append cache version to the request to skip Coprocessor computation if possible - // when request result is cached - copReq.CacheIfMatchVersion = cValue.RegionDataVersion - cacheValue = cValue - } else { - copReq.CacheIfMatchVersion = 0 - } - } else { - logutil.BgLogger().Warn("Failed to build copr cache key", zap.Error(err)) - } - } + cacheKey, cacheValue := worker.buildCacheKey(task, &copReq, false) req := tikvrpc.NewReplicaReadRequest(task.cmdType, &copReq, options.GetTiKVReplicaReadType(worker.req.ReplicaRead), &worker.replicaReadSeed, kvrpcpb.Context{ IsolationLevel: isolationLevelToPB(worker.req.IsolationLevel), @@ -1269,13 +1285,13 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R if err != nil { return remains, err } - return worker.handleBatchRemainsOnErr(bo, remains, resp.pbResp.GetBatchResponses(), task, ch) + return worker.handleBatchRemainsOnErr(bo, rpcCtx, remains, resp.pbResp.GetBatchResponses(), task, ch) } if lockErr := resp.pbResp.GetLocked(); lockErr != nil { if err := worker.handleLockErr(bo, lockErr, task); err != nil { return nil, err } - return worker.handleBatchRemainsOnErr(bo, []*copTask{task}, resp.pbResp.GetBatchResponses(), task, ch) + return worker.handleBatchRemainsOnErr(bo, rpcCtx, []*copTask{task}, resp.pbResp.GetBatchResponses(), task, ch) } if otherErr := resp.pbResp.GetOtherError(); otherErr != "" { err := errors.Errorf("other error: %s", otherErr) @@ -1306,74 +1322,23 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R } worker.handleCollectExecutionInfo(bo, rpcCtx, resp) resp.respTime = costTime - if resp.pbResp.IsCacheHit { - coprCacheCounterHit.Add(1) - if cacheValue == nil { - return nil, errors.New("Internal error: received illegal TiKV response") - } - // Cache hit and is valid: use cached data as response data and we don't update the cache. - data := make([]byte, len(cacheValue.Data)) - copy(data, cacheValue.Data) - resp.pbResp.Data = data - if worker.req.Paging.Enable { - var start, end []byte - if cacheValue.PageStart != nil { - start = make([]byte, len(cacheValue.PageStart)) - copy(start, cacheValue.PageStart) - } - if cacheValue.PageEnd != nil { - end = make([]byte, len(cacheValue.PageEnd)) - copy(end, cacheValue.PageEnd) - } - // When paging protocol is used, the response key range is part of the cache data. - if start != nil || end != nil { - resp.pbResp.Range = &coprocessor.KeyRange{ - Start: start, - End: end, - } - } else { - resp.pbResp.Range = nil - } - } - resp.detail.CoprCacheHit = true - } else { - coprCacheCounterMiss.Add(1) - // Cache not hit or cache hit but not valid: update the cache if the response can be cached. - if cacheKey != nil && resp.pbResp.CanBeCached && resp.pbResp.CacheLastVersion > 0 { - if resp.detail != nil { - if worker.store.coprCache.CheckResponseAdmission(resp.pbResp.Data.Size(), resp.detail.TimeDetail.ProcessTime, task.pagingTaskIdx) { - data := make([]byte, len(resp.pbResp.Data)) - copy(data, resp.pbResp.Data) - - newCacheValue := coprCacheValue{ - Data: data, - TimeStamp: worker.req.StartTs, - RegionID: task.region.GetID(), - RegionDataVersion: resp.pbResp.CacheLastVersion, - } - // When paging protocol is used, the response key range is part of the cache data. - if r := resp.pbResp.GetRange(); r != nil { - newCacheValue.PageStart = append([]byte{}, r.GetStart()...) - newCacheValue.PageEnd = append([]byte{}, r.GetEnd()...) - } - worker.store.coprCache.Set(cacheKey, &newCacheValue) - } - } - } + if err := worker.handleCopCache(task, resp, cacheKey, cacheValue); err != nil { + return nil, err } + batchResps := resp.pbResp.BatchResponses worker.sendToRespCh(resp, ch, true) - return worker.handleBatchCopResponse(bo, batchResps, task.batchTaskList, ch) + return worker.handleBatchCopResponse(bo, rpcCtx, batchResps, task.batchTaskList, ch) } -func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, remains []*copTask, batchResp []*coprocessor.StoreBatchTaskResponse, task *copTask, ch chan<- *copResponse) ([]*copTask, error) { +func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx *tikv.RPCContext, remains []*copTask, batchResp []*coprocessor.StoreBatchTaskResponse, task *copTask, ch chan<- *copResponse) ([]*copTask, error) { if len(task.batchTaskList) == 0 { return remains, nil } batchedTasks := task.batchTaskList task.batchTaskList = nil - batchedRemains, err := worker.handleBatchCopResponse(bo, batchResp, batchedTasks, ch) + batchedRemains, err := worker.handleBatchCopResponse(bo, rpcCtx, batchResp, batchedTasks, ch) if err != nil { return nil, err } @@ -1382,11 +1347,19 @@ func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, remains // handle the batched cop response. // tasks will be changed, so the input tasks should not be used after calling this function. -func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, batchResps []*coprocessor.StoreBatchTaskResponse, tasks map[uint64]*batchedCopTask, ch chan<- *copResponse) ([]*copTask, error) { +func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, batchResps []*coprocessor.StoreBatchTaskResponse, + tasks map[uint64]*batchedCopTask, ch chan<- *copResponse) (remainTasks []*copTask, err error) { if len(tasks) == 0 { return nil, nil } - var remainTasks []*copTask + batchedNum := len(tasks) + defer func() { + if err != nil { + return + } + worker.storeBatchedNum.Add(uint64(batchedNum - len(remainTasks))) + worker.storeBatchedFallbackNum.Add(uint64(len(remainTasks))) + }() appendRemainTasks := func(tasks ...*copTask) { if remainTasks == nil { // allocate size fo remain length @@ -1394,6 +1367,13 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, batchResp } remainTasks = append(remainTasks, tasks...) } + // need Addr for recording details. + var dummyRPCCtx *tikv.RPCContext + if rpcCtx != nil { + dummyRPCCtx = &tikv.RPCContext{ + Addr: rpcCtx.Addr, + } + } for _, batchResp := range batchResps { taskID := batchResp.GetTaskId() batchedTask, ok := tasks[taskID] @@ -1403,7 +1383,8 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, batchResp delete(tasks, taskID) resp := &copResponse{ pbResp: &coprocessor.Response{ - Data: batchResp.Data, + Data: batchResp.Data, + ExecDetailsV2: batchResp.ExecDetailsV2, }, } task := batchedTask.task @@ -1458,8 +1439,8 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, batchResp } return nil, errors.Trace(err) } - // TODO: check OOM - worker.sendToRespCh(resp, ch, false) + worker.handleCollectExecutionInfo(bo, dummyRPCCtx, resp) + worker.sendToRespCh(resp, ch, true) } for _, t := range tasks { task := t.task @@ -1515,6 +1496,90 @@ func (worker *copIteratorWorker) handleLockErr(bo *Backoffer, lockErr *kvrpcpb.L return nil } +func (worker *copIteratorWorker) buildCacheKey(task *copTask, copReq *coprocessor.Request, force bool) (cacheKey []byte, cacheValue *coprCacheValue) { + // If there are many ranges, it is very likely to be a TableLookupRequest. They are not worth to cache since + // computing is not the main cost. Ignore requests with many ranges directly to avoid slowly building the cache key. + if force || task.cmdType == tikvrpc.CmdCop && worker.store.coprCache != nil && worker.req.Cacheable && worker.store.coprCache.CheckRequestAdmission(len(copReq.Ranges)) { + cKey, err := coprCacheBuildKey(copReq) + if err == nil { + cacheKey = cKey + cValue := worker.store.coprCache.Get(cKey) + copReq.IsCacheEnabled = true + + if cValue != nil && cValue.RegionID == task.region.GetID() && cValue.TimeStamp <= worker.req.StartTs { + // Append cache version to the request to skip Coprocessor computation if possible + // when request result is cached + copReq.CacheIfMatchVersion = cValue.RegionDataVersion + cacheValue = cValue + } else { + copReq.CacheIfMatchVersion = 0 + } + } else { + logutil.BgLogger().Warn("Failed to build copr cache key", zap.Error(err)) + } + } + return +} + +func (worker *copIteratorWorker) handleCopCache(task *copTask, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue) error { + if resp.pbResp.IsCacheHit { + if cacheValue == nil { + return errors.New("Internal error: received illegal TiKV response") + } + coprCacheCounterHit.Add(1) + // Cache hit and is valid: use cached data as response data and we don't update the cache. + data := make([]byte, len(cacheValue.Data)) + copy(data, cacheValue.Data) + resp.pbResp.Data = data + if worker.req.Paging.Enable { + var start, end []byte + if cacheValue.PageStart != nil { + start = make([]byte, len(cacheValue.PageStart)) + copy(start, cacheValue.PageStart) + } + if cacheValue.PageEnd != nil { + end = make([]byte, len(cacheValue.PageEnd)) + copy(end, cacheValue.PageEnd) + } + // When paging protocol is used, the response key range is part of the cache data. + if start != nil || end != nil { + resp.pbResp.Range = &coprocessor.KeyRange{ + Start: start, + End: end, + } + } else { + resp.pbResp.Range = nil + } + } + resp.detail.CoprCacheHit = true + return nil + } + coprCacheCounterMiss.Add(1) + // Cache not hit or cache hit but not valid: update the cache if the response can be cached. + if cacheKey != nil && resp.pbResp.CanBeCached && resp.pbResp.CacheLastVersion > 0 { + if resp.detail != nil { + if worker.store.coprCache.CheckResponseAdmission(resp.pbResp.Data.Size(), resp.detail.TimeDetail.ProcessTime, task.pagingTaskIdx) { + data := make([]byte, len(resp.pbResp.Data)) + copy(data, resp.pbResp.Data) + + newCacheValue := coprCacheValue{ + Data: data, + TimeStamp: worker.req.StartTs, + RegionID: task.region.GetID(), + RegionDataVersion: resp.pbResp.CacheLastVersion, + } + // When paging protocol is used, the response key range is part of the cache data. + if r := resp.pbResp.GetRange(); r != nil { + newCacheValue.PageStart = append([]byte{}, r.GetStart()...) + newCacheValue.PageEnd = append([]byte{}, r.GetEnd()...) + } + worker.store.coprCache.Set(cacheKey, &newCacheValue) + } + } + } + return nil +} + func (worker *copIteratorWorker) getLockResolverDetails() *util.ResolveLockDetail { if !worker.enableCollectExecutionInfo { return nil From d6399ea7f93696e98baba1416cb4ad03f855cd87 Mon Sep 17 00:00:00 2001 From: you06 Date: Mon, 6 Feb 2023 12:53:28 +0800 Subject: [PATCH 02/13] record next wait time for IndexLookUp Signed-off-by: you06 --- executor/distsql.go | 29 ++++++++++++++++++++++++++++- executor/distsql_test.go | 23 +++++++++++++++-------- store/copr/coprocessor.go | 8 ++++---- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/executor/distsql.go b/executor/distsql.go index 3b9a6a7d4b288..087e20cb7ed28 100644 --- a/executor/distsql.go +++ b/executor/distsql.go @@ -76,7 +76,9 @@ type lookupTableTask struct { idxRows *chunk.Chunk cursor int - doneCh chan error + // after the cop task is built, buildDone will be set to the current instant, for Next wait duration statistic. + buildDone time.Time + doneCh chan error // indexOrder map is used to save the original index order for the handles. // Without this map, the original index order might be lost. @@ -790,13 +792,21 @@ func (e *IndexLookUpExecutor) getResultTask() (*lookupTableTask, error) { if e.resultCurr != nil && e.resultCurr.cursor < len(e.resultCurr.rows) { return e.resultCurr, nil } + start := time.Now() task, ok := <-e.resultCh if !ok { return nil, nil } + indexFetchedInstant := time.Now() if err := <-task.doneCh; err != nil { return nil, err } + e.stats.NextWaitIndexScan += indexFetchedInstant.Sub(start) + if task.buildDone.After(indexFetchedInstant) { + e.stats.NextWaitTableLookUpBuild += task.buildDone.Sub(indexFetchedInstant) + indexFetchedInstant = task.buildDone + } + e.stats.NextWaitTableLookUpResp += time.Since(indexFetchedInstant) // Release the memory usage of last task before we handle a new task. if e.resultCurr != nil { @@ -1119,6 +1129,10 @@ type IndexLookUpRunTimeStats struct { TableRowScan int64 TableTaskNum int64 Concurrency int + // record the next wait details. + NextWaitIndexScan time.Duration + NextWaitTableLookUpBuild time.Duration + NextWaitTableLookUpResp time.Duration } func (e *IndexLookUpRunTimeStats) String() string { @@ -1142,6 +1156,15 @@ func (e *IndexLookUpRunTimeStats) String() string { } buf.WriteString(fmt.Sprintf(" table_task: {total_time: %v, num: %d, concurrency: %d}", execdetails.FormatDuration(time.Duration(tableScan)), tableTaskNum, concurrency)) } + if e.NextWaitIndexScan > 0 || e.NextWaitTableLookUpBuild > 0 || e.NextWaitTableLookUpResp > 0 { + if buf.Len() > 0 { + buf.WriteByte(',') + fmt.Fprintf(&buf, " next: {wait_index: %s, wait_table_lookup_build: %s, wait_table_lookup_resp: %s}", + execdetails.FormatDuration(e.NextWaitIndexScan), + execdetails.FormatDuration(e.NextWaitTableLookUpBuild), + execdetails.FormatDuration(e.NextWaitTableLookUpResp)) + } + } return buf.String() } @@ -1162,6 +1185,9 @@ func (e *IndexLookUpRunTimeStats) Merge(other execdetails.RuntimeStats) { e.TaskWait += tmp.TaskWait e.TableRowScan += tmp.TableRowScan e.TableTaskNum += tmp.TableTaskNum + e.NextWaitIndexScan += tmp.NextWaitIndexScan + e.NextWaitTableLookUpBuild += tmp.NextWaitTableLookUpBuild + e.NextWaitTableLookUpResp += tmp.NextWaitTableLookUpResp } // Tp implements the RuntimeStats interface. @@ -1300,6 +1326,7 @@ func getDatumRow(r *chunk.Row, fields []*types.FieldType) []types.Datum { // Then we hold the returning rows and finish this task. func (w *tableWorker) executeTask(ctx context.Context, task *lookupTableTask) error { tableReader, err := w.idxLookup.buildTableReader(ctx, task) + task.buildDone = time.Now() if err != nil { logutil.Logger(ctx).Error("build table reader failed", zap.Error(err)) return err diff --git a/executor/distsql_test.go b/executor/distsql_test.go index 65889a10d0377..50c4a311a1eb9 100644 --- a/executor/distsql_test.go +++ b/executor/distsql_test.go @@ -358,17 +358,24 @@ func TestPartitionTableRandomlyIndexLookUpReader(t *testing.T) { func TestIndexLookUpStats(t *testing.T) { stats := &executor.IndexLookUpRunTimeStats{ - FetchHandleTotal: int64(5 * time.Second), - FetchHandle: int64(2 * time.Second), - TaskWait: int64(2 * time.Second), - TableRowScan: int64(2 * time.Second), - TableTaskNum: 2, - Concurrency: 1, + FetchHandleTotal: int64(5 * time.Second), + FetchHandle: int64(2 * time.Second), + TaskWait: int64(2 * time.Second), + TableRowScan: int64(2 * time.Second), + TableTaskNum: 2, + Concurrency: 1, + NextWaitIndexScan: time.Second, + NextWaitTableLookUpBuild: 2 * time.Second, + NextWaitTableLookUpResp: 3 * time.Second, } - require.Equal(t, "index_task: {total_time: 5s, fetch_handle: 2s, build: 1s, wait: 2s}, table_task: {total_time: 2s, num: 2, concurrency: 1}", stats.String()) + require.Equal(t, "index_task: {total_time: 5s, fetch_handle: 2s, build: 1s, wait: 2s}"+ + ", table_task: {total_time: 2s, num: 2, concurrency: 1}"+ + ", next: {wait_index: 1s, wait_table_lookup_build: 2s, wait_table_lookup_resp: 3s}", stats.String()) require.Equal(t, stats.Clone().String(), stats.String()) stats.Merge(stats.Clone()) - require.Equal(t, "index_task: {total_time: 10s, fetch_handle: 4s, build: 2s, wait: 4s}, table_task: {total_time: 4s, num: 4, concurrency: 1}", stats.String()) + require.Equal(t, "index_task: {total_time: 10s, fetch_handle: 4s, build: 2s, wait: 4s}"+ + ", table_task: {total_time: 4s, num: 4, concurrency: 1}"+ + ", next: {wait_index: 2s, wait_table_lookup_build: 4s, wait_table_lookup_resp: 6s}", stats.String()) } func TestIndexLookUpGetResultChunk(t *testing.T) { diff --git a/store/copr/coprocessor.go b/store/copr/coprocessor.go index 542ac89110992..cbcc4b65894ab 100644 --- a/store/copr/coprocessor.go +++ b/store/copr/coprocessor.go @@ -345,7 +345,7 @@ func buildCopTasks(bo *Backoffer, ranges *KeyRanges, opt *buildCopTaskOpt) ([]*c } var builder taskBuilder - if req.StoreBatchSize > 0 { + if req.StoreBatchSize > 0 && hints != nil { builder = newBatchTaskBuilder(bo, req, cache) } else { builder = newLegacyTaskBuilder(len(locs)) @@ -1115,7 +1115,7 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch Tasks: task.ToPBBatchTasks(), } - cacheKey, cacheValue := worker.buildCacheKey(task, &copReq, false) + cacheKey, cacheValue := worker.buildCacheKey(task, &copReq) req := tikvrpc.NewReplicaReadRequest(task.cmdType, &copReq, options.GetTiKVReplicaReadType(worker.req.ReplicaRead), &worker.replicaReadSeed, kvrpcpb.Context{ IsolationLevel: isolationLevelToPB(worker.req.IsolationLevel), @@ -1496,10 +1496,10 @@ func (worker *copIteratorWorker) handleLockErr(bo *Backoffer, lockErr *kvrpcpb.L return nil } -func (worker *copIteratorWorker) buildCacheKey(task *copTask, copReq *coprocessor.Request, force bool) (cacheKey []byte, cacheValue *coprCacheValue) { +func (worker *copIteratorWorker) buildCacheKey(task *copTask, copReq *coprocessor.Request) (cacheKey []byte, cacheValue *coprCacheValue) { // If there are many ranges, it is very likely to be a TableLookupRequest. They are not worth to cache since // computing is not the main cost. Ignore requests with many ranges directly to avoid slowly building the cache key. - if force || task.cmdType == tikvrpc.CmdCop && worker.store.coprCache != nil && worker.req.Cacheable && worker.store.coprCache.CheckRequestAdmission(len(copReq.Ranges)) { + if task.cmdType == tikvrpc.CmdCop && worker.store.coprCache != nil && worker.req.Cacheable && worker.store.coprCache.CheckRequestAdmission(len(copReq.Ranges)) { cKey, err := coprCacheBuildKey(copReq) if err == nil { cacheKey = cKey From 733b551ad158e06e6cb738330d884035cb4e5c47 Mon Sep 17 00:00:00 2001 From: you06 Date: Mon, 6 Feb 2023 17:04:18 +0800 Subject: [PATCH 03/13] record duration when stats exist Signed-off-by: you06 --- executor/distsql.go | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/executor/distsql.go b/executor/distsql.go index 087e20cb7ed28..5b9c923f551cf 100644 --- a/executor/distsql.go +++ b/executor/distsql.go @@ -792,21 +792,32 @@ func (e *IndexLookUpExecutor) getResultTask() (*lookupTableTask, error) { if e.resultCurr != nil && e.resultCurr.cursor < len(e.resultCurr.rows) { return e.resultCurr, nil } - start := time.Now() + var ( + enableStats = e.stats != nil + start time.Time + indexFetchedInstant time.Time + ) + if enableStats { + start = time.Now() + } task, ok := <-e.resultCh if !ok { return nil, nil } - indexFetchedInstant := time.Now() + if enableStats { + indexFetchedInstant = time.Now() + } if err := <-task.doneCh; err != nil { return nil, err } - e.stats.NextWaitIndexScan += indexFetchedInstant.Sub(start) - if task.buildDone.After(indexFetchedInstant) { - e.stats.NextWaitTableLookUpBuild += task.buildDone.Sub(indexFetchedInstant) - indexFetchedInstant = task.buildDone + if enableStats { + e.stats.NextWaitIndexScan += indexFetchedInstant.Sub(start) + if task.buildDone.After(indexFetchedInstant) { + e.stats.NextWaitTableLookUpBuild += task.buildDone.Sub(indexFetchedInstant) + indexFetchedInstant = task.buildDone + } + e.stats.NextWaitTableLookUpResp += time.Since(indexFetchedInstant) } - e.stats.NextWaitTableLookUpResp += time.Since(indexFetchedInstant) // Release the memory usage of last task before we handle a new task. if e.resultCurr != nil { From 91c283cf7ab1615f2737493f9e7166820e5138db Mon Sep 17 00:00:00 2001 From: you06 Date: Mon, 6 Feb 2023 22:00:42 +0800 Subject: [PATCH 04/13] update Signed-off-by: you06 --- store/copr/coprocessor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/store/copr/coprocessor.go b/store/copr/coprocessor.go index cbcc4b65894ab..82a8b9bd08552 100644 --- a/store/copr/coprocessor.go +++ b/store/copr/coprocessor.go @@ -319,7 +319,7 @@ func buildCopTasks(bo *Backoffer, ranges *KeyRanges, opt *buildCopTaskOpt) ([]*c } rangesLen := ranges.Len() // something went wrong, disable hints to avoid out of range index. - if hints != nil && len(hints) != rangesLen { + if len(hints) != rangesLen { hints = nil } From 1a530e63ee5f9e34f7763d1d4734c72a9b8575bf Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 7 Feb 2023 17:24:30 +0800 Subject: [PATCH 05/13] Update executor/distsql.go Co-authored-by: cfzjywxk --- executor/distsql.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/executor/distsql.go b/executor/distsql.go index 5b9c923f551cf..3316b8bcb1e08 100644 --- a/executor/distsql.go +++ b/executor/distsql.go @@ -77,7 +77,7 @@ type lookupTableTask struct { cursor int // after the cop task is built, buildDone will be set to the current instant, for Next wait duration statistic. - buildDone time.Time + buildDoneTime time.Time doneCh chan error // indexOrder map is used to save the original index order for the handles. From 0eb90b3d92f178b7725397a53079855b1a049220 Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 7 Feb 2023 17:24:47 +0800 Subject: [PATCH 06/13] Update executor/distsql.go Co-authored-by: cfzjywxk --- executor/distsql.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/executor/distsql.go b/executor/distsql.go index 3316b8bcb1e08..b2fdc709e643c 100644 --- a/executor/distsql.go +++ b/executor/distsql.go @@ -1140,7 +1140,7 @@ type IndexLookUpRunTimeStats struct { TableRowScan int64 TableTaskNum int64 Concurrency int - // record the next wait details. + // Record the `Next` call affected wait duration details. NextWaitIndexScan time.Duration NextWaitTableLookUpBuild time.Duration NextWaitTableLookUpResp time.Duration From 03265ae8aa725a58542408ed0178d814074c444a Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 7 Feb 2023 17:29:00 +0800 Subject: [PATCH 07/13] update field name Signed-off-by: you06 --- distsql/select_result.go | 4 ++-- executor/distsql.go | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/distsql/select_result.go b/distsql/select_result.go index 28670f359359b..9f962bb8fd720 100644 --- a/distsql/select_result.go +++ b/distsql/select_result.go @@ -616,11 +616,11 @@ func (s *selectResultRuntimeStats) String() string { buf.WriteString(execdetails.FormatDuration(s.buildTaskDuration)) } if s.distSQLConcurrency > 0 { - buf.WriteString(", distsql_concurrency: ") + buf.WriteString(", max_distsql_concurrency: ") buf.WriteString(strconv.FormatInt(int64(s.distSQLConcurrency), 10)) } if s.extraConcurrency > 0 { - buf.WriteString(", extra_concurrency: ") + buf.WriteString(", max_extra_concurrency: ") buf.WriteString(strconv.FormatInt(int64(s.extraConcurrency), 10)) } if s.storeBatchedNum > 0 { diff --git a/executor/distsql.go b/executor/distsql.go index b2fdc709e643c..a96954b9fba6f 100644 --- a/executor/distsql.go +++ b/executor/distsql.go @@ -78,7 +78,7 @@ type lookupTableTask struct { // after the cop task is built, buildDone will be set to the current instant, for Next wait duration statistic. buildDoneTime time.Time - doneCh chan error + doneCh chan error // indexOrder map is used to save the original index order for the handles. // Without this map, the original index order might be lost. @@ -812,9 +812,9 @@ func (e *IndexLookUpExecutor) getResultTask() (*lookupTableTask, error) { } if enableStats { e.stats.NextWaitIndexScan += indexFetchedInstant.Sub(start) - if task.buildDone.After(indexFetchedInstant) { - e.stats.NextWaitTableLookUpBuild += task.buildDone.Sub(indexFetchedInstant) - indexFetchedInstant = task.buildDone + if task.buildDoneTime.After(indexFetchedInstant) { + e.stats.NextWaitTableLookUpBuild += task.buildDoneTime.Sub(indexFetchedInstant) + indexFetchedInstant = task.buildDoneTime } e.stats.NextWaitTableLookUpResp += time.Since(indexFetchedInstant) } @@ -1337,7 +1337,7 @@ func getDatumRow(r *chunk.Row, fields []*types.FieldType) []types.Datum { // Then we hold the returning rows and finish this task. func (w *tableWorker) executeTask(ctx context.Context, task *lookupTableTask) error { tableReader, err := w.idxLookup.buildTableReader(ctx, task) - task.buildDone = time.Now() + task.buildDoneTime = time.Now() if err != nil { logutil.Logger(ctx).Error("build table reader failed", zap.Error(err)) return err From a9cd7dece8bb3cf69fd5b75410e3040316c6f44a Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 7 Feb 2023 23:06:18 +0800 Subject: [PATCH 08/13] add telemetry Signed-off-by: you06 --- distsql/select_result.go | 9 ++++- executor/adapter.go | 1 + executor/builder.go | 7 ++++ executor/distsql.go | 3 ++ metrics/telemetry.go | 47 +++++++++++++++++++++++ session/session.go | 7 +++- telemetry/data.go | 2 + telemetry/data_feature_usage.go | 20 ++++++++++ telemetry/data_feature_usage_test.go | 57 ++++++++++++++++++++++++++++ 9 files changed, 151 insertions(+), 2 deletions(-) diff --git a/distsql/select_result.go b/distsql/select_result.go index 9f962bb8fd720..ee8cbc638d308 100644 --- a/distsql/select_result.go +++ b/distsql/select_result.go @@ -53,6 +53,11 @@ var ( errQueryInterrupted = dbterror.ClassExecutor.NewStd(errno.ErrQueryInterrupted) ) +var ( + telemetryStoreBatchedCnt = metrics.TelemetryStoreBatchedCnt + telemetryStoreBatchedFallbackCnt = metrics.TelemetryStoreBatchedFallbackCnt +) + var ( _ SelectResult = (*selectResult)(nil) _ SelectResult = (*serialSelectResults)(nil) @@ -462,8 +467,10 @@ func (r *selectResult) Close() error { if r.stats != nil { defer func() { if ci, ok := r.resp.(copr.CopInfo); ok { - r.stats.storeBatchedNum, r.stats.storeBatchedFallbackNum = ci.GetStoreBatchInfo() r.stats.buildTaskDuration = ci.GetBuildTaskElapsed() + r.stats.storeBatchedNum, r.stats.storeBatchedFallbackNum = ci.GetStoreBatchInfo() + telemetryStoreBatchedCnt.Add(float64(r.stats.storeBatchedNum)) + telemetryStoreBatchedFallbackCnt.Add(float64(r.stats.storeBatchedFallbackNum)) } r.ctx.GetSessionVars().StmtCtx.RuntimeStatsColl.RegisterStats(r.rootPlanID, r.stats) }() diff --git a/executor/adapter.go b/executor/adapter.go index 33b321f60382e..f9fba0fea5d25 100644 --- a/executor/adapter.go +++ b/executor/adapter.go @@ -207,6 +207,7 @@ type TelemetryInfo struct { PartitionTelemetry *PartitionTelemetryInfo AccountLockTelemetry *AccountLockTelemetryInfo UseIndexMerge bool + UseTableLookUp bool } // PartitionTelemetryInfo records table partition telemetry information during execution. diff --git a/executor/builder.go b/executor/builder.go index 12cdeeaaa44ef..1005b51a764c6 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -3863,6 +3863,9 @@ func buildNoRangeIndexLookUpReader(b *executorBuilder, v *plannercore.PhysicalIn } func (b *executorBuilder) buildIndexLookUpReader(v *plannercore.PhysicalIndexLookUpReader) Executor { + if b.Ti != nil { + b.Ti.UseTableLookUp = true + } is := v.IndexPlans[0].(*plannercore.PhysicalIndexScan) if err := b.validCanReadTemporaryOrCacheTable(is.Table); err != nil { b.err = err @@ -4000,6 +4003,7 @@ func buildNoRangeIndexMergeReader(b *executorBuilder, v *plannercore.PhysicalInd func (b *executorBuilder) buildIndexMergeReader(v *plannercore.PhysicalIndexMergeReader) Executor { if b.Ti != nil { b.Ti.UseIndexMerge = true + b.Ti.UseTableLookUp = true } ts := v.TablePlans[0].(*plannercore.PhysicalTableScan) if err := b.validCanReadTemporaryOrCacheTable(ts.Table); err != nil { @@ -4445,6 +4449,9 @@ func (builder *dataReaderBuilder) buildIndexReaderForIndexJoin(ctx context.Conte func (builder *dataReaderBuilder) buildIndexLookUpReaderForIndexJoin(ctx context.Context, v *plannercore.PhysicalIndexLookUpReader, lookUpContents []*indexJoinLookUpContent, indexRanges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager, memTracker *memory.Tracker, interruptSignal *atomic.Value) (Executor, error) { + if builder.Ti != nil { + builder.Ti.UseTableLookUp = true + } e, err := buildNoRangeIndexLookUpReader(builder.executorBuilder, v) if err != nil { return nil, err diff --git a/executor/distsql.go b/executor/distsql.go index a96954b9fba6f..8edb0378b7ad5 100644 --- a/executor/distsql.go +++ b/executor/distsql.go @@ -30,6 +30,7 @@ import ( "github.com/pingcap/tidb/distsql" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/metrics" "github.com/pingcap/tidb/parser/charset" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" @@ -67,6 +68,8 @@ var ( // LookupTableTaskChannelSize represents the channel size of the index double read taskChan. var LookupTableTaskChannelSize int32 = 50 +var telemetryStoreBatchedQueryCnt = metrics.TelemetryStoreBatchedQueryCnt + // lookupTableTask is created from a partial result of an index request which // contains the handles in those index keys. type lookupTableTask struct { diff --git a/metrics/telemetry.go b/metrics/telemetry.go index 591823f9952d9..1a9b7ad7eebbb 100644 --- a/metrics/telemetry.go +++ b/metrics/telemetry.go @@ -169,6 +169,27 @@ var ( Name: "compact_partition_usage", Help: "Counter of compact table partition", }) + TelemetryStoreBatchedQueryCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "tidb", + Subsystem: "telemetry", + Name: "store_batched_query", + Help: "Counter of queries which use store batched coprocessor tasks", + }) + TelemetryStoreBatchedCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "tidb", + Subsystem: "telemetry", + Name: "store_batched", + Help: "Counter of store batched coprocessor tasks", + }) + TelemetryStoreBatchedFallbackCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "tidb", + Subsystem: "telemetry", + Name: "store_batched_fallback", + Help: "Counter of store batched fallback coprocessor tasks", + }) ) // readCounter reads the value of a prometheus.Counter. @@ -422,3 +443,29 @@ func GetIndexMergeCounter() IndexMergeUsageCounter { IndexMergeUsed: readCounter(TelemetryIndexMergeUsage), } } + +// StoreBatchCoprCounter records the usages of batch copr statements. +type StoreBatchCoprCounter struct { + BatchSize int `json:"batch_size"` + BatchedQuery int64 `json:"query"` + BatchedCount int64 `json:"batched"` + BatchedFallbackCount int64 `json:"batched_fallback"` +} + +// Sub returns the difference of two counters. +func (n StoreBatchCoprCounter) Sub(rhs StoreBatchCoprCounter) StoreBatchCoprCounter { + return StoreBatchCoprCounter{ + BatchedQuery: n.BatchedQuery - rhs.BatchedQuery, + BatchedCount: n.BatchedCount - rhs.BatchedCount, + BatchedFallbackCount: n.BatchedFallbackCount - rhs.BatchedFallbackCount, + } +} + +// GetStoreBatchCoprCounter gets the IndexMerge usage counter. +func GetStoreBatchCoprCounter() StoreBatchCoprCounter { + return StoreBatchCoprCounter{ + BatchedQuery: readCounter(TelemetryStoreBatchedQueryCnt), + BatchedCount: readCounter(TelemetryStoreBatchedCnt), + BatchedFallbackCount: readCounter(TelemetryStoreBatchedFallbackCnt), + } +} diff --git a/session/session.go b/session/session.go index 2d84f1c0fa858..95822c17811a0 100644 --- a/session/session.go +++ b/session/session.go @@ -153,7 +153,8 @@ var ( telemetryUnlockUserUsage = metrics.TelemetryAccountLockCnt.WithLabelValues("unlockUser") telemetryCreateOrAlterUserUsage = metrics.TelemetryAccountLockCnt.WithLabelValues("createOrAlterUser") - telemetryIndexMerge = metrics.TelemetryIndexMergeUsage + telemetryIndexMerge = metrics.TelemetryIndexMergeUsage + telemetryStoreBatchedUsage = metrics.TelemetryStoreBatchedQueryCnt ) // Session context, it is consistent with the lifecycle of a client connection. @@ -4047,6 +4048,10 @@ func (s *session) updateTelemetryMetric(es *executor.ExecStmt) { telemetryUnlockUserUsage.Add(float64(ti.AccountLockTelemetry.UnlockUser)) telemetryCreateOrAlterUserUsage.Add(float64(ti.AccountLockTelemetry.CreateOrAlterUser)) } + + if ti.UseTableLookUp && s.sessionVars.StoreBatchSize > 0 { + telemetryStoreBatchedUsage.Inc() + } } // GetBuiltinFunctionUsage returns the replica of counting of builtin function usage diff --git a/telemetry/data.go b/telemetry/data.go index 0008b7fc8b88b..2a3ecffddaa1e 100644 --- a/telemetry/data.go +++ b/telemetry/data.go @@ -69,6 +69,8 @@ func postReportTelemetryData() { PostSavepointCount() postReportLazyPessimisticUniqueCheckSetCount() postReportDDLUsage() + postReportIndexMergeUsage() + postStoreBatchUsage() } // PostReportTelemetryDataForTest is for test. diff --git a/telemetry/data_feature_usage.go b/telemetry/data_feature_usage.go index 1fe696870c291..1a186cc3451d2 100644 --- a/telemetry/data_feature_usage.go +++ b/telemetry/data_feature_usage.go @@ -17,6 +17,7 @@ package telemetry import ( "context" "errors" + "strconv" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/config" @@ -62,6 +63,7 @@ type featureUsage struct { IndexMergeUsageCounter *m.IndexMergeUsageCounter `json:"indexMergeUsageCounter"` ResourceControlUsage *resourceControlUsage `json:"resourceControl"` TTLUsage *ttlUsageCounter `json:"ttlUsage"` + StoreBatchCoprUsage *m.StoreBatchCoprCounter `json:"storeBatchCopr"` } type placementPolicyUsage struct { @@ -121,6 +123,8 @@ func getFeatureUsage(ctx context.Context, sctx sessionctx.Context) (*featureUsag usage.TTLUsage = getTTLUsageInfo(ctx, sctx) + usage.StoreBatchCoprUsage = getStoreBatchUsage(sctx) + return &usage, nil } @@ -264,6 +268,7 @@ var initialSavepointStmtCounter int64 var initialLazyPessimisticUniqueCheckSetCount int64 var initialDDLUsageCounter m.DDLUsageCounter var initialIndexMergeCounter m.IndexMergeUsageCounter +var initialStoreBatchCoprCounter m.StoreBatchCoprCounter // getTxnUsageInfo gets the usage info of transaction related features. It's exported for tests. func getTxnUsageInfo(ctx sessionctx.Context) *TxnUsage { @@ -432,3 +437,18 @@ func getIndexMergeUsageInfo() *m.IndexMergeUsageCounter { diff := curr.Sub(initialIndexMergeCounter) return &diff } + +func getStoreBatchUsage(ctx sessionctx.Context) *m.StoreBatchCoprCounter { + curr := m.GetStoreBatchCoprCounter() + diff := curr.Sub(initialStoreBatchCoprCounter) + if val, err := ctx.GetSessionVars().GetGlobalSystemVar(context.Background(), variable.TiDBStoreBatchSize); err == nil { + if batchSize, err := strconv.Atoi(val); err == nil { + diff.BatchSize = batchSize + } + } + return &diff +} + +func postStoreBatchUsage() { + initialStoreBatchCoprCounter = m.GetStoreBatchCoprCounter() +} diff --git a/telemetry/data_feature_usage_test.go b/telemetry/data_feature_usage_test.go index c8932cd27e35b..02fdd7b40cec9 100644 --- a/telemetry/data_feature_usage_test.go +++ b/telemetry/data_feature_usage_test.go @@ -21,6 +21,7 @@ import ( "testing" "time" + "github.com/pingcap/failpoint" _ "github.com/pingcap/tidb/autoid_service" "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/ddl" @@ -777,3 +778,59 @@ func TestTTLTelemetry(t *testing.T) { checkTableHistWithDeleteRows(1, 1, 0, 0, 0) checkTableHistWithDelay(0, 1, 1, 0, 1) } + +func TestStoreBatchCopr(t *testing.T) { + store := testkit.CreateMockStore(t) + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + usage, err := telemetry.GetFeatureUsage(tk.Session()) + require.NoError(t, err) + require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(0)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(0)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(0)) + + tk.MustExec("drop table if exists tele_batch_t") + tk.MustExec("create table tele_batch_t (id int primary key, c int, k int, index i(k))") + tk.MustExec("select * from tele_batch_t force index(i) where k between 1 and 10 and k % 2 != 0") + usage, err = telemetry.GetFeatureUsage(tk.Session()) + require.NoError(t, err) + require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(1)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(0)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(0)) + + tk.MustExec("insert into tele_batch_t values(1, 1, 1), (2, 2, 2), (3, 3, 3), (5, 5, 5), (7, 7, 7)") + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/store/copr/setRangesPerTask", "return(1)")) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/store/copr/setRangesPerTask")) + }() + tk.MustQuery("select * from tele_batch_t force index(i) where k between 1 and 3 and k % 2 != 0").Sort(). + Check(testkit.Rows("1 1 1", "3 3 3")) + usage, err = telemetry.GetFeatureUsage(tk.Session()) + require.NoError(t, err) + require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(2)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(1)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(0)) + + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/store/copr/batchCopRegionError", "return")) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/store/copr/batchCopRegionError")) + }() + tk.MustQuery("select * from tele_batch_t force index(i) where k between 1 and 3 and k % 2 != 0").Sort(). + Check(testkit.Rows("1 1 1", "3 3 3")) + usage, err = telemetry.GetFeatureUsage(tk.Session()) + require.NoError(t, err) + require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(3)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(1)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(1)) + + tk.MustExec("set global tidb_store_batch_size = 8") + usage, err = telemetry.GetFeatureUsage(tk.Session()) + require.NoError(t, err) + require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 8) +} From 8591a8bf27fcf19522ad801d474858884988d476 Mon Sep 17 00:00:00 2001 From: you06 Date: Wed, 8 Feb 2023 14:11:07 +0800 Subject: [PATCH 09/13] add telemetry of batched tasks Signed-off-by: you06 --- distsql/select_result.go | 11 ++++++++--- executor/distsql.go | 3 --- metrics/telemetry.go | 10 ++++++++++ telemetry/data_feature_usage_test.go | 15 +++++++++++++-- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/distsql/select_result.go b/distsql/select_result.go index ee8cbc638d308..394298e8fa3b0 100644 --- a/distsql/select_result.go +++ b/distsql/select_result.go @@ -54,6 +54,7 @@ var ( ) var ( + telemetryBatchedQueryTaskCnt = metrics.TelemetryBatchedQueryTaskCnt telemetryStoreBatchedCnt = metrics.TelemetryStoreBatchedCnt telemetryStoreBatchedFallbackCnt = metrics.TelemetryStoreBatchedFallbackCnt ) @@ -468,9 +469,13 @@ func (r *selectResult) Close() error { defer func() { if ci, ok := r.resp.(copr.CopInfo); ok { r.stats.buildTaskDuration = ci.GetBuildTaskElapsed() - r.stats.storeBatchedNum, r.stats.storeBatchedFallbackNum = ci.GetStoreBatchInfo() - telemetryStoreBatchedCnt.Add(float64(r.stats.storeBatchedNum)) - telemetryStoreBatchedFallbackCnt.Add(float64(r.stats.storeBatchedFallbackNum)) + batched, fallback := ci.GetStoreBatchInfo() + if batched != 0 || fallback != 0 { + r.stats.storeBatchedNum, r.stats.storeBatchedFallbackNum = batched, fallback + telemetryStoreBatchedCnt.Add(float64(r.stats.storeBatchedNum)) + telemetryStoreBatchedFallbackCnt.Add(float64(r.stats.storeBatchedFallbackNum)) + telemetryBatchedQueryTaskCnt.Add(float64(len(r.stats.copRespTime))) + } } r.ctx.GetSessionVars().StmtCtx.RuntimeStatsColl.RegisterStats(r.rootPlanID, r.stats) }() diff --git a/executor/distsql.go b/executor/distsql.go index 8edb0378b7ad5..a96954b9fba6f 100644 --- a/executor/distsql.go +++ b/executor/distsql.go @@ -30,7 +30,6 @@ import ( "github.com/pingcap/tidb/distsql" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/kv" - "github.com/pingcap/tidb/metrics" "github.com/pingcap/tidb/parser/charset" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" @@ -68,8 +67,6 @@ var ( // LookupTableTaskChannelSize represents the channel size of the index double read taskChan. var LookupTableTaskChannelSize int32 = 50 -var telemetryStoreBatchedQueryCnt = metrics.TelemetryStoreBatchedQueryCnt - // lookupTableTask is created from a partial result of an index request which // contains the handles in those index keys. type lookupTableTask struct { diff --git a/metrics/telemetry.go b/metrics/telemetry.go index 1a9b7ad7eebbb..dfd1eec466de4 100644 --- a/metrics/telemetry.go +++ b/metrics/telemetry.go @@ -176,6 +176,13 @@ var ( Name: "store_batched_query", Help: "Counter of queries which use store batched coprocessor tasks", }) + TelemetryBatchedQueryTaskCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "tidb", + Subsystem: "telemetry", + Name: "batched_query_task", + Help: "Counter of coprocessor tasks in batched queries", + }) TelemetryStoreBatchedCnt = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: "tidb", @@ -448,6 +455,7 @@ func GetIndexMergeCounter() IndexMergeUsageCounter { type StoreBatchCoprCounter struct { BatchSize int `json:"batch_size"` BatchedQuery int64 `json:"query"` + BatchedQueryTask int64 `json:"tasks"` BatchedCount int64 `json:"batched"` BatchedFallbackCount int64 `json:"batched_fallback"` } @@ -456,6 +464,7 @@ type StoreBatchCoprCounter struct { func (n StoreBatchCoprCounter) Sub(rhs StoreBatchCoprCounter) StoreBatchCoprCounter { return StoreBatchCoprCounter{ BatchedQuery: n.BatchedQuery - rhs.BatchedQuery, + BatchedQueryTask: n.BatchedQueryTask - rhs.BatchedQueryTask, BatchedCount: n.BatchedCount - rhs.BatchedCount, BatchedFallbackCount: n.BatchedFallbackCount - rhs.BatchedFallbackCount, } @@ -465,6 +474,7 @@ func (n StoreBatchCoprCounter) Sub(rhs StoreBatchCoprCounter) StoreBatchCoprCoun func GetStoreBatchCoprCounter() StoreBatchCoprCounter { return StoreBatchCoprCounter{ BatchedQuery: readCounter(TelemetryStoreBatchedQueryCnt), + BatchedQueryTask: readCounter(TelemetryBatchedQueryTaskCnt), BatchedCount: readCounter(TelemetryStoreBatchedCnt), BatchedFallbackCount: readCounter(TelemetryStoreBatchedFallbackCnt), } diff --git a/telemetry/data_feature_usage_test.go b/telemetry/data_feature_usage_test.go index 02fdd7b40cec9..faa43e3587e8b 100644 --- a/telemetry/data_feature_usage_test.go +++ b/telemetry/data_feature_usage_test.go @@ -789,6 +789,7 @@ func TestStoreBatchCopr(t *testing.T) { require.NoError(t, err) require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(0)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedQueryTask, int64(0)) require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(0)) require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(0)) @@ -799,6 +800,7 @@ func TestStoreBatchCopr(t *testing.T) { require.NoError(t, err) require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(1)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedQueryTask, int64(0)) require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(0)) require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(0)) @@ -813,6 +815,7 @@ func TestStoreBatchCopr(t *testing.T) { require.NoError(t, err) require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(2)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedQueryTask, int64(2)) require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(1)) require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(0)) @@ -826,11 +829,19 @@ func TestStoreBatchCopr(t *testing.T) { require.NoError(t, err) require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(3)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedQueryTask, int64(4)) require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(1)) require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(1)) - tk.MustExec("set global tidb_store_batch_size = 8") + tk.MustExec("set global tidb_store_batch_size = 0") + tk.MustExec("set session tidb_store_batch_size = 0") + tk.MustQuery("select * from tele_batch_t force index(i) where k between 1 and 3 and k % 2 != 0").Sort(). + Check(testkit.Rows("1 1 1", "3 3 3")) usage, err = telemetry.GetFeatureUsage(tk.Session()) require.NoError(t, err) - require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 8) + require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 0) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(3)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedQueryTask, int64(4)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(1)) + require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(1)) } From b1e091480a49d12e2b93a685542dbd7d56f54e8d Mon Sep 17 00:00:00 2001 From: you06 Date: Wed, 8 Feb 2023 14:23:40 +0800 Subject: [PATCH 10/13] update bazel Signed-off-by: you06 --- telemetry/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/telemetry/BUILD.bazel b/telemetry/BUILD.bazel index a6c79f7de596f..56376f0031109 100644 --- a/telemetry/BUILD.bazel +++ b/telemetry/BUILD.bazel @@ -77,6 +77,7 @@ go_test( "//testkit", "//testkit/testsetup", "@com_github_jeffail_gabs_v2//:gabs", + "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_kvproto//pkg/metapb", "@com_github_stretchr_testify//require", "@com_github_tikv_client_go_v2//testutils", From 0eed285ec6f974a04486944b7703108a9b7c86b0 Mon Sep 17 00:00:00 2001 From: you06 Date: Wed, 8 Feb 2023 15:39:45 +0800 Subject: [PATCH 11/13] add comment & only batch small tasks Signed-off-by: you06 --- metrics/telemetry.go | 13 +++++++++---- store/copr/copr_test/coprocessor_test.go | 15 +++++++++++++++ store/copr/coprocessor.go | 3 ++- telemetry/data_feature_usage_test.go | 1 - 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/metrics/telemetry.go b/metrics/telemetry.go index dfd1eec466de4..db4c80714c0ea 100644 --- a/metrics/telemetry.go +++ b/metrics/telemetry.go @@ -453,10 +453,15 @@ func GetIndexMergeCounter() IndexMergeUsageCounter { // StoreBatchCoprCounter records the usages of batch copr statements. type StoreBatchCoprCounter struct { - BatchSize int `json:"batch_size"` - BatchedQuery int64 `json:"query"` - BatchedQueryTask int64 `json:"tasks"` - BatchedCount int64 `json:"batched"` + // BatchSize is the global value of `tidb_store_batch_size` + BatchSize int `json:"batch_size"` + // BatchedQuery is the counter of queries that use this feature. + BatchedQuery int64 `json:"query"` + // BatchedQueryTask is the counter of total tasks in queries above. + BatchedQueryTask int64 `json:"tasks"` + // BatchedCount is the counter of successfully batched tasks. + BatchedCount int64 `json:"batched"` + // BatchedFallbackCount is the counter of fallback batched tasks by region miss. BatchedFallbackCount int64 `json:"batched_fallback"` } diff --git a/store/copr/copr_test/coprocessor_test.go b/store/copr/copr_test/coprocessor_test.go index 7931fb8432675..30247b8694a72 100644 --- a/store/copr/copr_test/coprocessor_test.go +++ b/store/copr/copr_test/coprocessor_test.go @@ -166,4 +166,19 @@ func TestBuildCopIteratorWithBatchStoreCopr(t *testing.T) { require.Nil(t, errRes) tasks = it.GetTasks() require.Equal(t, len(tasks), 4) + + // only small tasks will be batched. + ranges = copr.BuildKeyRanges("a", "b", "h", "i", "o", "p") + req = &kv.Request{ + Tp: kv.ReqTypeDAG, + KeyRanges: kv.NewNonParitionedKeyRangesWithHint(ranges, []int{1, 33, 32}), + Concurrency: 15, + StoreBatchSize: 3, + } + it, errRes = copClient.BuildCopIterator(ctx, req, vars, opt) + require.Nil(t, errRes) + tasks = it.GetTasks() + require.Equal(t, len(tasks), 2) + require.Equal(t, len(tasks[0].ToPBBatchTasks()), 1) + require.Equal(t, len(tasks[1].ToPBBatchTasks()), 0) } diff --git a/store/copr/coprocessor.go b/store/copr/coprocessor.go index 5ed44d18928b1..06ab6ab61efd1 100644 --- a/store/copr/coprocessor.go +++ b/store/copr/coprocessor.go @@ -506,7 +506,8 @@ func (b *batchStoreTaskBuilder) handle(task *copTask) (err error) { b.tasks = append(b.tasks, task) } }() - if b.limit <= 0 { + // only batch small tasks for memory control. + if b.limit <= 0 || !isSmallTask(task) { return nil } batchedTask, err := b.cache.BuildBatchTask(b.bo, task, b.req.ReplicaRead) diff --git a/telemetry/data_feature_usage_test.go b/telemetry/data_feature_usage_test.go index faa43e3587e8b..a42c35927abfe 100644 --- a/telemetry/data_feature_usage_test.go +++ b/telemetry/data_feature_usage_test.go @@ -781,7 +781,6 @@ func TestTTLTelemetry(t *testing.T) { func TestStoreBatchCopr(t *testing.T) { store := testkit.CreateMockStore(t) - tk := testkit.NewTestKit(t, store) tk.MustExec("use test") From 7311c981291950a4860e310ddd1c514215624daf Mon Sep 17 00:00:00 2001 From: you06 Date: Wed, 8 Feb 2023 20:23:44 +0800 Subject: [PATCH 12/13] check diff to avoid affected by other tests Signed-off-by: you06 --- telemetry/data_feature_usage_test.go | 46 ++++++++++++++-------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/telemetry/data_feature_usage_test.go b/telemetry/data_feature_usage_test.go index a42c35927abfe..f8e5701f0e69a 100644 --- a/telemetry/data_feature_usage_test.go +++ b/telemetry/data_feature_usage_test.go @@ -784,24 +784,21 @@ func TestStoreBatchCopr(t *testing.T) { tk := testkit.NewTestKit(t, store) tk.MustExec("use test") - usage, err := telemetry.GetFeatureUsage(tk.Session()) + init, err := telemetry.GetFeatureUsage(tk.Session()) require.NoError(t, err) - require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(0)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedQueryTask, int64(0)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(0)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(0)) + require.Equal(t, init.StoreBatchCoprUsage.BatchSize, 4) tk.MustExec("drop table if exists tele_batch_t") tk.MustExec("create table tele_batch_t (id int primary key, c int, k int, index i(k))") tk.MustExec("select * from tele_batch_t force index(i) where k between 1 and 10 and k % 2 != 0") - usage, err = telemetry.GetFeatureUsage(tk.Session()) + usage, err := telemetry.GetFeatureUsage(tk.Session()) require.NoError(t, err) require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(1)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedQueryTask, int64(0)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(0)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(0)) + diff := usage.StoreBatchCoprUsage.Sub(*init.StoreBatchCoprUsage) + require.Equal(t, diff.BatchedQuery, int64(1)) + require.Equal(t, diff.BatchedQueryTask, int64(0)) + require.Equal(t, diff.BatchedCount, int64(0)) + require.Equal(t, diff.BatchedFallbackCount, int64(0)) tk.MustExec("insert into tele_batch_t values(1, 1, 1), (2, 2, 2), (3, 3, 3), (5, 5, 5), (7, 7, 7)") require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/store/copr/setRangesPerTask", "return(1)")) @@ -813,10 +810,11 @@ func TestStoreBatchCopr(t *testing.T) { usage, err = telemetry.GetFeatureUsage(tk.Session()) require.NoError(t, err) require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(2)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedQueryTask, int64(2)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(1)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(0)) + diff = usage.StoreBatchCoprUsage.Sub(*init.StoreBatchCoprUsage) + require.Equal(t, diff.BatchedQuery, int64(2)) + require.Equal(t, diff.BatchedQueryTask, int64(2)) + require.Equal(t, diff.BatchedCount, int64(1)) + require.Equal(t, diff.BatchedFallbackCount, int64(0)) require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/store/copr/batchCopRegionError", "return")) defer func() { @@ -827,10 +825,11 @@ func TestStoreBatchCopr(t *testing.T) { usage, err = telemetry.GetFeatureUsage(tk.Session()) require.NoError(t, err) require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 4) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(3)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedQueryTask, int64(4)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(1)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(1)) + diff = usage.StoreBatchCoprUsage.Sub(*init.StoreBatchCoprUsage) + require.Equal(t, diff.BatchedQuery, int64(3)) + require.Equal(t, diff.BatchedQueryTask, int64(4)) + require.Equal(t, diff.BatchedCount, int64(1)) + require.Equal(t, diff.BatchedFallbackCount, int64(1)) tk.MustExec("set global tidb_store_batch_size = 0") tk.MustExec("set session tidb_store_batch_size = 0") @@ -839,8 +838,9 @@ func TestStoreBatchCopr(t *testing.T) { usage, err = telemetry.GetFeatureUsage(tk.Session()) require.NoError(t, err) require.Equal(t, usage.StoreBatchCoprUsage.BatchSize, 0) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedQuery, int64(3)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedQueryTask, int64(4)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedCount, int64(1)) - require.Equal(t, usage.StoreBatchCoprUsage.BatchedFallbackCount, int64(1)) + diff = usage.StoreBatchCoprUsage.Sub(*init.StoreBatchCoprUsage) + require.Equal(t, diff.BatchedQuery, int64(3)) + require.Equal(t, diff.BatchedQueryTask, int64(4)) + require.Equal(t, diff.BatchedCount, int64(1)) + require.Equal(t, diff.BatchedFallbackCount, int64(1)) } From a84c5db67278418fdb9ce0ba5c65eaf433d4596a Mon Sep 17 00:00:00 2001 From: you06 Date: Wed, 8 Feb 2023 21:09:05 +0800 Subject: [PATCH 13/13] fix runtime stats test Signed-off-by: you06 --- distsql/distsql_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/distsql/distsql_test.go b/distsql/distsql_test.go index f3988ea5f7c4d..d7a2447cf35b6 100644 --- a/distsql/distsql_test.go +++ b/distsql/distsql_test.go @@ -124,7 +124,7 @@ func TestSelectResultRuntimeStats(t *testing.T) { stmtStats.RegisterStats(1, s1) stmtStats.RegisterStats(1, &s2) stats := stmtStats.GetRootStats(1) - expect := "time:1s, loops:1, cop_task: {num: 4, max: 1s, min: 1ms, avg: 500.5ms, p95: 1s, max_proc_keys: 200, p95_proc_keys: 200, tot_proc: 2s, tot_wait: 2s, copr_cache_hit_ratio: 0.00, distsql_concurrency: 15}, backoff{RegionMiss: 2ms}" + expect := "time:1s, loops:1, cop_task: {num: 4, max: 1s, min: 1ms, avg: 500.5ms, p95: 1s, max_proc_keys: 200, p95_proc_keys: 200, tot_proc: 2s, tot_wait: 2s, copr_cache_hit_ratio: 0.00, max_distsql_concurrency: 15}, backoff{RegionMiss: 2ms}" require.Equal(t, expect, stats.String()) // Test for idempotence. require.Equal(t, expect, stats.String()) @@ -135,7 +135,7 @@ func TestSelectResultRuntimeStats(t *testing.T) { } stmtStats.RegisterStats(2, s1) stats = stmtStats.GetRootStats(2) - expect = "cop_task: {num: 2, max: 1s, min: 1ms, avg: 500.5ms, p95: 1s, max_proc_keys: 200, p95_proc_keys: 200, tot_proc: 1s, tot_wait: 1s, rpc_num: 1, rpc_time: 1s, copr_cache_hit_ratio: 0.00, distsql_concurrency: 15}, backoff{RegionMiss: 1ms}" + expect = "cop_task: {num: 2, max: 1s, min: 1ms, avg: 500.5ms, p95: 1s, max_proc_keys: 200, p95_proc_keys: 200, tot_proc: 1s, tot_wait: 1s, rpc_num: 1, rpc_time: 1s, copr_cache_hit_ratio: 0.00, max_distsql_concurrency: 15}, backoff{RegionMiss: 1ms}" require.Equal(t, expect, stats.String()) // Test for idempotence. require.Equal(t, expect, stats.String())