From 8cb634637c4f66fdd2f8b2a101a774fb1d3d0a88 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 22 Sep 2022 19:59:58 +0800 Subject: [PATCH 01/26] ddl: support tracing adding index --- ddl/ddl_worker_util.go | 156 +++++++++++++++++++++++++++++++++++++++++ ddl/index.go | 16 ++++- go.mod | 3 +- go.sum | 11 ++- 4 files changed, 183 insertions(+), 3 deletions(-) create mode 100644 ddl/ddl_worker_util.go diff --git a/ddl/ddl_worker_util.go b/ddl/ddl_worker_util.go new file mode 100644 index 0000000000000..42d0af336c6c2 --- /dev/null +++ b/ddl/ddl_worker_util.go @@ -0,0 +1,156 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl + +import ( + "bytes" + "context" + "fmt" + "math" + "strings" + "time" + + "github.com/pingcap/tidb/util/generic" + "github.com/pingcap/tidb/util/logutil" + minitrace "github.com/tikv/minitrace-go" + "github.com/tikv/minitrace-go/jaeger" + "go.uber.org/zap" + "golang.org/x/exp/slices" +) + +var timeDetails = generic.NewSyncMap[int64, *spanCtx](10) + +type spanCtx struct { + ctx context.Context + root minitrace.TraceHandle +} + +func injectSpan(jobID int64, event string) func() { + if sctx, ok := timeDetails.Load(jobID); ok { + hd := minitrace.StartSpan(sctx.ctx, event) + return func() { + hd.Finish() + } + } + return func() {} +} + +func initializeTrace(jobID int64) { + ctx, root := minitrace.StartRootSpan(context.Background(), + "add-index-worker", uint64(jobID), 0, nil) + timeDetails.Store(jobID, &spanCtx{ + ctx: ctx, + root: root, + }) +} + +func collectTrace(jobID int64) string { + if sctx, ok := timeDetails.Load(jobID); ok { + rootTrace, _ := sctx.root.Collect() + analyzed := analyzeTrace(rootTrace) + if len(rootTrace.Spans) < 1000 { + reportTrace(rootTrace) + } + return analyzed + } + return "" +} + +const batchSize = 512 + +func reportTrace(rootTrace minitrace.Trace) { + buf := bytes.NewBuffer(make([]uint8, 0, 4096)) + for _, subTrace := range splitTraces(rootTrace) { + buf.Reset() + trace := jaeger.MiniSpansToJaegerTrace("add-index", subTrace) + err := jaeger.ThriftCompactEncode(buf, trace) + if err != nil { + logutil.BgLogger().Warn("cannot collectTrace", zap.Error(err)) + return + } + err = jaeger.Send(buf.Bytes(), "127.0.0.1:6831") + if err != nil { + logutil.BgLogger().Warn("cannot collectTrace", zap.Error(err)) + return + } + } + +} + +func splitTraces(trace minitrace.Trace) []minitrace.Trace { + var traces []minitrace.Trace + for len(trace.Spans) > batchSize { + traces = append(traces, minitrace.Trace{ + TraceID: trace.TraceID, + Spans: trace.Spans[:batchSize], + }) + trace.Spans = trace.Spans[batchSize:] + } + traces = append(traces, minitrace.Trace{ + TraceID: trace.TraceID, + Spans: trace.Spans, + }) + return traces +} + +func analyzeTrace(trace minitrace.Trace) string { + groupByEvent := make(map[string][]*minitrace.Span, 16) + for i, span := range trace.Spans { + spans := groupByEvent[span.Event] + if len(spans) == 0 { + groupByEvent[span.Event] = []*minitrace.Span{&trace.Spans[i]} + } else { + groupByEvent[span.Event] = append(spans, &trace.Spans[i]) + } + } + var sb strings.Builder + sb.WriteString("{") + orderedEvents := make([]string, 0, len(groupByEvent)) + for event := range groupByEvent { + orderedEvents = append(orderedEvents, event) + } + slices.Sort(orderedEvents) + for i := 0; i < len(orderedEvents); i++ { + spans := groupByEvent[orderedEvents[i]] + sum := uint64(0) + min := uint64(math.MaxUint64) + max := uint64(0) + for _, span := range spans { + dur := span.DurationNs + sum += dur + if dur < min { + min = dur + } + if dur > max { + max = dur + } + } + avg := sum / uint64(len(spans)) + sb.WriteString(orderedEvents[i]) + sb.WriteString(":") + if len(spans) < 20 { + sb.WriteString(time.Duration(sum).String()) + } else { + sb.WriteString(fmt.Sprintf(`{sum: %s, min: %s, max: %s, avg: %s}`, + time.Duration(sum).String(), time.Duration(min).String(), + time.Duration(max).String(), time.Duration(avg).String())) + } + if i != len(orderedEvents)-1 { + sb.WriteString(", ") + } + } + sb.WriteString("}") + return sb.String() +} diff --git a/ddl/index.go b/ddl/index.go index 914e5e8ff48a3..0c1504ab46c08 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -600,11 +600,13 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo return ver, err } logutil.BgLogger().Info("[ddl] run add index job", zap.String("job", job.String()), zap.Reflect("indexInfo", indexInfo)) + initializeTrace(job.ID) } originalState := indexInfo.State switch indexInfo.State { case model.StateNone: // none -> delete only + defer injectSpan(job.ID, "none")() reorgTp := pickBackfillType(w, job) if reorgTp.NeedMergeProcess() { // Increase telemetryAddIndexIngestUsage @@ -620,6 +622,7 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo job.SchemaState = model.StateDeleteOnly case model.StateDeleteOnly: // delete only -> write only + defer injectSpan(job.ID, "delete-only")() indexInfo.State = model.StateWriteOnly _, err = checkPrimaryKeyNotNull(d, w, t, job, tblInfo, indexInfo) if err != nil { @@ -632,6 +635,7 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo job.SchemaState = model.StateWriteOnly case model.StateWriteOnly: // write only -> reorganization + defer injectSpan(job.ID, "write-only")() indexInfo.State = model.StateWriteReorganization _, err = checkPrimaryKeyNotNull(d, w, t, job, tblInfo, indexInfo) if err != nil { @@ -646,6 +650,7 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo job.SchemaState = model.StateWriteReorganization case model.StateWriteReorganization: // reorganization -> public + defer injectSpan(job.ID, "write-reorg")() tbl, err := getTable(d.store, schemaID, tblInfo) if err != nil { return ver, errors.Trace(err) @@ -676,10 +681,12 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo job.Args = []interface{}{indexInfo.ID, false /*if exists*/, getPartitionIDs(tbl.Meta())} // Finish this job. job.FinishTableJob(model.JobStateDone, model.StatePublic, ver, tblInfo) + details := collectTrace(job.ID) + logutil.BgLogger().Info("[ddl] finish add index job", zap.String("job", job.String()), + zap.String("time details", details)) default: err = dbterror.ErrInvalidDDLState.GenWithStackByArgs("index", tblInfo.State) } - return ver, errors.Trace(err) } @@ -778,6 +785,7 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo } switch indexInfo.BackfillState { case model.BackfillStateRunning: + defer injectSpan(job.ID, "write-reorg-backfill")() logutil.BgLogger().Info("[ddl] index backfill state running", zap.Int64("job ID", job.ID), zap.String("table", tbl.Meta().Name.O), zap.String("index", indexInfo.Name.O)) switch bfProcess { @@ -828,6 +836,7 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) return false, ver, errors.Trace(err) case model.BackfillStateReadyToMerge: + defer injectSpan(job.ID, "write-reorg-ready-to-merge")() logutil.BgLogger().Info("[ddl] index backfill state ready to merge", zap.Int64("job ID", job.ID), zap.String("table", tbl.Meta().Name.O), zap.String("index", indexInfo.Name.O)) indexInfo.BackfillState = model.BackfillStateMerging @@ -838,6 +847,7 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) return false, ver, errors.Trace(err) case model.BackfillStateMerging: + defer injectSpan(job.ID, "write-reorg-merging")() done, ver, err = runReorgJobAndHandleErr(w, d, t, job, tbl, indexInfo, true) if !done { return false, ver, err @@ -1474,6 +1484,7 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC oprStartTime := time.Now() ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) + defer injectSpan(w.reorgInfo.Job.ID, "fetch-create-txn")() errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { taskCtx.addedCount = 0 taskCtx.scanCount = 0 @@ -1482,7 +1493,9 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC txn.SetOption(kv.ResourceGroupTagger, tagger) } + finishSpan := injectSpan(w.reorgInfo.Job.ID, "fetch-rows") idxRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, handleRange) + finishSpan() if err != nil { return errors.Trace(err) } @@ -1494,6 +1507,7 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC return errors.Trace(err) } + defer injectSpan(w.reorgInfo.Job.ID, "create-records")() for _, idxRecord := range idxRecords { taskCtx.scanCount++ // The index is already exists, we skip it, no needs to backfill it. diff --git a/go.mod b/go.mod index 88b19ed5e8771..570fe47df39ff 100644 --- a/go.mod +++ b/go.mod @@ -58,7 +58,7 @@ require ( github.com/mgechev/revive v1.2.4-0.20220827111817-553604eaced5 github.com/ngaut/pools v0.0.0-20180318154953-b7bc8c42aac7 github.com/nishanths/predeclared v0.2.2 - github.com/opentracing/basictracer-go v1.0.0 + github.com/opentracing/basictracer-go v1.1.0 github.com/opentracing/opentracing-go v1.2.0 github.com/phayes/freeport v0.0.0-20180830031419-95f893ade6f2 github.com/pingcap/badger v1.5.1-0.20220314162537-ab58fbf40580 @@ -84,6 +84,7 @@ require ( github.com/tdakkota/asciicheck v0.1.1 github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2 github.com/tikv/client-go/v2 v2.0.1-0.20220913051514-ffaaf7131a8d + github.com/tikv/minitrace-go v0.0.0-20210623164044-529c511f20f8 github.com/tikv/pd/client v0.0.0-20220725055910-7187a7ab72db github.com/timakin/bodyclose v0.0.0-20210704033933-f49887972144 github.com/twmb/murmur3 v1.1.3 diff --git a/go.sum b/go.sum index 7cbe99ed6235e..07f8f16b71069 100644 --- a/go.sum +++ b/go.sum @@ -711,8 +711,9 @@ github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7J github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.18.1 h1:M1GfJqGRrBrrGGsbxzV5dqM2U2ApXefZCQpkukxYRLE= github.com/opentracing-contrib/go-stdlib v0.0.0-20170113013457-1de4cc2120e7/go.mod h1:PLldrQSroqzH70Xl+1DQcGnefIbqsKR7UDaiux3zV+w= -github.com/opentracing/basictracer-go v1.0.0 h1:YyUAhaEfjoWXclZVJ9sGoNct7j4TVk7lZWlQw5UXuoo= github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= +github.com/opentracing/basictracer-go v1.1.0 h1:Oa1fTSBvAl8pa3U+IJYqrKm0NALwH9OsgwOqDv4xJW0= +github.com/opentracing/basictracer-go v1.1.0/go.mod h1:V2HZueSJEp879yv285Aap1BS69fQMD+MNP1mRs6mBQc= github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= @@ -730,6 +731,7 @@ github.com/peterbourgon/g2s v0.0.0-20170223122336-d4e7ad98afea/go.mod h1:1VcHEd3 github.com/petermattis/goid v0.0.0-20170504144140-0ded85884ba5/go.mod h1:jvVRKCrJTQWu0XVbaOlby/2lO20uSCHEMzzplHXte1o= github.com/phayes/freeport v0.0.0-20180830031419-95f893ade6f2 h1:JhzVVoYvbOACxoUmOs6V/G4D5nPVUW73rKvXxP4XUJc= github.com/phayes/freeport v0.0.0-20180830031419-95f893ade6f2/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE= +github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= github.com/pierrec/lz4 v2.6.0+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pierrec/lz4 v2.6.1+incompatible h1:9UY3+iC23yxF0UfGaYrGplQ+79Rg+h/q9FV9ix19jjM= github.com/pierrec/lz4 v2.6.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= @@ -850,6 +852,7 @@ github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0/go.mod h1:919Lwc github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/shurcooL/vfsgen v0.0.0-20180711163814-62bca832be04 h1:y0cMJ0qjii33BnD6tMGcF/+gHYsoKQ6tbwQpy233OII= github.com/shurcooL/vfsgen v0.0.0-20180711163814-62bca832be04/go.mod h1:TrYk7fJVaAttu97ZZKrO9UbRa8izdowaMIZcxYMbVaw= +github.com/silentred/gid v1.0.0/go.mod h1:DMQPn66uY+3ed7rWfzOVET7VbDBAhjz+6AmmlixUK08= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= @@ -903,10 +906,13 @@ github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2 h1:mbAskLJ0oJf github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2/go.mod h1:2PfKggNGDuadAa0LElHrByyrz4JPZ9fFx6Gs7nx7ZZU= github.com/tikv/client-go/v2 v2.0.1-0.20220913051514-ffaaf7131a8d h1:p8XInTnkUlLabBT7bDS3aZCeemO6tJ/7b5mHN8WbSIE= github.com/tikv/client-go/v2 v2.0.1-0.20220913051514-ffaaf7131a8d/go.mod h1:6pedLz7wiINLHXwCT1+yMZmzuG42+ubtBkkfcwoukIo= +github.com/tikv/minitrace-go v0.0.0-20210623164044-529c511f20f8 h1:5j4UidaEmHWinqgzzO6alg1EW9qlO28gpKcOJj3088E= +github.com/tikv/minitrace-go v0.0.0-20210623164044-529c511f20f8/go.mod h1:ukJr0BfYeYbO3n15LAV2Dp4jvFpIPF2g14NU227ZTLY= github.com/tikv/pd/client v0.0.0-20220725055910-7187a7ab72db h1:r1eMh9Rny3hfWuBuxOnbsCRrR4FhthiNxLQ5rAUtaww= github.com/tikv/pd/client v0.0.0-20220725055910-7187a7ab72db/go.mod h1:ew8kS0yIcEaSetuuywkTLIUBR+sz3J5XvAYRae11qwc= github.com/timakin/bodyclose v0.0.0-20210704033933-f49887972144 h1:kl4KhGNsJIbDHS9/4U9yQo1UcPQM0kOMJHn29EoH/Ro= github.com/timakin/bodyclose v0.0.0-20210704033933-f49887972144/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk= +github.com/tinylib/msgp v1.1.5/go.mod h1:eQsjooMTnV42mHu917E26IogZ2930nFyBQdofk10Udg= github.com/tklauser/go-sysconf v0.3.9/go.mod h1:11DU/5sG7UexIrp/O6g35hrWzu0JxlwQ3LSFUzyeuhs= github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw= github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk= @@ -916,6 +922,7 @@ github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hM github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 h1:uruHq4dN7GR16kFc5fp3d1RIYzJW5onx8Ybykw2YQFA= github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/ttacon/chalk v0.0.0-20160626202418-22c06c80ed31/go.mod h1:onvgF043R+lC5RZ8IT9rBXDaEDnpnw/Cl+HFiw+v/7Q= github.com/twmb/murmur3 v1.1.3 h1:D83U0XYKcHRYwYIpBKf3Pks91Z0Byda/9SJ8B6EMRcA= github.com/twmb/murmur3 v1.1.3/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= github.com/uber/jaeger-client-go v2.22.1+incompatible h1:NHcubEkVbahf9t3p75TOCR83gdUHXjRJvjoBh1yACsM= @@ -1126,6 +1133,7 @@ golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= @@ -1358,6 +1366,7 @@ golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE= +golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201125231158-b5590deeca9b/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= From a8e171b6869d31a438b1106d8d2e8339deddd047 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 22 Sep 2022 20:28:43 +0800 Subject: [PATCH 02/26] trace lightning import phase --- ddl/ddl_worker_util.go | 4 ++-- ddl/index.go | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ddl/ddl_worker_util.go b/ddl/ddl_worker_util.go index 42d0af336c6c2..2a86ff584f575 100644 --- a/ddl/ddl_worker_util.go +++ b/ddl/ddl_worker_util.go @@ -115,13 +115,13 @@ func analyzeTrace(trace minitrace.Trace) string { groupByEvent[span.Event] = append(spans, &trace.Spans[i]) } } - var sb strings.Builder - sb.WriteString("{") orderedEvents := make([]string, 0, len(groupByEvent)) for event := range groupByEvent { orderedEvents = append(orderedEvents, event) } slices.Sort(orderedEvents) + var sb strings.Builder + sb.WriteString("{") for i := 0; i < len(orderedEvents); i++ { spans := groupByEvent[orderedEvents[i]] sum := uint64(0) diff --git a/ddl/index.go b/ddl/index.go index 0c1504ab46c08..160141087d453 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -813,6 +813,7 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo if !done { return false, ver, nil } + defer injectSpan(job.ID, "write-reorg-lit-import")() err = bc.FinishImport(indexInfo.ID, indexInfo.Unique, tbl) if err != nil { if kv.ErrKeyExists.Equal(err) { @@ -836,7 +837,7 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) return false, ver, errors.Trace(err) case model.BackfillStateReadyToMerge: - defer injectSpan(job.ID, "write-reorg-ready-to-merge")() + defer injectSpan(job.ID, "write-reorg-ready-merge")() logutil.BgLogger().Info("[ddl] index backfill state ready to merge", zap.Int64("job ID", job.ID), zap.String("table", tbl.Meta().Name.O), zap.String("index", indexInfo.Name.O)) indexInfo.BackfillState = model.BackfillStateMerging From b61eacbfe92a03805da49d034b2b0922d6f9bfb0 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 22 Sep 2022 20:49:08 +0800 Subject: [PATCH 03/26] update bazel --- br/pkg/task/BUILD.bazel | 1 + ddl/BUILD.bazel | 4 ++++ executor/kvtest/BUILD.bazel | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/br/pkg/task/BUILD.bazel b/br/pkg/task/BUILD.bazel index 094c7ceb7ac95..2f39cc1a6f2fe 100644 --- a/br/pkg/task/BUILD.bazel +++ b/br/pkg/task/BUILD.bazel @@ -40,6 +40,7 @@ go_library( "//br/pkg/utils", "//br/pkg/version", "//config", + "//ddl", "//kv", "//parser/model", "//parser/mysql", diff --git a/ddl/BUILD.bazel b/ddl/BUILD.bazel index d691eb00e52f3..43976ce3c9562 100644 --- a/ddl/BUILD.bazel +++ b/ddl/BUILD.bazel @@ -21,6 +21,7 @@ go_library( "ddl_api.go", "ddl_tiflash_api.go", "ddl_worker.go", + "ddl_worker_util.go", "ddl_workerpool.go", "delete_range.go", "delete_range_util.go", @@ -99,6 +100,7 @@ go_library( "//util/domainutil", "//util/filter", "//util/gcutil", + "//util/generic", "//util/hack", "//util/logutil", "//util/mathutil", @@ -128,6 +130,8 @@ go_library( "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_client_go_v2//tikvrpc", "@com_github_tikv_client_go_v2//txnkv/rangetask", + "@com_github_tikv_minitrace_go//:go_default_library", + "@com_github_tikv_minitrace_go//jaeger:go_default_library", "@io_etcd_go_etcd_client_v3//:client", "@org_golang_x_exp//slices", "@org_uber_go_atomic//:atomic", diff --git a/executor/kvtest/BUILD.bazel b/executor/kvtest/BUILD.bazel index 457d5e0d3dcf1..c746c6013029f 100644 --- a/executor/kvtest/BUILD.bazel +++ b/executor/kvtest/BUILD.bazel @@ -7,8 +7,8 @@ go_test( "kv_test.go", "main_test.go", ], - race = "on", flaky = True, + race = "on", deps = [ "//config", "//meta/autoid", From 62889843b2af969c502d8ca3b4b0cc01624bd106 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 22 Sep 2022 21:02:35 +0800 Subject: [PATCH 04/26] update bazel --- DEPS.bzl | 42 ++++++++++++++++++++++++++++++++++++++++-- ddl/ddl_worker_util.go | 1 - go.mod | 2 +- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index f1e62f4b48de3..66471e15a7653 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -2645,8 +2645,8 @@ def go_deps(): name = "com_github_opentracing_basictracer_go", build_file_proto_mode = "disable_global", importpath = "github.com/opentracing/basictracer-go", - sum = "h1:YyUAhaEfjoWXclZVJ9sGoNct7j4TVk7lZWlQw5UXuoo=", - version = "v1.0.0", + sum = "h1:Oa1fTSBvAl8pa3U+IJYqrKm0NALwH9OsgwOqDv4xJW0=", + version = "v1.1.0", ) go_repository( name = "com_github_opentracing_contrib_go_stdlib", @@ -2750,6 +2750,14 @@ def go_deps(): sum = "h1:JhzVVoYvbOACxoUmOs6V/G4D5nPVUW73rKvXxP4XUJc=", version = "v0.0.0-20180830031419-95f893ade6f2", ) + go_repository( + name = "com_github_philhofer_fwd", + build_file_proto_mode = "disable", + importpath = "github.com/philhofer/fwd", + sum = "h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=", + version = "v1.1.1", + ) + go_repository( name = "com_github_pierrec_lz4", build_file_proto_mode = "disable_global", @@ -3193,6 +3201,14 @@ def go_deps(): sum = "h1:y0cMJ0qjii33BnD6tMGcF/+gHYsoKQ6tbwQpy233OII=", version = "v0.0.0-20180711163814-62bca832be04", ) + go_repository( + name = "com_github_silentred_gid", + build_file_proto_mode = "disable", + importpath = "github.com/silentred/gid", + sum = "h1:JdsH8McqPUeY8IN4C0gxENnJG2zysvh+/xDJWhPvGVQ=", + version = "v1.0.0", + ) + go_repository( name = "com_github_sirupsen_logrus", build_file_proto_mode = "disable_global", @@ -3410,6 +3426,14 @@ def go_deps(): sum = "h1:p8XInTnkUlLabBT7bDS3aZCeemO6tJ/7b5mHN8WbSIE=", version = "v2.0.1-0.20220913051514-ffaaf7131a8d", ) + go_repository( + name = "com_github_tikv_minitrace_go", + build_file_proto_mode = "disable", + importpath = "github.com/tikv/minitrace-go", + sum = "h1:5j4UidaEmHWinqgzzO6alg1EW9qlO28gpKcOJj3088E=", + version = "v0.0.0-20210623164044-529c511f20f8", + ) + go_repository( name = "com_github_tikv_pd_client", build_file_proto_mode = "disable_global", @@ -3431,6 +3455,13 @@ def go_deps(): sum = "h1:phZCcypL/vtx6cGxObJgWZ5wexZF5SXFPLOM+ru0e/M=", version = "v0.1.0", ) + go_repository( + name = "com_github_tinylib_msgp", + build_file_proto_mode = "disable", + importpath = "github.com/tinylib/msgp", + sum = "h1:2gXmtWueD2HefZHQe1QOy9HVzmFrLOVvsXwXBQ0ayy0=", + version = "v1.1.5", + ) go_repository( name = "com_github_tklauser_go_sysconf", @@ -3467,6 +3498,13 @@ def go_deps(): sum = "h1:iAj0a8e6+dXSL7Liq0aXPox36FiN1dBbjA6lt9fl65s=", version = "v2.5.0", ) + go_repository( + name = "com_github_ttacon_chalk", + build_file_proto_mode = "disable", + importpath = "github.com/ttacon/chalk", + sum = "h1:OXcKh35JaYsGMRzpvFkLv/MEyPuL49CThT1pZ8aSml4=", + version = "v0.0.0-20160626202418-22c06c80ed31", + ) go_repository( name = "com_github_twmb_murmur3", diff --git a/ddl/ddl_worker_util.go b/ddl/ddl_worker_util.go index 2a86ff584f575..5ff5bd23135cf 100644 --- a/ddl/ddl_worker_util.go +++ b/ddl/ddl_worker_util.go @@ -86,7 +86,6 @@ func reportTrace(rootTrace minitrace.Trace) { return } } - } func splitTraces(trace minitrace.Trace) []minitrace.Trace { diff --git a/go.mod b/go.mod index 570fe47df39ff..4833c77a69040 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/charithe/durationcheck v0.0.9 github.com/cheggaaa/pb/v3 v3.0.8 github.com/cheynewallace/tabby v1.1.1 + github.com/cockroachdb/errors v1.8.1 github.com/cockroachdb/pebble v0.0.0-20210719141320-8c3bd06debb5 github.com/coocood/freecache v1.2.1 github.com/coreos/go-semver v0.3.0 @@ -136,7 +137,6 @@ require ( github.com/cespare/xxhash v1.1.0 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/chavacava/garif v0.0.0-20220630083739-93517212f375 // indirect - github.com/cockroachdb/errors v1.8.1 // indirect github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f // indirect github.com/cockroachdb/redact v1.0.8 // indirect github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 // indirect From 8d5ec902e11485f2870847bcb049d4536b6b991e Mon Sep 17 00:00:00 2001 From: tangenta Date: Fri, 23 Sep 2022 17:20:16 +0800 Subject: [PATCH 05/26] update minitrace --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 4833c77a69040..66d50b3bf6a3a 100644 --- a/go.mod +++ b/go.mod @@ -85,7 +85,7 @@ require ( github.com/tdakkota/asciicheck v0.1.1 github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2 github.com/tikv/client-go/v2 v2.0.1-0.20220913051514-ffaaf7131a8d - github.com/tikv/minitrace-go v0.0.0-20210623164044-529c511f20f8 + github.com/tikv/minitrace-go v0.0.0-20220923091513-8e6316bb4097 github.com/tikv/pd/client v0.0.0-20220725055910-7187a7ab72db github.com/timakin/bodyclose v0.0.0-20210704033933-f49887972144 github.com/twmb/murmur3 v1.1.3 diff --git a/go.sum b/go.sum index 07f8f16b71069..211f252fc2f3c 100644 --- a/go.sum +++ b/go.sum @@ -908,6 +908,8 @@ github.com/tikv/client-go/v2 v2.0.1-0.20220913051514-ffaaf7131a8d h1:p8XInTnkUlL github.com/tikv/client-go/v2 v2.0.1-0.20220913051514-ffaaf7131a8d/go.mod h1:6pedLz7wiINLHXwCT1+yMZmzuG42+ubtBkkfcwoukIo= github.com/tikv/minitrace-go v0.0.0-20210623164044-529c511f20f8 h1:5j4UidaEmHWinqgzzO6alg1EW9qlO28gpKcOJj3088E= github.com/tikv/minitrace-go v0.0.0-20210623164044-529c511f20f8/go.mod h1:ukJr0BfYeYbO3n15LAV2Dp4jvFpIPF2g14NU227ZTLY= +github.com/tikv/minitrace-go v0.0.0-20220923091513-8e6316bb4097 h1:nvIrUVo5YJZMsCn6yTxrpgrokIo/wug5N/nL5mc7v50= +github.com/tikv/minitrace-go v0.0.0-20220923091513-8e6316bb4097/go.mod h1:ukJr0BfYeYbO3n15LAV2Dp4jvFpIPF2g14NU227ZTLY= github.com/tikv/pd/client v0.0.0-20220725055910-7187a7ab72db h1:r1eMh9Rny3hfWuBuxOnbsCRrR4FhthiNxLQ5rAUtaww= github.com/tikv/pd/client v0.0.0-20220725055910-7187a7ab72db/go.mod h1:ew8kS0yIcEaSetuuywkTLIUBR+sz3J5XvAYRae11qwc= github.com/timakin/bodyclose v0.0.0-20210704033933-f49887972144 h1:kl4KhGNsJIbDHS9/4U9yQo1UcPQM0kOMJHn29EoH/Ro= From 2ccca06c640a9cdc4b38c612d5cdb7d95bb55f3a Mon Sep 17 00:00:00 2001 From: tangenta Date: Fri, 23 Sep 2022 18:05:01 +0800 Subject: [PATCH 06/26] delete unused trace --- ddl/ddl_worker_util.go | 1 + 1 file changed, 1 insertion(+) diff --git a/ddl/ddl_worker_util.go b/ddl/ddl_worker_util.go index 5ff5bd23135cf..2d41cfefb5a41 100644 --- a/ddl/ddl_worker_util.go +++ b/ddl/ddl_worker_util.go @@ -63,6 +63,7 @@ func collectTrace(jobID int64) string { if len(rootTrace.Spans) < 1000 { reportTrace(rootTrace) } + timeDetails.Delete(jobID) return analyzed } return "" From 5f915190964d2bf5d3849680126f89650a29e22a Mon Sep 17 00:00:00 2001 From: tangenta Date: Wed, 28 Sep 2022 18:13:07 +0800 Subject: [PATCH 07/26] print seconds instead --- ddl/ddl_worker_util.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ddl/ddl_worker_util.go b/ddl/ddl_worker_util.go index 2d41cfefb5a41..07cb6aec69b66 100644 --- a/ddl/ddl_worker_util.go +++ b/ddl/ddl_worker_util.go @@ -141,11 +141,11 @@ func analyzeTrace(trace minitrace.Trace) string { sb.WriteString(orderedEvents[i]) sb.WriteString(":") if len(spans) < 20 { - sb.WriteString(time.Duration(sum).String()) + sb.WriteString(fmt.Sprintf("%f", time.Duration(sum).Seconds())) } else { - sb.WriteString(fmt.Sprintf(`{sum: %s, min: %s, max: %s, avg: %s}`, - time.Duration(sum).String(), time.Duration(min).String(), - time.Duration(max).String(), time.Duration(avg).String())) + sb.WriteString(fmt.Sprintf(`{sum: %f, min: %f, max: %f, avg: %f}`, + time.Duration(sum).Seconds(), time.Duration(min).Seconds(), + time.Duration(max).Seconds(), time.Duration(avg).Seconds())) } if i != len(orderedEvents)-1 { sb.WriteString(", ") From b66b142dd4232e003871a04c2c40cb530fbc31c0 Mon Sep 17 00:00:00 2001 From: tangenta Date: Wed, 28 Sep 2022 20:20:56 +0800 Subject: [PATCH 08/26] add more details tracing --- ddl/backfilling.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 0736cbb58215f..e1347400a8447 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -320,7 +320,9 @@ func (w *backfillWorker) run(d *ddlCtx, bf backfiller, job *model.Job) { // Dynamic change batch size. w.batchCnt = int(variable.GetDDLReorgBatchSize()) + finish := injectSpan(job.ID, "handle-backfill-task") result := w.handleBackfillTask(d, task, bf) + finish() w.resultCh <- result } logutil.BgLogger().Info("[ddl] backfill worker exit", @@ -467,6 +469,7 @@ func tryDecodeToHandleString(key kv.Key) string { // handleRangeTasks sends tasks to workers, and returns remaining kvRanges that is not handled. func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers []*backfillWorker, reorgInfo *reorgInfo, totalAddedCount *int64, kvRanges []kv.KeyRange) ([]kv.KeyRange, error) { + defer injectSpan(reorgInfo.ID, "send-wait-tasks")() batchTasks := make([]*reorgBackfillTask, 0, len(workers)) physicalTableID := reorgInfo.PhysicalTableID @@ -618,10 +621,13 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic jc := dc.jobContext(job) for { + finish := injectSpan(job.ID, "split-table-ranges") kvRanges, err := splitTableRanges(t, reorgInfo.d.store, startKey, endKey) if err != nil { + finish() return errors.Trace(err) } + finish() // For dynamic adjust backfill worker number. if err := loadDDLReorgVars(dc.ctx, sessPool); err != nil { From ea2908d5999baad94383d4d931d9338f7975c37a Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 29 Sep 2022 15:42:59 +0800 Subject: [PATCH 09/26] group event by worker ID --- ddl/backfilling.go | 2 +- ddl/index.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index e1347400a8447..0cb8877ed4eef 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -320,7 +320,7 @@ func (w *backfillWorker) run(d *ddlCtx, bf backfiller, job *model.Job) { // Dynamic change batch size. w.batchCnt = int(variable.GetDDLReorgBatchSize()) - finish := injectSpan(job.ID, "handle-backfill-task") + finish := injectSpan(job.ID, fmt.Sprintf("%s-%d", "handle-backfill-task", w.id)) result := w.handleBackfillTask(d, task, bf) finish() w.resultCh <- result diff --git a/ddl/index.go b/ddl/index.go index 160141087d453..f471a0cc881e7 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -1485,7 +1485,7 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC oprStartTime := time.Now() ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) - defer injectSpan(w.reorgInfo.Job.ID, "fetch-create-txn")() + defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-create-txn", w.id))() errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { taskCtx.addedCount = 0 taskCtx.scanCount = 0 @@ -1494,7 +1494,7 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC txn.SetOption(kv.ResourceGroupTagger, tagger) } - finishSpan := injectSpan(w.reorgInfo.Job.ID, "fetch-rows") + finishSpan := injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-rows", w.id)) idxRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, handleRange) finishSpan() if err != nil { @@ -1508,7 +1508,7 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC return errors.Trace(err) } - defer injectSpan(w.reorgInfo.Job.ID, "create-records")() + defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "create-records", w.id))() for _, idxRecord := range idxRecords { taskCtx.scanCount++ // The index is already exists, we skip it, no needs to backfill it. From bb950ffb28b0dfbb60c02d29a5d8592b19aab6e0 Mon Sep 17 00:00:00 2001 From: tangenta Date: Mon, 10 Oct 2022 16:16:06 +0800 Subject: [PATCH 10/26] use copr to fetch row values --- DEPS.bzl | 4 +- ddl/BUILD.bazel | 5 +- ddl/backfilling.go | 7 + ddl/index.go | 28 +++- ddl/index_distsql.go | 260 +++++++++++++++++++++++++++++++ ddl/index_merge_tmp_test.go | 1 + sessionctx/variable/sysvar.go | 6 + sessionctx/variable/tidb_vars.go | 5 + table/tables/index.go | 6 + 9 files changed, 313 insertions(+), 9 deletions(-) create mode 100644 ddl/index_distsql.go diff --git a/DEPS.bzl b/DEPS.bzl index 66471e15a7653..0bbb1ceb3335a 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -3430,8 +3430,8 @@ def go_deps(): name = "com_github_tikv_minitrace_go", build_file_proto_mode = "disable", importpath = "github.com/tikv/minitrace-go", - sum = "h1:5j4UidaEmHWinqgzzO6alg1EW9qlO28gpKcOJj3088E=", - version = "v0.0.0-20210623164044-529c511f20f8", + sum = "h1:nvIrUVo5YJZMsCn6yTxrpgrokIo/wug5N/nL5mc7v50=", + version = "v0.0.0-20220923091513-8e6316bb4097", ) go_repository( diff --git a/ddl/BUILD.bazel b/ddl/BUILD.bazel index 43976ce3c9562..2d3a29a82b6f0 100644 --- a/ddl/BUILD.bazel +++ b/ddl/BUILD.bazel @@ -28,6 +28,7 @@ go_library( "foreign_key.go", "generated_column.go", "index.go", + "index_distsql.go", "index_merge_tmp.go", "job_table.go", "mock.go", @@ -130,8 +131,8 @@ go_library( "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_client_go_v2//tikvrpc", "@com_github_tikv_client_go_v2//txnkv/rangetask", - "@com_github_tikv_minitrace_go//:go_default_library", - "@com_github_tikv_minitrace_go//jaeger:go_default_library", + "@com_github_tikv_minitrace_go//:minitrace-go", + "@com_github_tikv_minitrace_go//jaeger", "@io_etcd_go_etcd_client_v3//:client", "@org_golang_x_exp//slices", "@org_uber_go_atomic//:atomic", diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 0cb8877ed4eef..0a4bcf6270e7c 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -155,6 +155,13 @@ type reorgBackfillTask struct { endInclude bool } +func (r *reorgBackfillTask) excludedEndKey() kv.Key { + if r.endInclude { + return r.endKey.Next() + } + return r.endKey +} + func (r *reorgBackfillTask) String() string { physicalID := strconv.FormatInt(r.physicalTableID, 10) startKey := tryDecodeToHandleString(r.startKey) diff --git a/ddl/index.go b/ddl/index.go index f471a0cc881e7..c0e2bbd122c9c 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -39,6 +39,7 @@ import ( "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/table/tables" "github.com/pingcap/tidb/tablecodec" @@ -1185,6 +1186,7 @@ type addIndexWorker struct { baseIndexWorker index table.Index writerCtx *ingest.WriterContext + coprCtx *copContext // The following attributes are used to reduce memory allocation. idxKeyBufs [][]byte @@ -1218,6 +1220,13 @@ func newAddIndexWorker(sessCtx sessionctx.Context, id int, t table.PhysicalTable return nil, err } } + var coprCtx *copContext + if variable.EnableCoprRead.Load() { + coprCtx = newCopContext(t.Meta(), indexInfo) + logutil.BgLogger().Info("[ddl] fetch index values with coprocessor", + zap.String("table", t.Meta().Name.O), + zap.String("index", indexInfo.Name.O)) + } return &addIndexWorker{ baseIndexWorker: baseIndexWorker{ @@ -1232,6 +1241,7 @@ func newAddIndexWorker(sessCtx sessionctx.Context, id int, t table.PhysicalTable }, index: index, writerCtx: lwCtx, + coprCtx: coprCtx, }, nil } @@ -1319,6 +1329,7 @@ func (w *baseIndexWorker) updateRowDecoder(handle kv.Handle, rawRecord []byte) e // 3. Boolean indicates whether the task is done. // 4. error occurs in fetchRowColVals. nil if no error occurs. func (w *baseIndexWorker) fetchRowColVals(txn kv.Transaction, taskRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { + defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-rows", w.id))() // TODO: use tableScan to prune columns. w.idxRecords = w.idxRecords[:0] startTime := time.Now() @@ -1486,7 +1497,7 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC oprStartTime := time.Now() ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-create-txn", w.id))() - errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { + errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) (err error) { taskCtx.addedCount = 0 taskCtx.scanCount = 0 txn.SetOption(kv.Priority, w.priority) @@ -1494,9 +1505,16 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC txn.SetOption(kv.ResourceGroupTagger, tagger) } - finishSpan := injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-rows", w.id)) - idxRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, handleRange) - finishSpan() + var ( + idxRecords []*indexRecord + nextKey kv.Key + taskDone bool + ) + if w.coprCtx != nil { + idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromSelect(ctx, txn, handleRange) + } else { + idxRecords, nextKey, taskDone, err = w.fetchRowColVals(txn, handleRange) + } if err != nil { return errors.Trace(err) } @@ -1519,7 +1537,7 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC // When the backfill-merge process is used, the writes from DML are redirected to a temp index. // The write-conflict will be handled by the merge worker. Thus, the locks are unnecessary. - if !needMergeTmpIdx { + if !needMergeTmpIdx && idxRecord.key != nil { // We need to add this lock to make sure pessimistic transaction can realize this operation. // For the normal pessimistic transaction, it's ok. But if async commit is used, it may lead to inconsistent data and index. err := txn.LockKeys(context.Background(), new(kv.LockCtx), idxRecord.key) diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go new file mode 100644 index 0000000000000..5769449e7d0cf --- /dev/null +++ b/ddl/index_distsql.go @@ -0,0 +1,260 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl + +import ( + "context" + "fmt" + "sync/atomic" + + "github.com/pingcap/tidb/distsql" + "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/parser/model" + "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/sessionctx/stmtctx" + "github.com/pingcap/tidb/sessionctx/variable" + "github.com/pingcap/tidb/statistics" + "github.com/pingcap/tidb/table" + "github.com/pingcap/tidb/table/tables" + "github.com/pingcap/tidb/tablecodec" + "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/chunk" + "github.com/pingcap/tidb/util/codec" + "github.com/pingcap/tidb/util/timeutil" + "github.com/pingcap/tipb/go-tipb" +) + +type copContext struct { + colInfos []*model.ColumnInfo + fieldTps []*types.FieldType + srcChunk *chunk.Chunk + indexRecordChan chan *indexRecord + bfTasks map[string]struct{} + err atomic.Value +} + +func (c *copContext) isNewTask(task reorgBackfillTask) bool { + _, found := c.bfTasks[string(task.endKey)] + return !found +} + +func (c *copContext) recordTask(task reorgBackfillTask) { + c.bfTasks[string(task.endKey)] = struct{}{} +} + +func newCopContext(tblInfo *model.TableInfo, idxInfo *model.IndexInfo) *copContext { + colInfos := make([]*model.ColumnInfo, 0, len(idxInfo.Columns)) + fieldTps := make([]*types.FieldType, 0, len(idxInfo.Columns)) + for _, idxCol := range idxInfo.Columns { + c := tblInfo.Columns[idxCol.Offset] + colInfos = append(colInfos, c) + fieldTps = append(fieldTps, &c.FieldType) + } + + pkColInfos, pkFieldTps := buildHandleColInfoAndFieldTypes(tblInfo) + colInfos = append(colInfos, pkColInfos...) + fieldTps = append(fieldTps, pkFieldTps...) + + return &copContext{ + colInfos: colInfos, + fieldTps: fieldTps, + srcChunk: chunk.NewChunkWithCapacity(fieldTps, 1), + bfTasks: make(map[string]struct{}, 16), + } +} + +func (w *addIndexWorker) buildTableScan(ctx context.Context, txn kv.Transaction, start, end kv.Key) (distsql.SelectResult, error) { + dagPB, err := w.buildDAGPB(w.coprCtx.colInfos) + if err != nil { + return nil, err + } + + var builder distsql.RequestBuilder + kvReq, err := builder. + SetDAGRequest(dagPB). + SetStartTS(txn.StartTS()). + SetKeyRanges([]kv.KeyRange{{StartKey: start, EndKey: end}}). + SetKeepOrder(true). + SetFromSessionVars(w.sessCtx.GetSessionVars()). + SetFromInfoSchema(w.sessCtx.GetDomainInfoSchema()). + Build() + if err != nil { + return nil, err + } + + kvReq.Concurrency = 1 + return distsql.Select(ctx, w.sessCtx, kvReq, w.coprCtx.fieldTps, statistics.NewQueryFeedback(0, nil, 0, false)) +} + +func (w *addIndexWorker) buildDAGPB(colInfos []*model.ColumnInfo) (*tipb.DAGRequest, error) { + dagReq := &tipb.DAGRequest{} + dagReq.TimeZoneName, dagReq.TimeZoneOffset = timeutil.Zone(w.sessCtx.GetSessionVars().Location()) + sc := w.sessCtx.GetSessionVars().StmtCtx + dagReq.Flags = sc.PushDownFlags() + for i := range colInfos { + dagReq.OutputOffsets = append(dagReq.OutputOffsets, uint32(i)) + } + execPB, err := w.constructTableScanPB(w.table.Meta(), colInfos) + if err != nil { + return nil, err + } + dagReq.Executors = append(dagReq.Executors, execPB) + distsql.SetEncodeType(w.sessCtx, dagReq) + return dagReq, nil +} + +func (w *addIndexWorker) constructTableScanPB(tblInfo *model.TableInfo, colInfos []*model.ColumnInfo) (*tipb.Executor, error) { + tblScan := tables.BuildTableScanFromInfos(tblInfo, colInfos) + tblScan.TableId = w.table.Meta().ID + err := setPBColumnsDefaultValue(w.sessCtx, tblScan.Columns, colInfos) + return &tipb.Executor{Tp: tipb.ExecType_TypeTableScan, TblScan: tblScan}, err +} + +func (w *addIndexWorker) fetchRowColValsFromSelect(ctx context.Context, txn kv.Transaction, + handleRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { + sctx := w.sessCtx.GetSessionVars().StmtCtx + if w.coprCtx.isNewTask(handleRange) { + w.coprCtx.recordTask(handleRange) + w.coprCtx.indexRecordChan = make(chan *indexRecord, variable.MaxDDLReorgBatchSize) + go func() { + defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-rows", w.id))() + defer close(w.coprCtx.indexRecordChan) + srcResult, err := w.buildTableScan(w.jobContext.ddlJobCtx, txn, handleRange.startKey, handleRange.excludedEndKey()) + if err != nil { + w.coprCtx.err.Store(err) + return + } + srcChunk := w.coprCtx.srcChunk + for { + err := srcResult.Next(ctx, srcChunk) + if err != nil { + w.coprCtx.err.Store(err) + return + } + if srcChunk.NumRows() == 0 { + return + } + iter := chunk.NewIterator4Chunk(srcChunk) + for row := iter.Begin(); row != iter.End(); row = iter.Next() { + idxDt, hdDt := extractIdxValsAndHandle(row, w.index.Meta(), w.coprCtx.fieldTps) + handle, err := buildHandle(hdDt, w.table.Meta(), w.index.Meta(), sctx) + if err != nil { + w.coprCtx.err.Store(err) + return + } + rsData := tables.TryGetHandleRestoredDataWrapper(w.table, hdDt, nil, w.index.Meta()) + w.coprCtx.indexRecordChan <- &indexRecord{handle: handle, key: nil, vals: idxDt, rsData: rsData, skip: false} + } + } + }() + } + w.idxRecords = w.idxRecords[:0] + taskDone := false + var current kv.Handle + for { + record, ok := <-w.coprCtx.indexRecordChan + if !ok { // The channel is closed. + taskDone = true + break + } + w.idxRecords = append(w.idxRecords, record) + current = record.handle + if len(w.idxRecords) >= w.batchCnt { + break + } + } + nextKey := handleRange.endKey + if current != nil { + nextKey = tablecodec.EncodeRecordKey(w.table.RecordPrefix(), current).Next() + } + err := w.coprCtx.err.Load() + if err != nil { + return nil, nil, false, err.(error) + } + return w.idxRecords, nextKey, taskDone, nil +} + +func buildHandleColInfoAndFieldTypes(tbInfo *model.TableInfo) ([]*model.ColumnInfo, []*types.FieldType) { + if tbInfo.PKIsHandle { + for i := range tbInfo.Columns { + if mysql.HasPriKeyFlag(tbInfo.Columns[i].GetFlag()) { + return []*model.ColumnInfo{tbInfo.Columns[i]}, []*types.FieldType{&tbInfo.Columns[i].FieldType} + } + } + } else if tbInfo.IsCommonHandle { + primaryIdx := tables.FindPrimaryIndex(tbInfo) + pkCols := make([]*model.ColumnInfo, 0, len(primaryIdx.Columns)) + pkFts := make([]*types.FieldType, 0, len(primaryIdx.Columns)) + for i := range tbInfo.Columns { + pkCols = append(pkCols, tbInfo.Columns[i]) + pkFts = append(pkFts, &tbInfo.Columns[i].FieldType) + } + return pkCols, pkFts + } + extra := model.NewExtraHandleColInfo() + return []*model.ColumnInfo{extra}, []*types.FieldType{&extra.FieldType} +} + +func extractIdxValsAndHandle(row chunk.Row, idxInfo *model.IndexInfo, fieldTps []*types.FieldType) ([]types.Datum, []types.Datum) { + datumBuf := make([]types.Datum, 0, len(fieldTps)) + idxColLen := len(idxInfo.Columns) + for i, ft := range fieldTps { + datumBuf = append(datumBuf, row.GetDatum(i, ft)) + } + return datumBuf[:idxColLen], datumBuf[idxColLen:] +} + +func buildHandle(pkDts []types.Datum, tblInfo *model.TableInfo, + idxInfo *model.IndexInfo, stmtCtx *stmtctx.StatementContext) (kv.Handle, error) { + if tblInfo.IsCommonHandle { + tablecodec.TruncateIndexValues(tblInfo, idxInfo, pkDts) + handleBytes, err := codec.EncodeKey(stmtCtx, nil, pkDts...) + if err != nil { + return nil, err + } + return kv.NewCommonHandle(handleBytes) + } + return kv.IntHandle(pkDts[0].GetInt64()), nil +} + +// setPBColumnsDefaultValue sets the default values of tipb.ColumnInfos. +func setPBColumnsDefaultValue(ctx sessionctx.Context, pbColumns []*tipb.ColumnInfo, columns []*model.ColumnInfo) error { + for i, c := range columns { + // For virtual columns, we set their default values to NULL so that TiKV will return NULL properly, + // They real values will be compute later. + if c.IsGenerated() && !c.GeneratedStored { + pbColumns[i].DefaultVal = []byte{codec.NilFlag} + } + if c.GetOriginDefaultValue() == nil { + continue + } + + sessVars := ctx.GetSessionVars() + originStrict := sessVars.StrictSQLMode + sessVars.StrictSQLMode = false + d, err := table.GetColOriginDefaultValue(ctx, c) + sessVars.StrictSQLMode = originStrict + if err != nil { + return err + } + + pbColumns[i].DefaultVal, err = tablecodec.EncodeValue(sessVars.StmtCtx, nil, d) + if err != nil { + return err + } + } + return nil +} diff --git a/ddl/index_merge_tmp_test.go b/ddl/index_merge_tmp_test.go index 24522a9548608..4089db162ac4b 100644 --- a/ddl/index_merge_tmp_test.go +++ b/ddl/index_merge_tmp_test.go @@ -41,6 +41,7 @@ func TestAddIndexMergeProcess(t *testing.T) { // Force onCreateIndex use the txn-merge process. ingest.LitInitialized = false tk.MustExec("set @@global.tidb_ddl_enable_fast_reorg = 1;") + tk.MustExec("set global tidb_ddl_enable_copr_read = 1;") var checkErr error var runDML, backfillDone bool diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index 37fd015fce28f..1d949520e9aad 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -1826,6 +1826,12 @@ var defaultSysVars = []*SysVar{ EnableFastReorg.Store(TiDBOptOn(val)) return nil }}, + {Scope: ScopeGlobal, Name: TiDBDDLEnableCoprRead, Value: BoolToOnOff(DefTiDBDDLEnableCoprRead), Type: TypeBool, GetGlobal: func(sv *SessionVars) (string, error) { + return BoolToOnOff(EnableCoprRead.Load()), nil + }, SetGlobal: func(s *SessionVars, val string) error { + EnableCoprRead.Store(TiDBOptOn(val)) + return nil + }}, // This system var is set disk quota for lightning sort dir, from 100 GB to 1PB. {Scope: ScopeGlobal, Name: TiDBDDLDiskQuota, Value: strconv.Itoa(DefTiDBDDLDiskQuota), Type: TypeInt, MinValue: DefTiDBDDLDiskQuota, MaxValue: 1024 * 1024 * DefTiDBDDLDiskQuota / 100, GetGlobal: func(sv *SessionVars) (string, error) { return strconv.FormatUint(DDLDiskQuota.Load(), 10), nil diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 06da8d8c53ce8..841f598e39f3e 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -837,6 +837,8 @@ const ( TiDBDDLEnableFastReorg = "tidb_ddl_enable_fast_reorg" // TiDBDDLDiskQuota used to set disk quota for lightning add index. TiDBDDLDiskQuota = "tidb_ddl_disk_quota" + // TiDBDDLEnableCoprRead is used to control whether to read with coprocessor for adding index. + TiDBDDLEnableCoprRead = "tidb_ddl_enable_copr_read" ) // TiDB intentional limits @@ -1058,6 +1060,7 @@ const ( DefTiDBEnableMDL = false DefTiFlashFastScan = false DefTiDBEnableFastReorg = false + DefTiDBDDLEnableCoprRead = false DefTiDBDDLDiskQuota = 100 * 1024 * 1024 * 1024 // 100GB DefExecutorConcurrency = 5 DefTiDBEnableGeneralPlanCache = false @@ -1118,6 +1121,8 @@ var ( EnableMDL = atomic.NewBool(DefTiDBEnableMDL) // EnableFastReorg indicates whether to use lightning to enhance DDL reorg performance. EnableFastReorg = atomic.NewBool(DefTiDBEnableFastReorg) + // EnableCoprRead indicates whether to read with coprocessor for adding index. + EnableCoprRead = atomic.NewBool(DefTiDBDDLEnableCoprRead) // DDLDiskQuota is the temporary variable for set disk quota for lightning DDLDiskQuota = atomic.NewUint64(DefTiDBDDLDiskQuota) // EnableForeignKey indicates whether to enable foreign key feature. diff --git a/table/tables/index.go b/table/tables/index.go index 77811eacc3a8c..a1570ddfd2fc2 100644 --- a/table/tables/index.go +++ b/table/tables/index.go @@ -16,6 +16,7 @@ package tables import ( "context" + "encoding/hex" "sync" "github.com/opentracing/opentracing-go" @@ -27,7 +28,9 @@ import ( "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/rowcodec" + "go.uber.org/zap" ) // index is the data structure for index data in the KV store. @@ -115,6 +118,9 @@ func (c *index) Create(sctx sessionctx.Context, txn kv.Transaction, indexedValue if err != nil { return nil, err } + if c.idxInfo.Name.L == "i1" || c.idxInfo.Name.L == "i0" { + logutil.BgLogger().Info("index.Create()", zap.String("key", hex.EncodeToString(key))) + } var ( tempKey []byte From f2ee374aacc4713f1308b3ca444954e2be70e748 Mon Sep 17 00:00:00 2001 From: tangenta Date: Tue, 11 Oct 2022 15:02:29 +0800 Subject: [PATCH 11/26] support push down index record encoding --- ddl/index.go | 77 +++++++++++++------- ddl/index_distsql.go | 117 +++++++++++++++++++++++++++++++ distsql/request_builder.go | 11 +++ go.mod | 2 + go.sum | 6 +- kv/kv.go | 1 + sessionctx/variable/sysvar.go | 6 +- sessionctx/variable/tidb_vars.go | 4 +- table/tables/index.go | 6 -- 9 files changed, 189 insertions(+), 41 deletions(-) diff --git a/ddl/index.go b/ddl/index.go index c0e2bbd122c9c..2c8f765602662 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -1164,6 +1164,12 @@ type indexRecord struct { vals []types.Datum // It's the index values. rsData []types.Datum // It's the restored data for handle. skip bool // skip indicates that the index key is already exists, we should not add it. + idxKV *indexKV +} + +type indexKV struct { + key []byte + value []byte } type baseIndexWorker struct { @@ -1221,7 +1227,7 @@ func newAddIndexWorker(sessCtx sessionctx.Context, id int, t table.PhysicalTable } } var coprCtx *copContext - if variable.EnableCoprRead.Load() { + if variable.EnableCoprRead.Load() != "0" { coprCtx = newCopContext(t.Meta(), indexInfo) logutil.BgLogger().Info("[ddl] fetch index values with coprocessor", zap.String("table", t.Meta().Name.O), @@ -1506,12 +1512,18 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC } var ( - idxRecords []*indexRecord - nextKey kv.Key - taskDone bool + idxRecords []*indexRecord + nextKey kv.Key + taskDone bool + encPushDown bool ) if w.coprCtx != nil { - idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromSelect(ctx, txn, handleRange) + encPushDown = variable.EnableCoprRead.Load() == "2" + if encPushDown { + idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromCopr(ctx, txn, handleRange) + } else { + idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromSelect(ctx, txn, handleRange) + } } else { idxRecords, nextKey, taskDone, err = w.fetchRowColVals(txn, handleRange) } @@ -1548,31 +1560,44 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC // Create the index. if w.writerCtx == nil { - handle, err := w.index.Create(w.sessCtx, txn, idxRecord.vals, idxRecord.handle, idxRecord.rsData, table.WithIgnoreAssertion, table.FromBackfill) - if err != nil { - if kv.ErrKeyExists.Equal(err) && idxRecord.handle.Equal(handle) { - // Index already exists, skip it. - continue + if encPushDown { + err := txn.GetMemBuffer().Set(idxRecord.idxKV.key, idxRecord.idxKV.value) + if err != nil { + return errors.Trace(err) + } + } else { + handle, err := w.index.Create(w.sessCtx, txn, idxRecord.vals, idxRecord.handle, idxRecord.rsData, table.WithIgnoreAssertion, table.FromBackfill) + if err != nil { + if kv.ErrKeyExists.Equal(err) && idxRecord.handle.Equal(handle) { + // Index already exists, skip it. + continue + } + return errors.Trace(err) } - - return errors.Trace(err) } } else { // The lightning environment is ready. - vars := w.sessCtx.GetSessionVars() - sCtx, writeBufs := vars.StmtCtx, vars.GetWriteStmtBufs() - key, distinct, err := w.index.GenIndexKey(sCtx, idxRecord.vals, idxRecord.handle, writeBufs.IndexKeyBuf) - if err != nil { - return errors.Trace(err) - } - idxVal, err := w.index.GenIndexValue(sCtx, distinct, idxRecord.vals, idxRecord.handle, idxRecord.rsData) - if err != nil { - return errors.Trace(err) - } - err = w.writerCtx.WriteRow(key, idxVal) - if err != nil { - return errors.Trace(err) + if encPushDown { + err = w.writerCtx.WriteRow(idxRecord.idxKV.key, idxRecord.idxKV.value) + if err != nil { + return errors.Trace(err) + } + } else { + vars := w.sessCtx.GetSessionVars() + sCtx, writeBufs := vars.StmtCtx, vars.GetWriteStmtBufs() + key, distinct, err := w.index.GenIndexKey(sCtx, idxRecord.vals, idxRecord.handle, writeBufs.IndexKeyBuf) + if err != nil { + return errors.Trace(err) + } + idxVal, err := w.index.GenIndexValue(sCtx, distinct, idxRecord.vals, idxRecord.handle, idxRecord.rsData) + if err != nil { + return errors.Trace(err) + } + err = w.writerCtx.WriteRow(key, idxVal) + if err != nil { + return errors.Trace(err) + } + writeBufs.IndexKeyBuf = key } - writeBufs.IndexKeyBuf = key } taskCtx.addedCount++ } diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index 5769449e7d0cf..0fe062a12315a 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -19,6 +19,7 @@ import ( "fmt" "sync/atomic" + "github.com/pingcap/errors" "github.com/pingcap/tidb/distsql" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/parser/model" @@ -31,6 +32,7 @@ import ( "github.com/pingcap/tidb/table/tables" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/timeutil" @@ -99,6 +101,25 @@ func (w *addIndexWorker) buildTableScan(ctx context.Context, txn kv.Transaction, return distsql.Select(ctx, w.sessCtx, kvReq, w.coprCtx.fieldTps, statistics.NewQueryFeedback(0, nil, 0, false)) } +func (w *addIndexWorker) buildDDLPB(colInfos []*model.ColumnInfo) (*tipb.DDLRequest, error) { + ddlReq := &tipb.DDLRequest{} + ddlReq.TableInfo = new(tipb.TableInfo) + ddlReq.IndexInfo = new(tipb.IndexInfo) + ddlReq.TableInfo.TableId = w.table.Meta().ID + ddlReq.TableInfo.Columns = util.ColumnsToProto(colInfos, w.table.Meta().PKIsHandle) + ddlReq.IndexInfo.TableId = w.table.Meta().ID + ddlReq.IndexInfo.IndexId = w.index.Meta().ID + indexColInfos := make([]*model.ColumnInfo, 0, len(w.index.Meta().Columns)) + for _, idxCol := range w.index.Meta().Columns { + indexColInfos = append(indexColInfos, w.table.Cols()[idxCol.Offset].ColumnInfo) + } + ddlReq.IndexInfo.Columns = util.ColumnsToProto(indexColInfos, w.table.Meta().PKIsHandle) + ddlReq.Columns = ddlReq.TableInfo.Columns + ddlReq.IndexInfo.Unique = w.index.Meta().Unique + + return ddlReq, nil +} + func (w *addIndexWorker) buildDAGPB(colInfos []*model.ColumnInfo) (*tipb.DAGRequest, error) { dagReq := &tipb.DAGRequest{} dagReq.TimeZoneName, dagReq.TimeZoneOffset = timeutil.Zone(w.sessCtx.GetSessionVars().Location()) @@ -123,6 +144,102 @@ func (w *addIndexWorker) constructTableScanPB(tblInfo *model.TableInfo, colInfos return &tipb.Executor{Tp: tipb.ExecType_TypeTableScan, TblScan: tblScan}, err } +func (w *addIndexWorker) fetchRowColValsFromCopr(ctx context.Context, txn kv.Transaction, + handleRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { + if w.coprCtx.isNewTask(handleRange) { + w.coprCtx.recordTask(handleRange) + w.coprCtx.indexRecordChan = make(chan *indexRecord, variable.MaxDDLReorgBatchSize) + go func() { + defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-rows", w.id))() + defer close(w.coprCtx.indexRecordChan) + resp, err := w.buildScanIndexKV(w.jobContext.ddlJobCtx, txn, handleRange.startKey, handleRange.excludedEndKey()) + if err != nil { + w.coprCtx.err.Store(err) + return + } + for { + data, err := resp.Next(ctx) + if err != nil { + w.coprCtx.err.Store(err) + return + } + if data == nil { + break + } + colResp := &tipb.DDLResponse{} + if err = colResp.Unmarshal(data.GetData()); err != nil { + w.coprCtx.err.Store(err) + return + } + for i := 0; i < len(colResp.Keys); i++ { + w.coprCtx.indexRecordChan <- &indexRecord{idxKV: &indexKV{key: colResp.Keys[i], value: colResp.Values[i]}} + } + } + }() + } + w.idxRecords = w.idxRecords[:0] + taskDone := false + var current *indexKV + for { + record, ok := <-w.coprCtx.indexRecordChan + if !ok { // The channel is closed. + taskDone = true + break + } + w.idxRecords = append(w.idxRecords, record) + current = record.idxKV + if len(w.idxRecords) >= w.batchCnt { + break + } + } + nextKey := handleRange.endKey + if current != nil { + h, err := tablecodec.DecodeIndexHandle(current.key, current.value, len(w.index.Meta().Columns)) + if err != nil { + return nil, nil, false, errors.Trace(err) + } + nextKey = tablecodec.EncodeRecordKey(w.table.RecordPrefix(), h).Next() + } + err := w.coprCtx.err.Load() + if err != nil { + return nil, nil, false, err.(error) + } + return w.idxRecords, nextKey, taskDone, nil +} + +func (w *addIndexWorker) buildScanIndexKV(ctx context.Context, txn kv.Transaction, start, end kv.Key) (kv.Response, error) { + ddlPB, err := w.buildDDLPB(w.coprCtx.colInfos) + if err != nil { + return nil, err + } + + ddlPB.Ranges = append(ddlPB.Ranges, tipb.KeyRange{Low: start, High: end}) + + var builder distsql.RequestBuilder + kvReq, err := builder. + SetDDLRequest(ddlPB). + SetStartTS(txn.StartTS()). + SetKeyRanges([]kv.KeyRange{{StartKey: start, EndKey: end}}). + SetKeepOrder(true). + SetFromSessionVars(w.sessCtx.GetSessionVars()). + SetFromInfoSchema(w.sessCtx.GetDomainInfoSchema()). + Build() + if err != nil { + return nil, err + } + + kvReq.Concurrency = 1 + option := &kv.ClientSendOption{ + SessionMemTracker: w.sessCtx.GetSessionVars().StmtCtx.MemTracker, + } + + resp := w.sessCtx.GetClient().Send(ctx, kvReq, w.sessCtx.GetSessionVars().KVVars, option) + if resp == nil { + return nil, errors.New("client returns nil response") + } + return resp, nil +} + func (w *addIndexWorker) fetchRowColValsFromSelect(ctx context.Context, txn kv.Transaction, handleRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { sctx := w.sessCtx.GetSessionVars().StmtCtx diff --git a/distsql/request_builder.go b/distsql/request_builder.go index aae83a0dd0053..fa5bb73768731 100644 --- a/distsql/request_builder.go +++ b/distsql/request_builder.go @@ -181,6 +181,17 @@ func (builder *RequestBuilder) SetChecksumRequest(checksum *tipb.ChecksumRequest return builder } +// SetDDLRequest sets the request type to "ReqTypeDDL" and construct request data. +func (builder *RequestBuilder) SetDDLRequest(ddl *tipb.DDLRequest) *RequestBuilder { + if builder.err == nil { + builder.Request.Tp = kv.ReqTypeDDL + builder.Request.Data, builder.err = ddl.Marshal() + builder.Request.NotFillCache = true + } + + return builder +} + // SetKeyRanges sets "KeyRanges" for "kv.Request". func (builder *RequestBuilder) SetKeyRanges(keyRanges []kv.KeyRange) *RequestBuilder { builder.Request.KeyRanges = keyRanges diff --git a/go.mod b/go.mod index 66d50b3bf6a3a..43f0cbfd41a9b 100644 --- a/go.mod +++ b/go.mod @@ -244,3 +244,5 @@ replace ( github.com/pingcap/tidb/parser => ./parser go.opencensus.io => go.opencensus.io v0.23.1-0.20220331163232-052120675fac ) + +replace github.com/pingcap/tipb => github.com/wjhuang2016/tipb v0.0.0-20221008063631-6d62ac9c19a2 diff --git a/go.sum b/go.sum index 211f252fc2f3c..76e55d8631960 100644 --- a/go.sum +++ b/go.sum @@ -764,8 +764,6 @@ github.com/pingcap/log v1.1.0 h1:ELiPxACz7vdo1qAvvaWJg1NrYFoY6gqAh/+Uo6aXdD8= github.com/pingcap/log v1.1.0/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v0.0.0-20220114020952-ea68d2dbf5b4 h1:HYbcxtnkN3s5tqrZ/z3eJS4j3Db8wMphEm1q10lY/TM= github.com/pingcap/sysutil v0.0.0-20220114020952-ea68d2dbf5b4/go.mod h1:sDCsM39cGiv2vwunZkaFA917vVkqDTGSPbbV7z4Oops= -github.com/pingcap/tipb v0.0.0-20220824081009-0714a57aff1d h1:kWYridgsn8xSKYJ2EkXp7uj5HwJnG5snpY3XP8oYmPU= -github.com/pingcap/tipb v0.0.0-20220824081009-0714a57aff1d/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4 h1:49lOXmGaUpV9Fz3gd7TFZY106KVlPVa5jcYD1gaQf98= github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4/go.mod h1:4OwLy04Bl9Ef3GJJCoec+30X3LQs/0/m4HFRt/2LUSA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -906,8 +904,6 @@ github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2 h1:mbAskLJ0oJf github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2/go.mod h1:2PfKggNGDuadAa0LElHrByyrz4JPZ9fFx6Gs7nx7ZZU= github.com/tikv/client-go/v2 v2.0.1-0.20220913051514-ffaaf7131a8d h1:p8XInTnkUlLabBT7bDS3aZCeemO6tJ/7b5mHN8WbSIE= github.com/tikv/client-go/v2 v2.0.1-0.20220913051514-ffaaf7131a8d/go.mod h1:6pedLz7wiINLHXwCT1+yMZmzuG42+ubtBkkfcwoukIo= -github.com/tikv/minitrace-go v0.0.0-20210623164044-529c511f20f8 h1:5j4UidaEmHWinqgzzO6alg1EW9qlO28gpKcOJj3088E= -github.com/tikv/minitrace-go v0.0.0-20210623164044-529c511f20f8/go.mod h1:ukJr0BfYeYbO3n15LAV2Dp4jvFpIPF2g14NU227ZTLY= github.com/tikv/minitrace-go v0.0.0-20220923091513-8e6316bb4097 h1:nvIrUVo5YJZMsCn6yTxrpgrokIo/wug5N/nL5mc7v50= github.com/tikv/minitrace-go v0.0.0-20220923091513-8e6316bb4097/go.mod h1:ukJr0BfYeYbO3n15LAV2Dp4jvFpIPF2g14NU227ZTLY= github.com/tikv/pd/client v0.0.0-20220725055910-7187a7ab72db h1:r1eMh9Rny3hfWuBuxOnbsCRrR4FhthiNxLQ5rAUtaww= @@ -940,6 +936,8 @@ github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPU github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f h1:9DDCDwOyEy/gId+IEMrFHLuQ5R/WV0KNxWLler8X2OY= github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f/go.mod h1:8sdOQnirw1PrcnTJYkmW1iOHtUmblMmGdUOHyWYycLI= +github.com/wjhuang2016/tipb v0.0.0-20221008063631-6d62ac9c19a2 h1:13LEN/7sdwcoTRDlBxqPUixvUqPrzHzn3J0EaasrpXg= +github.com/wjhuang2016/tipb v0.0.0-20221008063631-6d62ac9c19a2/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/xdg/scram v1.0.3/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= github.com/xdg/stringprep v1.0.3/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= diff --git a/kv/kv.go b/kv/kv.go index d39dc1ee5c862..0d312605aaf61 100644 --- a/kv/kv.go +++ b/kv/kv.go @@ -295,6 +295,7 @@ const ( ReqTypeDAG = 103 ReqTypeAnalyze = 104 ReqTypeChecksum = 105 + ReqTypeDDL = 106 ReqSubTypeBasic = 0 ReqSubTypeDesc = 10000 diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index 1d949520e9aad..d844bf6f5d9a6 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -1826,10 +1826,10 @@ var defaultSysVars = []*SysVar{ EnableFastReorg.Store(TiDBOptOn(val)) return nil }}, - {Scope: ScopeGlobal, Name: TiDBDDLEnableCoprRead, Value: BoolToOnOff(DefTiDBDDLEnableCoprRead), Type: TypeBool, GetGlobal: func(sv *SessionVars) (string, error) { - return BoolToOnOff(EnableCoprRead.Load()), nil + {Scope: ScopeGlobal, Name: TiDBDDLEnableCoprRead, Value: DefTiDBDDLEnableCoprRead, PossibleValues: []string{"0", "1", "2"}, Type: TypeEnum, GetGlobal: func(sv *SessionVars) (string, error) { + return EnableCoprRead.Load(), nil }, SetGlobal: func(s *SessionVars, val string) error { - EnableCoprRead.Store(TiDBOptOn(val)) + EnableCoprRead.Store(val) return nil }}, // This system var is set disk quota for lightning sort dir, from 100 GB to 1PB. diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 841f598e39f3e..a11e60b727609 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -1060,7 +1060,7 @@ const ( DefTiDBEnableMDL = false DefTiFlashFastScan = false DefTiDBEnableFastReorg = false - DefTiDBDDLEnableCoprRead = false + DefTiDBDDLEnableCoprRead = "0" DefTiDBDDLDiskQuota = 100 * 1024 * 1024 * 1024 // 100GB DefExecutorConcurrency = 5 DefTiDBEnableGeneralPlanCache = false @@ -1122,7 +1122,7 @@ var ( // EnableFastReorg indicates whether to use lightning to enhance DDL reorg performance. EnableFastReorg = atomic.NewBool(DefTiDBEnableFastReorg) // EnableCoprRead indicates whether to read with coprocessor for adding index. - EnableCoprRead = atomic.NewBool(DefTiDBDDLEnableCoprRead) + EnableCoprRead = atomic.NewString(DefTiDBDDLEnableCoprRead) // DDLDiskQuota is the temporary variable for set disk quota for lightning DDLDiskQuota = atomic.NewUint64(DefTiDBDDLDiskQuota) // EnableForeignKey indicates whether to enable foreign key feature. diff --git a/table/tables/index.go b/table/tables/index.go index a1570ddfd2fc2..77811eacc3a8c 100644 --- a/table/tables/index.go +++ b/table/tables/index.go @@ -16,7 +16,6 @@ package tables import ( "context" - "encoding/hex" "sync" "github.com/opentracing/opentracing-go" @@ -28,9 +27,7 @@ import ( "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/types" - "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/rowcodec" - "go.uber.org/zap" ) // index is the data structure for index data in the KV store. @@ -118,9 +115,6 @@ func (c *index) Create(sctx sessionctx.Context, txn kv.Transaction, indexedValue if err != nil { return nil, err } - if c.idxInfo.Name.L == "i1" || c.idxInfo.Name.L == "i0" { - logutil.BgLogger().Info("index.Create()", zap.String("key", hex.EncodeToString(key))) - } var ( tempKey []byte From 1ac68e4d25a7866504dcfd36a879fb9284a9f244 Mon Sep 17 00:00:00 2001 From: tangenta Date: Wed, 12 Oct 2022 20:50:20 +0800 Subject: [PATCH 12/26] support async read and write --- ddl/backfilling.go | 76 ++++++++----- ddl/column.go | 4 +- ddl/index.go | 26 ++--- ddl/index_distsql.go | 241 ++++++++++++++++++++--------------------- ddl/index_merge_tmp.go | 6 +- 5 files changed, 182 insertions(+), 171 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 0a4bcf6270e7c..50e22006c88ec 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -126,7 +126,7 @@ func (bWT backfillWorkerType) String() string { } type backfiller interface { - BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) + BackfillDataInTxn(handleRange []*reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) AddMetricInfo(float64) } @@ -185,7 +185,7 @@ type backfillWorker struct { reorgInfo *reorgInfo batchCnt int sessCtx sessionctx.Context - taskCh chan *reorgBackfillTask + taskCh chan []*reorgBackfillTask resultCh chan *backfillResult table table.Table closed bool @@ -201,7 +201,7 @@ func newBackfillWorker(sessCtx sessionctx.Context, id int, t table.PhysicalTable reorgInfo: reorgInfo, batchCnt: int(variable.GetDDLReorgBatchSize()), sessCtx: sessCtx, - taskCh: make(chan *reorgBackfillTask, 1), + taskCh: make(chan []*reorgBackfillTask, 1), resultCh: make(chan *backfillResult, 1), priority: reorgInfo.Job.Priority, tp: tp, @@ -222,12 +222,11 @@ func closeBackfillWorkers(workers []*backfillWorker) { } // handleBackfillTask backfills range [task.startHandle, task.endHandle) handle's index to table. -func (w *backfillWorker) handleBackfillTask(d *ddlCtx, task *reorgBackfillTask, bf backfiller) *backfillResult { - handleRange := *task +func (w *backfillWorker) handleBackfillTask(d *ddlCtx, tasks []*reorgBackfillTask, bf backfiller) *backfillResult { result := &backfillResult{ err: nil, addedCount: 0, - nextKey: handleRange.startKey, + nextKey: tasks[0].startKey, } lastLogCount := 0 lastLogTime := time.Now() @@ -245,7 +244,7 @@ func (w *backfillWorker) handleBackfillTask(d *ddlCtx, task *reorgBackfillTask, return result } - taskCtx, err := bf.BackfillDataInTxn(handleRange) + taskCtx, err := bf.BackfillDataInTxn(tasks) if err != nil { result.err = err return result @@ -275,8 +274,9 @@ func (w *backfillWorker) handleBackfillTask(d *ddlCtx, task *reorgBackfillTask, zap.Float64("speed(rows/s)", float64(num)/time.Since(lastLogTime).Seconds())) lastLogTime = time.Now() } - - handleRange.startKey = taskCtx.nextKey + if len(tasks) == 1 { + tasks[0].startKey = taskCtx.nextKey + } if taskCtx.done { break } @@ -284,7 +284,7 @@ func (w *backfillWorker) handleBackfillTask(d *ddlCtx, task *reorgBackfillTask, logutil.BgLogger().Info("[ddl] backfill worker finish task", zap.Stringer("type", w.tp), zap.Int("workerID", w.id), - zap.String("task", task.String()), + zap.String("task", tasks[0].String()), zap.Int("addedCount", result.addedCount), zap.Int("scanCount", result.scanCount), zap.String("nextHandle", tryDecodeToHandleString(result.nextKey)), @@ -301,13 +301,13 @@ func (w *backfillWorker) run(d *ddlCtx, bf backfiller, job *model.Job) { }() defer util.Recover(metrics.LabelDDL, "backfillWorker.run", nil, false) for { - task, more := <-w.taskCh + tasks, more := <-w.taskCh if !more { break } d.setDDLLabelForTopSQL(job) - logutil.BgLogger().Debug("[ddl] backfill worker got task", zap.Int("workerID", w.id), zap.String("task", task.String())) + logutil.BgLogger().Debug("[ddl] backfill worker got task", zap.Int("workerID", w.id), zap.String("task", tasks[0].String())) failpoint.Inject("mockBackfillRunErr", func() { if w.id == 0 { result := &backfillResult{addedCount: 0, nextKey: nil, err: errors.Errorf("mock backfill error")} @@ -328,7 +328,7 @@ func (w *backfillWorker) run(d *ddlCtx, bf backfiller, job *model.Job) { // Dynamic change batch size. w.batchCnt = int(variable.GetDDLReorgBatchSize()) finish := injectSpan(job.ID, fmt.Sprintf("%s-%d", "handle-backfill-task", w.id)) - result := w.handleBackfillTask(d, task, bf) + result := w.handleBackfillTask(d, tasks, bf) finish() w.resultCh <- result } @@ -399,15 +399,17 @@ func waitTaskResults(workers []*backfillWorker, taskCnt int, // sendTasksAndWait sends tasks to workers, and waits for all the running workers to return results, // there are taskCnt running workers. -func (dc *ddlCtx) sendTasksAndWait(sessPool *sessionPool, reorgInfo *reorgInfo, totalAddedCount *int64, workers []*backfillWorker, batchTasks []*reorgBackfillTask) error { +func (dc *ddlCtx) sendTasksAndWait(sessPool *sessionPool, reorgInfo *reorgInfo, totalAddedCount *int64, workers []*backfillWorker, batchTasks [][]*reorgBackfillTask) error { + sentCnt := 0 for i, task := range batchTasks { - workers[i].taskCh <- task + if len(task) > 0 { + workers[i].taskCh <- task + sentCnt++ + } } - - startKey := batchTasks[0].startKey - taskCnt := len(batchTasks) + startKey := batchTasks[0][0].startKey startTime := time.Now() - nextKey, taskAddedCount, err := waitTaskResults(workers, taskCnt, totalAddedCount, startKey) + nextKey, taskAddedCount, err := waitTaskResults(workers, sentCnt, totalAddedCount, startKey) elapsedTime := time.Since(startTime) if err == nil { err = dc.isReorgRunnable(reorgInfo.Job) @@ -475,9 +477,13 @@ func tryDecodeToHandleString(key kv.Key) string { // handleRangeTasks sends tasks to workers, and returns remaining kvRanges that is not handled. func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers []*backfillWorker, reorgInfo *reorgInfo, - totalAddedCount *int64, kvRanges []kv.KeyRange) ([]kv.KeyRange, error) { + totalAddedCount *int64, kvRanges []kv.KeyRange, readerCnt int) ([]kv.KeyRange, error) { defer injectSpan(reorgInfo.ID, "send-wait-tasks")() - batchTasks := make([]*reorgBackfillTask, 0, len(workers)) + workerCnt := len(workers) + batchTasks := make([][]*reorgBackfillTask, workerCnt) + for i := range batchTasks { + batchTasks[i] = make([]*reorgBackfillTask, 0, readerCnt) + } physicalTableID := reorgInfo.PhysicalTableID var prefix kv.Key @@ -487,6 +493,7 @@ func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers prefix = t.RecordPrefix() } // Build reorg tasks. + processedRanges := 0 for i, keyRange := range kvRanges { endKey := keyRange.EndKey endK, err := getRangeEndKey(reorgInfo.d.jobContext(reorgInfo.Job), workers[0].sessCtx.GetStore(), workers[0].priority, prefix, keyRange.StartKey, endKey) @@ -504,14 +511,15 @@ func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers endKey: endKey, // If the boundaries overlap, we should ignore the preceding endKey. endInclude: endK.Cmp(keyRange.EndKey) != 0 || i == len(kvRanges)-1} - batchTasks = append(batchTasks, task) - if len(batchTasks) >= len(workers) { + batchTasks[i%workerCnt] = append(batchTasks[i%workerCnt], task) + processedRanges++ + if processedRanges >= workerCnt*readerCnt { break } } - if len(batchTasks) == 0 { + if processedRanges == 0 { return nil, nil } @@ -521,9 +529,9 @@ func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers return nil, errors.Trace(err) } - if len(batchTasks) < len(kvRanges) { + if processedRanges < len(kvRanges) { // There are kvRanges not handled. - remains := kvRanges[len(batchTasks):] + remains := kvRanges[processedRanges:] return remains, nil } @@ -640,7 +648,9 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic if err := loadDDLReorgVars(dc.ctx, sessPool); err != nil { logutil.BgLogger().Error("[ddl] load DDL reorganization variable failed", zap.Error(err)) } - workerCnt = variable.GetDDLReorgWorkerCounter() + workerCnt := variable.GetDDLReorgWorkerCounter() + var readerCnt int32 + readerCnt, workerCnt = readWriteRatio(workerCnt, bfWorkerType) rowFormat := variable.GetDDLReorgRowFormat() // If only have 1 range, we can only start 1 worker. if len(kvRanges) < int(workerCnt) { @@ -736,7 +746,7 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic return errors.New(ingest.LitErrGetBackendFail) } } - remains, err := dc.handleRangeTasks(sessPool, t, backfillWorkers, reorgInfo, &totalAddedCount, kvRanges) + remains, err := dc.handleRangeTasks(sessPool, t, backfillWorkers, reorgInfo, &totalAddedCount, kvRanges, int(readerCnt)) if err != nil { return errors.Trace(err) } @@ -749,6 +759,16 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic return nil } +// readWriteRatio split the available workers to read and write workers in 2:1 ratio. +func readWriteRatio(totalWorker int32, tp backfillWorkerType) (read int32, write int32) { + if tp != typeAddIndexWorker || variable.EnableCoprRead.Load() == "0" { + return 1, totalWorker + } + write = totalWorker / 3 + read = (totalWorker-write)/write + 1 + return read, write +} + // recordIterFunc is used for low-level record iteration. type recordIterFunc func(h kv.Handle, rowKey kv.Key, rawRecord []byte) (more bool, err error) diff --git a/ddl/column.go b/ddl/column.go index 9c29bceea943d..62d2dd14002f4 100644 --- a/ddl/column.go +++ b/ddl/column.go @@ -1317,7 +1317,7 @@ func (w *updateColumnWorker) cleanRowMap() { } // BackfillDataInTxn will backfill the table record in a transaction. A lock corresponds to a rowKey if the value of rowKey is changed. -func (w *updateColumnWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { +func (w *updateColumnWorker) BackfillDataInTxn(handleRange []*reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { oprStartTime := time.Now() ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { @@ -1328,7 +1328,7 @@ func (w *updateColumnWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (t txn.SetOption(kv.ResourceGroupTagger, tagger) } - rowRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, handleRange) + rowRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, *handleRange[0]) if err != nil { return errors.Trace(err) } diff --git a/ddl/index.go b/ddl/index.go index 2c8f765602662..ce739519f7909 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -1490,7 +1490,7 @@ func (w *addIndexWorker) batchCheckUniqueKey(txn kv.Transaction, idxRecords []*i // BackfillDataInTxn will backfill table index in a transaction. A lock corresponds to a rowKey if the value of rowKey is changed, // Note that index columns values may change, and an index is not allowed to be added, so the txn will rollback and retry. // BackfillDataInTxn will add w.batchCnt indices once, default value of w.batchCnt is 128. -func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { +func (w *addIndexWorker) BackfillDataInTxn(handleRange []*reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { failpoint.Inject("errorMockPanic", func(val failpoint.Value) { //nolint:forcetypeassert if val.(bool) { @@ -1512,20 +1512,14 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC } var ( - idxRecords []*indexRecord - nextKey kv.Key - taskDone bool - encPushDown bool + idxRecords []*indexRecord + nextKey kv.Key + taskDone bool ) if w.coprCtx != nil { - encPushDown = variable.EnableCoprRead.Load() == "2" - if encPushDown { - idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromCopr(ctx, txn, handleRange) - } else { - idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromSelect(ctx, txn, handleRange) - } + idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromCop(ctx, txn, handleRange) } else { - idxRecords, nextKey, taskDone, err = w.fetchRowColVals(txn, handleRange) + idxRecords, nextKey, taskDone, err = w.fetchRowColVals(txn, *handleRange[0]) } if err != nil { return errors.Trace(err) @@ -1560,7 +1554,7 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC // Create the index. if w.writerCtx == nil { - if encPushDown { + if idxRecord.idxKV != nil { err := txn.GetMemBuffer().Set(idxRecord.idxKV.key, idxRecord.idxKV.value) if err != nil { return errors.Trace(err) @@ -1576,7 +1570,7 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC } } } else { // The lightning environment is ready. - if encPushDown { + if idxRecord.idxKV != nil { err = w.writerCtx.WriteRow(idxRecord.idxKV.key, idxRecord.idxKV.value) if err != nil { return errors.Trace(err) @@ -1775,7 +1769,7 @@ func newCleanUpIndexWorker(sessCtx sessionctx.Context, id int, t table.PhysicalT } } -func (w *cleanUpIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { +func (w *cleanUpIndexWorker) BackfillDataInTxn(handleRanges []*reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { failpoint.Inject("errorMockPanic", func(val failpoint.Value) { //nolint:forcetypeassert if val.(bool) { @@ -1793,7 +1787,7 @@ func (w *cleanUpIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (t txn.SetOption(kv.ResourceGroupTagger, tagger) } - idxRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, handleRange) + idxRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, *handleRanges[0]) if err != nil { return errors.Trace(err) } diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index 0fe062a12315a..d1ffe9db71715 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -17,6 +17,7 @@ package ddl import ( "context" "fmt" + "sync" "sync/atomic" "github.com/pingcap/errors" @@ -40,21 +41,52 @@ import ( ) type copContext struct { - colInfos []*model.ColumnInfo - fieldTps []*types.FieldType - srcChunk *chunk.Chunk + colInfos []*model.ColumnInfo + fieldTps []*types.FieldType + pushDownEncoding bool + + srcChunks []*chunk.Chunk indexRecordChan chan *indexRecord + doneChan chan struct{} bfTasks map[string]struct{} err atomic.Value + readerCnt atomic.Int32 + mu sync.Mutex } -func (c *copContext) isNewTask(task reorgBackfillTask) bool { - _, found := c.bfTasks[string(task.endKey)] - return !found +func (c *copContext) spawnCopRead(w *addIndexWorker, ctx context.Context, txn kv.Transaction, seq int, task *reorgBackfillTask) { + if _, found := c.bfTasks[string(task.endKey)]; found { + // The task has been processed by an existing goroutine. + return + } + c.bfTasks[string(task.endKey)] = struct{}{} + c.readerCnt.Add(1) + go func() { + defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d-%d", "fetch-rows", w.id, seq))() + defer func() { + w.coprCtx.readerCnt.Add(-1) + w.coprCtx.doneChan <- struct{}{} + }() + var err error + if c.pushDownEncoding { + err = w.sendEncodedIdxRecords(ctx, txn, task.startKey, task.excludedEndKey()) + } else { + err = w.sendIdxRecords(ctx, txn, task.startKey, task.excludedEndKey(), seq) + } + if err != nil { + w.coprCtx.err.Store(err) + return + } + }() } -func (c *copContext) recordTask(task reorgBackfillTask) { - c.bfTasks[string(task.endKey)] = struct{}{} +func (c *copContext) getChunk(i int) *chunk.Chunk { + c.mu.Lock() + defer c.mu.Unlock() + for j := len(c.srcChunks); j <= i; j++ { + c.srcChunks = append(c.srcChunks, chunk.NewChunkWithCapacity(c.fieldTps, 1024)) + } + return c.srcChunks[i] } func newCopContext(tblInfo *model.TableInfo, idxInfo *model.IndexInfo) *copContext { @@ -71,10 +103,12 @@ func newCopContext(tblInfo *model.TableInfo, idxInfo *model.IndexInfo) *copConte fieldTps = append(fieldTps, pkFieldTps...) return &copContext{ - colInfos: colInfos, - fieldTps: fieldTps, - srcChunk: chunk.NewChunkWithCapacity(fieldTps, 1), - bfTasks: make(map[string]struct{}, 16), + colInfos: colInfos, + fieldTps: fieldTps, + pushDownEncoding: variable.EnableCoprRead.Load() == "2", + indexRecordChan: make(chan *indexRecord, variable.MaxDDLReorgBatchSize), + doneChan: make(chan struct{}, 1), + bfTasks: make(map[string]struct{}, 16), } } @@ -144,69 +178,6 @@ func (w *addIndexWorker) constructTableScanPB(tblInfo *model.TableInfo, colInfos return &tipb.Executor{Tp: tipb.ExecType_TypeTableScan, TblScan: tblScan}, err } -func (w *addIndexWorker) fetchRowColValsFromCopr(ctx context.Context, txn kv.Transaction, - handleRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { - if w.coprCtx.isNewTask(handleRange) { - w.coprCtx.recordTask(handleRange) - w.coprCtx.indexRecordChan = make(chan *indexRecord, variable.MaxDDLReorgBatchSize) - go func() { - defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-rows", w.id))() - defer close(w.coprCtx.indexRecordChan) - resp, err := w.buildScanIndexKV(w.jobContext.ddlJobCtx, txn, handleRange.startKey, handleRange.excludedEndKey()) - if err != nil { - w.coprCtx.err.Store(err) - return - } - for { - data, err := resp.Next(ctx) - if err != nil { - w.coprCtx.err.Store(err) - return - } - if data == nil { - break - } - colResp := &tipb.DDLResponse{} - if err = colResp.Unmarshal(data.GetData()); err != nil { - w.coprCtx.err.Store(err) - return - } - for i := 0; i < len(colResp.Keys); i++ { - w.coprCtx.indexRecordChan <- &indexRecord{idxKV: &indexKV{key: colResp.Keys[i], value: colResp.Values[i]}} - } - } - }() - } - w.idxRecords = w.idxRecords[:0] - taskDone := false - var current *indexKV - for { - record, ok := <-w.coprCtx.indexRecordChan - if !ok { // The channel is closed. - taskDone = true - break - } - w.idxRecords = append(w.idxRecords, record) - current = record.idxKV - if len(w.idxRecords) >= w.batchCnt { - break - } - } - nextKey := handleRange.endKey - if current != nil { - h, err := tablecodec.DecodeIndexHandle(current.key, current.value, len(w.index.Meta().Columns)) - if err != nil { - return nil, nil, false, errors.Trace(err) - } - nextKey = tablecodec.EncodeRecordKey(w.table.RecordPrefix(), h).Next() - } - err := w.coprCtx.err.Load() - if err != nil { - return nil, nil, false, err.(error) - } - return w.idxRecords, nextKey, taskDone, nil -} - func (w *addIndexWorker) buildScanIndexKV(ctx context.Context, txn kv.Transaction, start, end kv.Key) (kv.Response, error) { ddlPB, err := w.buildDDLPB(w.coprCtx.colInfos) if err != nil { @@ -240,68 +211,94 @@ func (w *addIndexWorker) buildScanIndexKV(ctx context.Context, txn kv.Transactio return resp, nil } -func (w *addIndexWorker) fetchRowColValsFromSelect(ctx context.Context, txn kv.Transaction, - handleRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { +func (w *addIndexWorker) sendIdxRecords(ctx context.Context, txn kv.Transaction, start, end kv.Key, seq int) error { sctx := w.sessCtx.GetSessionVars().StmtCtx - if w.coprCtx.isNewTask(handleRange) { - w.coprCtx.recordTask(handleRange) - w.coprCtx.indexRecordChan = make(chan *indexRecord, variable.MaxDDLReorgBatchSize) - go func() { - defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-rows", w.id))() - defer close(w.coprCtx.indexRecordChan) - srcResult, err := w.buildTableScan(w.jobContext.ddlJobCtx, txn, handleRange.startKey, handleRange.excludedEndKey()) + srcResult, err := w.buildTableScan(ctx, txn, start, end) + if err != nil { + return errors.Trace(err) + } + srcChunk := w.coprCtx.getChunk(seq) + for { + err := srcResult.Next(ctx, srcChunk) + if err != nil { + return errors.Trace(err) + } + if srcChunk.NumRows() == 0 { + return nil + } + iter := chunk.NewIterator4Chunk(srcChunk) + for row := iter.Begin(); row != iter.End(); row = iter.Next() { + idxDt, hdDt := extractIdxValsAndHandle(row, w.index.Meta(), w.coprCtx.fieldTps) + handle, err := buildHandle(hdDt, w.table.Meta(), w.index.Meta(), sctx) if err != nil { - w.coprCtx.err.Store(err) - return + return errors.Trace(err) } - srcChunk := w.coprCtx.srcChunk - for { - err := srcResult.Next(ctx, srcChunk) - if err != nil { - w.coprCtx.err.Store(err) - return - } - if srcChunk.NumRows() == 0 { - return - } - iter := chunk.NewIterator4Chunk(srcChunk) - for row := iter.Begin(); row != iter.End(); row = iter.Next() { - idxDt, hdDt := extractIdxValsAndHandle(row, w.index.Meta(), w.coprCtx.fieldTps) - handle, err := buildHandle(hdDt, w.table.Meta(), w.index.Meta(), sctx) - if err != nil { - w.coprCtx.err.Store(err) - return - } - rsData := tables.TryGetHandleRestoredDataWrapper(w.table, hdDt, nil, w.index.Meta()) - w.coprCtx.indexRecordChan <- &indexRecord{handle: handle, key: nil, vals: idxDt, rsData: rsData, skip: false} - } - } - }() + rsData := tables.TryGetHandleRestoredDataWrapper(w.table, hdDt, nil, w.index.Meta()) + w.coprCtx.indexRecordChan <- &indexRecord{handle: handle, key: nil, vals: idxDt, rsData: rsData, skip: false} + } + } +} + +func (w *addIndexWorker) sendEncodedIdxRecords(ctx context.Context, txn kv.Transaction, start, end kv.Key) error { + resp, err := w.buildScanIndexKV(w.jobContext.ddlJobCtx, txn, start, end) + if err != nil { + return errors.Trace(err) + } + for { + data, err := resp.Next(ctx) + if err != nil { + return errors.Trace(err) + } + if data == nil { + return nil + } + colResp := &tipb.DDLResponse{} + if err = colResp.Unmarshal(data.GetData()); err != nil { + return errors.Trace(err) + } + for i := 0; i < len(colResp.Keys); i++ { + w.coprCtx.indexRecordChan <- &indexRecord{idxKV: &indexKV{key: colResp.Keys[i], value: colResp.Values[i]}} + } + } +} + +func (w *addIndexWorker) fetchRowColValsFromCop(ctx context.Context, txn kv.Transaction, + handleRanges []*reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { + for rid, hRange := range handleRanges { + w.coprCtx.spawnCopRead(w, ctx, txn, rid, hRange) } w.idxRecords = w.idxRecords[:0] taskDone := false - var current kv.Handle for { - record, ok := <-w.coprCtx.indexRecordChan - if !ok { // The channel is closed. - taskDone = true - break + select { + case record := <-w.coprCtx.indexRecordChan: + w.idxRecords = append(w.idxRecords, record) + case <-w.coprCtx.doneChan: + if w.coprCtx.readerCnt.Load() == 0 { + taskDone = true + for { // consume all the remaining records. + exit := false + select { + case record := <-w.coprCtx.indexRecordChan: + w.idxRecords = append(w.idxRecords, record) + default: + exit = true + } + if exit { + break + } + } + } } - w.idxRecords = append(w.idxRecords, record) - current = record.handle - if len(w.idxRecords) >= w.batchCnt { + if taskDone || len(w.idxRecords) >= w.batchCnt { break } } - nextKey := handleRange.endKey - if current != nil { - nextKey = tablecodec.EncodeRecordKey(w.table.RecordPrefix(), current).Next() - } err := w.coprCtx.err.Load() if err != nil { return nil, nil, false, err.(error) } - return w.idxRecords, nextKey, taskDone, nil + return w.idxRecords, handleRanges[0].startKey, taskDone, nil } func buildHandleColInfoAndFieldTypes(tbInfo *model.TableInfo) ([]*model.ColumnInfo, []*types.FieldType) { diff --git a/ddl/index_merge_tmp.go b/ddl/index_merge_tmp.go index 9159b47c47951..b90fc7a2292bd 100644 --- a/ddl/index_merge_tmp.go +++ b/ddl/index_merge_tmp.go @@ -102,7 +102,7 @@ func newMergeTempIndexWorker(sessCtx sessionctx.Context, id int, t table.Physica } // BackfillDataInTxn merge temp index data in txn. -func (w *mergeIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { +func (w *mergeIndexWorker) BackfillDataInTxn(taskRanges []*reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { oprStartTime := time.Now() ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { @@ -113,7 +113,7 @@ func (w *mergeIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (taskC txn.SetOption(kv.ResourceGroupTagger, tagger) } - tmpIdxRecords, nextKey, taskDone, err := w.fetchTempIndexVals(txn, taskRange) + tmpIdxRecords, nextKey, taskDone, err := w.fetchTempIndexVals(txn, taskRanges[0]) if err != nil { return errors.Trace(err) } @@ -156,7 +156,7 @@ func (w *mergeIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (taskC func (w *mergeIndexWorker) AddMetricInfo(cnt float64) { } -func (w *mergeIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange reorgBackfillTask) ([]*temporaryIndexRecord, kv.Key, bool, error) { +func (w *mergeIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange *reorgBackfillTask) ([]*temporaryIndexRecord, kv.Key, bool, error) { startTime := time.Now() w.tmpIdxRecords = w.tmpIdxRecords[:0] w.tmpIdxKeys = w.tmpIdxKeys[:0] From 68f56591ce7704420e808020a780224090d09b38 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 13 Oct 2022 16:04:40 +0800 Subject: [PATCH 13/26] change the read write ratio to 2 : 1 --- ddl/backfilling.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 50e22006c88ec..a18643b955e80 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -764,9 +764,7 @@ func readWriteRatio(totalWorker int32, tp backfillWorkerType) (read int32, write if tp != typeAddIndexWorker || variable.EnableCoprRead.Load() == "0" { return 1, totalWorker } - write = totalWorker / 3 - read = (totalWorker-write)/write + 1 - return read, write + return totalWorker * 2, totalWorker } // recordIterFunc is used for low-level record iteration. From 18b6d21df6a9767adc2307a9d696a43b23ba7c35 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 13 Oct 2022 22:48:54 +0800 Subject: [PATCH 14/26] pipeline copr reader and writer --- ddl/backfilling.go | 87 +++++++------- ddl/column.go | 4 +- ddl/index.go | 23 ++-- ddl/index_distsql.go | 241 ++++++++++++++++++++++---------------- ddl/index_merge_tmp.go | 6 +- executor/admin.go | 2 +- session/schema_amender.go | 2 +- table/tables/tables.go | 12 +- 8 files changed, 205 insertions(+), 172 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index a18643b955e80..330fc31622a9c 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -126,7 +126,7 @@ func (bWT backfillWorkerType) String() string { } type backfiller interface { - BackfillDataInTxn(handleRange []*reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) + BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) AddMetricInfo(float64) } @@ -185,12 +185,14 @@ type backfillWorker struct { reorgInfo *reorgInfo batchCnt int sessCtx sessionctx.Context - taskCh chan []*reorgBackfillTask + taskCh chan *reorgBackfillTask resultCh chan *backfillResult table table.Table closed bool priority int tp backfillWorkerType + + copReqReaders *copReqReaders } func newBackfillWorker(sessCtx sessionctx.Context, id int, t table.PhysicalTable, @@ -201,7 +203,7 @@ func newBackfillWorker(sessCtx sessionctx.Context, id int, t table.PhysicalTable reorgInfo: reorgInfo, batchCnt: int(variable.GetDDLReorgBatchSize()), sessCtx: sessCtx, - taskCh: make(chan []*reorgBackfillTask, 1), + taskCh: make(chan *reorgBackfillTask, 1), resultCh: make(chan *backfillResult, 1), priority: reorgInfo.Job.Priority, tp: tp, @@ -222,11 +224,12 @@ func closeBackfillWorkers(workers []*backfillWorker) { } // handleBackfillTask backfills range [task.startHandle, task.endHandle) handle's index to table. -func (w *backfillWorker) handleBackfillTask(d *ddlCtx, tasks []*reorgBackfillTask, bf backfiller) *backfillResult { +func (w *backfillWorker) handleBackfillTask(d *ddlCtx, task *reorgBackfillTask, bf backfiller) *backfillResult { + handleRange := *task result := &backfillResult{ err: nil, addedCount: 0, - nextKey: tasks[0].startKey, + nextKey: handleRange.startKey, } lastLogCount := 0 lastLogTime := time.Now() @@ -244,7 +247,7 @@ func (w *backfillWorker) handleBackfillTask(d *ddlCtx, tasks []*reorgBackfillTas return result } - taskCtx, err := bf.BackfillDataInTxn(tasks) + taskCtx, err := bf.BackfillDataInTxn(handleRange) if err != nil { result.err = err return result @@ -274,9 +277,7 @@ func (w *backfillWorker) handleBackfillTask(d *ddlCtx, tasks []*reorgBackfillTas zap.Float64("speed(rows/s)", float64(num)/time.Since(lastLogTime).Seconds())) lastLogTime = time.Now() } - if len(tasks) == 1 { - tasks[0].startKey = taskCtx.nextKey - } + handleRange.startKey = taskCtx.nextKey if taskCtx.done { break } @@ -284,7 +285,7 @@ func (w *backfillWorker) handleBackfillTask(d *ddlCtx, tasks []*reorgBackfillTas logutil.BgLogger().Info("[ddl] backfill worker finish task", zap.Stringer("type", w.tp), zap.Int("workerID", w.id), - zap.String("task", tasks[0].String()), + zap.String("task", task.String()), zap.Int("addedCount", result.addedCount), zap.Int("scanCount", result.scanCount), zap.String("nextHandle", tryDecodeToHandleString(result.nextKey)), @@ -301,13 +302,13 @@ func (w *backfillWorker) run(d *ddlCtx, bf backfiller, job *model.Job) { }() defer util.Recover(metrics.LabelDDL, "backfillWorker.run", nil, false) for { - tasks, more := <-w.taskCh + task, more := <-w.taskCh if !more { break } d.setDDLLabelForTopSQL(job) - logutil.BgLogger().Debug("[ddl] backfill worker got task", zap.Int("workerID", w.id), zap.String("task", tasks[0].String())) + logutil.BgLogger().Debug("[ddl] backfill worker got task", zap.Int("workerID", w.id), zap.String("task", task.String())) failpoint.Inject("mockBackfillRunErr", func() { if w.id == 0 { result := &backfillResult{addedCount: 0, nextKey: nil, err: errors.Errorf("mock backfill error")} @@ -328,7 +329,7 @@ func (w *backfillWorker) run(d *ddlCtx, bf backfiller, job *model.Job) { // Dynamic change batch size. w.batchCnt = int(variable.GetDDLReorgBatchSize()) finish := injectSpan(job.ID, fmt.Sprintf("%s-%d", "handle-backfill-task", w.id)) - result := w.handleBackfillTask(d, tasks, bf) + result := w.handleBackfillTask(d, task, bf) finish() w.resultCh <- result } @@ -399,17 +400,19 @@ func waitTaskResults(workers []*backfillWorker, taskCnt int, // sendTasksAndWait sends tasks to workers, and waits for all the running workers to return results, // there are taskCnt running workers. -func (dc *ddlCtx) sendTasksAndWait(sessPool *sessionPool, reorgInfo *reorgInfo, totalAddedCount *int64, workers []*backfillWorker, batchTasks [][]*reorgBackfillTask) error { - sentCnt := 0 +func (dc *ddlCtx) sendTasksAndWait(sessPool *sessionPool, reorgInfo *reorgInfo, totalAddedCount *int64, workers []*backfillWorker, batchTasks []*reorgBackfillTask) error { for i, task := range batchTasks { - if len(task) > 0 { - workers[i].taskCh <- task - sentCnt++ + w := workers[i] + w.taskCh <- task + if w.copReqReaders != nil { + w.copReqReaders.tasksCh <- task } } - startKey := batchTasks[0][0].startKey + + startKey := batchTasks[0].startKey + taskCnt := len(batchTasks) startTime := time.Now() - nextKey, taskAddedCount, err := waitTaskResults(workers, sentCnt, totalAddedCount, startKey) + nextKey, taskAddedCount, err := waitTaskResults(workers, taskCnt, totalAddedCount, startKey) elapsedTime := time.Since(startTime) if err == nil { err = dc.isReorgRunnable(reorgInfo.Job) @@ -477,13 +480,9 @@ func tryDecodeToHandleString(key kv.Key) string { // handleRangeTasks sends tasks to workers, and returns remaining kvRanges that is not handled. func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers []*backfillWorker, reorgInfo *reorgInfo, - totalAddedCount *int64, kvRanges []kv.KeyRange, readerCnt int) ([]kv.KeyRange, error) { + totalAddedCount *int64, kvRanges []kv.KeyRange) ([]kv.KeyRange, error) { defer injectSpan(reorgInfo.ID, "send-wait-tasks")() - workerCnt := len(workers) - batchTasks := make([][]*reorgBackfillTask, workerCnt) - for i := range batchTasks { - batchTasks[i] = make([]*reorgBackfillTask, 0, readerCnt) - } + batchTasks := make([]*reorgBackfillTask, 0, len(workers)) physicalTableID := reorgInfo.PhysicalTableID var prefix kv.Key @@ -493,7 +492,6 @@ func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers prefix = t.RecordPrefix() } // Build reorg tasks. - processedRanges := 0 for i, keyRange := range kvRanges { endKey := keyRange.EndKey endK, err := getRangeEndKey(reorgInfo.d.jobContext(reorgInfo.Job), workers[0].sessCtx.GetStore(), workers[0].priority, prefix, keyRange.StartKey, endKey) @@ -512,14 +510,13 @@ func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers // If the boundaries overlap, we should ignore the preceding endKey. endInclude: endK.Cmp(keyRange.EndKey) != 0 || i == len(kvRanges)-1} - batchTasks[i%workerCnt] = append(batchTasks[i%workerCnt], task) - processedRanges++ - if processedRanges >= workerCnt*readerCnt { + batchTasks = append(batchTasks, task) + if len(batchTasks) >= len(workers) { break } } - if processedRanges == 0 { + if len(batchTasks) == 0 { return nil, nil } @@ -529,9 +526,9 @@ func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers return nil, errors.Trace(err) } - if processedRanges < len(kvRanges) { + if len(batchTasks) < len(kvRanges) { // There are kvRanges not handled. - remains := kvRanges[processedRanges:] + remains := kvRanges[len(batchTasks):] return remains, nil } @@ -649,13 +646,13 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic logutil.BgLogger().Error("[ddl] load DDL reorganization variable failed", zap.Error(err)) } workerCnt := variable.GetDDLReorgWorkerCounter() - var readerCnt int32 - readerCnt, workerCnt = readWriteRatio(workerCnt, bfWorkerType) rowFormat := variable.GetDDLReorgRowFormat() // If only have 1 range, we can only start 1 worker. if len(kvRanges) < int(workerCnt) { workerCnt = int32(len(kvRanges)) } + var copReqReaders *copReqReaders + useCopRead := variable.EnableCoprRead.Load() != "0" // Enlarge the worker size. for i := len(backfillWorkers); i < int(workerCnt); i++ { sessCtx := newContext(reorgInfo.d.store) @@ -683,6 +680,16 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic if err != nil { return errors.Trace(err) } + if useCopRead && copReqReaders == nil { + indexInfo := model.FindIndexInfoByID(t.Meta().Indices, reorgInfo.currElement.ID) + copCtx := newCopContext(t.Meta(), indexInfo, sessCtx) + taskCh := make(chan *reorgBackfillTask, 64) + copReqReaders = newCopReqReaders(dc.ctx, copCtx, job.ID, int(workerCnt*2), taskCh) + logutil.BgLogger().Info("[ddl] fetch index values with coprocessor", + zap.String("table", t.Meta().Name.O), + zap.String("index", indexInfo.Name.O)) + } + idxWorker.backfillWorker.copReqReaders = copReqReaders backfillWorkers = append(backfillWorkers, idxWorker.backfillWorker) go idxWorker.backfillWorker.run(reorgInfo.d, idxWorker, job) case typeAddIndexMergeTmpWorker: @@ -746,7 +753,7 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic return errors.New(ingest.LitErrGetBackendFail) } } - remains, err := dc.handleRangeTasks(sessPool, t, backfillWorkers, reorgInfo, &totalAddedCount, kvRanges, int(readerCnt)) + remains, err := dc.handleRangeTasks(sessPool, t, backfillWorkers, reorgInfo, &totalAddedCount, kvRanges) if err != nil { return errors.Trace(err) } @@ -759,14 +766,6 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic return nil } -// readWriteRatio split the available workers to read and write workers in 2:1 ratio. -func readWriteRatio(totalWorker int32, tp backfillWorkerType) (read int32, write int32) { - if tp != typeAddIndexWorker || variable.EnableCoprRead.Load() == "0" { - return 1, totalWorker - } - return totalWorker * 2, totalWorker -} - // recordIterFunc is used for low-level record iteration. type recordIterFunc func(h kv.Handle, rowKey kv.Key, rawRecord []byte) (more bool, err error) diff --git a/ddl/column.go b/ddl/column.go index 62d2dd14002f4..9c29bceea943d 100644 --- a/ddl/column.go +++ b/ddl/column.go @@ -1317,7 +1317,7 @@ func (w *updateColumnWorker) cleanRowMap() { } // BackfillDataInTxn will backfill the table record in a transaction. A lock corresponds to a rowKey if the value of rowKey is changed. -func (w *updateColumnWorker) BackfillDataInTxn(handleRange []*reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { +func (w *updateColumnWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { oprStartTime := time.Now() ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { @@ -1328,7 +1328,7 @@ func (w *updateColumnWorker) BackfillDataInTxn(handleRange []*reorgBackfillTask) txn.SetOption(kv.ResourceGroupTagger, tagger) } - rowRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, *handleRange[0]) + rowRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, handleRange) if err != nil { return errors.Trace(err) } diff --git a/ddl/index.go b/ddl/index.go index ce739519f7909..5497d273c604d 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -39,7 +39,6 @@ import ( "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/sessionctx" - "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/table/tables" "github.com/pingcap/tidb/tablecodec" @@ -1226,13 +1225,6 @@ func newAddIndexWorker(sessCtx sessionctx.Context, id int, t table.PhysicalTable return nil, err } } - var coprCtx *copContext - if variable.EnableCoprRead.Load() != "0" { - coprCtx = newCopContext(t.Meta(), indexInfo) - logutil.BgLogger().Info("[ddl] fetch index values with coprocessor", - zap.String("table", t.Meta().Name.O), - zap.String("index", indexInfo.Name.O)) - } return &addIndexWorker{ baseIndexWorker: baseIndexWorker{ @@ -1247,7 +1239,6 @@ func newAddIndexWorker(sessCtx sessionctx.Context, id int, t table.PhysicalTable }, index: index, writerCtx: lwCtx, - coprCtx: coprCtx, }, nil } @@ -1297,7 +1288,7 @@ func (w *baseIndexWorker) getIndexRecord(idxInfo *model.IndexInfo, handle kv.Han idxVal[j] = idxColumnVal } - rsData := tables.TryGetHandleRestoredDataWrapper(w.table, nil, w.rowMap, idxInfo) + rsData := tables.TryGetHandleRestoredDataWrapper(w.table.Meta(), nil, w.rowMap, idxInfo) idxRecord := &indexRecord{handle: handle, key: recordKey, vals: idxVal, rsData: rsData} return idxRecord, nil } @@ -1490,7 +1481,7 @@ func (w *addIndexWorker) batchCheckUniqueKey(txn kv.Transaction, idxRecords []*i // BackfillDataInTxn will backfill table index in a transaction. A lock corresponds to a rowKey if the value of rowKey is changed, // Note that index columns values may change, and an index is not allowed to be added, so the txn will rollback and retry. // BackfillDataInTxn will add w.batchCnt indices once, default value of w.batchCnt is 128. -func (w *addIndexWorker) BackfillDataInTxn(handleRange []*reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { +func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { failpoint.Inject("errorMockPanic", func(val failpoint.Value) { //nolint:forcetypeassert if val.(bool) { @@ -1516,10 +1507,10 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange []*reorgBackfillTask) (ta nextKey kv.Key taskDone bool ) - if w.coprCtx != nil { - idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromCop(ctx, txn, handleRange) + if w.copReqReaders != nil { + idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromCop(handleRange) } else { - idxRecords, nextKey, taskDone, err = w.fetchRowColVals(txn, *handleRange[0]) + idxRecords, nextKey, taskDone, err = w.fetchRowColVals(txn, handleRange) } if err != nil { return errors.Trace(err) @@ -1769,7 +1760,7 @@ func newCleanUpIndexWorker(sessCtx sessionctx.Context, id int, t table.PhysicalT } } -func (w *cleanUpIndexWorker) BackfillDataInTxn(handleRanges []*reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { +func (w *cleanUpIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { failpoint.Inject("errorMockPanic", func(val failpoint.Value) { //nolint:forcetypeassert if val.(bool) { @@ -1787,7 +1778,7 @@ func (w *cleanUpIndexWorker) BackfillDataInTxn(handleRanges []*reorgBackfillTask txn.SetOption(kv.ResourceGroupTagger, tagger) } - idxRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, *handleRanges[0]) + idxRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, handleRange) if err != nil { return errors.Trace(err) } diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index d1ffe9db71715..9eb408a844d0f 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -19,6 +19,7 @@ import ( "fmt" "sync" "sync/atomic" + "time" "github.com/pingcap/errors" "github.com/pingcap/tidb/distsql" @@ -36,13 +37,18 @@ import ( "github.com/pingcap/tidb/util" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" + "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/timeutil" "github.com/pingcap/tipb/go-tipb" + "go.uber.org/zap" ) type copContext struct { + tblInfo *model.TableInfo + idxInfo *model.IndexInfo colInfos []*model.ColumnInfo fieldTps []*types.FieldType + sessCtx sessionctx.Context pushDownEncoding bool srcChunks []*chunk.Chunk @@ -54,42 +60,91 @@ type copContext struct { mu sync.Mutex } -func (c *copContext) spawnCopRead(w *addIndexWorker, ctx context.Context, txn kv.Transaction, seq int, task *reorgBackfillTask) { - if _, found := c.bfTasks[string(task.endKey)]; found { - // The task has been processed by an existing goroutine. - return - } - c.bfTasks[string(task.endKey)] = struct{}{} - c.readerCnt.Add(1) - go func() { - defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d-%d", "fetch-rows", w.id, seq))() - defer func() { - w.coprCtx.readerCnt.Add(-1) - w.coprCtx.doneChan <- struct{}{} - }() - var err error - if c.pushDownEncoding { - err = w.sendEncodedIdxRecords(ctx, txn, task.startKey, task.excludedEndKey()) - } else { - err = w.sendIdxRecords(ctx, txn, task.startKey, task.excludedEndKey(), seq) +type copReqReaders struct { + tasksCh chan *reorgBackfillTask + readers []*copReqReader +} + +func (p *copReqReaders) getReader(task *reorgBackfillTask) *copReqReader { + for { + for _, r := range p.readers { + r.mu.Lock() + if string(r.currentTask.endKey) == string(task.endKey) { + r.mu.Unlock() + return r + } + r.mu.Unlock() } - if err != nil { - w.coprCtx.err.Store(err) - return + logutil.BgLogger().Info("[ddl] coprocessor reader not found, wait a while", + zap.String("task", task.String())) + time.Sleep(time.Millisecond * 300) + } +} + +type copReqReader struct { + id int + traceID int64 + copCtx *copContext + idxRecordChan chan *indexRecord + srcChunk *chunk.Chunk + err error + done chan struct{} + currentTask *reorgBackfillTask + mu sync.Mutex +} + +func (c *copReqReader) run(ctx context.Context, tasks chan *reorgBackfillTask) { + for { + select { + case task, ok := <-tasks: + if !ok { + return + } + c.mu.Lock() + c.currentTask = task + c.idxRecordChan = make(chan *indexRecord, variable.MaxDDLReorgBatchSize) + c.mu.Unlock() + finish := injectSpan(c.traceID, fmt.Sprintf("%s-%d", "fetch-rows", c.id)) + err := kv.RunInNewTxn(ctx, c.copCtx.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { + if c.copCtx.pushDownEncoding { + return c.copCtx.sendEncodedIdxRecords(ctx, c.idxRecordChan, txn, task.startKey, task.excludedEndKey()) + } else { + return c.copCtx.sendIdxRecords(ctx, c.idxRecordChan, c.srcChunk, txn, task.startKey, task.excludedEndKey()) + } + }) + finish() + c.mu.Lock() + c.err = err + c.mu.Unlock() + close(c.idxRecordChan) + <-c.done } - }() + } } -func (c *copContext) getChunk(i int) *chunk.Chunk { - c.mu.Lock() - defer c.mu.Unlock() - for j := len(c.srcChunks); j <= i; j++ { - c.srcChunks = append(c.srcChunks, chunk.NewChunkWithCapacity(c.fieldTps, 1024)) +func newCopReqReaders(ctx context.Context, copCtx *copContext, jobID int64, readerCnt int, tasks chan *reorgBackfillTask) *copReqReaders { + p := &copReqReaders{ + tasksCh: tasks, + } + for i := 0; i < readerCnt; i++ { + r := &copReqReader{ + id: i, + traceID: jobID, + copCtx: copCtx, + idxRecordChan: make(chan *indexRecord, variable.MaxDDLReorgBatchSize), + srcChunk: chunk.NewChunkWithCapacity(copCtx.fieldTps, 1024), + err: nil, + done: make(chan struct{}), + currentTask: nil, + mu: sync.Mutex{}, + } + p.readers = append(p.readers, r) + go r.run(ctx, tasks) } - return c.srcChunks[i] + return p } -func newCopContext(tblInfo *model.TableInfo, idxInfo *model.IndexInfo) *copContext { +func newCopContext(tblInfo *model.TableInfo, idxInfo *model.IndexInfo, sessCtx sessionctx.Context) *copContext { colInfos := make([]*model.ColumnInfo, 0, len(idxInfo.Columns)) fieldTps := make([]*types.FieldType, 0, len(idxInfo.Columns)) for _, idxCol := range idxInfo.Columns { @@ -103,8 +158,11 @@ func newCopContext(tblInfo *model.TableInfo, idxInfo *model.IndexInfo) *copConte fieldTps = append(fieldTps, pkFieldTps...) return &copContext{ + tblInfo: tblInfo, + idxInfo: idxInfo, colInfos: colInfos, fieldTps: fieldTps, + sessCtx: sessCtx, pushDownEncoding: variable.EnableCoprRead.Load() == "2", indexRecordChan: make(chan *indexRecord, variable.MaxDDLReorgBatchSize), doneChan: make(chan struct{}, 1), @@ -112,8 +170,8 @@ func newCopContext(tblInfo *model.TableInfo, idxInfo *model.IndexInfo) *copConte } } -func (w *addIndexWorker) buildTableScan(ctx context.Context, txn kv.Transaction, start, end kv.Key) (distsql.SelectResult, error) { - dagPB, err := w.buildDAGPB(w.coprCtx.colInfos) +func (c *copContext) buildTableScan(ctx context.Context, txn kv.Transaction, start, end kv.Key) (distsql.SelectResult, error) { + dagPB, err := buildDAGPB(c.sessCtx, c.tblInfo, c.colInfos) if err != nil { return nil, err } @@ -124,62 +182,62 @@ func (w *addIndexWorker) buildTableScan(ctx context.Context, txn kv.Transaction, SetStartTS(txn.StartTS()). SetKeyRanges([]kv.KeyRange{{StartKey: start, EndKey: end}}). SetKeepOrder(true). - SetFromSessionVars(w.sessCtx.GetSessionVars()). - SetFromInfoSchema(w.sessCtx.GetDomainInfoSchema()). + SetFromSessionVars(c.sessCtx.GetSessionVars()). + SetFromInfoSchema(c.sessCtx.GetDomainInfoSchema()). Build() if err != nil { return nil, err } kvReq.Concurrency = 1 - return distsql.Select(ctx, w.sessCtx, kvReq, w.coprCtx.fieldTps, statistics.NewQueryFeedback(0, nil, 0, false)) + return distsql.Select(ctx, c.sessCtx, kvReq, c.fieldTps, statistics.NewQueryFeedback(0, nil, 0, false)) } -func (w *addIndexWorker) buildDDLPB(colInfos []*model.ColumnInfo) (*tipb.DDLRequest, error) { +func buildDDLPB(tblInfo *model.TableInfo, idxInfo *model.IndexInfo, colInfos []*model.ColumnInfo) (*tipb.DDLRequest, error) { ddlReq := &tipb.DDLRequest{} ddlReq.TableInfo = new(tipb.TableInfo) ddlReq.IndexInfo = new(tipb.IndexInfo) - ddlReq.TableInfo.TableId = w.table.Meta().ID - ddlReq.TableInfo.Columns = util.ColumnsToProto(colInfos, w.table.Meta().PKIsHandle) - ddlReq.IndexInfo.TableId = w.table.Meta().ID - ddlReq.IndexInfo.IndexId = w.index.Meta().ID - indexColInfos := make([]*model.ColumnInfo, 0, len(w.index.Meta().Columns)) - for _, idxCol := range w.index.Meta().Columns { - indexColInfos = append(indexColInfos, w.table.Cols()[idxCol.Offset].ColumnInfo) + ddlReq.TableInfo.TableId = tblInfo.ID + ddlReq.TableInfo.Columns = util.ColumnsToProto(colInfos, tblInfo.PKIsHandle) + ddlReq.IndexInfo.TableId = tblInfo.ID + ddlReq.IndexInfo.IndexId = idxInfo.ID + indexColInfos := make([]*model.ColumnInfo, 0, len(idxInfo.Columns)) + for _, idxCol := range idxInfo.Columns { + indexColInfos = append(indexColInfos, tblInfo.Cols()[idxCol.Offset]) } - ddlReq.IndexInfo.Columns = util.ColumnsToProto(indexColInfos, w.table.Meta().PKIsHandle) + ddlReq.IndexInfo.Columns = util.ColumnsToProto(indexColInfos, tblInfo.PKIsHandle) ddlReq.Columns = ddlReq.TableInfo.Columns - ddlReq.IndexInfo.Unique = w.index.Meta().Unique + ddlReq.IndexInfo.Unique = idxInfo.Unique return ddlReq, nil } -func (w *addIndexWorker) buildDAGPB(colInfos []*model.ColumnInfo) (*tipb.DAGRequest, error) { +func buildDAGPB(sCtx sessionctx.Context, tblInfo *model.TableInfo, colInfos []*model.ColumnInfo) (*tipb.DAGRequest, error) { dagReq := &tipb.DAGRequest{} - dagReq.TimeZoneName, dagReq.TimeZoneOffset = timeutil.Zone(w.sessCtx.GetSessionVars().Location()) - sc := w.sessCtx.GetSessionVars().StmtCtx + dagReq.TimeZoneName, dagReq.TimeZoneOffset = timeutil.Zone(sCtx.GetSessionVars().Location()) + sc := sCtx.GetSessionVars().StmtCtx dagReq.Flags = sc.PushDownFlags() for i := range colInfos { dagReq.OutputOffsets = append(dagReq.OutputOffsets, uint32(i)) } - execPB, err := w.constructTableScanPB(w.table.Meta(), colInfos) + execPB, err := constructTableScanPB(sCtx, tblInfo, colInfos) if err != nil { return nil, err } dagReq.Executors = append(dagReq.Executors, execPB) - distsql.SetEncodeType(w.sessCtx, dagReq) + distsql.SetEncodeType(sCtx, dagReq) return dagReq, nil } -func (w *addIndexWorker) constructTableScanPB(tblInfo *model.TableInfo, colInfos []*model.ColumnInfo) (*tipb.Executor, error) { +func constructTableScanPB(sCtx sessionctx.Context, tblInfo *model.TableInfo, colInfos []*model.ColumnInfo) (*tipb.Executor, error) { tblScan := tables.BuildTableScanFromInfos(tblInfo, colInfos) - tblScan.TableId = w.table.Meta().ID - err := setPBColumnsDefaultValue(w.sessCtx, tblScan.Columns, colInfos) + tblScan.TableId = tblInfo.ID + err := setPBColumnsDefaultValue(sCtx, tblScan.Columns, colInfos) return &tipb.Executor{Tp: tipb.ExecType_TypeTableScan, TblScan: tblScan}, err } -func (w *addIndexWorker) buildScanIndexKV(ctx context.Context, txn kv.Transaction, start, end kv.Key) (kv.Response, error) { - ddlPB, err := w.buildDDLPB(w.coprCtx.colInfos) +func (c *copContext) buildScanIndexKV(ctx context.Context, txn kv.Transaction, start, end kv.Key) (kv.Response, error) { + ddlPB, err := buildDDLPB(c.tblInfo, c.idxInfo, c.colInfos) if err != nil { return nil, err } @@ -192,8 +250,8 @@ func (w *addIndexWorker) buildScanIndexKV(ctx context.Context, txn kv.Transactio SetStartTS(txn.StartTS()). SetKeyRanges([]kv.KeyRange{{StartKey: start, EndKey: end}}). SetKeepOrder(true). - SetFromSessionVars(w.sessCtx.GetSessionVars()). - SetFromInfoSchema(w.sessCtx.GetDomainInfoSchema()). + SetFromSessionVars(c.sessCtx.GetSessionVars()). + SetFromInfoSchema(c.sessCtx.GetDomainInfoSchema()). Build() if err != nil { return nil, err @@ -201,46 +259,46 @@ func (w *addIndexWorker) buildScanIndexKV(ctx context.Context, txn kv.Transactio kvReq.Concurrency = 1 option := &kv.ClientSendOption{ - SessionMemTracker: w.sessCtx.GetSessionVars().StmtCtx.MemTracker, + SessionMemTracker: c.sessCtx.GetSessionVars().StmtCtx.MemTracker, } - resp := w.sessCtx.GetClient().Send(ctx, kvReq, w.sessCtx.GetSessionVars().KVVars, option) + resp := c.sessCtx.GetClient().Send(ctx, kvReq, c.sessCtx.GetSessionVars().KVVars, option) if resp == nil { return nil, errors.New("client returns nil response") } return resp, nil } -func (w *addIndexWorker) sendIdxRecords(ctx context.Context, txn kv.Transaction, start, end kv.Key, seq int) error { - sctx := w.sessCtx.GetSessionVars().StmtCtx - srcResult, err := w.buildTableScan(ctx, txn, start, end) +func (c *copContext) sendIdxRecords(ctx context.Context, ch chan *indexRecord, srcChk *chunk.Chunk, + txn kv.Transaction, start, end kv.Key) error { + sctx := c.sessCtx.GetSessionVars().StmtCtx + srcResult, err := c.buildTableScan(ctx, txn, start, end) if err != nil { return errors.Trace(err) } - srcChunk := w.coprCtx.getChunk(seq) for { - err := srcResult.Next(ctx, srcChunk) + err := srcResult.Next(ctx, srcChk) if err != nil { return errors.Trace(err) } - if srcChunk.NumRows() == 0 { + if srcChk.NumRows() == 0 { return nil } - iter := chunk.NewIterator4Chunk(srcChunk) + iter := chunk.NewIterator4Chunk(srcChk) for row := iter.Begin(); row != iter.End(); row = iter.Next() { - idxDt, hdDt := extractIdxValsAndHandle(row, w.index.Meta(), w.coprCtx.fieldTps) - handle, err := buildHandle(hdDt, w.table.Meta(), w.index.Meta(), sctx) + idxDt, hdDt := extractIdxValsAndHandle(row, c.idxInfo, c.fieldTps) + handle, err := buildHandle(hdDt, c.tblInfo, c.idxInfo, sctx) if err != nil { return errors.Trace(err) } - rsData := tables.TryGetHandleRestoredDataWrapper(w.table, hdDt, nil, w.index.Meta()) - w.coprCtx.indexRecordChan <- &indexRecord{handle: handle, key: nil, vals: idxDt, rsData: rsData, skip: false} + rsData := tables.TryGetHandleRestoredDataWrapper(c.tblInfo, hdDt, nil, c.idxInfo) + ch <- &indexRecord{handle: handle, key: nil, vals: idxDt, rsData: rsData, skip: false} } } } -func (w *addIndexWorker) sendEncodedIdxRecords(ctx context.Context, txn kv.Transaction, start, end kv.Key) error { - resp, err := w.buildScanIndexKV(w.jobContext.ddlJobCtx, txn, start, end) +func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan *indexRecord, txn kv.Transaction, start, end kv.Key) error { + resp, err := c.buildScanIndexKV(ctx, txn, start, end) if err != nil { return errors.Trace(err) } @@ -257,48 +315,33 @@ func (w *addIndexWorker) sendEncodedIdxRecords(ctx context.Context, txn kv.Trans return errors.Trace(err) } for i := 0; i < len(colResp.Keys); i++ { - w.coprCtx.indexRecordChan <- &indexRecord{idxKV: &indexKV{key: colResp.Keys[i], value: colResp.Values[i]}} + ch <- &indexRecord{idxKV: &indexKV{key: colResp.Keys[i], value: colResp.Values[i]}} } } } -func (w *addIndexWorker) fetchRowColValsFromCop(ctx context.Context, txn kv.Transaction, - handleRanges []*reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { - for rid, hRange := range handleRanges { - w.coprCtx.spawnCopRead(w, ctx, txn, rid, hRange) - } +func (w *addIndexWorker) fetchRowColValsFromCop(handleRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { + copReader := w.copReqReaders.getReader(&handleRange) w.idxRecords = w.idxRecords[:0] taskDone := false for { select { - case record := <-w.coprCtx.indexRecordChan: - w.idxRecords = append(w.idxRecords, record) - case <-w.coprCtx.doneChan: - if w.coprCtx.readerCnt.Load() == 0 { + case record, more := <-copReader.idxRecordChan: + if !more { taskDone = true - for { // consume all the remaining records. - exit := false - select { - case record := <-w.coprCtx.indexRecordChan: - w.idxRecords = append(w.idxRecords, record) - default: - exit = true - } - if exit { - break - } - } + break } + w.idxRecords = append(w.idxRecords, record) } - if taskDone || len(w.idxRecords) >= w.batchCnt { + if len(w.idxRecords) >= w.batchCnt { + break + } + if taskDone { + copReader.done <- struct{}{} break } } - err := w.coprCtx.err.Load() - if err != nil { - return nil, nil, false, err.(error) - } - return w.idxRecords, handleRanges[0].startKey, taskDone, nil + return w.idxRecords, handleRange.startKey, taskDone, copReader.err } func buildHandleColInfoAndFieldTypes(tbInfo *model.TableInfo) ([]*model.ColumnInfo, []*types.FieldType) { diff --git a/ddl/index_merge_tmp.go b/ddl/index_merge_tmp.go index b90fc7a2292bd..9159b47c47951 100644 --- a/ddl/index_merge_tmp.go +++ b/ddl/index_merge_tmp.go @@ -102,7 +102,7 @@ func newMergeTempIndexWorker(sessCtx sessionctx.Context, id int, t table.Physica } // BackfillDataInTxn merge temp index data in txn. -func (w *mergeIndexWorker) BackfillDataInTxn(taskRanges []*reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { +func (w *mergeIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { oprStartTime := time.Now() ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { @@ -113,7 +113,7 @@ func (w *mergeIndexWorker) BackfillDataInTxn(taskRanges []*reorgBackfillTask) (t txn.SetOption(kv.ResourceGroupTagger, tagger) } - tmpIdxRecords, nextKey, taskDone, err := w.fetchTempIndexVals(txn, taskRanges[0]) + tmpIdxRecords, nextKey, taskDone, err := w.fetchTempIndexVals(txn, taskRange) if err != nil { return errors.Trace(err) } @@ -156,7 +156,7 @@ func (w *mergeIndexWorker) BackfillDataInTxn(taskRanges []*reorgBackfillTask) (t func (w *mergeIndexWorker) AddMetricInfo(cnt float64) { } -func (w *mergeIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange *reorgBackfillTask) ([]*temporaryIndexRecord, kv.Key, bool, error) { +func (w *mergeIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange reorgBackfillTask) ([]*temporaryIndexRecord, kv.Key, bool, error) { startTime := time.Now() w.tmpIdxRecords = w.tmpIdxRecords[:0] w.tmpIdxKeys = w.tmpIdxKeys[:0] diff --git a/executor/admin.go b/executor/admin.go index ba219b70b6db3..b69ef74b97f2a 100644 --- a/executor/admin.go +++ b/executor/admin.go @@ -380,7 +380,7 @@ func (e *RecoverIndexExec) fetchRecoverRows(ctx context.Context, srcResult dists } idxVals := extractIdxVals(row, e.idxValsBufs[result.scanRowCount], e.colFieldTypes, idxValLen) e.idxValsBufs[result.scanRowCount] = idxVals - rsData := tables.TryGetHandleRestoredDataWrapper(e.table, plannercore.GetCommonHandleDatum(e.handleCols, row), nil, e.index.Meta()) + rsData := tables.TryGetHandleRestoredDataWrapper(e.table.Meta(), plannercore.GetCommonHandleDatum(e.handleCols, row), nil, e.index.Meta()) e.recoverRows = append(e.recoverRows, recoverRows{handle: handle, idxVals: idxVals, rsData: rsData, skip: false}) result.scanRowCount++ result.currentHandle = handle diff --git a/session/schema_amender.go b/session/schema_amender.go index 955d30cc42ada..caaec7994ae3a 100644 --- a/session/schema_amender.go +++ b/session/schema_amender.go @@ -447,7 +447,7 @@ func (a *amendOperationAddIndexInfo) genIndexKeyValue(ctx context.Context, sctx idxVals = append(idxVals, chk.GetRow(0).GetDatum(oldCol.Offset, &oldCol.FieldType)) } - rsData := tables.TryGetHandleRestoredDataWrapper(a.tblInfoAtCommit, getCommonHandleDatum(a.tblInfoAtCommit, chk.GetRow(0)), nil, a.indexInfoAtCommit.Meta()) + rsData := tables.TryGetHandleRestoredDataWrapper(a.tblInfoAtCommit.Meta(), getCommonHandleDatum(a.tblInfoAtCommit, chk.GetRow(0)), nil, a.indexInfoAtCommit.Meta()) // Generate index key buf. newIdxKey, distinct, err := tablecodec.GenIndexKey(sctx.GetSessionVars().StmtCtx, diff --git a/table/tables/tables.go b/table/tables/tables.go index 05724a67fa311..e9b66bf445436 100644 --- a/table/tables/tables.go +++ b/table/tables/tables.go @@ -977,7 +977,7 @@ func (t *TableCommon) addIndices(sctx sessionctx.Context, recordID kv.Handle, r idxMeta := v.Meta() dupErr = kv.ErrKeyExists.FastGenByArgs(entryKey, idxMeta.Name.String()) } - rsData := TryGetHandleRestoredDataWrapper(t, r, nil, v.Meta()) + rsData := TryGetHandleRestoredDataWrapper(t.Meta(), r, nil, v.Meta()) if dupHandle, err := v.Create(sctx, txn, indexVals, recordID, rsData, opts...); err != nil { if kv.ErrKeyExists.Equal(err) { return dupHandle, dupErr @@ -1345,7 +1345,7 @@ func (t *TableCommon) buildIndexForRow(ctx sessionctx.Context, h kv.Handle, vals if untouched { opts = append(opts, table.IndexIsUntouched) } - rsData := TryGetHandleRestoredDataWrapper(t, newData, nil, idx.Meta()) + rsData := TryGetHandleRestoredDataWrapper(t.Meta(), newData, nil, idx.Meta()) if _, err := idx.Create(ctx, txn, vals, h, rsData, opts...); err != nil { if kv.ErrKeyExists.Equal(err) { // Make error message consistent with MySQL. @@ -1866,14 +1866,14 @@ func (t *TableCommon) GetSequenceCommon() *sequenceCommon { } // TryGetHandleRestoredDataWrapper tries to get the restored data for handle if needed. The argument can be a slice or a map. -func TryGetHandleRestoredDataWrapper(t table.Table, row []types.Datum, rowMap map[int64]types.Datum, idx *model.IndexInfo) []types.Datum { - if !collate.NewCollationEnabled() || !t.Meta().IsCommonHandle || t.Meta().CommonHandleVersion == 0 { +func TryGetHandleRestoredDataWrapper(tblInfo *model.TableInfo, row []types.Datum, rowMap map[int64]types.Datum, idx *model.IndexInfo) []types.Datum { + if !collate.NewCollationEnabled() || !tblInfo.IsCommonHandle || tblInfo.CommonHandleVersion == 0 { return nil } rsData := make([]types.Datum, 0, 4) - pkIdx := FindPrimaryIndex(t.Meta()) + pkIdx := FindPrimaryIndex(tblInfo) for _, pkIdxCol := range pkIdx.Columns { - pkCol := t.Meta().Columns[pkIdxCol.Offset] + pkCol := tblInfo.Columns[pkIdxCol.Offset] if !types.NeedRestoredData(&pkCol.FieldType) { continue } From ca070d0308f3860c5bdf7c6b2a498557ebdd8d64 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 13 Oct 2022 22:53:01 +0800 Subject: [PATCH 15/26] revert small changes --- ddl/backfilling.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 330fc31622a9c..54a90e4b6a872 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -277,6 +277,7 @@ func (w *backfillWorker) handleBackfillTask(d *ddlCtx, task *reorgBackfillTask, zap.Float64("speed(rows/s)", float64(num)/time.Since(lastLogTime).Seconds())) lastLogTime = time.Now() } + handleRange.startKey = taskCtx.nextKey if taskCtx.done { break @@ -509,8 +510,8 @@ func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers endKey: endKey, // If the boundaries overlap, we should ignore the preceding endKey. endInclude: endK.Cmp(keyRange.EndKey) != 0 || i == len(kvRanges)-1} - batchTasks = append(batchTasks, task) + if len(batchTasks) >= len(workers) { break } @@ -645,7 +646,7 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic if err := loadDDLReorgVars(dc.ctx, sessPool); err != nil { logutil.BgLogger().Error("[ddl] load DDL reorganization variable failed", zap.Error(err)) } - workerCnt := variable.GetDDLReorgWorkerCounter() + workerCnt = variable.GetDDLReorgWorkerCounter() rowFormat := variable.GetDDLReorgRowFormat() // If only have 1 range, we can only start 1 worker. if len(kvRanges) < int(workerCnt) { From 0c3c0b1f1299776e289c1bd168d690f4ce178083 Mon Sep 17 00:00:00 2001 From: tangenta Date: Fri, 14 Oct 2022 11:33:25 +0800 Subject: [PATCH 16/26] send next cop request without blocking --- ddl/index.go | 1 - ddl/index_distsql.go | 60 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/ddl/index.go b/ddl/index.go index 5497d273c604d..f9c1d1d39d0c0 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -1191,7 +1191,6 @@ type addIndexWorker struct { baseIndexWorker index table.Index writerCtx *ingest.WriterContext - coprCtx *copContext // The following attributes are used to reduce memory allocation. idxKeyBufs [][]byte diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index 9eb408a844d0f..631013801e035 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -63,24 +63,50 @@ type copContext struct { type copReqReaders struct { tasksCh chan *reorgBackfillTask readers []*copReqReader + results map[string]struct { + ch chan *indexRecord + err error + } + mu sync.Mutex } -func (p *copReqReaders) getReader(task *reorgBackfillTask) *copReqReader { +func (p *copReqReaders) getResult(task *reorgBackfillTask) (chan *indexRecord, error) { for { for _, r := range p.readers { r.mu.Lock() - if string(r.currentTask.endKey) == string(task.endKey) { + if r.currentTask != nil && string(r.currentTask.endKey) == string(task.endKey) { r.mu.Unlock() - return r + return r.idxRecordChan, r.err } r.mu.Unlock() } - logutil.BgLogger().Info("[ddl] coprocessor reader not found, wait a while", + p.mu.Lock() + if res, ok := p.results[string(task.endKey)]; ok { + p.mu.Unlock() + return res.ch, res.err + } + p.mu.Unlock() + logutil.BgLogger().Info("[ddl] coprocessor result not found, wait a while", zap.String("task", task.String())) time.Sleep(time.Millisecond * 300) } } +func (p *copReqReaders) appendResult(task *reorgBackfillTask, ch chan *indexRecord, err error) { + p.mu.Lock() + p.results[string(task.endKey)] = struct { + ch chan *indexRecord + err error + }{ch, err} + p.mu.Unlock() +} + +func (p *copReqReaders) deleteResult(task *reorgBackfillTask) { + p.mu.Lock() + delete(p.results, string(task.endKey)) + p.mu.Unlock() +} + type copReqReader struct { id int traceID int64 @@ -88,9 +114,9 @@ type copReqReader struct { idxRecordChan chan *indexRecord srcChunk *chunk.Chunk err error - done chan struct{} currentTask *reorgBackfillTask mu sync.Mutex + onReadDone func(task *reorgBackfillTask, ch chan *indexRecord, err error) } func (c *copReqReader) run(ctx context.Context, tasks chan *reorgBackfillTask) { @@ -117,7 +143,7 @@ func (c *copReqReader) run(ctx context.Context, tasks chan *reorgBackfillTask) { c.err = err c.mu.Unlock() close(c.idxRecordChan) - <-c.done + c.onReadDone(c.currentTask, c.idxRecordChan, err) } } } @@ -125,6 +151,12 @@ func (c *copReqReader) run(ctx context.Context, tasks chan *reorgBackfillTask) { func newCopReqReaders(ctx context.Context, copCtx *copContext, jobID int64, readerCnt int, tasks chan *reorgBackfillTask) *copReqReaders { p := &copReqReaders{ tasksCh: tasks, + readers: make([]*copReqReader, 0, readerCnt), + results: make(map[string]struct { + ch chan *indexRecord + err error + }), + mu: sync.Mutex{}, } for i := 0; i < readerCnt; i++ { r := &copReqReader{ @@ -134,9 +166,9 @@ func newCopReqReaders(ctx context.Context, copCtx *copContext, jobID int64, read idxRecordChan: make(chan *indexRecord, variable.MaxDDLReorgBatchSize), srcChunk: chunk.NewChunkWithCapacity(copCtx.fieldTps, 1024), err: nil, - done: make(chan struct{}), currentTask: nil, mu: sync.Mutex{}, + onReadDone: p.appendResult, } p.readers = append(p.readers, r) go r.run(ctx, tasks) @@ -321,27 +353,29 @@ func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan *indexRe } func (w *addIndexWorker) fetchRowColValsFromCop(handleRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { - copReader := w.copReqReaders.getReader(&handleRange) + ch, err := w.copReqReaders.getResult(&handleRange) w.idxRecords = w.idxRecords[:0] taskDone := false for { select { - case record, more := <-copReader.idxRecordChan: + case record, more := <-ch: if !more { taskDone = true break } w.idxRecords = append(w.idxRecords, record) } - if len(w.idxRecords) >= w.batchCnt { + if taskDone { + _, err = w.copReqReaders.getResult(&handleRange) + w.copReqReaders.deleteResult(&handleRange) break } - if taskDone { - copReader.done <- struct{}{} + if len(w.idxRecords) >= w.batchCnt { break } } - return w.idxRecords, handleRange.startKey, taskDone, copReader.err + + return w.idxRecords, handleRange.startKey, taskDone, err } func buildHandleColInfoAndFieldTypes(tbInfo *model.TableInfo) ([]*model.ColumnInfo, []*types.FieldType) { From 093a620ce69181293fa692c088955455af6b9a85 Mon Sep 17 00:00:00 2001 From: tangenta Date: Fri, 14 Oct 2022 14:38:44 +0800 Subject: [PATCH 17/26] add tracing for cop-read and fetch-chan-rows --- ddl/ddl_worker_util.go | 5 ++--- ddl/index.go | 2 ++ ddl/index_distsql.go | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ddl/ddl_worker_util.go b/ddl/ddl_worker_util.go index 07cb6aec69b66..40dec3447de99 100644 --- a/ddl/ddl_worker_util.go +++ b/ddl/ddl_worker_util.go @@ -137,15 +137,14 @@ func analyzeTrace(trace minitrace.Trace) string { max = dur } } - avg := sum / uint64(len(spans)) sb.WriteString(orderedEvents[i]) sb.WriteString(":") if len(spans) < 20 { sb.WriteString(fmt.Sprintf("%f", time.Duration(sum).Seconds())) } else { - sb.WriteString(fmt.Sprintf(`{sum: %f, min: %f, max: %f, avg: %f}`, + sb.WriteString(fmt.Sprintf(`{sum: %f, min: %f, max: %f, cnt: %d}`, time.Duration(sum).Seconds(), time.Duration(min).Seconds(), - time.Duration(max).Seconds(), time.Duration(avg).Seconds())) + time.Duration(max).Seconds(), len(spans))) } if i != len(orderedEvents)-1 { sb.WriteString(", ") diff --git a/ddl/index.go b/ddl/index.go index f9c1d1d39d0c0..7c0a47b3ee2df 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -1506,11 +1506,13 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC nextKey kv.Key taskDone bool ) + finish := injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-rows", w.id)) if w.copReqReaders != nil { idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromCop(handleRange) } else { idxRecords, nextKey, taskDone, err = w.fetchRowColVals(txn, handleRange) } + finish() if err != nil { return errors.Trace(err) } diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index 631013801e035..28266adbf4b8f 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -130,7 +130,7 @@ func (c *copReqReader) run(ctx context.Context, tasks chan *reorgBackfillTask) { c.currentTask = task c.idxRecordChan = make(chan *indexRecord, variable.MaxDDLReorgBatchSize) c.mu.Unlock() - finish := injectSpan(c.traceID, fmt.Sprintf("%s-%d", "fetch-rows", c.id)) + finish := injectSpan(c.traceID, fmt.Sprintf("cop-read-%d", c.id)) err := kv.RunInNewTxn(ctx, c.copCtx.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { if c.copCtx.pushDownEncoding { return c.copCtx.sendEncodedIdxRecords(ctx, c.idxRecordChan, txn, task.startKey, task.excludedEndKey()) From 03ec3af09276d6468b1bdc269d2864ca24a824d7 Mon Sep 17 00:00:00 2001 From: tangenta Date: Fri, 14 Oct 2022 16:24:07 +0800 Subject: [PATCH 18/26] writers can receive index records from all the ranges --- ddl/index_distsql.go | 134 +++++++++++++------------------------------ 1 file changed, 41 insertions(+), 93 deletions(-) diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index 28266adbf4b8f..9d389554c17cd 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -19,7 +19,6 @@ import ( "fmt" "sync" "sync/atomic" - "time" "github.com/pingcap/errors" "github.com/pingcap/tidb/distsql" @@ -37,10 +36,9 @@ import ( "github.com/pingcap/tidb/util" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" - "github.com/pingcap/tidb/util/logutil" + "github.com/pingcap/tidb/util/generic" "github.com/pingcap/tidb/util/timeutil" "github.com/pingcap/tipb/go-tipb" - "go.uber.org/zap" ) type copContext struct { @@ -61,118 +59,66 @@ type copContext struct { } type copReqReaders struct { - tasksCh chan *reorgBackfillTask - readers []*copReqReader - results map[string]struct { - ch chan *indexRecord - err error - } - mu sync.Mutex -} - -func (p *copReqReaders) getResult(task *reorgBackfillTask) (chan *indexRecord, error) { - for { - for _, r := range p.readers { - r.mu.Lock() - if r.currentTask != nil && string(r.currentTask.endKey) == string(task.endKey) { - r.mu.Unlock() - return r.idxRecordChan, r.err - } - r.mu.Unlock() - } - p.mu.Lock() - if res, ok := p.results[string(task.endKey)]; ok { - p.mu.Unlock() - return res.ch, res.err - } - p.mu.Unlock() - logutil.BgLogger().Info("[ddl] coprocessor result not found, wait a while", - zap.String("task", task.String())) - time.Sleep(time.Millisecond * 300) - } -} - -func (p *copReqReaders) appendResult(task *reorgBackfillTask, ch chan *indexRecord, err error) { - p.mu.Lock() - p.results[string(task.endKey)] = struct { - ch chan *indexRecord - err error - }{ch, err} - p.mu.Unlock() -} - -func (p *copReqReaders) deleteResult(task *reorgBackfillTask) { - p.mu.Lock() - delete(p.results, string(task.endKey)) - p.mu.Unlock() + tasksCh chan *reorgBackfillTask + resultsCh chan *indexRecord + results generic.SyncMap[string, error] } type copReqReader struct { - id int - traceID int64 - copCtx *copContext + id int + traceID int64 + copCtx *copContext + srcChunk *chunk.Chunk + idxRecordChan chan *indexRecord - srcChunk *chunk.Chunk - err error - currentTask *reorgBackfillTask - mu sync.Mutex - onReadDone func(task *reorgBackfillTask, ch chan *indexRecord, err error) + results *generic.SyncMap[string, error] } -func (c *copReqReader) run(ctx context.Context, tasks chan *reorgBackfillTask) { +func (c *copReqReader) run(ctx context.Context, wg *sync.WaitGroup, tasks chan *reorgBackfillTask) { for { select { case task, ok := <-tasks: if !ok { + wg.Done() return } - c.mu.Lock() - c.currentTask = task - c.idxRecordChan = make(chan *indexRecord, variable.MaxDDLReorgBatchSize) - c.mu.Unlock() finish := injectSpan(c.traceID, fmt.Sprintf("cop-read-%d", c.id)) err := kv.RunInNewTxn(ctx, c.copCtx.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { if c.copCtx.pushDownEncoding { - return c.copCtx.sendEncodedIdxRecords(ctx, c.idxRecordChan, txn, task.startKey, task.excludedEndKey()) + return c.copCtx.sendEncodedIdxRecords(ctx, c.idxRecordChan, txn.StartTS(), task.startKey, task.excludedEndKey()) } else { - return c.copCtx.sendIdxRecords(ctx, c.idxRecordChan, c.srcChunk, txn, task.startKey, task.excludedEndKey()) + return c.copCtx.sendIdxRecords(ctx, c.idxRecordChan, c.srcChunk, txn.StartTS(), task.startKey, task.excludedEndKey()) } }) finish() - c.mu.Lock() - c.err = err - c.mu.Unlock() - close(c.idxRecordChan) - c.onReadDone(c.currentTask, c.idxRecordChan, err) + c.results.Store(string(task.endKey), err) } } } func newCopReqReaders(ctx context.Context, copCtx *copContext, jobID int64, readerCnt int, tasks chan *reorgBackfillTask) *copReqReaders { p := &copReqReaders{ - tasksCh: tasks, - readers: make([]*copReqReader, 0, readerCnt), - results: make(map[string]struct { - ch chan *indexRecord - err error - }), - mu: sync.Mutex{}, + tasksCh: tasks, + resultsCh: make(chan *indexRecord, int(variable.MaxDDLReorgBatchSize)*readerCnt), + results: generic.NewSyncMap[string, error](readerCnt), } + wg := &sync.WaitGroup{} for i := 0; i < readerCnt; i++ { + wg.Add(1) r := &copReqReader{ id: i, traceID: jobID, copCtx: copCtx, - idxRecordChan: make(chan *indexRecord, variable.MaxDDLReorgBatchSize), + idxRecordChan: p.resultsCh, srcChunk: chunk.NewChunkWithCapacity(copCtx.fieldTps, 1024), - err: nil, - currentTask: nil, - mu: sync.Mutex{}, - onReadDone: p.appendResult, + results: &p.results, } - p.readers = append(p.readers, r) - go r.run(ctx, tasks) + go r.run(ctx, wg, tasks) } + go func() { + wg.Wait() + close(p.resultsCh) + }() return p } @@ -202,7 +148,7 @@ func newCopContext(tblInfo *model.TableInfo, idxInfo *model.IndexInfo, sessCtx s } } -func (c *copContext) buildTableScan(ctx context.Context, txn kv.Transaction, start, end kv.Key) (distsql.SelectResult, error) { +func (c *copContext) buildTableScan(ctx context.Context, startTS uint64, start, end kv.Key) (distsql.SelectResult, error) { dagPB, err := buildDAGPB(c.sessCtx, c.tblInfo, c.colInfos) if err != nil { return nil, err @@ -211,7 +157,7 @@ func (c *copContext) buildTableScan(ctx context.Context, txn kv.Transaction, sta var builder distsql.RequestBuilder kvReq, err := builder. SetDAGRequest(dagPB). - SetStartTS(txn.StartTS()). + SetStartTS(startTS). SetKeyRanges([]kv.KeyRange{{StartKey: start, EndKey: end}}). SetKeepOrder(true). SetFromSessionVars(c.sessCtx.GetSessionVars()). @@ -268,7 +214,7 @@ func constructTableScanPB(sCtx sessionctx.Context, tblInfo *model.TableInfo, col return &tipb.Executor{Tp: tipb.ExecType_TypeTableScan, TblScan: tblScan}, err } -func (c *copContext) buildScanIndexKV(ctx context.Context, txn kv.Transaction, start, end kv.Key) (kv.Response, error) { +func (c *copContext) buildScanIndexKV(ctx context.Context, startTS uint64, start, end kv.Key) (kv.Response, error) { ddlPB, err := buildDDLPB(c.tblInfo, c.idxInfo, c.colInfos) if err != nil { return nil, err @@ -279,7 +225,7 @@ func (c *copContext) buildScanIndexKV(ctx context.Context, txn kv.Transaction, s var builder distsql.RequestBuilder kvReq, err := builder. SetDDLRequest(ddlPB). - SetStartTS(txn.StartTS()). + SetStartTS(startTS). SetKeyRanges([]kv.KeyRange{{StartKey: start, EndKey: end}}). SetKeepOrder(true). SetFromSessionVars(c.sessCtx.GetSessionVars()). @@ -302,9 +248,9 @@ func (c *copContext) buildScanIndexKV(ctx context.Context, txn kv.Transaction, s } func (c *copContext) sendIdxRecords(ctx context.Context, ch chan *indexRecord, srcChk *chunk.Chunk, - txn kv.Transaction, start, end kv.Key) error { + startTS uint64, start, end kv.Key) error { sctx := c.sessCtx.GetSessionVars().StmtCtx - srcResult, err := c.buildTableScan(ctx, txn, start, end) + srcResult, err := c.buildTableScan(ctx, startTS, start, end) if err != nil { return errors.Trace(err) } @@ -329,8 +275,8 @@ func (c *copContext) sendIdxRecords(ctx context.Context, ch chan *indexRecord, s } } -func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan *indexRecord, txn kv.Transaction, start, end kv.Key) error { - resp, err := c.buildScanIndexKV(ctx, txn, start, end) +func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan *indexRecord, startTS uint64, start, end kv.Key) error { + resp, err := c.buildScanIndexKV(ctx, startTS, start, end) if err != nil { return errors.Trace(err) } @@ -353,21 +299,23 @@ func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan *indexRe } func (w *addIndexWorker) fetchRowColValsFromCop(handleRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { - ch, err := w.copReqReaders.getResult(&handleRange) w.idxRecords = w.idxRecords[:0] taskDone := false + var err error + curRangeKey := string(handleRange.endKey) for { select { - case record, more := <-ch: + case record, more := <-w.copReqReaders.resultsCh: if !more { taskDone = true break } w.idxRecords = append(w.idxRecords, record) + default: + err, taskDone = w.copReqReaders.results.Load(curRangeKey) } if taskDone { - _, err = w.copReqReaders.getResult(&handleRange) - w.copReqReaders.deleteResult(&handleRange) + w.copReqReaders.results.Delete(curRangeKey) break } if len(w.idxRecords) >= w.batchCnt { From dd35bfa461754f9c1f40f21e653aa4fe14516499 Mon Sep 17 00:00:00 2001 From: tangenta Date: Tue, 18 Oct 2022 18:56:42 +0800 Subject: [PATCH 19/26] refactor send reorg task code --- ddl/backfilling.go | 156 ++++++++++++++++++++++--------------------- ddl/index_distsql.go | 5 +- 2 files changed, 85 insertions(+), 76 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 54a90e4b6a872..88ac78549483d 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -203,25 +203,12 @@ func newBackfillWorker(sessCtx sessionctx.Context, id int, t table.PhysicalTable reorgInfo: reorgInfo, batchCnt: int(variable.GetDDLReorgBatchSize()), sessCtx: sessCtx, - taskCh: make(chan *reorgBackfillTask, 1), - resultCh: make(chan *backfillResult, 1), priority: reorgInfo.Job.Priority, tp: tp, } } -func (w *backfillWorker) Close() { - if !w.closed { - w.closed = true - close(w.taskCh) - } -} - -func closeBackfillWorkers(workers []*backfillWorker) { - for _, worker := range workers { - worker.Close() - } -} +func (w *backfillWorker) Close() {} // handleBackfillTask backfills range [task.startHandle, task.endHandle) handle's index to table. func (w *backfillWorker) handleBackfillTask(d *ddlCtx, task *reorgBackfillTask, bf backfiller) *backfillResult { @@ -294,45 +281,51 @@ func (w *backfillWorker) handleBackfillTask(d *ddlCtx, task *reorgBackfillTask, return result } -func (w *backfillWorker) run(d *ddlCtx, bf backfiller, job *model.Job) { +func (w *backfillWorker) run(ctx context.Context, d *ddlCtx, bf backfiller, job *model.Job) { logutil.BgLogger().Info("[ddl] backfill worker start", zap.Stringer("type", w.tp), zap.Int("workerID", w.id)) - defer func() { - w.resultCh <- &backfillResult{err: dbterror.ErrReorgPanic} - }() defer util.Recover(metrics.LabelDDL, "backfillWorker.run", nil, false) for { - task, more := <-w.taskCh - if !more { - break - } - d.setDDLLabelForTopSQL(job) - - logutil.BgLogger().Debug("[ddl] backfill worker got task", zap.Int("workerID", w.id), zap.String("task", task.String())) - failpoint.Inject("mockBackfillRunErr", func() { - if w.id == 0 { - result := &backfillResult{addedCount: 0, nextKey: nil, err: errors.Errorf("mock backfill error")} - w.resultCh <- result - failpoint.Continue() + exit := false + select { + case task, more := <-w.taskCh: + if !more { + exit = true + break } - }) + d.setDDLLabelForTopSQL(job) + + logutil.BgLogger().Debug("[ddl] backfill worker got task", zap.Int("workerID", w.id), zap.String("task", task.String())) + failpoint.Inject("mockBackfillRunErr", func() { + if w.id == 0 { + result := &backfillResult{addedCount: 0, nextKey: nil, err: errors.Errorf("mock backfill error")} + w.resultCh <- result + failpoint.Continue() + } + }) - failpoint.Inject("mockHighLoadForAddIndex", func() { - sqlPrefixes := []string{"alter"} - topsql.MockHighCPULoad(job.Query, sqlPrefixes, 5) - }) + failpoint.Inject("mockHighLoadForAddIndex", func() { + sqlPrefixes := []string{"alter"} + topsql.MockHighCPULoad(job.Query, sqlPrefixes, 5) + }) - failpoint.Inject("mockBackfillSlow", func() { - time.Sleep(100 * time.Millisecond) - }) + failpoint.Inject("mockBackfillSlow", func() { + time.Sleep(100 * time.Millisecond) + }) - // Dynamic change batch size. - w.batchCnt = int(variable.GetDDLReorgBatchSize()) - finish := injectSpan(job.ID, fmt.Sprintf("%s-%d", "handle-backfill-task", w.id)) - result := w.handleBackfillTask(d, task, bf) - finish() - w.resultCh <- result + // Dynamic change batch size. + w.batchCnt = int(variable.GetDDLReorgBatchSize()) + finish := injectSpan(job.ID, fmt.Sprintf("%s-%d", "handle-backfill-task", w.id)) + result := w.handleBackfillTask(d, task, bf) + finish() + w.resultCh <- result + case <-ctx.Done(): + exit = true + } + if exit { + break + } } logutil.BgLogger().Info("[ddl] backfill worker exit", zap.Stringer("type", w.tp), @@ -368,7 +361,7 @@ func splitTableRanges(t table.PhysicalTable, store kv.Storage, startKey, endKey return ranges, nil } -func waitTaskResults(workers []*backfillWorker, taskCnt int, +func waitTaskResults(resultsCh chan *backfillResult, taskCnt int, totalAddedCount *int64, startKey kv.Key) (kv.Key, int64, error) { var ( addedCount int64 @@ -376,8 +369,7 @@ func waitTaskResults(workers []*backfillWorker, taskCnt int, firstErr error ) for i := 0; i < taskCnt; i++ { - worker := workers[i] - result := <-worker.resultCh + result := <-resultsCh if firstErr == nil && result.err != nil { firstErr = result.err // We should wait all working workers exits, any way. @@ -385,7 +377,8 @@ func waitTaskResults(workers []*backfillWorker, taskCnt int, } if result.err != nil { - logutil.BgLogger().Warn("[ddl] backfill worker failed", zap.Int("workerID", worker.id), + logutil.BgLogger().Warn("[ddl] backfill worker failed", + zap.String("result next key", hex.EncodeToString(result.nextKey)), zap.Error(result.err)) } @@ -401,19 +394,19 @@ func waitTaskResults(workers []*backfillWorker, taskCnt int, // sendTasksAndWait sends tasks to workers, and waits for all the running workers to return results, // there are taskCnt running workers. -func (dc *ddlCtx) sendTasksAndWait(sessPool *sessionPool, reorgInfo *reorgInfo, totalAddedCount *int64, workers []*backfillWorker, batchTasks []*reorgBackfillTask) error { - for i, task := range batchTasks { - w := workers[i] - w.taskCh <- task - if w.copReqReaders != nil { - w.copReqReaders.tasksCh <- task +func (dc *ddlCtx) sendTasksAndWait(sessPool *sessionPool, reorgInfo *reorgInfo, totalAddedCount *int64, + batchTasks []*reorgBackfillTask, rTaskCh, wTaskCh chan *reorgBackfillTask, resCh chan *backfillResult) error { + for _, task := range batchTasks { + if rTaskCh != nil { + rTaskCh <- task } + wTaskCh <- task } startKey := batchTasks[0].startKey taskCnt := len(batchTasks) startTime := time.Now() - nextKey, taskAddedCount, err := waitTaskResults(workers, taskCnt, totalAddedCount, startKey) + nextKey, taskAddedCount, err := waitTaskResults(resCh, taskCnt, totalAddedCount, startKey) elapsedTime := time.Since(startTime) if err == nil { err = dc.isReorgRunnable(reorgInfo.Job) @@ -424,7 +417,6 @@ func (dc *ddlCtx) sendTasksAndWait(sessPool *sessionPool, reorgInfo *reorgInfo, err1 := reorgInfo.UpdateReorgMeta(nextKey, sessPool) metrics.BatchAddIdxHistogram.WithLabelValues(metrics.LblError).Observe(elapsedTime.Seconds()) logutil.BgLogger().Warn("[ddl] backfill worker handle batch tasks failed", - zap.Stringer("type", workers[0].tp), zap.ByteString("elementType", reorgInfo.currElement.TypeKey), zap.Int64("elementID", reorgInfo.currElement.ID), zap.Int64("totalAddedCount", *totalAddedCount), @@ -441,7 +433,6 @@ func (dc *ddlCtx) sendTasksAndWait(sessPool *sessionPool, reorgInfo *reorgInfo, dc.getReorgCtx(reorgInfo.Job).setNextKey(nextKey) metrics.BatchAddIdxHistogram.WithLabelValues(metrics.LblOK).Observe(elapsedTime.Seconds()) logutil.BgLogger().Info("[ddl] backfill workers successfully processed batch", - zap.Stringer("type", workers[0].tp), zap.ByteString("elementType", reorgInfo.currElement.TypeKey), zap.Int64("elementID", reorgInfo.currElement.ID), zap.Int64("totalAddedCount", *totalAddedCount), @@ -480,10 +471,10 @@ func tryDecodeToHandleString(key kv.Key) string { } // handleRangeTasks sends tasks to workers, and returns remaining kvRanges that is not handled. -func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers []*backfillWorker, reorgInfo *reorgInfo, - totalAddedCount *int64, kvRanges []kv.KeyRange) ([]kv.KeyRange, error) { +func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, batchSize int, reorgInfo *reorgInfo, + totalAddedCount *int64, kvRanges []kv.KeyRange, rTaskCh, wTaskCh chan *reorgBackfillTask, retCh chan *backfillResult) ([]kv.KeyRange, error) { defer injectSpan(reorgInfo.ID, "send-wait-tasks")() - batchTasks := make([]*reorgBackfillTask, 0, len(workers)) + batchTasks := make([]*reorgBackfillTask, 0, batchSize) physicalTableID := reorgInfo.PhysicalTableID var prefix kv.Key @@ -492,10 +483,11 @@ func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers } else { prefix = t.RecordPrefix() } + job := reorgInfo.Job // Build reorg tasks. for i, keyRange := range kvRanges { endKey := keyRange.EndKey - endK, err := getRangeEndKey(reorgInfo.d.jobContext(reorgInfo.Job), workers[0].sessCtx.GetStore(), workers[0].priority, prefix, keyRange.StartKey, endKey) + endK, err := getRangeEndKey(reorgInfo.d.jobContext(job), dc.store, job.Priority, prefix, keyRange.StartKey, endKey) if err != nil { logutil.BgLogger().Info("[ddl] send range task to workers, get reverse key failed", zap.Error(err)) } else { @@ -512,7 +504,7 @@ func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers endInclude: endK.Cmp(keyRange.EndKey) != 0 || i == len(kvRanges)-1} batchTasks = append(batchTasks, task) - if len(batchTasks) >= len(workers) { + if len(batchTasks) >= batchSize { break } } @@ -522,7 +514,7 @@ func (dc *ddlCtx) handleRangeTasks(sessPool *sessionPool, t table.Table, workers } // Wait tasks finish. - err := dc.sendTasksAndWait(sessPool, reorgInfo, totalAddedCount, workers, batchTasks) + err := dc.sendTasksAndWait(sessPool, reorgInfo, totalAddedCount, batchTasks, rTaskCh, wTaskCh, retCh) if err != nil { return nil, errors.Trace(err) } @@ -585,6 +577,8 @@ func setSessCtxLocation(sctx sessionctx.Context, info *reorgInfo) error { return nil } +const backfillTaskBatchSize = 1024 + // writePhysicalTableRecord handles the "add index" or "modify/change column" reorganization state for a non-partitioned table or a partition. // For a partitioned table, it should be handled partition by partition. // @@ -628,10 +622,11 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic // variable.ddlReorgWorkerCounter can be modified by system variable "tidb_ddl_reorg_worker_cnt". workerCnt := variable.GetDDLReorgWorkerCounter() backfillWorkers := make([]*backfillWorker, 0, workerCnt) - defer func() { - closeBackfillWorkers(backfillWorkers) - }() + backfillCtxCancels := make([]func(), 0, workerCnt) jc := dc.jobContext(job) + wTaskCh := make(chan *reorgBackfillTask, backfillTaskBatchSize) + resultCh := make(chan *backfillResult, backfillTaskBatchSize) + var rTaskCh chan *reorgBackfillTask for { finish := injectSpan(job.ID, "split-table-ranges") @@ -674,6 +669,8 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic sessCtx.GetSessionVars().StmtCtx.DividedByZeroAsWarning = !sqlMode.HasStrictMode() sessCtx.GetSessionVars().StmtCtx.IgnoreZeroInDate = !sqlMode.HasStrictMode() || sqlMode.HasAllowInvalidDatesMode() sessCtx.GetSessionVars().StmtCtx.NoZeroDate = sqlMode.HasStrictMode() + bfCtx, cancel := context.WithCancel(dc.ctx) + backfillCtxCancels = append(backfillCtxCancels, cancel) switch bfWorkerType { case typeAddIndexWorker: @@ -684,38 +681,42 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic if useCopRead && copReqReaders == nil { indexInfo := model.FindIndexInfoByID(t.Meta().Indices, reorgInfo.currElement.ID) copCtx := newCopContext(t.Meta(), indexInfo, sessCtx) - taskCh := make(chan *reorgBackfillTask, 64) - copReqReaders = newCopReqReaders(dc.ctx, copCtx, job.ID, int(workerCnt*2), taskCh) + rTaskCh = make(chan *reorgBackfillTask, backfillTaskBatchSize) + copReqReaders = newCopReqReaders(dc.ctx, copCtx, job.ID, int(workerCnt*2), rTaskCh) logutil.BgLogger().Info("[ddl] fetch index values with coprocessor", zap.String("table", t.Meta().Name.O), zap.String("index", indexInfo.Name.O)) } idxWorker.backfillWorker.copReqReaders = copReqReaders backfillWorkers = append(backfillWorkers, idxWorker.backfillWorker) - go idxWorker.backfillWorker.run(reorgInfo.d, idxWorker, job) + go idxWorker.backfillWorker.run(bfCtx, reorgInfo.d, idxWorker, job) case typeAddIndexMergeTmpWorker: tmpIdxWorker := newMergeTempIndexWorker(sessCtx, i, t, reorgInfo, jc) backfillWorkers = append(backfillWorkers, tmpIdxWorker.backfillWorker) - go tmpIdxWorker.backfillWorker.run(reorgInfo.d, tmpIdxWorker, job) + go tmpIdxWorker.backfillWorker.run(bfCtx, reorgInfo.d, tmpIdxWorker, job) case typeUpdateColumnWorker: // Setting InCreateOrAlterStmt tells the difference between SELECT casting and ALTER COLUMN casting. sessCtx.GetSessionVars().StmtCtx.InCreateOrAlterStmt = true updateWorker := newUpdateColumnWorker(sessCtx, i, t, decodeColMap, reorgInfo, jc) backfillWorkers = append(backfillWorkers, updateWorker.backfillWorker) - go updateWorker.backfillWorker.run(reorgInfo.d, updateWorker, job) + go updateWorker.backfillWorker.run(bfCtx, reorgInfo.d, updateWorker, job) case typeCleanUpIndexWorker: idxWorker := newCleanUpIndexWorker(sessCtx, i, t, decodeColMap, reorgInfo, jc) backfillWorkers = append(backfillWorkers, idxWorker.backfillWorker) - go idxWorker.backfillWorker.run(reorgInfo.d, idxWorker, job) + go idxWorker.backfillWorker.run(bfCtx, reorgInfo.d, idxWorker, job) default: return errors.New("unknow backfill type") } + lastBfWorker := backfillWorkers[i] + lastBfWorker.taskCh = wTaskCh + lastBfWorker.resultCh = resultCh } // Shrink the worker size. if len(backfillWorkers) > int(workerCnt) { - workers := backfillWorkers[workerCnt:] backfillWorkers = backfillWorkers[:workerCnt] - closeBackfillWorkers(workers) + for i := int(workerCnt); i < len(backfillWorkers); i++ { + backfillCtxCancels[i]() + } } failpoint.Inject("checkBackfillWorkerNum", func(val failpoint.Value) { @@ -754,12 +755,17 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic return errors.New(ingest.LitErrGetBackendFail) } } - remains, err := dc.handleRangeTasks(sessPool, t, backfillWorkers, reorgInfo, &totalAddedCount, kvRanges) + remains, err := dc.handleRangeTasks(sessPool, t, backfillTaskBatchSize, reorgInfo, &totalAddedCount, kvRanges, rTaskCh, wTaskCh, resultCh) if err != nil { return errors.Trace(err) } if len(remains) == 0 { + if rTaskCh != nil { + close(rTaskCh) + } + close(wTaskCh) + close(resultCh) break } startKey = remains[0].StartKey diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index 9d389554c17cd..de2e6f7ffa1dc 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -19,6 +19,7 @@ import ( "fmt" "sync" "sync/atomic" + "time" "github.com/pingcap/errors" "github.com/pingcap/tidb/distsql" @@ -303,6 +304,7 @@ func (w *addIndexWorker) fetchRowColValsFromCop(handleRange reorgBackfillTask) ( taskDone := false var err error curRangeKey := string(handleRange.endKey) + timer := time.NewTimer(200 * time.Millisecond) for { select { case record, more := <-w.copReqReaders.resultsCh: @@ -311,7 +313,7 @@ func (w *addIndexWorker) fetchRowColValsFromCop(handleRange reorgBackfillTask) ( break } w.idxRecords = append(w.idxRecords, record) - default: + case <-timer.C: err, taskDone = w.copReqReaders.results.Load(curRangeKey) } if taskDone { @@ -323,6 +325,7 @@ func (w *addIndexWorker) fetchRowColValsFromCop(handleRange reorgBackfillTask) ( } } + timer.Stop() return w.idxRecords, handleRange.startKey, taskDone, err } From 111fd5ce911442a79cb4b076d820b99fcde1f0bd Mon Sep 17 00:00:00 2001 From: tangenta Date: Tue, 18 Oct 2022 21:54:46 +0800 Subject: [PATCH 20/26] use a separate kv structure for copr push down impl --- ddl/backfilling.go | 6 +- ddl/index.go | 171 ++++++++++++++++++++++++------------------- ddl/index_distsql.go | 68 +++++++++-------- 3 files changed, 138 insertions(+), 107 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 88ac78549483d..6199e7ed40ba2 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -42,6 +42,7 @@ import ( "github.com/pingcap/tidb/util" "github.com/pingcap/tidb/util/dbterror" "github.com/pingcap/tidb/util/logutil" + "github.com/pingcap/tidb/util/mathutil" decoder "github.com/pingcap/tidb/util/rowDecoder" "github.com/pingcap/tidb/util/timeutil" "github.com/pingcap/tidb/util/topsql" @@ -649,6 +650,9 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic } var copReqReaders *copReqReaders useCopRead := variable.EnableCoprRead.Load() != "0" + if useCopRead { + workerCnt = mathutil.Max(workerCnt/2, 1) + } // Enlarge the worker size. for i := len(backfillWorkers); i < int(workerCnt); i++ { sessCtx := newContext(reorgInfo.d.store) @@ -682,7 +686,7 @@ func (dc *ddlCtx) writePhysicalTableRecord(sessPool *sessionPool, t table.Physic indexInfo := model.FindIndexInfoByID(t.Meta().Indices, reorgInfo.currElement.ID) copCtx := newCopContext(t.Meta(), indexInfo, sessCtx) rTaskCh = make(chan *reorgBackfillTask, backfillTaskBatchSize) - copReqReaders = newCopReqReaders(dc.ctx, copCtx, job.ID, int(workerCnt*2), rTaskCh) + copReqReaders = newCopReqReaders(dc.ctx, copCtx, job.ID, int(workerCnt), rTaskCh) logutil.BgLogger().Info("[ddl] fetch index values with coprocessor", zap.String("table", t.Meta().Name.O), zap.String("index", indexInfo.Name.O)) diff --git a/ddl/index.go b/ddl/index.go index 7c0a47b3ee2df..ce472e1b753c4 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -1163,12 +1163,6 @@ type indexRecord struct { vals []types.Datum // It's the index values. rsData []types.Datum // It's the restored data for handle. skip bool // skip indicates that the index key is already exists, we should not add it. - idxKV *indexKV -} - -type indexKV struct { - key []byte - value []byte } type baseIndexWorker struct { @@ -1178,10 +1172,11 @@ type baseIndexWorker struct { metricCounter prometheus.Counter // The following attributes are used to reduce memory allocation. - defaultVals []types.Datum - idxRecords []*indexRecord - rowMap map[int64]types.Datum - rowDecoder *decoder.RowDecoder + defaultVals []types.Datum + idxRecords []*indexRecord + idxKVRecords []idxKV + rowMap map[int64]types.Datum + rowDecoder *decoder.RowDecoder sqlMode mysql.SQLMode jobContext *JobContext @@ -1503,12 +1498,24 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC var ( idxRecords []*indexRecord + idxKVs []idxKV nextKey kv.Key taskDone bool ) finish := injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-rows", w.id)) + var pushDownKVEncoding bool if w.copReqReaders != nil { - idxRecords, nextKey, taskDone, err = w.fetchRowColValsFromCop(handleRange) + pushDownKVEncoding = w.copReqReaders.kvResultsCh != nil + r := w.copReqReaders + if !pushDownKVEncoding { + w.idxRecords = w.idxRecords[:0] + w.idxRecords, nextKey, taskDone, err = fetchRowColValsFromCop(w, r.resultsCh, w.idxRecords, handleRange) + idxRecords = w.idxRecords + } else { + w.idxKVRecords = w.idxKVRecords[:0] + w.idxKVRecords, nextKey, taskDone, err = fetchRowColValsFromCop(w, r.kvResultsCh, w.idxKVRecords, handleRange) + idxKVs = w.idxKVRecords + } } else { idxRecords, nextKey, taskDone, err = w.fetchRowColVals(txn, handleRange) } @@ -1518,81 +1525,95 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC } taskCtx.nextKey = nextKey taskCtx.done = taskDone + if !pushDownKVEncoding { + err = w.batchCheckUniqueKey(txn, idxRecords) + if err != nil { + return errors.Trace(err) + } + return w.createIndexRecords(idxRecords, &taskCtx, needMergeTmpIdx, txn) + } else { + return w.createIndexKVs(idxKVs, &taskCtx, txn) + } + }) + logSlowOperations(time.Since(oprStartTime), "AddIndexBackfillDataInTxn", 3000) - err = w.batchCheckUniqueKey(txn, idxRecords) - if err != nil { - return errors.Trace(err) + return +} + +func (w *addIndexWorker) createIndexRecords(idxRecords []*indexRecord, taskCtx *backfillTaskContext, needMergeTmpIdx bool, txn kv.Transaction) error { + defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "create-records", w.id))() + for _, rec := range idxRecords { + taskCtx.scanCount++ + // The index is already exists, we skip it, no needs to backfill it. + // The following update, delete, insert on these rows, TiDB can handle it correctly. + if rec.skip { + continue } - defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "create-records", w.id))() - for _, idxRecord := range idxRecords { - taskCtx.scanCount++ - // The index is already exists, we skip it, no needs to backfill it. - // The following update, delete, insert on these rows, TiDB can handle it correctly. - if idxRecord.skip { - continue + // When the backfill-merge process is used, the writes from DML are redirected to a temp index. + // The write-conflict will be handled by the merge worker. Thus, the locks are unnecessary. + if !needMergeTmpIdx && rec.key != nil { + // We need to add this lock to make sure pessimistic transaction can realize this operation. + // For the normal pessimistic transaction, it's ok. But if async commit is used, it may lead to inconsistent data and index. + err := txn.LockKeys(context.Background(), new(kv.LockCtx), rec.key) + if err != nil { + return errors.Trace(err) } + } - // When the backfill-merge process is used, the writes from DML are redirected to a temp index. - // The write-conflict will be handled by the merge worker. Thus, the locks are unnecessary. - if !needMergeTmpIdx && idxRecord.key != nil { - // We need to add this lock to make sure pessimistic transaction can realize this operation. - // For the normal pessimistic transaction, it's ok. But if async commit is used, it may lead to inconsistent data and index. - err := txn.LockKeys(context.Background(), new(kv.LockCtx), idxRecord.key) - if err != nil { - return errors.Trace(err) + // Create the index. + if w.writerCtx == nil { + handle, err := w.index.Create(w.sessCtx, txn, rec.vals, rec.handle, rec.rsData, table.WithIgnoreAssertion, table.FromBackfill) + if err != nil { + if kv.ErrKeyExists.Equal(err) && rec.handle.Equal(handle) { + // Index already exists, skip it. + continue } + return errors.Trace(err) + } + } else { // The lightning environment is ready. + vars := w.sessCtx.GetSessionVars() + sCtx, writeBufs := vars.StmtCtx, vars.GetWriteStmtBufs() + key, distinct, err := w.index.GenIndexKey(sCtx, rec.vals, rec.handle, writeBufs.IndexKeyBuf) + if err != nil { + return errors.Trace(err) } + idxVal, err := w.index.GenIndexValue(sCtx, distinct, rec.vals, rec.handle, rec.rsData) + if err != nil { + return errors.Trace(err) + } + err = w.writerCtx.WriteRow(key, idxVal) + if err != nil { + return errors.Trace(err) + } + writeBufs.IndexKeyBuf = key + } + taskCtx.addedCount++ + } + return nil +} - // Create the index. - if w.writerCtx == nil { - if idxRecord.idxKV != nil { - err := txn.GetMemBuffer().Set(idxRecord.idxKV.key, idxRecord.idxKV.value) - if err != nil { - return errors.Trace(err) - } - } else { - handle, err := w.index.Create(w.sessCtx, txn, idxRecord.vals, idxRecord.handle, idxRecord.rsData, table.WithIgnoreAssertion, table.FromBackfill) - if err != nil { - if kv.ErrKeyExists.Equal(err) && idxRecord.handle.Equal(handle) { - // Index already exists, skip it. - continue - } - return errors.Trace(err) - } - } - } else { // The lightning environment is ready. - if idxRecord.idxKV != nil { - err = w.writerCtx.WriteRow(idxRecord.idxKV.key, idxRecord.idxKV.value) - if err != nil { - return errors.Trace(err) - } - } else { - vars := w.sessCtx.GetSessionVars() - sCtx, writeBufs := vars.StmtCtx, vars.GetWriteStmtBufs() - key, distinct, err := w.index.GenIndexKey(sCtx, idxRecord.vals, idxRecord.handle, writeBufs.IndexKeyBuf) - if err != nil { - return errors.Trace(err) - } - idxVal, err := w.index.GenIndexValue(sCtx, distinct, idxRecord.vals, idxRecord.handle, idxRecord.rsData) - if err != nil { - return errors.Trace(err) - } - err = w.writerCtx.WriteRow(key, idxVal) - if err != nil { - return errors.Trace(err) - } - writeBufs.IndexKeyBuf = key +func (w *addIndexWorker) createIndexKVs(idxKVs []idxKV, taskCtx *backfillTaskContext, txn kv.Transaction) error { + defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "create-records", w.id))() + for _, idxKV := range idxKVs { + taskCtx.scanCount++ + // Create the index. + if w.writerCtx == nil { + err := txn.GetMemBuffer().Set(idxKV.key, idxKV.val) + if err != nil { + return errors.Trace(err) + } + } else { // The lightning environment is ready. + if len(idxKVs) != 0 { + err := w.writerCtx.WriteRow(idxKV.key, idxKV.val) + if err != nil { + return errors.Trace(err) } } - taskCtx.addedCount++ } - - return nil - }) - logSlowOperations(time.Since(oprStartTime), "AddIndexBackfillDataInTxn", 3000) - - return + taskCtx.addedCount++ + } + return nil } func (w *worker) addPhysicalTableIndex(t table.PhysicalTable, reorgInfo *reorgInfo) error { diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index de2e6f7ffa1dc..53494bda46339 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -18,7 +18,6 @@ import ( "context" "fmt" "sync" - "sync/atomic" "time" "github.com/pingcap/errors" @@ -49,20 +48,13 @@ type copContext struct { fieldTps []*types.FieldType sessCtx sessionctx.Context pushDownEncoding bool - - srcChunks []*chunk.Chunk - indexRecordChan chan *indexRecord - doneChan chan struct{} - bfTasks map[string]struct{} - err atomic.Value - readerCnt atomic.Int32 - mu sync.Mutex } type copReqReaders struct { - tasksCh chan *reorgBackfillTask - resultsCh chan *indexRecord - results generic.SyncMap[string, error] + tasksCh chan *reorgBackfillTask + resultsCh chan *indexRecord + kvResultsCh chan idxKV + results generic.SyncMap[string, error] } type copReqReader struct { @@ -72,6 +64,7 @@ type copReqReader struct { srcChunk *chunk.Chunk idxRecordChan chan *indexRecord + idxKVChan chan idxKV results *generic.SyncMap[string, error] } @@ -86,7 +79,7 @@ func (c *copReqReader) run(ctx context.Context, wg *sync.WaitGroup, tasks chan * finish := injectSpan(c.traceID, fmt.Sprintf("cop-read-%d", c.id)) err := kv.RunInNewTxn(ctx, c.copCtx.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { if c.copCtx.pushDownEncoding { - return c.copCtx.sendEncodedIdxRecords(ctx, c.idxRecordChan, txn.StartTS(), task.startKey, task.excludedEndKey()) + return c.copCtx.sendEncodedIdxRecords(ctx, c.idxKVChan, txn.StartTS(), task.startKey, task.excludedEndKey()) } else { return c.copCtx.sendIdxRecords(ctx, c.idxRecordChan, c.srcChunk, txn.StartTS(), task.startKey, task.excludedEndKey()) } @@ -99,9 +92,13 @@ func (c *copReqReader) run(ctx context.Context, wg *sync.WaitGroup, tasks chan * func newCopReqReaders(ctx context.Context, copCtx *copContext, jobID int64, readerCnt int, tasks chan *reorgBackfillTask) *copReqReaders { p := &copReqReaders{ - tasksCh: tasks, - resultsCh: make(chan *indexRecord, int(variable.MaxDDLReorgBatchSize)*readerCnt), - results: generic.NewSyncMap[string, error](readerCnt), + tasksCh: tasks, + results: generic.NewSyncMap[string, error](readerCnt), + } + if copCtx.pushDownEncoding { + p.kvResultsCh = make(chan idxKV, int(variable.MaxDDLReorgBatchSize)) + } else { + p.resultsCh = make(chan *indexRecord, int(variable.MaxDDLReorgBatchSize)) } wg := &sync.WaitGroup{} for i := 0; i < readerCnt; i++ { @@ -111,6 +108,7 @@ func newCopReqReaders(ctx context.Context, copCtx *copContext, jobID int64, read traceID: jobID, copCtx: copCtx, idxRecordChan: p.resultsCh, + idxKVChan: p.kvResultsCh, srcChunk: chunk.NewChunkWithCapacity(copCtx.fieldTps, 1024), results: &p.results, } @@ -118,7 +116,11 @@ func newCopReqReaders(ctx context.Context, copCtx *copContext, jobID int64, read } go func() { wg.Wait() - close(p.resultsCh) + if copCtx.pushDownEncoding { + close(p.kvResultsCh) + } else { + close(p.resultsCh) + } }() return p } @@ -136,17 +138,15 @@ func newCopContext(tblInfo *model.TableInfo, idxInfo *model.IndexInfo, sessCtx s colInfos = append(colInfos, pkColInfos...) fieldTps = append(fieldTps, pkFieldTps...) - return &copContext{ + copCtx := &copContext{ tblInfo: tblInfo, idxInfo: idxInfo, colInfos: colInfos, fieldTps: fieldTps, sessCtx: sessCtx, pushDownEncoding: variable.EnableCoprRead.Load() == "2", - indexRecordChan: make(chan *indexRecord, variable.MaxDDLReorgBatchSize), - doneChan: make(chan struct{}, 1), - bfTasks: make(map[string]struct{}, 16), } + return copCtx } func (c *copContext) buildTableScan(ctx context.Context, startTS uint64, start, end kv.Key) (distsql.SelectResult, error) { @@ -276,11 +276,17 @@ func (c *copContext) sendIdxRecords(ctx context.Context, ch chan *indexRecord, s } } -func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan *indexRecord, startTS uint64, start, end kv.Key) error { +type idxKV struct { + key kv.Key + val []byte +} + +func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan idxKV, startTS uint64, start, end kv.Key) error { resp, err := c.buildScanIndexKV(ctx, startTS, start, end) if err != nil { return errors.Trace(err) } + colResp := &tipb.DDLResponse{} for { data, err := resp.Next(ctx) if err != nil { @@ -289,30 +295,31 @@ func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan *indexRe if data == nil { return nil } - colResp := &tipb.DDLResponse{} + colResp.Reset() + colResp.Keys = make([][]byte, 0, 2*1024*1024) + colResp.Values = make([][]byte, 0, 2*1024*1024) if err = colResp.Unmarshal(data.GetData()); err != nil { return errors.Trace(err) } for i := 0; i < len(colResp.Keys); i++ { - ch <- &indexRecord{idxKV: &indexKV{key: colResp.Keys[i], value: colResp.Values[i]}} + ch <- idxKV{key: colResp.Keys[i], val: colResp.Values[i]} } } } -func (w *addIndexWorker) fetchRowColValsFromCop(handleRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { - w.idxRecords = w.idxRecords[:0] +func fetchRowColValsFromCop[V *indexRecord | idxKV](w *addIndexWorker, ch chan V, buf []V, handleRange reorgBackfillTask) ([]V, kv.Key, bool, error) { taskDone := false var err error curRangeKey := string(handleRange.endKey) timer := time.NewTimer(200 * time.Millisecond) for { select { - case record, more := <-w.copReqReaders.resultsCh: + case record, more := <-ch: if !more { taskDone = true break } - w.idxRecords = append(w.idxRecords, record) + buf = append(buf, record) case <-timer.C: err, taskDone = w.copReqReaders.results.Load(curRangeKey) } @@ -320,13 +327,12 @@ func (w *addIndexWorker) fetchRowColValsFromCop(handleRange reorgBackfillTask) ( w.copReqReaders.results.Delete(curRangeKey) break } - if len(w.idxRecords) >= w.batchCnt { + if len(buf) >= w.batchCnt { break } } - timer.Stop() - return w.idxRecords, handleRange.startKey, taskDone, err + return buf, handleRange.startKey, taskDone, err } func buildHandleColInfoAndFieldTypes(tbInfo *model.TableInfo) ([]*model.ColumnInfo, []*types.FieldType) { From 720dd542e63a8bd53aae4fc61662960fe89fa190 Mon Sep 17 00:00:00 2001 From: tangenta Date: Wed, 19 Oct 2022 00:33:45 +0800 Subject: [PATCH 21/26] record time detail at a fine-grained level --- ddl/index_distsql.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index 53494bda46339..63fdccfe1ea4d 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -79,9 +79,11 @@ func (c *copReqReader) run(ctx context.Context, wg *sync.WaitGroup, tasks chan * finish := injectSpan(c.traceID, fmt.Sprintf("cop-read-%d", c.id)) err := kv.RunInNewTxn(ctx, c.copCtx.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { if c.copCtx.pushDownEncoding { - return c.copCtx.sendEncodedIdxRecords(ctx, c.idxKVChan, txn.StartTS(), task.startKey, task.excludedEndKey()) + return c.copCtx.sendEncodedIdxRecords(ctx, c.idxKVChan, txn.StartTS(), + task.startKey, task.excludedEndKey(), c.traceID, c.id) } else { - return c.copCtx.sendIdxRecords(ctx, c.idxRecordChan, c.srcChunk, txn.StartTS(), task.startKey, task.excludedEndKey()) + return c.copCtx.sendIdxRecords(ctx, c.idxRecordChan, c.srcChunk, txn.StartTS(), + task.startKey, task.excludedEndKey()) } }) finish() @@ -281,33 +283,40 @@ type idxKV struct { val []byte } -func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan idxKV, startTS uint64, start, end kv.Key) error { +func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan idxKV, startTS uint64, + start, end kv.Key, traceID int64, wid int) error { resp, err := c.buildScanIndexKV(ctx, startTS, start, end) if err != nil { return errors.Trace(err) } colResp := &tipb.DDLResponse{} for { + finish := injectSpan(traceID, fmt.Sprintf("cop-req-%d", wid)) data, err := resp.Next(ctx) if err != nil { + finish() return errors.Trace(err) } if data == nil { + finish() return nil } colResp.Reset() colResp.Keys = make([][]byte, 0, 2*1024*1024) colResp.Values = make([][]byte, 0, 2*1024*1024) if err = colResp.Unmarshal(data.GetData()); err != nil { + finish() return errors.Trace(err) } + finish() for i := 0; i < len(colResp.Keys); i++ { ch <- idxKV{key: colResp.Keys[i], val: colResp.Values[i]} } } } -func fetchRowColValsFromCop[V *indexRecord | idxKV](w *addIndexWorker, ch chan V, buf []V, handleRange reorgBackfillTask) ([]V, kv.Key, bool, error) { +func fetchRowColValsFromCop[V *indexRecord | idxKV](w *addIndexWorker, ch chan V, buf []V, + handleRange reorgBackfillTask) ([]V, kv.Key, bool, error) { taskDone := false var err error curRangeKey := string(handleRange.endKey) From 929605c3bcb54da4212e5e3a3966e6ecc3d08f28 Mon Sep 17 00:00:00 2001 From: tangenta Date: Wed, 19 Oct 2022 17:41:45 +0800 Subject: [PATCH 22/26] push down sort --- ddl/index_distsql.go | 9 +++++---- go.mod | 2 +- go.sum | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index 63fdccfe1ea4d..7fd1377be650d 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -224,6 +224,8 @@ func (c *copContext) buildScanIndexKV(ctx context.Context, startTS uint64, start } ddlPB.Ranges = append(ddlPB.Ranges, tipb.KeyRange{Low: start, High: end}) + needSort := variable.EnableCoprRead.Load() == "3" + ddlPB.Sort = &needSort var builder distsql.RequestBuilder kvReq, err := builder. @@ -302,15 +304,14 @@ func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan idxKV, s return nil } colResp.Reset() - colResp.Keys = make([][]byte, 0, 2*1024*1024) - colResp.Values = make([][]byte, 0, 2*1024*1024) + colResp.Kv = make([]*tipb.KVPair, 0, 2*1024*1024) if err = colResp.Unmarshal(data.GetData()); err != nil { finish() return errors.Trace(err) } finish() - for i := 0; i < len(colResp.Keys); i++ { - ch <- idxKV{key: colResp.Keys[i], val: colResp.Values[i]} + for i := 0; i < len(colResp.Kv); i++ { + ch <- idxKV{key: colResp.Kv[i].Key, val: colResp.Kv[i].Value} } } } diff --git a/go.mod b/go.mod index 43f0cbfd41a9b..b068f76512d06 100644 --- a/go.mod +++ b/go.mod @@ -245,4 +245,4 @@ replace ( go.opencensus.io => go.opencensus.io v0.23.1-0.20220331163232-052120675fac ) -replace github.com/pingcap/tipb => github.com/wjhuang2016/tipb v0.0.0-20221008063631-6d62ac9c19a2 +replace github.com/pingcap/tipb => github.com/wjhuang2016/tipb v0.0.0-20221019083954-87f880600301 diff --git a/go.sum b/go.sum index 76e55d8631960..07b8eaedd474d 100644 --- a/go.sum +++ b/go.sum @@ -936,8 +936,8 @@ github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPU github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f h1:9DDCDwOyEy/gId+IEMrFHLuQ5R/WV0KNxWLler8X2OY= github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f/go.mod h1:8sdOQnirw1PrcnTJYkmW1iOHtUmblMmGdUOHyWYycLI= -github.com/wjhuang2016/tipb v0.0.0-20221008063631-6d62ac9c19a2 h1:13LEN/7sdwcoTRDlBxqPUixvUqPrzHzn3J0EaasrpXg= -github.com/wjhuang2016/tipb v0.0.0-20221008063631-6d62ac9c19a2/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= +github.com/wjhuang2016/tipb v0.0.0-20221019083954-87f880600301 h1:/uBAla6grIWlpJVLX3tGU5hAzOjZWUAGjdzKtNVoIO8= +github.com/wjhuang2016/tipb v0.0.0-20221019083954-87f880600301/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/xdg/scram v1.0.3/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= github.com/xdg/stringprep v1.0.3/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= From 2a2fad1ca0ac265d1ec3fef6500c5f5d5c1f0c60 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 20 Oct 2022 13:40:27 +0800 Subject: [PATCH 23/26] Revert "push down sort" This reverts commit 929605c3bcb54da4212e5e3a3966e6ecc3d08f28. --- ddl/index_distsql.go | 9 ++++----- go.mod | 2 +- go.sum | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index 7fd1377be650d..63fdccfe1ea4d 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -224,8 +224,6 @@ func (c *copContext) buildScanIndexKV(ctx context.Context, startTS uint64, start } ddlPB.Ranges = append(ddlPB.Ranges, tipb.KeyRange{Low: start, High: end}) - needSort := variable.EnableCoprRead.Load() == "3" - ddlPB.Sort = &needSort var builder distsql.RequestBuilder kvReq, err := builder. @@ -304,14 +302,15 @@ func (c *copContext) sendEncodedIdxRecords(ctx context.Context, ch chan idxKV, s return nil } colResp.Reset() - colResp.Kv = make([]*tipb.KVPair, 0, 2*1024*1024) + colResp.Keys = make([][]byte, 0, 2*1024*1024) + colResp.Values = make([][]byte, 0, 2*1024*1024) if err = colResp.Unmarshal(data.GetData()); err != nil { finish() return errors.Trace(err) } finish() - for i := 0; i < len(colResp.Kv); i++ { - ch <- idxKV{key: colResp.Kv[i].Key, val: colResp.Kv[i].Value} + for i := 0; i < len(colResp.Keys); i++ { + ch <- idxKV{key: colResp.Keys[i], val: colResp.Values[i]} } } } diff --git a/go.mod b/go.mod index b068f76512d06..43f0cbfd41a9b 100644 --- a/go.mod +++ b/go.mod @@ -245,4 +245,4 @@ replace ( go.opencensus.io => go.opencensus.io v0.23.1-0.20220331163232-052120675fac ) -replace github.com/pingcap/tipb => github.com/wjhuang2016/tipb v0.0.0-20221019083954-87f880600301 +replace github.com/pingcap/tipb => github.com/wjhuang2016/tipb v0.0.0-20221008063631-6d62ac9c19a2 diff --git a/go.sum b/go.sum index 07b8eaedd474d..76e55d8631960 100644 --- a/go.sum +++ b/go.sum @@ -936,8 +936,8 @@ github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPU github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f h1:9DDCDwOyEy/gId+IEMrFHLuQ5R/WV0KNxWLler8X2OY= github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f/go.mod h1:8sdOQnirw1PrcnTJYkmW1iOHtUmblMmGdUOHyWYycLI= -github.com/wjhuang2016/tipb v0.0.0-20221019083954-87f880600301 h1:/uBAla6grIWlpJVLX3tGU5hAzOjZWUAGjdzKtNVoIO8= -github.com/wjhuang2016/tipb v0.0.0-20221019083954-87f880600301/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= +github.com/wjhuang2016/tipb v0.0.0-20221008063631-6d62ac9c19a2 h1:13LEN/7sdwcoTRDlBxqPUixvUqPrzHzn3J0EaasrpXg= +github.com/wjhuang2016/tipb v0.0.0-20221008063631-6d62ac9c19a2/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/xdg/scram v1.0.3/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= github.com/xdg/stringprep v1.0.3/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= From bdeaec2e7eda2df6f776463d01a91ae25f75d502 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 20 Oct 2022 14:15:23 +0800 Subject: [PATCH 24/26] remove unnecessary time record --- ddl/index.go | 1 - 1 file changed, 1 deletion(-) diff --git a/ddl/index.go b/ddl/index.go index ce472e1b753c4..f7e00195e163b 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -1320,7 +1320,6 @@ func (w *baseIndexWorker) updateRowDecoder(handle kv.Handle, rawRecord []byte) e // 3. Boolean indicates whether the task is done. // 4. error occurs in fetchRowColVals. nil if no error occurs. func (w *baseIndexWorker) fetchRowColVals(txn kv.Transaction, taskRange reorgBackfillTask) ([]*indexRecord, kv.Key, bool, error) { - defer injectSpan(w.reorgInfo.Job.ID, fmt.Sprintf("%s-%d", "fetch-rows", w.id))() // TODO: use tableScan to prune columns. w.idxRecords = w.idxRecords[:0] startTime := time.Now() From bfe69cad71a1c69d6fd7243bfa60e0aab295a587 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 20 Oct 2022 14:33:37 +0800 Subject: [PATCH 25/26] reduce unnecessary memory allocation --- ddl/ingest/engine.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ddl/ingest/engine.go b/ddl/ingest/engine.go index d875d78e346d0..f4a028bad7698 100644 --- a/ddl/ingest/engine.go +++ b/ddl/ingest/engine.go @@ -184,9 +184,9 @@ func (ei *engineInfo) newWriterContext(workerID int) (*WriterContext, error) { // WriteRow Write one row into local writer buffer. func (wCtx *WriterContext) WriteRow(key, idxVal []byte) error { - kvs := make([]common.KvPair, 1) + var kvs [1]common.KvPair kvs[0].Key = key kvs[0].Val = idxVal - row := kv.MakeRowsFromKvPairs(kvs) + row := kv.MakeRowsFromKvPairs(kvs[:]) return wCtx.lWrite.WriteRows(wCtx.ctx, nil, row) } From ebf2617155b4df0f5d39d84900836bf74201a465 Mon Sep 17 00:00:00 2001 From: tangenta Date: Thu, 20 Oct 2022 14:37:14 +0800 Subject: [PATCH 26/26] trace sendIdxRecords --- ddl/index_distsql.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ddl/index_distsql.go b/ddl/index_distsql.go index 63fdccfe1ea4d..f29f9d597b1a5 100644 --- a/ddl/index_distsql.go +++ b/ddl/index_distsql.go @@ -83,7 +83,7 @@ func (c *copReqReader) run(ctx context.Context, wg *sync.WaitGroup, tasks chan * task.startKey, task.excludedEndKey(), c.traceID, c.id) } else { return c.copCtx.sendIdxRecords(ctx, c.idxRecordChan, c.srcChunk, txn.StartTS(), - task.startKey, task.excludedEndKey()) + task.startKey, task.excludedEndKey(), c.traceID, c.id) } }) finish() @@ -251,21 +251,25 @@ func (c *copContext) buildScanIndexKV(ctx context.Context, startTS uint64, start } func (c *copContext) sendIdxRecords(ctx context.Context, ch chan *indexRecord, srcChk *chunk.Chunk, - startTS uint64, start, end kv.Key) error { + startTS uint64, start, end kv.Key, traceID int64, wid int) error { sctx := c.sessCtx.GetSessionVars().StmtCtx srcResult, err := c.buildTableScan(ctx, startTS, start, end) if err != nil { return errors.Trace(err) } for { + finish := injectSpan(traceID, fmt.Sprintf("cop-req-%d", wid)) err := srcResult.Next(ctx, srcChk) if err != nil { + finish() return errors.Trace(err) } if srcChk.NumRows() == 0 { + finish() return nil } iter := chunk.NewIterator4Chunk(srcChk) + finish() for row := iter.Begin(); row != iter.End(); row = iter.Next() { idxDt, hdDt := extractIdxValsAndHandle(row, c.idxInfo, c.fieldTps) handle, err := buildHandle(hdDt, c.tblInfo, c.idxInfo, sctx)