Skip to content

Commit

Permalink
statistics: use DatumMapCache to remove duplicate code (#46364)
Browse files Browse the repository at this point in the history
ref #46158
  • Loading branch information
hawkingrei authored Aug 24, 2023
1 parent ad668cd commit 097c13f
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 66 deletions.
1 change: 1 addition & 0 deletions statistics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ go_library(
"analyze_jobs.go",
"builder.go",
"cmsketch.go",
"cmsketch_util.go",
"column.go",
"debugtrace.go",
"estimate.go",
Expand Down
28 changes: 5 additions & 23 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (
"github.com/pingcap/tidb/tablecodec"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/dbterror"
"github.com/pingcap/tidb/util/hack"
"github.com/pingcap/tidb/util/mathutil"
Expand Down Expand Up @@ -805,7 +804,7 @@ func MergePartTopN2GlobalTopN(loc *time.Location, version int, topNs []*TopN, n
counter := make(map[hack.MutableString]float64)
// datumMap is used to store the mapping from the string type to datum type.
// The datum is used to find the value in the histogram.
datumMap := make(map[hack.MutableString]types.Datum)
datumMap := newDatumMapCache()
for i, topN := range topNs {
if atomic.LoadUint32(killed) == 1 {
return nil, nil, nil, errors.Trace(ErrQueryInterrupted)
Expand All @@ -832,29 +831,12 @@ func MergePartTopN2GlobalTopN(loc *time.Location, version int, topNs []*TopN, n
continue
}
// Get the encodedVal from the hists[j]
datum, exists := datumMap[encodedVal]
datum, exists := datumMap.Get(encodedVal)
if !exists {
// If the datumMap does not have the encodedVal datum,
// we should generate the datum based on the encoded value.
// This part is copied from the function MergePartitionHist2GlobalHist.
var d types.Datum
if isIndex {
d.SetBytes(val.Encoded)
} else {
var err error
if types.IsTypeTime(hists[0].Tp.GetType()) {
// Handle date time values specially since they are encoded to int and we'll get int values if using DecodeOne.
_, d, err = codec.DecodeAsDateTime(val.Encoded, hists[0].Tp.GetType(), loc)
} else if types.IsTypeFloat(hists[0].Tp.GetType()) {
_, d, err = codec.DecodeAsFloat32(val.Encoded, hists[0].Tp.GetType())
} else {
_, d, err = codec.DecodeOne(val.Encoded)
}
if err != nil {
return nil, nil, nil, err
}
d, err := datumMap.Put(val, encodedVal, hists[0].Tp.GetType(), isIndex, loc)
if err != nil {
return nil, nil, nil, err
}
datumMap[encodedVal] = d
datum = d
}
// Get the row count which the value is equal to the encodedVal from histogram.
Expand Down
69 changes: 69 additions & 0 deletions statistics/cmsketch_util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package statistics

import (
"time"

"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/hack"
)

type datumMapCache struct {
datumMap map[hack.MutableString]types.Datum
}

func newDatumMapCache() *datumMapCache {
return &datumMapCache{
datumMap: make(map[hack.MutableString]types.Datum),
}
}

func (d *datumMapCache) Get(key hack.MutableString) (val types.Datum, ok bool) {
val, ok = d.datumMap[key]
return
}

func (d *datumMapCache) Put(val TopNMeta, encodedVal hack.MutableString,
tp byte, isIndex bool, loc *time.Location) (dat types.Datum, err error) {
dat, err = topNMetaToDatum(val, tp, isIndex, loc)
if err != nil {
return dat, err
}
d.datumMap[encodedVal] = dat
return dat, nil
}

func topNMetaToDatum(val TopNMeta,
tp byte, isIndex bool, loc *time.Location) (dat types.Datum, err error) {
if isIndex {
dat.SetBytes(val.Encoded)
} else {
var err error
if types.IsTypeTime(tp) {
// Handle date time values specially since they are encoded to int and we'll get int values if using DecodeOne.
_, dat, err = codec.DecodeAsDateTime(val.Encoded, tp, loc)
} else if types.IsTypeFloat(tp) {
_, dat, err = codec.DecodeAsFloat32(val.Encoded, tp)
} else {
_, dat, err = codec.DecodeOne(val.Encoded)
}
if err != nil {
return dat, err
}
}
return dat, err
}
19 changes: 3 additions & 16 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -1331,22 +1331,9 @@ func MergePartitionHist2GlobalHist(sc *stmtctx.StatementContext, hists []*Histog

for _, meta := range popedTopN {
totCount += int64(meta.Count)
var d types.Datum
if isIndex {
d.SetBytes(meta.Encoded)
} else {
var err error
if types.IsTypeTime(hists[0].Tp.GetType()) {
// handle datetime values specially since they are encoded to int and we'll get int values if using DecodeOne.
_, d, err = codec.DecodeAsDateTime(meta.Encoded, hists[0].Tp.GetType(), sc.TimeZone)
} else if types.IsTypeFloat(hists[0].Tp.GetType()) {
_, d, err = codec.DecodeAsFloat32(meta.Encoded, hists[0].Tp.GetType())
} else {
_, d, err = codec.DecodeOne(meta.Encoded)
}
if err != nil {
return nil, err
}
d, err := topNMetaToDatum(meta, hists[0].Tp.GetType(), isIndex, sc.TimeZone)
if err != nil {
return nil, err
}
if minValue == nil {
minValue = d.Clone()
Expand Down
34 changes: 7 additions & 27 deletions statistics/merge_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ import (
"time"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/hack"
)

Expand Down Expand Up @@ -106,7 +104,7 @@ func (worker *topnStatsMergeWorker) Run(timeZone *time.Location, isIndex bool,
counter := make(map[hack.MutableString]float64)
// datumMap is used to store the mapping from the string type to datum type.
// The datum is used to find the value in the histogram.
datumMap := make(map[hack.MutableString]types.Datum)
datumMap := newDatumMapCache()

for i, topN := range checkTopNs {
if atomic.LoadUint32(worker.killed) == 1 {
Expand Down Expand Up @@ -138,31 +136,14 @@ func (worker *topnStatsMergeWorker) Run(timeZone *time.Location, isIndex bool,
continue
}
// Get the encodedVal from the hists[j]
datum, exists := datumMap[encodedVal]
datum, exists := datumMap.Get(encodedVal)
if !exists {
// If the datumMap does not have the encodedVal datum,
// we should generate the datum based on the encoded value.
// This part is copied from the function MergePartitionHist2GlobalHist.
var d types.Datum
if isIndex {
d.SetBytes(val.Encoded)
} else {
var err error
if types.IsTypeTime(allHists[0].Tp.GetType()) {
// handle datetime values specially since they are encoded to int and we'll get int values if using DecodeOne.
_, d, err = codec.DecodeAsDateTime(val.Encoded, allHists[0].Tp.GetType(), timeZone)
} else if types.IsTypeFloat(allHists[0].Tp.GetType()) {
_, d, err = codec.DecodeAsFloat32(val.Encoded, allHists[0].Tp.GetType())
} else {
_, d, err = codec.DecodeOne(val.Encoded)
}
if err != nil {
resp.Err = err
worker.respCh <- resp
return
}
d, err := datumMap.Put(val, encodedVal, allHists[0].Tp.GetType(), isIndex, timeZone)
if err != nil {
resp.Err = err
worker.respCh <- resp
return
}
datumMap[encodedVal] = d
datum = d
}
// Get the row count which the value is equal to the encodedVal from histogram.
Expand All @@ -177,7 +158,6 @@ func (worker *topnStatsMergeWorker) Run(timeZone *time.Location, isIndex bool,
}
}
}

numTop := len(counter)
if numTop == 0 {
worker.respCh <- resp
Expand Down

0 comments on commit 097c13f

Please sign in to comment.