Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: using standard lib slices instead of exp slices #45996

Merged
merged 1 commit into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion statistics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ go_test(
"@com_github_pingcap_failpoint//:failpoint",
"@com_github_pingcap_log//:log",
"@com_github_stretchr_testify//require",
"@org_golang_x_exp//slices",
"@org_uber_go_goleak//:goleak",
"@org_uber_go_zap//:zap",
],
Expand Down
47 changes: 24 additions & 23 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ package statistics

import (
"bytes"
"cmp"
"fmt"
"math"
"reflect"
"slices"
"sort"
"strings"
"sync/atomic"
Expand All @@ -39,7 +41,6 @@ import (
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tipb/go-tipb"
"github.com/twmb/murmur3"
"golang.org/x/exp/slices"
)

// topNThreshold is the minimum ratio of the number of topN elements in CMSketch, 10 means 1 / 10 = 10%.
Expand Down Expand Up @@ -265,12 +266,12 @@ func queryValue(sctx sessionctx.Context, c *CMSketch, t *TopN, val types.Datum)
if sctx != nil {
sc = sctx.GetSessionVars().StmtCtx
}
bytes, err := tablecodec.EncodeValue(sc, nil, val)
rawData, err := tablecodec.EncodeValue(sc, nil, val)
if err != nil {
return 0, errors.Trace(err)
}
h1, h2 := murmur3.Sum128(bytes)
if ret, ok := t.QueryTopN(sctx, bytes); ok {
h1, h2 := murmur3.Sum128(rawData)
if ret, ok := t.QueryTopN(sctx, rawData); ok {
return ret, nil
}
return c.queryHashValue(sctx, h1, h2), nil
Expand All @@ -289,7 +290,7 @@ func (c *CMSketch) QueryBytes(d []byte) uint64 {
func (c *CMSketch) queryHashValue(sctx sessionctx.Context, h1, h2 uint64) (result uint64) {
vals := make([]uint32, c.depth)
originVals := make([]uint32, c.depth)
min := uint32(math.MaxUint32)
minValue := uint32(math.MaxUint32)
useDefaultValue := false
if sctx != nil && sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
Expand All @@ -309,8 +310,8 @@ func (c *CMSketch) queryHashValue(sctx sessionctx.Context, h1, h2 uint64) (resul
for i := range c.table {
j := (h1 + h2*uint64(i)) % uint64(c.width)
originVals[i] = c.table[i][j]
if min > c.table[i][j] {
min = c.table[i][j]
if minValue > c.table[i][j] {
minValue = c.table[i][j]
}
noise := (c.count - uint64(c.table[i][j])) / (uint64(c.width) - 1)
if uint64(c.table[i][j]) == 0 {
Expand All @@ -323,8 +324,8 @@ func (c *CMSketch) queryHashValue(sctx sessionctx.Context, h1, h2 uint64) (resul
}
slices.Sort(vals)
res := vals[(c.depth-1)/2] + (vals[c.depth/2]-vals[(c.depth-1)/2])/2
if res > min+temp {
res = min + temp
if res > minValue+temp {
res = minValue + temp
}
if res == 0 {
return uint64(0)
Expand Down Expand Up @@ -641,11 +642,11 @@ func (c *TopN) findTopN(d []byte) int {
}
match := false
idx := sort.Search(len(c.TopN), func(i int) bool {
cmp := bytes.Compare(c.TopN[i].Encoded, d)
if cmp == 0 {
cmpRst := bytes.Compare(c.TopN[i].Encoded, d)
if cmpRst == 0 {
match = true
}
return cmp >= 0
return cmpRst >= 0
})
if !match {
return -1
Expand All @@ -660,11 +661,11 @@ func (c *TopN) LowerBound(d []byte) (idx int, match bool) {
return 0, false
}
idx = sort.Search(len(c.TopN), func(i int) bool {
cmp := bytes.Compare(c.TopN[i].Encoded, d)
if cmp == 0 {
cmpRst := bytes.Compare(c.TopN[i].Encoded, d)
if cmpRst == 0 {
match = true
}
return cmp >= 0
return cmpRst >= 0
})
return idx, match
}
Expand Down Expand Up @@ -699,8 +700,8 @@ func (c *TopN) Sort() {
if c == nil {
return
}
slices.SortFunc(c.TopN, func(i, j TopNMeta) bool {
return bytes.Compare(i.Encoded, j.Encoded) < 0
slices.SortFunc(c.TopN, func(i, j TopNMeta) int {
return bytes.Compare(i.Encoded, j.Encoded)
})
}

Expand Down Expand Up @@ -919,11 +920,11 @@ func checkEmptyTopNs(topNs []*TopN) bool {

// SortTopnMeta sort topnMeta
func SortTopnMeta(topnMetas []TopNMeta) []TopNMeta {
slices.SortFunc(topnMetas, func(i, j TopNMeta) bool {
slices.SortFunc(topnMetas, func(i, j TopNMeta) int {
if i.Count != j.Count {
return i.Count > j.Count
return cmp.Compare(j.Count, i.Count)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could please add a note here?

}
return bytes.Compare(i.Encoded, j.Encoded) < 0
return bytes.Compare(i.Encoded, j.Encoded)
})
return topnMetas
}
Expand All @@ -934,11 +935,11 @@ func GetMergedTopNFromSortedSlice(sorted []TopNMeta, n uint32) (*TopN, []TopNMet
}

func getMergedTopNFromSortedSlice(sorted []TopNMeta, n uint32) (*TopN, []TopNMeta) {
slices.SortFunc(sorted, func(i, j TopNMeta) bool {
slices.SortFunc(sorted, func(i, j TopNMeta) int {
if i.Count != j.Count {
return i.Count > j.Count
return cmp.Compare(j.Count, i.Count)
}
return bytes.Compare(i.Encoded, j.Encoded) < 0
return bytes.Compare(i.Encoded, j.Encoded)
})
n = mathutil.Min(uint32(len(sorted)), n)

Expand Down
12 changes: 12 additions & 0 deletions statistics/cmsketch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,18 @@ func TestMergePartTopN2GlobalTopNWithoutHists(t *testing.T) {
require.Len(t, leftTopN, 1, "should have 1 left topN")
}

func TestSortTopnMeta(t *testing.T) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍🏿

data := []TopNMeta{{
Encoded: []byte("a"),
Count: 1,
}, {
Encoded: []byte("b"),
Count: 2,
}}
sortedData := SortTopnMeta(data)
require.Equal(t, uint64(2), sortedData[0].Count)
}

func TestMergePartTopN2GlobalTopNWithHists(t *testing.T) {
loc := time.UTC
sc := &stmtctx.StatementContext{TimeZone: loc}
Expand Down
8 changes: 4 additions & 4 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"encoding/gob"
"math"
"math/rand"
"slices"
"sort"
goatomic "sync/atomic"
"time"
Expand All @@ -41,7 +42,6 @@ import (
"github.com/pingcap/tidb/util/ranger"
"go.uber.org/atomic"
"go.uber.org/zap"
"golang.org/x/exp/slices"
)

// Feedback represents the total scan count in range [lower, upper).
Expand Down Expand Up @@ -353,19 +353,19 @@ func NonOverlappedFeedbacks(sc *stmtctx.StatementContext, fbs []Feedback) ([]Fee
// Sort feedbacks by end point and start point incrementally, then pick every feedback that is not overlapped
// with the previous chosen feedbacks.
var existsErr bool
slices.SortFunc(fbs, func(i, j Feedback) bool {
slices.SortFunc(fbs, func(i, j Feedback) int {
res, err := i.Upper.Compare(sc, j.Upper, collate.GetBinaryCollator())
if err != nil {
existsErr = true
}
if existsErr || res != 0 {
return res < 0
return res
}
res, err = i.Lower.Compare(sc, j.Lower, collate.GetBinaryCollator())
if err != nil {
existsErr = true
}
return res < 0
return res
})
if existsErr {
return fbs, false
Expand Down
1 change: 0 additions & 1 deletion statistics/handle/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ go_library(
"@com_github_pingcap_log//:log",
"@com_github_pingcap_tipb//go-tipb",
"@com_github_tikv_client_go_v2//oracle",
"@org_golang_x_exp//slices",
"@org_uber_go_atomic//:atomic",
"@org_uber_go_zap//:zap",
],
Expand Down
7 changes: 3 additions & 4 deletions statistics/handle/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"context"
"encoding/json"
"fmt"
"slices"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -50,7 +51,6 @@ import (
"github.com/tikv/client-go/v2/oracle"
atomic2 "go.uber.org/atomic"
"go.uber.org/zap"
"golang.org/x/exp/slices"
)

const (
Expand Down Expand Up @@ -976,9 +976,8 @@ func (*Handle) mergeGlobalStatsTopNByConcurrency(mergeConcurrency, mergeBatchSiz
// Remove the value from the Hists.
if len(removeTopn) > 0 {
tmp := removeTopn
slices.SortFunc(tmp, func(i, j statistics.TopNMeta) bool {
cmpResult := bytes.Compare(i.Encoded, j.Encoded)
return cmpResult < 0
slices.SortFunc(tmp, func(i, j statistics.TopNMeta) int {
return bytes.Compare(i.Encoded, j.Encoded)
})
wrapper.AllHg[i].RemoveVals(tmp)
}
Expand Down
9 changes: 5 additions & 4 deletions statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ package handle

import (
"bytes"
"cmp"
"context"
"fmt"
"math"
"math/rand"
"slices"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -48,7 +50,6 @@ import (
"github.com/pingcap/tidb/util/timeutil"
"go.uber.org/atomic"
"go.uber.org/zap"
"golang.org/x/exp/slices"
)

type tableDeltaMap map[int64]variable.TableDelta
Expand Down Expand Up @@ -961,11 +962,11 @@ func (h *Handle) DumpColStatsUsageToKV() error {
for id, t := range colMap {
pairs = append(pairs, pair{tblColID: id, lastUsedAt: t.UTC().Format(types.TimeFormat)})
}
slices.SortFunc(pairs, func(i, j pair) bool {
slices.SortFunc(pairs, func(i, j pair) int {
if i.tblColID.TableID == j.tblColID.TableID {
return i.tblColID.ID < j.tblColID.ID
return cmp.Compare(i.tblColID.ID, j.tblColID.ID)
}
return i.tblColID.TableID < j.tblColID.TableID
return cmp.Compare(i.tblColID.TableID, j.tblColID.TableID)
})
// Use batch insert to reduce cost.
for i := 0; i < len(pairs); i += batchInsertSize {
Expand Down
2 changes: 1 addition & 1 deletion statistics/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package statistics
import (
"bytes"
"math"
"slices"
"strings"

"github.com/pingcap/failpoint"
Expand All @@ -33,7 +34,6 @@ import (
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tidb/util/ranger"
"github.com/twmb/murmur3"
"golang.org/x/exp/slices"
)

// Index represents an index histogram.
Expand Down
2 changes: 1 addition & 1 deletion statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"os"
"regexp"
"runtime/pprof"
"slices"
"testing"
"time"

Expand All @@ -44,7 +45,6 @@ import (
"github.com/pingcap/tidb/util/mock"
"github.com/pingcap/tidb/util/ranger"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"
)

func TestCollationColumnEstimate(t *testing.T) {
Expand Down