From 4ec740d3fec3f9e79ee965cae8f0b6028219f0d7 Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Mon, 28 Aug 2023 10:26:06 +0800 Subject: [PATCH] This is an automated cherry-pick of #46433 Signed-off-by: ti-chi-bot --- br/pkg/lightning/checkpoints/BUILD.bazel | 1 - br/pkg/lightning/checkpoints/checkpoints.go | 14 ++- executor/BUILD.bazel | 3 + executor/inspection_result.go | 25 +++-- executor/mem_reader.go | 51 +++++++++ executor/union_scan.go | 33 ++++++ expression/scalar_function.go | 116 ++++++++++++++++++++ parser/goyacc/BUILD.bazel | 1 - parser/goyacc/main.go | 2 +- statistics/cmsketch.go | 26 ++++- 10 files changed, 251 insertions(+), 21 deletions(-) diff --git a/br/pkg/lightning/checkpoints/BUILD.bazel b/br/pkg/lightning/checkpoints/BUILD.bazel index 35312b44a4998..d7567f0f4e8c6 100644 --- a/br/pkg/lightning/checkpoints/BUILD.bazel +++ b/br/pkg/lightning/checkpoints/BUILD.bazel @@ -26,7 +26,6 @@ go_library( "//util/sqlexec", "@com_github_joho_sqltocsv//:sqltocsv", "@com_github_pingcap_errors//:errors", - "@org_golang_x_exp//slices", "@org_uber_go_zap//:zap", ], ) diff --git a/br/pkg/lightning/checkpoints/checkpoints.go b/br/pkg/lightning/checkpoints/checkpoints.go index d20134660de9c..e6e1ee053bc36 100644 --- a/br/pkg/lightning/checkpoints/checkpoints.go +++ b/br/pkg/lightning/checkpoints/checkpoints.go @@ -15,6 +15,7 @@ package checkpoints import ( + "cmp" "context" "database/sql" "encoding/json" @@ -22,6 +23,7 @@ import ( "io" "math" "path" + "slices" "sort" "strings" "sync" @@ -38,7 +40,6 @@ import ( "github.com/pingcap/tidb/br/pkg/version/build" "github.com/pingcap/tidb/util/mathutil" "go.uber.org/zap" - "golang.org/x/exp/slices" ) type CheckpointStatus uint8 @@ -229,6 +230,13 @@ func (key *ChunkCheckpointKey) String() string { return fmt.Sprintf("%s:%d", key.Path, key.Offset) } +func (key *ChunkCheckpointKey) compare(other *ChunkCheckpointKey) int { + if c := cmp.Compare(key.Path, other.Path); c != 0 { + return c + } + return cmp.Compare(key.Offset, other.Offset) +} + func (key *ChunkCheckpointKey) less(other *ChunkCheckpointKey) bool { switch { case key.Path < other.Path: @@ -1257,8 +1265,8 @@ func (cpdb *FileCheckpointsDB) Get(_ context.Context, tableName string) (*TableC }) } - slices.SortFunc(engine.Chunks, func(i, j *ChunkCheckpoint) bool { - return i.Key.less(&j.Key) + slices.SortFunc(engine.Chunks, func(i, j *ChunkCheckpoint) int { + return i.Key.compare(&j.Key) }) cp.Engines[engineID] = engine diff --git a/executor/BUILD.bazel b/executor/BUILD.bazel index dbe42439f4162..3987f06d2ed7b 100644 --- a/executor/BUILD.bazel +++ b/executor/BUILD.bazel @@ -237,8 +237,11 @@ go_library( "@org_golang_google_grpc//codes", "@org_golang_google_grpc//credentials", "@org_golang_google_grpc//status", +<<<<<<< HEAD "@org_golang_x_exp//maps", "@org_golang_x_exp//slices", +======= +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) "@org_golang_x_sync//errgroup", "@org_uber_go_atomic//:atomic", "@org_uber_go_zap//:zap", diff --git a/executor/inspection_result.go b/executor/inspection_result.go index 55793a7729274..ca2f04e5e9976 100644 --- a/executor/inspection_result.go +++ b/executor/inspection_result.go @@ -15,9 +15,11 @@ package executor import ( + "cmp" "context" "fmt" "math" + "slices" "strconv" "strings" @@ -34,7 +36,6 @@ import ( "github.com/pingcap/tidb/util/set" "github.com/pingcap/tidb/util/size" "github.com/pingcap/tidb/util/sqlexec" - "golang.org/x/exp/slices" ) type ( @@ -169,20 +170,22 @@ func (e *inspectionResultRetriever) retrieve(ctx context.Context, sctx sessionct continue } // make result stable - slices.SortFunc(results, func(i, j inspectionResult) bool { - if i.degree != j.degree { - return i.degree > j.degree + slices.SortFunc(results, func(i, j inspectionResult) int { + if c := cmp.Compare(i.degree, j.degree); c != 0 { + return -c } - if lhs, rhs := i.item, j.item; lhs != rhs { - return lhs < rhs + // lhs and rhs + if c := cmp.Compare(i.item, j.item); c != 0 { + return c } - if i.actual != j.actual { - return i.actual < j.actual + if c := cmp.Compare(i.actual, j.actual); c != 0 { + return c } - if lhs, rhs := i.tp, j.tp; lhs != rhs { - return lhs < rhs + // lhs and rhs + if c := cmp.Compare(i.tp, j.tp); c != 0 { + return c } - return i.instance < j.instance + return cmp.Compare(i.instance, j.instance) }) for _, result := range results { if len(result.instance) == 0 { diff --git a/executor/mem_reader.go b/executor/mem_reader.go index 38424799922df..7ca26802efc3d 100644 --- a/executor/mem_reader.go +++ b/executor/mem_reader.go @@ -16,6 +16,7 @@ package executor import ( "context" + "slices" "github.com/opentracing/opentracing-go" "github.com/pingcap/errors" @@ -34,6 +35,10 @@ import ( "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/rowcodec" +<<<<<<< HEAD +======= + "github.com/pingcap/tidb/util/tracing" +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) ) type memReader interface { @@ -137,9 +142,22 @@ func (m *memIndexReader) getMemRows(ctx context.Context) ([][]types.Datum, error if err != nil { return nil, err } +<<<<<<< HEAD // TODO: After refine `IterReverse`, remove below logic and use `IterReverse` when do reverse scan. if m.desc { reverseDatumSlice(m.addedRows) +======= + + if m.keepOrder && m.table.GetPartitionInfo() != nil { + slices.SortFunc(m.addedRows, func(a, b []types.Datum) int { + ret, err1 := m.compare(m.ctx.GetSessionVars().StmtCtx, a, b) + if err1 != nil { + err = err1 + } + return ret + }) + return m.addedRows, err +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) } return m.addedRows, nil } @@ -278,9 +296,21 @@ func (m *memTableReader) getMemRows(ctx context.Context) ([][]types.Datum, error return nil, err } +<<<<<<< HEAD // TODO: After refine `IterReverse`, remove below logic and use `IterReverse` when do reverse scan. if m.desc { reverseDatumSlice(m.addedRows) +======= + if m.keepOrder && m.table.GetPartitionInfo() != nil { + slices.SortFunc(m.addedRows, func(a, b []types.Datum) int { + ret, err1 := m.compare(m.ctx.GetSessionVars().StmtCtx, a, b) + if err1 != nil { + err = err1 + } + return ret + }) + return m.addedRows, err +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) } return m.addedRows, nil } @@ -736,7 +766,28 @@ func (m *memIndexMergeReader) getMemRows(ctx context.Context) ([][]types.Datum, }, } +<<<<<<< HEAD return memTblReader.getMemRows(ctx) +======= + rows, err := memTblReader.getMemRows(ctx) + if err != nil { + return nil, err + } + + // Didn't set keepOrder = true for memTblReader, + // In indexMerge, non-partitioned tables are also need reordered. + if m.keepOrder { + slices.SortFunc(rows, func(a, b []types.Datum) int { + ret, err1 := m.compare(m.ctx.GetSessionVars().StmtCtx, a, b) + if err1 != nil { + err = err1 + } + return ret + }) + } + + return rows, err +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) } // Union all handles of all partial paths. diff --git a/executor/union_scan.go b/executor/union_scan.go index a23cd8b8c7873..e3e5e39c9a5e9 100644 --- a/executor/union_scan.go +++ b/executor/union_scan.go @@ -208,10 +208,15 @@ func (us *UnionScanExec) getOneRow(ctx context.Context) ([]types.Datum, error) { } else if snapshotRow == nil { row = addedRow } else { +<<<<<<< HEAD isSnapshotRow, err = us.shouldPickFirstRow(snapshotRow, addedRow) +======= + isSnapshotRowInt, err := us.compare(us.Ctx().GetSessionVars().StmtCtx, snapshotRow, addedRow) +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) if err != nil { return nil, err } + isSnapshotRow = isSnapshotRowInt < 0 if isSnapshotRow { row = snapshotRow } else { @@ -300,9 +305,24 @@ func (us *UnionScanExec) shouldPickFirstRow(a, b []types.Datum) (bool, error) { return isFirstRow, nil } +<<<<<<< HEAD func (us *UnionScanExec) compare(a, b []types.Datum) (int, error) { sc := us.ctx.GetSessionVars().StmtCtx for _, colOff := range us.usedIndex { +======= +type compareExec struct { + collators []collate.Collator + // usedIndex is the column offsets of the index which Src executor has used. + usedIndex []int + desc bool + // handleCols is the handle's position of the below scan plan. + handleCols plannercore.HandleCols +} + +func (ce compareExec) compare(sctx *stmtctx.StatementContext, a, b []types.Datum) (ret int, err error) { + var cmp int + for _, colOff := range ce.usedIndex { +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) aColumn := a[colOff] bColumn := b[colOff] cmp, err := aColumn.Compare(sc, &bColumn, us.collators[colOff]) @@ -312,6 +332,19 @@ func (us *UnionScanExec) compare(a, b []types.Datum) (int, error) { if cmp != 0 { return cmp, nil } +<<<<<<< HEAD } return us.belowHandleCols.Compare(a, b, us.collators) +======= + if ce.desc { + return -cmp, nil + } + return cmp, nil + } + cmp, err = ce.handleCols.Compare(a, b, ce.collators) + if ce.desc { + return -cmp, err + } + return cmp, err +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) } diff --git a/expression/scalar_function.go b/expression/scalar_function.go index 131e018fba550..b073a48a9415a 100644 --- a/expression/scalar_function.go +++ b/expression/scalar_function.go @@ -17,6 +17,10 @@ package expression import ( "bytes" "fmt" +<<<<<<< HEAD +======= + "slices" +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) "unsafe" "github.com/pingcap/errors" @@ -445,6 +449,118 @@ func (sf *ScalarFunction) HashCode(sc *stmtctx.StatementContext) []byte { return sf.hashcode } +<<<<<<< HEAD +======= +// ExpressionsSemanticEqual is used to judge whether two expression tree is semantic equivalent. +func ExpressionsSemanticEqual(ctx sessionctx.Context, expr1, expr2 Expression) bool { + sc := ctx.GetSessionVars().StmtCtx + sc.CanonicalHashCode = true + defer func() { + sc.CanonicalHashCode = false + }() + return bytes.Equal(expr1.HashCode(sc), expr2.HashCode(sc)) +} + +// simpleCanonicalizedHashCode is used to judge whether two expression is semantically equal. +func simpleCanonicalizedHashCode(sf *ScalarFunction, sc *stmtctx.StatementContext) { + if sf.canonicalhashcode != nil { + sf.canonicalhashcode = sf.canonicalhashcode[:0] + } + sf.canonicalhashcode = append(sf.canonicalhashcode, scalarFunctionFlag) + + argsHashCode := make([][]byte, 0, len(sf.GetArgs())) + for _, arg := range sf.GetArgs() { + argsHashCode = append(argsHashCode, arg.HashCode(sc)) + } + switch sf.FuncName.L { + case ast.Plus, ast.Mul, ast.EQ, ast.In, ast.LogicOr, ast.LogicAnd: + // encode original function name. + sf.canonicalhashcode = codec.EncodeCompactBytes(sf.canonicalhashcode, hack.Slice(sf.FuncName.L)) + // reorder parameters hashcode, eg: a+b and b+a should has the same hashcode here. + slices.SortFunc(argsHashCode, func(i, j []byte) int { + return bytes.Compare(i, j) + }) + for _, argCode := range argsHashCode { + sf.canonicalhashcode = append(sf.canonicalhashcode, argCode...) + } + + case ast.GE, ast.LE: // directed binary OP: a >= b and b <= a should have the same hashcode. + // encode GE function name. + sf.canonicalhashcode = codec.EncodeCompactBytes(sf.canonicalhashcode, hack.Slice(ast.GE)) + // encode GE function name and switch the args order. + if sf.FuncName.L == ast.GE { + for _, argCode := range argsHashCode { + sf.canonicalhashcode = append(sf.canonicalhashcode, argCode...) + } + } else { + for i := len(argsHashCode) - 1; i >= 0; i-- { + sf.canonicalhashcode = append(sf.canonicalhashcode, argsHashCode[i]...) + } + } + case ast.GT, ast.LT: + sf.canonicalhashcode = codec.EncodeCompactBytes(sf.canonicalhashcode, hack.Slice(ast.GT)) + if sf.FuncName.L == ast.GT { + for _, argCode := range argsHashCode { + sf.canonicalhashcode = append(sf.canonicalhashcode, argCode...) + } + } else { + for i := len(argsHashCode) - 1; i >= 0; i-- { + sf.canonicalhashcode = append(sf.canonicalhashcode, argsHashCode[i]...) + } + } + case ast.UnaryNot: + child, ok := sf.GetArgs()[0].(*ScalarFunction) + if !ok { + // encode original function name. + sf.canonicalhashcode = codec.EncodeCompactBytes(sf.canonicalhashcode, hack.Slice(sf.FuncName.L)) + // use the origin arg hash code. + for _, argCode := range argsHashCode { + sf.canonicalhashcode = append(sf.canonicalhashcode, argCode...) + } + } else { + childArgsHashCode := make([][]byte, 0, len(child.GetArgs())) + for _, arg := range child.GetArgs() { + childArgsHashCode = append(childArgsHashCode, arg.HashCode(sc)) + } + switch child.FuncName.L { + case ast.GT: // not GT ==> LE ==> use GE and switch args + sf.canonicalhashcode = codec.EncodeCompactBytes(sf.canonicalhashcode, hack.Slice(ast.GE)) + for i := len(childArgsHashCode) - 1; i >= 0; i-- { + sf.canonicalhashcode = append(sf.canonicalhashcode, childArgsHashCode[i]...) + } + case ast.LT: // not LT ==> GE + sf.canonicalhashcode = codec.EncodeCompactBytes(sf.canonicalhashcode, hack.Slice(ast.GE)) + for _, argCode := range childArgsHashCode { + sf.canonicalhashcode = append(sf.canonicalhashcode, argCode...) + } + case ast.GE: // not GE ==> LT ==> use GT and switch args + sf.canonicalhashcode = codec.EncodeCompactBytes(sf.canonicalhashcode, hack.Slice(ast.GT)) + for i := len(childArgsHashCode) - 1; i >= 0; i-- { + sf.canonicalhashcode = append(sf.canonicalhashcode, childArgsHashCode[i]...) + } + case ast.LE: // not LE ==> GT + sf.canonicalhashcode = codec.EncodeCompactBytes(sf.canonicalhashcode, hack.Slice(ast.GT)) + for _, argCode := range childArgsHashCode { + sf.canonicalhashcode = append(sf.canonicalhashcode, argCode...) + } + } + } + default: + // encode original function name. + sf.canonicalhashcode = codec.EncodeCompactBytes(sf.canonicalhashcode, hack.Slice(sf.FuncName.L)) + for _, argCode := range argsHashCode { + sf.canonicalhashcode = append(sf.canonicalhashcode, argCode...) + } + // Cast is a special case. The RetType should also be considered as an argument. + // Please see `newFunctionImpl()` for detail. + if sf.FuncName.L == ast.Cast { + evalTp := sf.RetType.EvalType() + sf.canonicalhashcode = append(sf.canonicalhashcode, byte(evalTp)) + } + } +} + +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) // ReHashCode is used after we change the argument in place. func ReHashCode(sf *ScalarFunction, sc *stmtctx.StatementContext) { sf.hashcode = sf.hashcode[:0] diff --git a/parser/goyacc/BUILD.bazel b/parser/goyacc/BUILD.bazel index e8582c1c4a2ab..51d0991ed1b0c 100644 --- a/parser/goyacc/BUILD.bazel +++ b/parser/goyacc/BUILD.bazel @@ -14,7 +14,6 @@ go_library( "@com_github_cznic_sortutil//:sortutil", "@com_github_cznic_strutil//:strutil", "@com_github_pingcap_errors//:errors", - "@org_golang_x_exp//slices", "@org_modernc_parser//yacc", "@org_modernc_y//:y", ], diff --git a/parser/goyacc/main.go b/parser/goyacc/main.go index cc7589773baa1..ff382d914707b 100644 --- a/parser/goyacc/main.go +++ b/parser/goyacc/main.go @@ -136,13 +136,13 @@ import ( "log" "os" "runtime" + "slices" "sort" "strings" "github.com/cznic/mathutil" "github.com/cznic/sortutil" "github.com/cznic/strutil" - "golang.org/x/exp/slices" parser "modernc.org/parser/yacc" "modernc.org/y" ) diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index 9d6a9a9cd3fa9..155c896c7bd7f 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -35,7 +35,6 @@ import ( "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/dbterror" "github.com/pingcap/tidb/util/hack" - "github.com/pingcap/tidb/util/mathutil" "github.com/pingcap/tipb/go-tipb" "github.com/twmb/murmur3" "golang.org/x/exp/slices" @@ -99,6 +98,20 @@ func newTopNHelper(sample [][]byte, numTop uint32) *topNHelper { } } sort.SliceStable(sorted, func(i, j int) bool { return sorted[i].cnt > sorted[j].cnt }) +<<<<<<< HEAD +======= + failpoint.Inject("StabilizeV1AnalyzeTopN", func(val failpoint.Value) { + if val.(bool) { + // The earlier TopN entry will modify the CMSketch, therefore influence later TopN entry's row count. + // So we need to make the order here fully deterministic to make the stats from analyze ver1 stable. + // See (*SampleCollector).ExtractTopN(), which calls this function, for details + sort.SliceStable(sorted, func(i, j int) bool { + return sorted[i].cnt > sorted[j].cnt || + (sorted[i].cnt == sorted[j].cnt && string(sorted[i].data) < string(sorted[j].data)) + }) + } + }) +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) var ( sumTopN uint64 @@ -131,7 +144,7 @@ func NewCMSketchAndTopN(d, w int32, sample [][]byte, numTop uint32, rowCount uin helper := newTopNHelper(sample, numTop) // rowCount is not a accurate value when fast analyzing // In some cases, if user triggers fast analyze when rowCount is close to sampleSize, unexpected bahavior might happen. - rowCount = mathutil.Max(rowCount, uint64(len(sample))) + rowCount = max(rowCount, uint64(len(sample))) estimateNDV, scaleRatio := calculateEstimateNDV(helper, rowCount) defaultVal := calculateDefaultVal(helper, estimateNDV, scaleRatio, rowCount) c, t := buildCMSAndTopN(helper, d, w, scaleRatio, defaultVal) @@ -170,7 +183,7 @@ func calculateDefaultVal(helper *topNHelper, estimateNDV, scaleRatio, rowCount u return 1 } estimateRemainingCount := rowCount - (helper.sampleSize-helper.onlyOnceItems)*scaleRatio - return estimateRemainingCount / mathutil.Max(1, estimateNDV-sampleNDV+helper.onlyOnceItems) + return estimateRemainingCount / max(1, estimateNDV-sampleNDV+helper.onlyOnceItems) } // MemoryUsage returns the total memory usage of a CMSketch. @@ -352,7 +365,7 @@ func (c *CMSketch) MergeCMSketch4IncrementalAnalyze(rc *CMSketch, numTopN uint32 for i := range c.table { c.count = 0 for j := range c.table[i] { - c.table[i][j] = mathutil.Max(c.table[i][j], rc.table[i][j]) + c.table[i][j] = max(c.table[i][j], rc.table[i][j]) c.count += uint64(c.table[i][j]) } } @@ -486,8 +499,13 @@ func (c *CMSketch) GetWidthAndDepth() (int32, int32) { // CalcDefaultValForAnalyze calculate the default value for Analyze. // The value of it is count / NDV in CMSketch. This means count and NDV are not include topN. +<<<<<<< HEAD func (c *CMSketch) CalcDefaultValForAnalyze(NDV uint64) { c.defaultValue = c.count / mathutil.Max(1, NDV) +======= +func (c *CMSketch) CalcDefaultValForAnalyze(ndv uint64) { + c.defaultValue = c.count / max(1, ndv) +>>>>>>> c11a9992882 (*: use std/slices to replace exp/slices (#46433)) } // TopN stores most-common values, which is used to estimate point queries.