diff --git a/distsql/request_builder.go b/distsql/request_builder.go index 06734fbc45d06..92532de0b7d1d 100644 --- a/distsql/request_builder.go +++ b/distsql/request_builder.go @@ -269,7 +269,10 @@ func IndexRangesToKVRanges(sc *stmtctx.StatementContext, tid, idxID int64, range feedbackRanges = append(feedbackRanges, &ranger.Range{LowVal: []types.Datum{types.NewBytesDatum(low)}, HighVal: []types.Datum{types.NewBytesDatum(high)}, LowExclude: false, HighExclude: true}) } - feedbackRanges = fb.Hist.SplitRange(sc, feedbackRanges, true) + feedbackRanges, ok := fb.Hist.SplitRange(sc, feedbackRanges, true) + if !ok { + fb.Invalidate() + } krs := make([]kv.KeyRange, 0, len(feedbackRanges)) for _, ran := range feedbackRanges { low, high := ran.LowVal[0].GetBytes(), ran.HighVal[0].GetBytes() diff --git a/executor/executor_test.go b/executor/executor_test.go index c21b8104fae28..6434a799f573c 100644 --- a/executor/executor_test.go +++ b/executor/executor_test.go @@ -49,6 +49,7 @@ import ( "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/sessionctx/variable" + "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/store/mockstore" "github.com/pingcap/tidb/store/mockstore/mocktikv" "github.com/pingcap/tidb/store/tikv" @@ -3757,6 +3758,21 @@ func (s *testSuite) TestSplitIndexRegion(c *C) { c.Assert(terr.Code(), Equals, terror.ErrCode(mysql.WarnDataTruncated)) } +func (s *testSuite) TestUnsignedFeedback(c *C) { + tk := testkit.NewTestKit(c, s.store) + oriProbability := statistics.FeedbackProbability.Load() + statistics.FeedbackProbability.Store(1.0) + defer func() { statistics.FeedbackProbability.Store(oriProbability) }() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a bigint unsigned, b int, primary key(a))") + tk.MustExec("insert into t values (1,1),(2,2)") + tk.MustExec("analyze table t") + tk.MustQuery("select count(distinct b) from t").Check(testkit.Rows("2")) + result := tk.MustQuery("explain analyze select count(distinct b) from t") + c.Assert(result.Rows()[2][3], Equals, "table:t, range:[0,+inf], keep order:false") +} + type testOOMSuite struct { store kv.Storage do *domain.Domain diff --git a/executor/table_reader.go b/executor/table_reader.go index a441e00755668..327b148b02de7 100644 --- a/executor/table_reader.go +++ b/executor/table_reader.go @@ -103,7 +103,11 @@ func (e *TableReaderExecutor) Open(ctx context.Context) error { e.resultHandler = &tableResultHandler{} if e.feedback != nil && e.feedback.Hist != nil { // EncodeInt don't need *statement.Context. - e.ranges = e.feedback.Hist.SplitRange(nil, e.ranges, false) + var ok bool + e.ranges, ok = e.feedback.Hist.SplitRange(nil, e.ranges, false) + if !ok { + e.feedback.Invalidate() + } } firstPartRanges, secondPartRanges := splitRanges(e.ranges, e.keepOrder, e.desc) firstResult, err := e.buildResp(ctx, firstPartRanges) diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index c3322b9aae47c..09cff86980d69 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -376,19 +376,21 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath) (bool, error) { path.tableFilters = ds.pushedDownConds var pkCol *expression.Column columnLen := len(ds.schema.Columns) - if columnLen > 0 && ds.schema.Columns[columnLen-1].ID == model.ExtraHandleID { - pkCol = ds.schema.Columns[columnLen-1] - } else if ds.tableInfo.PKIsHandle { + isUnsigned := false + if ds.tableInfo.PKIsHandle { if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil { + isUnsigned = mysql.HasUnsignedFlag(pkColInfo.Flag) pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo) } + } else if columnLen > 0 && ds.schema.Columns[columnLen-1].ID == model.ExtraHandleID { + pkCol = ds.schema.Columns[columnLen-1] } if pkCol == nil { - path.ranges = ranger.FullIntRange(false) + path.ranges = ranger.FullIntRange(isUnsigned) return false, nil } - path.ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) + path.ranges = ranger.FullIntRange(isUnsigned) if len(ds.pushedDownConds) == 0 { return false, nil } diff --git a/statistics/handle/update.go b/statistics/handle/update.go index f5dd421adc4d5..1084fc5dc1528 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -972,7 +972,11 @@ func (h *Handle) dumpRangeFeedback(sc *stmtctx.StatementContext, ran *ranger.Ran ran.HighVal[0] = statistics.GetMaxValue(q.Hist.Tp) } } - ranges := q.Hist.SplitRange(sc, []*ranger.Range{ran}, q.Tp == statistics.IndexType) + ranges, ok := q.Hist.SplitRange(sc, []*ranger.Range{ran}, q.Tp == statistics.IndexType) + if !ok { + logutil.Logger(context.Background()).Debug("type of histogram and ranges mismatch") + return nil + } counts := make([]float64, 0, len(ranges)) sum := 0.0 for i, r := range ranges { diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go index f91b9c77587d6..d44ddf7d49fb9 100644 --- a/statistics/handle/update_test.go +++ b/statistics/handle/update_test.go @@ -708,7 +708,7 @@ func (s *testStatsSuite) TestSplitRange(c *C) { HighExclude: t.exclude[i+1], }) } - ranges = h.SplitRange(nil, ranges, false) + ranges, _ = h.SplitRange(nil, ranges, false) var ranStrs []string for _, ran := range ranges { ranStrs = append(ranStrs, ran.String()) diff --git a/statistics/histogram.go b/statistics/histogram.go index 951070a45fe20..c16dd954628a6 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -395,10 +395,48 @@ func validRange(sc *stmtctx.StatementContext, ran *ranger.Range, encoded bool) b return bytes.Compare(low, high) < 0 } +func checkKind(vals []types.Datum, kind byte) bool { + if kind == types.KindString { + kind = types.KindBytes + } + for _, val := range vals { + valKind := val.Kind() + if valKind == types.KindNull || valKind == types.KindMinNotNull || valKind == types.KindMaxValue { + continue + } + if valKind == types.KindString { + valKind = types.KindBytes + } + if valKind != kind { + return false + } + // Only check the first non-null value. + break + } + return true +} + +func (hg *Histogram) typeMatch(ranges []*ranger.Range) bool { + kind := hg.GetLower(0).Kind() + for _, ran := range ranges { + if !checkKind(ran.LowVal, kind) || !checkKind(ran.HighVal, kind) { + return false + } + } + return true +} + // SplitRange splits the range according to the histogram upper bound. Note that we treat last bucket's upper bound // as inf, so all the split Ranges will totally fall in one of the (-inf, u(0)], (u(0), u(1)],...(u(n-3), u(n-2)], // (u(n-2), +inf), where n is the number of buckets, u(i) is the i-th bucket's upper bound. -func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, ranges []*ranger.Range, encoded bool) []*ranger.Range { +func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool) { + if !hg.typeMatch(oldRanges) { + return oldRanges, false + } + ranges := make([]*ranger.Range, 0, len(oldRanges)) + for _, ran := range oldRanges { + ranges = append(ranges, ran.Clone()) + } split := make([]*ranger.Range, 0, len(ranges)) for len(ranges) > 0 { // Find the last bound that greater or equal to the LowVal. @@ -447,7 +485,7 @@ func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, ranges []*ranger.R } } } - return split + return split, true } func (hg *Histogram) bucketCount(idx int) int64 { @@ -954,7 +992,11 @@ func (coll *HistColl) NewHistCollBySelectivity(sc *stmtctx.StatementContext, sta } newCol.Histogram = *NewHistogram(oldCol.ID, int64(float64(oldCol.NDV)*node.Selectivity), 0, 0, oldCol.Tp, chunk.InitialCapacity, 0) var err error - splitRanges := oldCol.Histogram.SplitRange(sc, node.Ranges, false) + splitRanges, ok := oldCol.Histogram.SplitRange(sc, node.Ranges, false) + if !ok { + logutil.Logger(context.Background()).Warn("[Histogram-in-plan]: the type of histogram and ranges mismatch") + continue + } // Deal with some corner case. if len(splitRanges) > 0 { // Deal with NULL values.