Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: fix bug when unsigned histogram meets signed ranges in feedback #10415

Merged
merged 4 commits into from
May 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion distsql/request_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,10 @@ func IndexRangesToKVRanges(sc *stmtctx.StatementContext, tid, idxID int64, range
feedbackRanges = append(feedbackRanges, &ranger.Range{LowVal: []types.Datum{types.NewBytesDatum(low)},
HighVal: []types.Datum{types.NewBytesDatum(high)}, LowExclude: false, HighExclude: true})
}
feedbackRanges = fb.Hist.SplitRange(sc, feedbackRanges, true)
feedbackRanges, ok := fb.Hist.SplitRange(sc, feedbackRanges, true)
if !ok {
fb.Invalidate()
}
krs := make([]kv.KeyRange, 0, len(feedbackRanges))
for _, ran := range feedbackRanges {
low, high := ran.LowVal[0].GetBytes(), ran.HighVal[0].GetBytes()
Expand Down
16 changes: 16 additions & 0 deletions executor/executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/store/mockstore"
"github.com/pingcap/tidb/store/mockstore/mocktikv"
"github.com/pingcap/tidb/store/tikv"
Expand Down Expand Up @@ -3757,6 +3758,21 @@ func (s *testSuite) TestSplitIndexRegion(c *C) {
c.Assert(terr.Code(), Equals, terror.ErrCode(mysql.WarnDataTruncated))
}

func (s *testSuite) TestUnsignedFeedback(c *C) {
tk := testkit.NewTestKit(c, s.store)
oriProbability := statistics.FeedbackProbability.Load()
statistics.FeedbackProbability.Store(1.0)
defer func() { statistics.FeedbackProbability.Store(oriProbability) }()
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a bigint unsigned, b int, primary key(a))")
tk.MustExec("insert into t values (1,1),(2,2)")
tk.MustExec("analyze table t")
tk.MustQuery("select count(distinct b) from t").Check(testkit.Rows("2"))
result := tk.MustQuery("explain analyze select count(distinct b) from t")
c.Assert(result.Rows()[2][3], Equals, "table:t, range:[0,+inf], keep order:false")
}

type testOOMSuite struct {
store kv.Storage
do *domain.Domain
Expand Down
6 changes: 5 additions & 1 deletion executor/table_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,11 @@ func (e *TableReaderExecutor) Open(ctx context.Context) error {
e.resultHandler = &tableResultHandler{}
if e.feedback != nil && e.feedback.Hist != nil {
// EncodeInt don't need *statement.Context.
e.ranges = e.feedback.Hist.SplitRange(nil, e.ranges, false)
var ok bool
e.ranges, ok = e.feedback.Hist.SplitRange(nil, e.ranges, false)
if !ok {
e.feedback.Invalidate()
}
}
firstPartRanges, secondPartRanges := splitRanges(e.ranges, e.keepOrder, e.desc)
firstResult, err := e.buildResp(ctx, firstPartRanges)
Expand Down
12 changes: 7 additions & 5 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,19 +376,21 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath) (bool, error) {
path.tableFilters = ds.pushedDownConds
var pkCol *expression.Column
columnLen := len(ds.schema.Columns)
if columnLen > 0 && ds.schema.Columns[columnLen-1].ID == model.ExtraHandleID {
pkCol = ds.schema.Columns[columnLen-1]
} else if ds.tableInfo.PKIsHandle {
isUnsigned := false
if ds.tableInfo.PKIsHandle {
if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil {
isUnsigned = mysql.HasUnsignedFlag(pkColInfo.Flag)
pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo)
}
} else if columnLen > 0 && ds.schema.Columns[columnLen-1].ID == model.ExtraHandleID {
pkCol = ds.schema.Columns[columnLen-1]
}
if pkCol == nil {
path.ranges = ranger.FullIntRange(false)
path.ranges = ranger.FullIntRange(isUnsigned)
return false, nil
}

path.ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
path.ranges = ranger.FullIntRange(isUnsigned)
if len(ds.pushedDownConds) == 0 {
return false, nil
}
Expand Down
6 changes: 5 additions & 1 deletion statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,11 @@ func (h *Handle) dumpRangeFeedback(sc *stmtctx.StatementContext, ran *ranger.Ran
ran.HighVal[0] = statistics.GetMaxValue(q.Hist.Tp)
}
}
ranges := q.Hist.SplitRange(sc, []*ranger.Range{ran}, q.Tp == statistics.IndexType)
ranges, ok := q.Hist.SplitRange(sc, []*ranger.Range{ran}, q.Tp == statistics.IndexType)
if !ok {
logutil.Logger(context.Background()).Debug("type of histogram and ranges mismatch")
return nil
}
counts := make([]float64, 0, len(ranges))
sum := 0.0
for i, r := range ranges {
Expand Down
2 changes: 1 addition & 1 deletion statistics/handle/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,7 @@ func (s *testStatsSuite) TestSplitRange(c *C) {
HighExclude: t.exclude[i+1],
})
}
ranges = h.SplitRange(nil, ranges, false)
ranges, _ = h.SplitRange(nil, ranges, false)
var ranStrs []string
for _, ran := range ranges {
ranStrs = append(ranStrs, ran.String())
Expand Down
48 changes: 45 additions & 3 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,48 @@ func validRange(sc *stmtctx.StatementContext, ran *ranger.Range, encoded bool) b
return bytes.Compare(low, high) < 0
}

func checkKind(vals []types.Datum, kind byte) bool {
if kind == types.KindString {
kind = types.KindBytes
}
for _, val := range vals {
alivxxx marked this conversation as resolved.
Show resolved Hide resolved
valKind := val.Kind()
if valKind == types.KindNull || valKind == types.KindMinNotNull || valKind == types.KindMaxValue {
continue
}
if valKind == types.KindString {
valKind = types.KindBytes
}
if valKind != kind {
return false
}
// Only check the first non-null value.
break
}
return true
}

func (hg *Histogram) typeMatch(ranges []*ranger.Range) bool {
kind := hg.GetLower(0).Kind()
for _, ran := range ranges {
if !checkKind(ran.LowVal, kind) || !checkKind(ran.HighVal, kind) {
return false
}
}
return true
}

// SplitRange splits the range according to the histogram upper bound. Note that we treat last bucket's upper bound
// as inf, so all the split Ranges will totally fall in one of the (-inf, u(0)], (u(0), u(1)],...(u(n-3), u(n-2)],
// (u(n-2), +inf), where n is the number of buckets, u(i) is the i-th bucket's upper bound.
func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, ranges []*ranger.Range, encoded bool) []*ranger.Range {
func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool) {
if !hg.typeMatch(oldRanges) {
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
return oldRanges, false
}
ranges := make([]*ranger.Range, 0, len(oldRanges))
for _, ran := range oldRanges {
ranges = append(ranges, ran.Clone())
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
}
split := make([]*ranger.Range, 0, len(ranges))
for len(ranges) > 0 {
// Find the last bound that greater or equal to the LowVal.
Expand Down Expand Up @@ -447,7 +485,7 @@ func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, ranges []*ranger.R
}
}
}
return split
return split, true
}

func (hg *Histogram) bucketCount(idx int) int64 {
Expand Down Expand Up @@ -954,7 +992,11 @@ func (coll *HistColl) NewHistCollBySelectivity(sc *stmtctx.StatementContext, sta
}
newCol.Histogram = *NewHistogram(oldCol.ID, int64(float64(oldCol.NDV)*node.Selectivity), 0, 0, oldCol.Tp, chunk.InitialCapacity, 0)
var err error
splitRanges := oldCol.Histogram.SplitRange(sc, node.Ranges, false)
splitRanges, ok := oldCol.Histogram.SplitRange(sc, node.Ranges, false)
if !ok {
logutil.Logger(context.Background()).Warn("[Histogram-in-plan]: the type of histogram and ranges mismatch")
continue
}
// Deal with some corner case.
if len(splitRanges) > 0 {
// Deal with NULL values.
Expand Down