Skip to content

Commit

Permalink
plan: handle DNF expressions in Selectivity (pingcap#9282)
Browse files Browse the repository at this point in the history
  • Loading branch information
eurekaka committed Feb 21, 2019
1 parent 0964ad4 commit fa51f92
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 49 deletions.
7 changes: 7 additions & 0 deletions cmd/explaintest/r/explain_easy_stats.result
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,10 @@ explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085
id count task operator info
Point_Get_1 1.00 root table:index_prune, index:a b
drop table if exists t1, t2, t3, index_prune;
drop table if exists tbl;
create table tbl(column1 int, column2 int, index idx(column1, column2));
load stats 's/explain_easy_stats_tbl_dnf.json';
explain select * from tbl where (column1=0 and column2=1) or (column1=1 and column2=3) or (column1=2 and column2=5);
id count task operator info
IndexReader_9 3.00 root index:IndexScan_8
└─IndexScan_8 3.00 cop table:tbl, index:column1, column2, range:[0 1,0 1], [1 3,1 3], [2 5,2 5], keep order:false
1 change: 1 addition & 0 deletions cmd/explaintest/s/explain_easy_stats_tbl_dnf.json

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions cmd/explaintest/t/explain_easy_stats.test
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,8 @@ explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085
explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085526790 GROUP BY b ORDER BY a limit 1;

drop table if exists t1, t2, t3, index_prune;

drop table if exists tbl;
create table tbl(column1 int, column2 int, index idx(column1, column2));
load stats 's/explain_easy_stats_tbl_dnf.json';
explain select * from tbl where (column1=0 and column2=1) or (column1=1 and column2=3) or (column1=2 and column2=5);
15 changes: 7 additions & 8 deletions planner/core/common_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,15 +259,14 @@ func (e *Execute) rebuildRange(p Plan) error {

func (e *Execute) buildRangeForIndexScan(sctx sessionctx.Context, is *PhysicalIndexScan) ([]*ranger.Range, error) {
idxCols, colLengths := expression.IndexInfo2Cols(is.schema.Columns, is.Index)
ranges := ranger.FullRange()
if len(idxCols) > 0 {
var err error
ranges, _, _, _, err = ranger.DetachCondAndBuildRangeForIndex(sctx, is.AccessCondition, idxCols, colLengths)
if err != nil {
return nil, errors.Trace(err)
}
if len(idxCols) == 0 {
return ranger.FullRange(), nil
}
res, err := ranger.DetachCondAndBuildRangeForIndex(sctx, is.AccessCondition, idxCols, colLengths)
if err != nil {
return nil, err
}
return ranges, nil
return res.Ranges, nil
}

// Deallocate represents deallocate plan.
Expand Down
9 changes: 4 additions & 5 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package core
import (
"math"

"github.com/pingcap/errors"
"github.com/pingcap/parser/ast"
"github.com/pingcap/parser/model"
"github.com/pingcap/parser/mysql"
Expand Down Expand Up @@ -530,20 +529,20 @@ func (p *LogicalJoin) buildRangeForIndexJoin(indexInfo *model.IndexInfo, innerPl
// In `buildFakeEqCondsForIndexJoin`, we construct the equal conditions for join keys and remove filters that contain the join keys' column.
// When t1.a = t2.a and t1.a > 1, we can also guarantee that t1.a > 1 won't be chosen as the access condition.
// So the equal conditions we built can be successfully used to build a range if they can be used. They won't be affected by the existing filters.
ranges, accesses, moreRemained, _, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, access, idxCols, colLengths)
res, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, access, idxCols, colLengths)
if err != nil {
terror.Log(errors.Trace(err))
terror.Log(err)
return nil, nil, nil
}

// We should guarantee that all the join's equal condition is used.
for _, eqCond := range eqConds {
if !expression.Contains(accesses, eqCond) {
if !expression.Contains(res.AccessConds, eqCond) {
return nil, nil, nil
}
}

return ranges, append(remained, moreRemained...), keyOff2IdxOff
return res.Ranges, append(remained, res.RemainedConds...), keyOff2IdxOff
}

func (p *LogicalJoin) buildFakeEqCondsForIndexJoin(keys, idxCols []*expression.Column, colLengths []int,
Expand Down
11 changes: 7 additions & 4 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -442,19 +442,22 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath) (bool, error) {
// And it will check whether this index is full matched by point query. We will use this check to
// determine whether we remove other paths or not.
func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.ranges = ranger.FullRange()
path.countAfterAccess = float64(ds.statisticTable.Count)
path.idxCols, path.idxColLens = expression.IndexInfo2Cols(ds.schema.Columns, path.index)
if len(path.idxCols) != 0 {
path.ranges, path.accessConds, path.tableFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, path.idxCols, path.idxColLens)
res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, path.idxCols, path.idxColLens)
if err != nil {
return false, errors.Trace(err)
return false, err
}
path.ranges = res.Ranges
path.accessConds = res.AccessConds
path.tableFilters = res.RemainedConds
path.eqCondCount = res.EqCondCount
path.countAfterAccess, err = ds.stats.HistColl.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges)
if err != nil {
return false, errors.Trace(err)
return false, err
}
} else {
path.tableFilters = ds.pushedDownConds
Expand Down
34 changes: 25 additions & 9 deletions statistics/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ type exprSet struct {
ranges []*ranger.Range
// numCols is the number of columns contained in the index or column(which is always 1).
numCols int
// partCover indicates whether the bit in the mask is for a full cover or partial cover. It is only true
// when the condition is a DNF expression on index, and the expression is not totally extracted as access condition.
partCover bool
}

// The type of the exprSet.
Expand Down Expand Up @@ -140,7 +143,6 @@ func isColEqCorCol(filter expression.Expression) *expression.Column {
// Selectivity is a function calculate the selectivity of the expressions.
// The definition of selectivity is (row count after filter / row count before filter).
// And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this.
// TODO: support expressions that the top layer is a DNF.
// Currently the time complexity is o(n^2).
func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression) (float64, error) {
// If table's count is zero or conditions are empty, we should return 100% selectivity.
Expand Down Expand Up @@ -175,7 +177,7 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
for id, colInfo := range coll.Columns {
col := expression.ColInfo2Col(extractedCols, colInfo.Info)
if col != nil {
maskCovered, ranges, err := getMaskAndRanges(ctx, remainedExprs, ranger.ColumnRangeType, nil, col)
maskCovered, ranges, _, err := getMaskAndRanges(ctx, remainedExprs, ranger.ColumnRangeType, nil, col)
if err != nil {
return 0, errors.Trace(err)
}
Expand All @@ -192,11 +194,11 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
for i := 0; i < len(idxCols); i++ {
lengths = append(lengths, idxInfo.Info.Columns[i].Length)
}
maskCovered, ranges, err := getMaskAndRanges(ctx, remainedExprs, ranger.IndexRangeType, lengths, idxCols...)
maskCovered, ranges, partCover, err := getMaskAndRanges(ctx, remainedExprs, ranger.IndexRangeType, lengths, idxCols...)
if err != nil {
return 0, errors.Trace(err)
}
sets = append(sets, &exprSet{tp: indexType, ID: id, mask: maskCovered, ranges: ranges, numCols: len(idxInfo.Info.Columns)})
sets = append(sets, &exprSet{tp: indexType, ID: id, mask: maskCovered, ranges: ranges, numCols: len(idxInfo.Info.Columns), partCover: partCover})
}
}
sets = getUsableSetsByGreedy(sets)
Expand All @@ -220,6 +222,13 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
return 0, errors.Trace(err)
}
ret *= rowCount / float64(coll.Count)
// If `partCover` is true, it means that the conditions are in DNF form, and only part
// of the DNF expressions are extracted as access conditions, so besides from the selectivity
// of the extracted access conditions, we multiply another selectionFactor for the residual
// conditions.
if set.partCover {
ret *= selectionFactor
}
}
// If there's still conditions which cannot be calculated, we will multiply a selectionFactor.
if mask > 0 {
Expand All @@ -229,20 +238,27 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
}

func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, rangeType ranger.RangeType,
lengths []int, cols ...*expression.Column) (mask int64, ranges []*ranger.Range, err error) {
lengths []int, cols ...*expression.Column) (mask int64, ranges []*ranger.Range, partCover bool, err error) {
sc := ctx.GetSessionVars().StmtCtx
var accessConds []expression.Expression
isDNF := false
var accessConds, remainedConds []expression.Expression
switch rangeType {
case ranger.ColumnRangeType:
accessConds = ranger.ExtractAccessConditionsForColumn(exprs, cols[0].ColName)
ranges, err = ranger.BuildColumnRange(accessConds, sc, cols[0].RetType)
case ranger.IndexRangeType:
ranges, accessConds, err = ranger.DetachSimpleCondAndBuildRangeForIndex(ctx, exprs, cols, lengths)
var res *ranger.DetachRangeResult
res, err = ranger.DetachCondAndBuildRangeForIndex(ctx, exprs, cols, lengths)
ranges, accessConds, remainedConds, isDNF = res.Ranges, res.AccessConds, res.RemainedConds, res.IsDNFCond
default:
panic("should never be here")
}
if err != nil {
return 0, nil, errors.Trace(err)
return 0, nil, false, err
}
if isDNF && len(accessConds) > 0 {
mask |= 1
return mask, ranges, len(remainedConds) > 0, nil
}
for i := range exprs {
for j := range accessConds {
Expand All @@ -252,7 +268,7 @@ func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, ran
}
}
}
return mask, ranges, nil
return mask, ranges, false, nil
}

// getUsableSetsByGreedy will select the indices and pk used for calculate selectivity by greedy algorithm.
Expand Down
60 changes: 45 additions & 15 deletions util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,17 @@ func getEqOrInColOffset(expr expression.Expression, cols []*expression.Column) i
// It will first find the point query column and then extract the range query column.
// considerDNF is true means it will try to extract access conditions from the DNF expressions.
func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
tpSlice []*types.FieldType, lengths []int, considerDNF bool) ([]*Range, []expression.Expression, []expression.Expression, int, error) {
tpSlice []*types.FieldType, lengths []int, considerDNF bool) (*DetachRangeResult, error) {
var (
eqCount int
ranges []*Range
err error
)
res := &DetachRangeResult{}

accessConds, filterConds, newConditions, emptyRange := extractEqAndInCondition(sctx, conditions, cols, lengths)
if emptyRange {
return ranges, nil, nil, 0, nil
return res, nil
}

for ; eqCount < len(accessConds); eqCount++ {
Expand All @@ -164,9 +165,13 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex
filterConds = append(filterConds, newConditions...)
ranges, err = buildCNFIndexRange(sctx.GetSessionVars().StmtCtx, cols, tpSlice, lengths, eqOrInCount, accessConds)
if err != nil {
return nil, nil, nil, 0, errors.Trace(err)
return res, err
}
return ranges, accessConds, filterConds, eqCount, nil
res.Ranges = ranges
res.AccessConds = accessConds
res.RemainedConds = filterConds
res.EqCondCount = eqCount
return res, nil
}
checker := &conditionChecker{
colName: cols[eqOrInCount].ColName,
Expand All @@ -187,7 +192,11 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex
}
}
ranges, err = buildCNFIndexRange(sctx.GetSessionVars().StmtCtx, cols, tpSlice, lengths, eqOrInCount, accessConds)
return ranges, accessConds, filterConds, eqCount, errors.Trace(err)
res.Ranges = ranges
res.AccessConds = accessConds
res.RemainedConds = filterConds
res.EqCondCount = eqCount
return res, err
}

func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression,
Expand Down Expand Up @@ -261,10 +270,13 @@ func detachDNFCondAndBuildRangeForIndex(sctx sessionctx.Context, condition *expr
if sf, ok := item.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicAnd {
cnfItems := expression.FlattenCNFConditions(sf)
var accesses, filters []expression.Expression
ranges, accesses, filters, _, err := detachCNFCondAndBuildRangeForIndex(sctx, cnfItems, cols, newTpSlice, lengths, true)
res, err := detachCNFCondAndBuildRangeForIndex(sctx, cnfItems, cols, newTpSlice, lengths, true)
if err != nil {
return nil, nil, false, nil
}
ranges := res.Ranges
accesses = res.AccessConds
filters = res.RemainedConds
if len(accesses) == 0 {
return FullRange(), nil, true, nil
}
Expand Down Expand Up @@ -298,11 +310,25 @@ func detachDNFCondAndBuildRangeForIndex(sctx sessionctx.Context, condition *expr
return totalRanges, []expression.Expression{expression.ComposeDNFCondition(sctx, newAccessItems...)}, hasResidual, nil
}

// DetachRangeResult wraps up results when detaching conditions and builing ranges.
type DetachRangeResult struct {
// Ranges is the ranges extracted and built from conditions.
Ranges []*Range
// AccessConds is the extracted conditions for access.
AccessConds []expression.Expression
// RemainedConds is the filter conditions which should be kept after access.
RemainedConds []expression.Expression
// EqCondCount is the number of equal conditions extracted.
EqCondCount int
// IsDNFCond indicates if the top layer of conditions are in DNF.
IsDNFCond bool
}

// DetachCondAndBuildRangeForIndex will detach the index filters from table filters.
// If the top layer is DNF, we return a int slice which is eqAndInCount of every DNF item.
// Otherwise just one number is returned.
// The returned values are encapsulated into a struct DetachRangeResult, see its comments for explanation.
func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
lengths []int) ([]*Range, []expression.Expression, []expression.Expression, int, error) {
lengths []int) (*DetachRangeResult, error) {
res := &DetachRangeResult{}
newTpSlice := make([]*types.FieldType, 0, len(cols))
for _, col := range cols {
newTpSlice = append(newTpSlice, newFieldType(col.RetType))
Expand All @@ -311,13 +337,17 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
if sf, ok := conditions[0].(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicOr {
ranges, accesses, hasResidual, err := detachDNFCondAndBuildRangeForIndex(sctx, sf, cols, newTpSlice, lengths)
if err != nil {
return nil, nil, nil, 0, errors.Trace(err)
return res, errors.Trace(err)
}
res.Ranges = ranges
res.AccessConds = accesses
res.IsDNFCond = true
// If this DNF have something cannot be to calculate range, then all this DNF should be pushed as filter condition.
if hasResidual {
return ranges, accesses, conditions, 0, nil
res.RemainedConds = conditions
return res, nil
}
return ranges, accesses, nil, 0, nil
return res, nil
}
}
return detachCNFCondAndBuildRangeForIndex(sctx, conditions, cols, newTpSlice, lengths, true)
Expand All @@ -326,13 +356,13 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
// DetachSimpleCondAndBuildRangeForIndex will detach the index filters from table filters.
// It will find the point query column firstly and then extract the range query column.
func DetachSimpleCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expression.Expression,
cols []*expression.Column, lengths []int) (ranges []*Range, accessConds []expression.Expression, err error) {
cols []*expression.Column, lengths []int) ([]*Range, []expression.Expression, error) {
newTpSlice := make([]*types.FieldType, 0, len(cols))
for _, col := range cols {
newTpSlice = append(newTpSlice, newFieldType(col.RetType))
}
ranges, accessConds, _, _, err = detachCNFCondAndBuildRangeForIndex(sctx, conditions, cols, newTpSlice, lengths, false)
return ranges, accessConds, errors.Trace(err)
res, err := detachCNFCondAndBuildRangeForIndex(sctx, conditions, cols, newTpSlice, lengths, false)
return res.Ranges, res.AccessConds, err
}

func removeAccessConditions(conditions, accessConds []expression.Expression) []expression.Expression {
Expand Down
16 changes: 8 additions & 8 deletions util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -594,11 +594,11 @@ func (s *testRangerSuite) TestIndexRange(c *C) {
}
cols, lengths := expression.IndexInfo2Cols(selection.Schema().Columns, tbl.Indices[tt.indexPos])
c.Assert(cols, NotNil)
ranges, conds, filter, _, err := ranger.DetachCondAndBuildRangeForIndex(ctx, conds, cols, lengths)
res, err := ranger.DetachCondAndBuildRangeForIndex(ctx, conds, cols, lengths)
c.Assert(err, IsNil)
c.Assert(fmt.Sprintf("%s", conds), Equals, tt.accessConds, Commentf("wrong access conditions for expr: %s", tt.exprStr))
c.Assert(fmt.Sprintf("%s", filter), Equals, tt.filterConds, Commentf("wrong filter conditions for expr: %s", tt.exprStr))
got := fmt.Sprintf("%v", ranges)
c.Assert(fmt.Sprintf("%s", res.AccessConds), Equals, tt.accessConds, Commentf("wrong access conditions for expr: %s", tt.exprStr))
c.Assert(fmt.Sprintf("%s", res.RemainedConds), Equals, tt.filterConds, Commentf("wrong filter conditions for expr: %s", tt.exprStr))
got := fmt.Sprintf("%v", res.Ranges)
c.Assert(got, Equals, tt.resultStr, Commentf("different for expr %s", tt.exprStr))
}
}
Expand Down Expand Up @@ -681,11 +681,11 @@ func (s *testRangerSuite) TestIndexRangeForUnsignedInt(c *C) {
}
cols, lengths := expression.IndexInfo2Cols(selection.Schema().Columns, tbl.Indices[tt.indexPos])
c.Assert(cols, NotNil)
ranges, conds, filter, _, err := ranger.DetachCondAndBuildRangeForIndex(ctx, conds, cols, lengths)
res, err := ranger.DetachCondAndBuildRangeForIndex(ctx, conds, cols, lengths)
c.Assert(err, IsNil)
c.Assert(fmt.Sprintf("%s", conds), Equals, tt.accessConds, Commentf("wrong access conditions for expr: %s", tt.exprStr))
c.Assert(fmt.Sprintf("%s", filter), Equals, tt.filterConds, Commentf("wrong filter conditions for expr: %s", tt.exprStr))
got := fmt.Sprintf("%v", ranges)
c.Assert(fmt.Sprintf("%s", res.AccessConds), Equals, tt.accessConds, Commentf("wrong access conditions for expr: %s", tt.exprStr))
c.Assert(fmt.Sprintf("%s", res.RemainedConds), Equals, tt.filterConds, Commentf("wrong filter conditions for expr: %s", tt.exprStr))
got := fmt.Sprintf("%v", res.Ranges)
c.Assert(got, Equals, tt.resultStr, Commentf("different for expr %s", tt.exprStr))
}
}
Expand Down

0 comments on commit fa51f92

Please sign in to comment.