Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plan: handle DNF expressions in Selectivity (#9282) #9405

Merged
merged 3 commits into from
Feb 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cmd/explaintest/r/explain_easy_stats.result
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,10 @@ explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085
id count task operator info
Point_Get_1 1.00 root table:index_prune, index:a b
drop table if exists t1, t2, t3, index_prune;
drop table if exists tbl;
create table tbl(column1 int, column2 int, index idx(column1, column2));
load stats 's/explain_easy_stats_tbl_dnf.json';
explain select * from tbl where (column1=0 and column2=1) or (column1=1 and column2=3) or (column1=2 and column2=5);
id count task operator info
IndexReader_6 3.00 root index:IndexScan_5
└─IndexScan_5 3.00 cop table:tbl, index:column1, column2, range:[0 1,0 1], [1 3,1 3], [2 5,2 5], keep order:false
1 change: 1 addition & 0 deletions cmd/explaintest/s/explain_easy_stats_tbl_dnf.json

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions cmd/explaintest/t/explain_easy_stats.test
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,8 @@ explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085
explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085526790 GROUP BY b ORDER BY a limit 1;

drop table if exists t1, t2, t3, index_prune;

drop table if exists tbl;
create table tbl(column1 int, column2 int, index idx(column1, column2));
load stats 's/explain_easy_stats_tbl_dnf.json';
explain select * from tbl where (column1=0 and column2=1) or (column1=1 and column2=3) or (column1=2 and column2=5);
15 changes: 7 additions & 8 deletions planner/core/common_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,15 +259,14 @@ func (e *Execute) rebuildRange(p Plan) error {

func (e *Execute) buildRangeForIndexScan(sctx sessionctx.Context, is *PhysicalIndexScan) ([]*ranger.Range, error) {
idxCols, colLengths := expression.IndexInfo2Cols(is.schema.Columns, is.Index)
ranges := ranger.FullRange()
if len(idxCols) > 0 {
var err error
ranges, _, _, _, err = ranger.DetachCondAndBuildRangeForIndex(sctx, is.AccessCondition, idxCols, colLengths)
if err != nil {
return nil, errors.Trace(err)
}
if len(idxCols) == 0 {
return ranger.FullRange(), nil
}
res, err := ranger.DetachCondAndBuildRangeForIndex(sctx, is.AccessCondition, idxCols, colLengths)
if err != nil {
return nil, err
}
return ranges, nil
return res.Ranges, nil
}

// Deallocate represents deallocate plan.
Expand Down
9 changes: 4 additions & 5 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package core
import (
"math"

"github.com/pingcap/errors"
"github.com/pingcap/parser/ast"
"github.com/pingcap/parser/model"
"github.com/pingcap/parser/mysql"
Expand Down Expand Up @@ -530,20 +529,20 @@ func (p *LogicalJoin) buildRangeForIndexJoin(indexInfo *model.IndexInfo, innerPl
// In `buildFakeEqCondsForIndexJoin`, we construct the equal conditions for join keys and remove filters that contain the join keys' column.
// When t1.a = t2.a and t1.a > 1, we can also guarantee that t1.a > 1 won't be chosen as the access condition.
// So the equal conditions we built can be successfully used to build a range if they can be used. They won't be affected by the existing filters.
ranges, accesses, moreRemained, _, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, access, idxCols, colLengths)
res, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, access, idxCols, colLengths)
if err != nil {
terror.Log(errors.Trace(err))
terror.Log(err)
return nil, nil, nil
}

// We should guarantee that all the join's equal condition is used.
for _, eqCond := range eqConds {
if !expression.Contains(accesses, eqCond) {
if !expression.Contains(res.AccessConds, eqCond) {
return nil, nil, nil
}
}

return ranges, append(remained, moreRemained...), keyOff2IdxOff
return res.Ranges, append(remained, res.RemainedConds...), keyOff2IdxOff
}

func (p *LogicalJoin) buildFakeEqCondsForIndexJoin(keys, idxCols []*expression.Column, colLengths []int,
Expand Down
11 changes: 7 additions & 4 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -442,19 +442,22 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath) (bool, error) {
// And it will check whether this index is full matched by point query. We will use this check to
// determine whether we remove other paths or not.
func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.ranges = ranger.FullRange()
path.countAfterAccess = float64(ds.statisticTable.Count)
path.idxCols, path.idxColLens = expression.IndexInfo2Cols(ds.schema.Columns, path.index)
if len(path.idxCols) != 0 {
path.ranges, path.accessConds, path.tableFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, path.idxCols, path.idxColLens)
res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, path.idxCols, path.idxColLens)
if err != nil {
return false, errors.Trace(err)
return false, err
}
path.ranges = res.Ranges
path.accessConds = res.AccessConds
path.tableFilters = res.RemainedConds
path.eqCondCount = res.EqCondCount
path.countAfterAccess, err = ds.stats.HistColl.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges)
if err != nil {
return false, errors.Trace(err)
return false, err
}
} else {
path.tableFilters = ds.pushedDownConds
Expand Down
34 changes: 25 additions & 9 deletions statistics/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ type exprSet struct {
ranges []*ranger.Range
// numCols is the number of columns contained in the index or column(which is always 1).
numCols int
// partCover indicates whether the bit in the mask is for a full cover or partial cover. It is only true
// when the condition is a DNF expression on index, and the expression is not totally extracted as access condition.
partCover bool
}

// The type of the exprSet.
Expand Down Expand Up @@ -140,7 +143,6 @@ func isColEqCorCol(filter expression.Expression) *expression.Column {
// Selectivity is a function calculate the selectivity of the expressions.
// The definition of selectivity is (row count after filter / row count before filter).
// And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this.
// TODO: support expressions that the top layer is a DNF.
// Currently the time complexity is o(n^2).
func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression) (float64, error) {
// If table's count is zero or conditions are empty, we should return 100% selectivity.
Expand Down Expand Up @@ -175,7 +177,7 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
for id, colInfo := range coll.Columns {
col := expression.ColInfo2Col(extractedCols, colInfo.Info)
if col != nil {
maskCovered, ranges, err := getMaskAndRanges(ctx, remainedExprs, ranger.ColumnRangeType, nil, col)
maskCovered, ranges, _, err := getMaskAndRanges(ctx, remainedExprs, ranger.ColumnRangeType, nil, col)
if err != nil {
return 0, errors.Trace(err)
}
Expand All @@ -192,11 +194,11 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
for i := 0; i < len(idxCols); i++ {
lengths = append(lengths, idxInfo.Info.Columns[i].Length)
}
maskCovered, ranges, err := getMaskAndRanges(ctx, remainedExprs, ranger.IndexRangeType, lengths, idxCols...)
maskCovered, ranges, partCover, err := getMaskAndRanges(ctx, remainedExprs, ranger.IndexRangeType, lengths, idxCols...)
if err != nil {
return 0, errors.Trace(err)
}
sets = append(sets, &exprSet{tp: indexType, ID: id, mask: maskCovered, ranges: ranges, numCols: len(idxInfo.Info.Columns)})
sets = append(sets, &exprSet{tp: indexType, ID: id, mask: maskCovered, ranges: ranges, numCols: len(idxInfo.Info.Columns), partCover: partCover})
}
}
sets = getUsableSetsByGreedy(sets)
Expand All @@ -220,6 +222,13 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
return 0, errors.Trace(err)
}
ret *= rowCount / float64(coll.Count)
// If `partCover` is true, it means that the conditions are in DNF form, and only part
// of the DNF expressions are extracted as access conditions, so besides from the selectivity
// of the extracted access conditions, we multiply another selectionFactor for the residual
// conditions.
if set.partCover {
ret *= selectionFactor
}
}
// If there's still conditions which cannot be calculated, we will multiply a selectionFactor.
if mask > 0 {
Expand All @@ -229,20 +238,27 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
}

func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, rangeType ranger.RangeType,
lengths []int, cols ...*expression.Column) (mask int64, ranges []*ranger.Range, err error) {
lengths []int, cols ...*expression.Column) (mask int64, ranges []*ranger.Range, partCover bool, err error) {
sc := ctx.GetSessionVars().StmtCtx
var accessConds []expression.Expression
isDNF := false
var accessConds, remainedConds []expression.Expression
switch rangeType {
case ranger.ColumnRangeType:
accessConds = ranger.ExtractAccessConditionsForColumn(exprs, cols[0].ColName)
ranges, err = ranger.BuildColumnRange(accessConds, sc, cols[0].RetType)
case ranger.IndexRangeType:
ranges, accessConds, err = ranger.DetachSimpleCondAndBuildRangeForIndex(ctx, exprs, cols, lengths)
var res *ranger.DetachRangeResult
res, err = ranger.DetachCondAndBuildRangeForIndex(ctx, exprs, cols, lengths)
ranges, accessConds, remainedConds, isDNF = res.Ranges, res.AccessConds, res.RemainedConds, res.IsDNFCond
default:
panic("should never be here")
}
if err != nil {
return 0, nil, errors.Trace(err)
return 0, nil, false, err
}
if isDNF && len(accessConds) > 0 {
mask |= 1
return mask, ranges, len(remainedConds) > 0, nil
}
for i := range exprs {
for j := range accessConds {
Expand All @@ -252,7 +268,7 @@ func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, ran
}
}
}
return mask, ranges, nil
return mask, ranges, false, nil
}

// getUsableSetsByGreedy will select the indices and pk used for calculate selectivity by greedy algorithm.
Expand Down
60 changes: 45 additions & 15 deletions util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,17 @@ func getEqOrInColOffset(expr expression.Expression, cols []*expression.Column) i
// It will first find the point query column and then extract the range query column.
// considerDNF is true means it will try to extract access conditions from the DNF expressions.
func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
tpSlice []*types.FieldType, lengths []int, considerDNF bool) ([]*Range, []expression.Expression, []expression.Expression, int, error) {
tpSlice []*types.FieldType, lengths []int, considerDNF bool) (*DetachRangeResult, error) {
var (
eqCount int
ranges []*Range
err error
)
res := &DetachRangeResult{}

accessConds, filterConds, newConditions, emptyRange := extractEqAndInCondition(sctx, conditions, cols, lengths)
if emptyRange {
return ranges, nil, nil, 0, nil
return res, nil
}

for ; eqCount < len(accessConds); eqCount++ {
Expand All @@ -164,9 +165,13 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex
filterConds = append(filterConds, newConditions...)
ranges, err = buildCNFIndexRange(sctx.GetSessionVars().StmtCtx, cols, tpSlice, lengths, eqOrInCount, accessConds)
if err != nil {
return nil, nil, nil, 0, errors.Trace(err)
return res, err
}
return ranges, accessConds, filterConds, eqCount, nil
res.Ranges = ranges
res.AccessConds = accessConds
res.RemainedConds = filterConds
res.EqCondCount = eqCount
return res, nil
}
checker := &conditionChecker{
colName: cols[eqOrInCount].ColName,
Expand All @@ -187,7 +192,11 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex
}
}
ranges, err = buildCNFIndexRange(sctx.GetSessionVars().StmtCtx, cols, tpSlice, lengths, eqOrInCount, accessConds)
return ranges, accessConds, filterConds, eqCount, errors.Trace(err)
res.Ranges = ranges
res.AccessConds = accessConds
res.RemainedConds = filterConds
res.EqCondCount = eqCount
return res, err
}

func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression,
Expand Down Expand Up @@ -261,10 +270,13 @@ func detachDNFCondAndBuildRangeForIndex(sctx sessionctx.Context, condition *expr
if sf, ok := item.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicAnd {
cnfItems := expression.FlattenCNFConditions(sf)
var accesses, filters []expression.Expression
ranges, accesses, filters, _, err := detachCNFCondAndBuildRangeForIndex(sctx, cnfItems, cols, newTpSlice, lengths, true)
res, err := detachCNFCondAndBuildRangeForIndex(sctx, cnfItems, cols, newTpSlice, lengths, true)
if err != nil {
return nil, nil, false, nil
}
ranges := res.Ranges
accesses = res.AccessConds
filters = res.RemainedConds
if len(accesses) == 0 {
return FullRange(), nil, true, nil
}
Expand Down Expand Up @@ -298,11 +310,25 @@ func detachDNFCondAndBuildRangeForIndex(sctx sessionctx.Context, condition *expr
return totalRanges, []expression.Expression{expression.ComposeDNFCondition(sctx, newAccessItems...)}, hasResidual, nil
}

// DetachRangeResult wraps up results when detaching conditions and builing ranges.
type DetachRangeResult struct {
// Ranges is the ranges extracted and built from conditions.
Ranges []*Range
// AccessConds is the extracted conditions for access.
AccessConds []expression.Expression
// RemainedConds is the filter conditions which should be kept after access.
RemainedConds []expression.Expression
// EqCondCount is the number of equal conditions extracted.
EqCondCount int
// IsDNFCond indicates if the top layer of conditions are in DNF.
IsDNFCond bool
}

// DetachCondAndBuildRangeForIndex will detach the index filters from table filters.
// If the top layer is DNF, we return a int slice which is eqAndInCount of every DNF item.
// Otherwise just one number is returned.
// The returned values are encapsulated into a struct DetachRangeResult, see its comments for explanation.
func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
lengths []int) ([]*Range, []expression.Expression, []expression.Expression, int, error) {
lengths []int) (*DetachRangeResult, error) {
res := &DetachRangeResult{}
newTpSlice := make([]*types.FieldType, 0, len(cols))
for _, col := range cols {
newTpSlice = append(newTpSlice, newFieldType(col.RetType))
Expand All @@ -311,13 +337,17 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
if sf, ok := conditions[0].(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicOr {
ranges, accesses, hasResidual, err := detachDNFCondAndBuildRangeForIndex(sctx, sf, cols, newTpSlice, lengths)
if err != nil {
return nil, nil, nil, 0, errors.Trace(err)
return res, errors.Trace(err)
}
res.Ranges = ranges
res.AccessConds = accesses
res.IsDNFCond = true
// If this DNF have something cannot be to calculate range, then all this DNF should be pushed as filter condition.
if hasResidual {
return ranges, accesses, conditions, 0, nil
res.RemainedConds = conditions
return res, nil
}
return ranges, accesses, nil, 0, nil
return res, nil
}
}
return detachCNFCondAndBuildRangeForIndex(sctx, conditions, cols, newTpSlice, lengths, true)
Expand All @@ -326,13 +356,13 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
// DetachSimpleCondAndBuildRangeForIndex will detach the index filters from table filters.
// It will find the point query column firstly and then extract the range query column.
func DetachSimpleCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expression.Expression,
cols []*expression.Column, lengths []int) (ranges []*Range, accessConds []expression.Expression, err error) {
cols []*expression.Column, lengths []int) ([]*Range, []expression.Expression, error) {
newTpSlice := make([]*types.FieldType, 0, len(cols))
for _, col := range cols {
newTpSlice = append(newTpSlice, newFieldType(col.RetType))
}
ranges, accessConds, _, _, err = detachCNFCondAndBuildRangeForIndex(sctx, conditions, cols, newTpSlice, lengths, false)
return ranges, accessConds, errors.Trace(err)
res, err := detachCNFCondAndBuildRangeForIndex(sctx, conditions, cols, newTpSlice, lengths, false)
return res.Ranges, res.AccessConds, err
}

func removeAccessConditions(conditions, accessConds []expression.Expression) []expression.Expression {
Expand Down
16 changes: 8 additions & 8 deletions util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -594,11 +594,11 @@ func (s *testRangerSuite) TestIndexRange(c *C) {
}
cols, lengths := expression.IndexInfo2Cols(selection.Schema().Columns, tbl.Indices[tt.indexPos])
c.Assert(cols, NotNil)
ranges, conds, filter, _, err := ranger.DetachCondAndBuildRangeForIndex(ctx, conds, cols, lengths)
res, err := ranger.DetachCondAndBuildRangeForIndex(ctx, conds, cols, lengths)
c.Assert(err, IsNil)
c.Assert(fmt.Sprintf("%s", conds), Equals, tt.accessConds, Commentf("wrong access conditions for expr: %s", tt.exprStr))
c.Assert(fmt.Sprintf("%s", filter), Equals, tt.filterConds, Commentf("wrong filter conditions for expr: %s", tt.exprStr))
got := fmt.Sprintf("%v", ranges)
c.Assert(fmt.Sprintf("%s", res.AccessConds), Equals, tt.accessConds, Commentf("wrong access conditions for expr: %s", tt.exprStr))
c.Assert(fmt.Sprintf("%s", res.RemainedConds), Equals, tt.filterConds, Commentf("wrong filter conditions for expr: %s", tt.exprStr))
got := fmt.Sprintf("%v", res.Ranges)
c.Assert(got, Equals, tt.resultStr, Commentf("different for expr %s", tt.exprStr))
}
}
Expand Down Expand Up @@ -681,11 +681,11 @@ func (s *testRangerSuite) TestIndexRangeForUnsignedInt(c *C) {
}
cols, lengths := expression.IndexInfo2Cols(selection.Schema().Columns, tbl.Indices[tt.indexPos])
c.Assert(cols, NotNil)
ranges, conds, filter, _, err := ranger.DetachCondAndBuildRangeForIndex(ctx, conds, cols, lengths)
res, err := ranger.DetachCondAndBuildRangeForIndex(ctx, conds, cols, lengths)
c.Assert(err, IsNil)
c.Assert(fmt.Sprintf("%s", conds), Equals, tt.accessConds, Commentf("wrong access conditions for expr: %s", tt.exprStr))
c.Assert(fmt.Sprintf("%s", filter), Equals, tt.filterConds, Commentf("wrong filter conditions for expr: %s", tt.exprStr))
got := fmt.Sprintf("%v", ranges)
c.Assert(fmt.Sprintf("%s", res.AccessConds), Equals, tt.accessConds, Commentf("wrong access conditions for expr: %s", tt.exprStr))
c.Assert(fmt.Sprintf("%s", res.RemainedConds), Equals, tt.filterConds, Commentf("wrong filter conditions for expr: %s", tt.exprStr))
got := fmt.Sprintf("%v", res.Ranges)
c.Assert(got, Equals, tt.resultStr, Commentf("different for expr %s", tt.exprStr))
}
}
Expand Down