Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: generate IndexMergePath in physical optimization #10512

Merged
merged 24 commits into from
Jul 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions planner/core/indexmerge_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package core

import (
. "github.com/pingcap/check"
"github.com/pingcap/parser"
"github.com/pingcap/parser/ast"
"github.com/pingcap/parser/model"
"github.com/pingcap/tidb/infoschema"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/util/testleak"
)

var _ = Suite(&testIndexMergeSuite{})

type testIndexMergeSuite struct {
*parser.Parser

is infoschema.InfoSchema
ctx sessionctx.Context
}

func (s *testIndexMergeSuite) SetUpSuite(c *C) {
s.is = infoschema.MockInfoSchema([]*model.TableInfo{MockTable(), MockView()})
s.ctx = MockContext()
s.Parser = parser.New()
}

func getIndexMergePathDigest(paths []*accessPath, startIndex int) string {
if len(paths) == startIndex {
return "[]"
}
idxMergeDisgest := "["
for i := startIndex; i < len(paths); i++ {
if i != startIndex {
idxMergeDisgest += ","
}
path := paths[i]
idxMergeDisgest += "{Idxs:["
for j := 0; j < len(path.partialIndexPaths); j++ {
if j > 0 {
idxMergeDisgest += ","
}
idxMergeDisgest += path.partialIndexPaths[j].index.Name.L
}
idxMergeDisgest += "],TbFilters:["
for j := 0; j < len(path.tableFilters); j++ {
if j > 0 {
idxMergeDisgest += ","
}
idxMergeDisgest += path.tableFilters[j].String()
}
idxMergeDisgest += "]}"
}
idxMergeDisgest += "]"
return idxMergeDisgest
}

func (s *testIndexMergeSuite) TestIndexMergePathGenerateion(c *C) {
defer testleak.AfterTest(c)()
tests := []struct {
sql string
idxMergeDigest string
}{
{
sql: "select * from t",
idxMergeDigest: "[]",
},
{
sql: "select * from t where c < 1",
idxMergeDigest: "[]",
},
{
sql: "select * from t where c < 1 or f > 2",
idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[]}]",
},
{
sql: "select * from t where (c < 1 or f > 2) and (c > 5 or f < 7)",
idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7))]}," +
"{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2))]}]",
},
{
sql: "select * from t where (c < 1 or f > 2) and (c > 5 or f < 7) and (c < 1 or g > 2)",
idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7)),or(lt(test.t.c, 1), gt(test.t.g, 2))]}," +
"{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(lt(test.t.c, 1), gt(test.t.g, 2))]}," +
"{Idxs:[c_d_e,g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(gt(test.t.c, 5), lt(test.t.f, 7))]}]",
},
{
sql: "select * from t where (c < 1 or f > 2) and (c > 5 or f < 7) and (e < 1 or f > 2)",
idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7)),or(lt(test.t.e, 1), gt(test.t.f, 2))]}," +
"{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(lt(test.t.e, 1), gt(test.t.f, 2))]}]",
},
}
for i, tc := range tests {
comment := Commentf("case:%v sql:%s", i, tc.sql)
stmt, err := s.ParseOneStmt(tc.sql, "", "")
c.Assert(err, IsNil, comment)
Preprocess(s.ctx, stmt, s.is)
builder := &PlanBuilder{
ctx: MockContext(),
is: s.is,
colMapper: make(map[*ast.ColumnNameExpr]int),
}
p, err := builder.Build(stmt)
if err != nil {
c.Assert(err.Error(), Equals, tc.idxMergeDigest, comment)
continue
}
c.Assert(err, IsNil)
p, err = logicalOptimize(builder.optFlag, p.(LogicalPlan))
c.Assert(err, IsNil)
lp := p.(LogicalPlan)
c.Assert(err, IsNil)
var ds *DataSource
for ds == nil {
switch v := lp.(type) {
case *DataSource:
ds = v
default:
lp = lp.Children()[0]
}
}
ds.ctx.GetSessionVars().EnableIndexMerge = true
idxMergeStartIndex := len(ds.possibleAccessPaths)
_, err = lp.recursiveDeriveStats()
c.Assert(err, IsNil)
c.Assert(getIndexMergePathDigest(ds.possibleAccessPaths, idxMergeStartIndex), Equals, tc.idxMergeDigest)
}
}
21 changes: 13 additions & 8 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,8 @@ type DataSource struct {
handleCol *expression.Column
}

// accessPath tells how we access one index or just access table.
// accessPath indicates the way we access a table: by using single index, or by using multiple indexes,
// or just by using table scan.
type accessPath struct {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope that you can do cleanup here. (Of course, no need to do it in this pr)
Split the struct accessPath to accessPath and singleIndexPath/or some other name would be better.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I will consider it later.

index *model.IndexInfo
idxCols []*expression.Column
Expand All @@ -369,15 +370,18 @@ type accessPath struct {
isTablePath bool
// forced means this path is generated by `use/force index()`.
forced bool
// partialIndexPaths store all index access paths.
// If there are extra filters, store them in tableFilters.
partialIndexPaths []*accessPath
}

// deriveTablePathStats will fulfill the information that the accessPath need.
// And it will check whether the primary key is covered only by point query.
func (ds *DataSource) deriveTablePathStats(path *accessPath) (bool, error) {
func (ds *DataSource) deriveTablePathStats(path *accessPath, conds []expression.Expression) (bool, error) {
var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.countAfterAccess = float64(ds.statisticTable.Count)
path.tableFilters = ds.pushedDownConds
path.tableFilters = conds
var pkCol *expression.Column
columnLen := len(ds.schema.Columns)
isUnsigned := false
Expand All @@ -395,10 +399,10 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath) (bool, error) {
}

path.ranges = ranger.FullIntRange(isUnsigned)
if len(ds.pushedDownConds) == 0 {
if len(conds) == 0 {
return false, nil
}
path.accessConds, path.tableFilters = ranger.DetachCondsForColumn(ds.ctx, ds.pushedDownConds, pkCol)
path.accessConds, path.tableFilters = ranger.DetachCondsForColumn(ds.ctx, conds, pkCol)
// If there's no access cond, we try to find that whether there's expression containing correlated column that
// can be used to access data.
corColInAccessConds := false
Expand Down Expand Up @@ -478,7 +482,8 @@ func (ds *DataSource) getHandleCol() *expression.Column {
// deriveIndexPathStats will fulfill the information that the accessPath need.
// And it will check whether this index is full matched by point query. We will use this check to
// determine whether we remove other paths or not.
func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
// conds is the conditions used to generate the DetachRangeResult for path.
func (ds *DataSource) deriveIndexPathStats(path *accessPath, conds []expression.Expression) (bool, error) {
sc := ds.ctx.GetSessionVars().StmtCtx
path.ranges = ranger.FullRange()
path.countAfterAccess = float64(ds.statisticTable.Count)
Expand All @@ -492,7 +497,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
}
eqOrInCount := 0
if len(path.idxCols) != 0 {
res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, path.idxCols, path.idxColLens)
res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, conds, path.idxCols, path.idxColLens)
if err != nil {
return false, err
}
Expand All @@ -506,7 +511,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
return false, err
}
} else {
path.tableFilters = ds.pushedDownConds
path.tableFilters = conds
}
if eqOrInCount == len(path.accessConds) {
accesses, remained := path.splitCorColAccessCondFromFilters(eqOrInCount)
Expand Down
127 changes: 125 additions & 2 deletions planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package core
import (
"math"

"github.com/pingcap/parser/ast"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/statistics"
Expand Down Expand Up @@ -123,7 +124,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
ds.deriveStatsByFilter(ds.pushedDownConds)
for _, path := range ds.possibleAccessPaths {
if path.isTablePath {
noIntervalRanges, err := ds.deriveTablePathStats(path)
noIntervalRanges, err := ds.deriveTablePathStats(path, ds.pushedDownConds)
if err != nil {
return nil, err
}
Expand All @@ -135,7 +136,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
}
continue
}
noIntervalRanges, err := ds.deriveIndexPathStats(path)
noIntervalRanges, err := ds.deriveIndexPathStats(path, ds.pushedDownConds)
if err != nil {
return nil, err
}
Expand All @@ -146,9 +147,131 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
break
}
}
// Consider the IndexMergePath. Now, we just generate `IndexMergePath` in DNF case.
if len(ds.pushedDownConds) > 0 && len(ds.possibleAccessPaths) > 1 && ds.ctx.GetSessionVars().EnableIndexMerge {
needConsiderIndexMerge := true
for i := 1; i < len(ds.possibleAccessPaths); i++ {
hailanwhu marked this conversation as resolved.
Show resolved Hide resolved
alivxxx marked this conversation as resolved.
Show resolved Hide resolved
if len(ds.possibleAccessPaths[i].accessConds) != 0 {
needConsiderIndexMerge = false
hailanwhu marked this conversation as resolved.
Show resolved Hide resolved
break
}
}
if needConsiderIndexMerge {
ds.generateIndexMergeOrPaths()
}
}
return ds.stats, nil
}

// getIndexMergeOrPath generates all possible IndexMergeOrPaths.
func (ds *DataSource) generateIndexMergeOrPaths() {
usedIndexCount := len(ds.possibleAccessPaths)
for i, cond := range ds.pushedDownConds {
sf, ok := cond.(*expression.ScalarFunction)
if !ok || sf.FuncName.L != ast.LogicOr {
continue
}
var partialPaths = make([]*accessPath, 0, usedIndexCount)
dnfItems := expression.FlattenDNFConditions(sf)
for _, item := range dnfItems {
cnfItems := expression.SplitCNFItems(item)
itemPaths := ds.accessPathsForConds(cnfItems, usedIndexCount)
if len(itemPaths) == 0 {
partialPaths = nil
break
}
partialPath := ds.buildIndexMergePartialPath(itemPaths)
if partialPath == nil {
partialPaths = nil
break
}
partialPaths = append(partialPaths, partialPath)
}
if len(partialPaths) > 1 {
possiblePath := ds.buildIndexMergeOrPath(partialPaths, i)
if possiblePath != nil {
ds.possibleAccessPaths = append(ds.possibleAccessPaths, possiblePath)
}
}
}
}

// accessPathsForConds generates all possible index paths for conditions.
func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, usedIndexCount int) []*accessPath {
hailanwhu marked this conversation as resolved.
Show resolved Hide resolved
var results = make([]*accessPath, 0, usedIndexCount)
for i := 0; i < usedIndexCount; i++ {
path := &accessPath{}
if ds.possibleAccessPaths[i].isTablePath {
path.isTablePath = true
noIntervalRanges, err := ds.deriveTablePathStats(path, conditions)
if err != nil {
logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err))
continue
}
// If we have point or empty range, just remove other possible paths.
if noIntervalRanges || len(path.ranges) == 0 {
results[0] = path
results = results[:1]
break
}
} else {
path.index = ds.possibleAccessPaths[i].index
noIntervalRanges, err := ds.deriveIndexPathStats(path, conditions)
if err != nil {
logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err))
continue
}
// If we have empty range, or point range on unique index, just remove other possible paths.
if (noIntervalRanges && path.index.Unique) || len(path.ranges) == 0 {
results[0] = path
results = results[:1]
break
}
}
// If accessConds is empty or tableFilter is not empty, we ignore the access path.
// Now these conditions are too strict.
// For example, a sql `select * from t where a > 1 or (b < 2 and c > 3)` and table `t` with indexes
// on a and b separately. we can generate a `IndexMergePath` with table filter `a > 1 or (b < 2 and c > 3)`.
// TODO: solve the above case
if len(path.tableFilters) > 0 || len(path.accessConds) == 0 {
continue
}
results = append(results, path)
}
hailanwhu marked this conversation as resolved.
Show resolved Hide resolved
return results
}

// buildIndexMergePartialPath chooses the best index path from all possible paths.
hailanwhu marked this conversation as resolved.
Show resolved Hide resolved
// Now we just choose the index with most columns.
eurekaka marked this conversation as resolved.
Show resolved Hide resolved
// We should improve this strategy, because it is not always better to choose index
// with most columns, e.g, filter is c > 1 and the input indexes are c and c_d_e,
// the former one is enough, and it is less expensive in execution compared with the latter one.
// TODO: improve strategy of the partial path selection
func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*accessPath) *accessPath {
if len(indexAccessPaths) == 1 {
return indexAccessPaths[0]
}

maxColsIndex := 0
maxCols := len(indexAccessPaths[0].idxCols)
for i := 1; i < len(indexAccessPaths); i++ {
current := len(indexAccessPaths[i].idxCols)
if current > maxCols {
maxColsIndex = i
maxCols = current
}
}
return indexAccessPaths[maxColsIndex]
}

// buildIndexMergeOrPath generates one possible IndexMergePath.
func (ds *DataSource) buildIndexMergeOrPath(partialPaths []*accessPath, current int) *accessPath {
indexMergePath := &accessPath{partialIndexPaths: partialPaths}
indexMergePath.tableFilters = append(indexMergePath.tableFilters, ds.pushedDownConds[:current]...)
indexMergePath.tableFilters = append(indexMergePath.tableFilters, ds.pushedDownConds[current+1:]...)
return indexMergePath
hailanwhu marked this conversation as resolved.
Show resolved Hide resolved
}

// DeriveStats implement LogicalPlan DeriveStats interface.
func (p *LogicalSelection) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
p.stats = childStats[0].Scale(selectionFactor)
Expand Down
1 change: 1 addition & 0 deletions session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1697,6 +1697,7 @@ var builtinGlobalVariable = []string{
variable.TiDBEnableWindowFunction,
variable.TiDBEnableFastAnalyze,
variable.TiDBExpensiveQueryTimeThreshold,
variable.TiDBEnableIndexMerge,
}

var (
Expand Down
Loading