Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: avoid double scan for index prefix col is (not) null #38555

Merged
merged 25 commits into from
Oct 24, 2022
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions executor/set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,20 @@ func TestSetVar(t *testing.T) {
tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 9999")
tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1024")) // max value is 1024

// test variable 'tidb_opt_prefix_index_single_scan'
// global scope
tk.MustQuery("select @@global.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1")) // default value
tk.MustExec("set global tidb_opt_prefix_index_single_scan = 0")
tk.MustQuery("select @@global.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("0"))
tk.MustExec("set global tidb_opt_prefix_index_single_scan = 1")
tk.MustQuery("select @@global.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1"))
// session scope
tk.MustQuery("select @@session.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1")) // default value
tk.MustExec("set session tidb_opt_prefix_index_single_scan = 0")
tk.MustQuery("select @@session.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("0"))
tk.MustExec("set session tidb_opt_prefix_index_single_scan = 1")
tk.MustQuery("select @@session.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1"))

// test tidb_opt_range_max_size
tk.MustQuery("select @@tidb_opt_range_max_size").Check(testkit.Rows("67108864"))
tk.MustExec("set global tidb_opt_range_max_size = -1")
Expand Down
2 changes: 1 addition & 1 deletion planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
cop.commonHandleCols = ds.commonHandleCols
}
is.initSchema(append(path.FullIdxCols, ds.commonHandleCols...), cop.tablePlan != nil)
indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens)
if maxOneRow {
// Theoretically, this line is unnecessary because row count estimation of join should guarantee rowCount is not larger
// than 1.0; however, there may be rowCount larger than 1.0 in reality, e.g, pseudo statistics cases, which does not reflect
Expand Down
93 changes: 73 additions & 20 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1118,7 +1118,7 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c

func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *util.AccessPath) (indexPlan PhysicalPlan) {
is := ds.getOriginalPhysicalIndexScan(prop, path, false, false)
// TODO: Consider using isCoveringIndex() to avoid another TableRead
// TODO: Consider using indexCoveringColumns() to avoid another TableRead
indexConds := path.IndexFilters
if indexConds != nil {
var selectivity float64
Expand Down Expand Up @@ -1281,33 +1281,80 @@ func extractFiltersForIndexMerge(sc *stmtctx.StatementContext, client kv.Client,
return
}

func indexCoveringCol(col *expression.Column, indexCols []*expression.Column, idxColLens []int) bool {
func indexColsCoveringCol(col *expression.Column, indexCols []*expression.Column, idxColLens []int, ignoreLen bool) bool {
xuyifangreeneyes marked this conversation as resolved.
Show resolved Hide resolved
for i, indexCol := range indexCols {
isFullLen := idxColLens[i] == types.UnspecifiedLength || idxColLens[i] == col.RetType.GetFlen()
if indexCol != nil && col.EqualByExprAndID(nil, indexCol) && isFullLen {
if indexCol == nil || !col.EqualByExprAndID(nil, indexCol) {
continue
}
if ignoreLen || idxColLens[i] == types.UnspecifiedLength || idxColLens[i] == col.RetType.GetFlen() {
return true
}
}
return false
}

func (ds *DataSource) isCoveringIndex(columns, indexColumns []*expression.Column, idxColLens []int, tblInfo *model.TableInfo) bool {
func (ds *DataSource) indexCoveringColumn(column *expression.Column, indexColumns []*expression.Column, idxColLens []int, ignoreLen bool) bool {
if ds.tableInfo.PKIsHandle && mysql.HasPriKeyFlag(column.RetType.GetFlag()) {
return true
}
if column.ID == model.ExtraHandleID {
return true
}
coveredByPlainIndex := indexColsCoveringCol(column, indexColumns, idxColLens, ignoreLen)
coveredByClusteredIndex := indexColsCoveringCol(column, ds.commonHandleCols, ds.commonHandleLens, ignoreLen)
if !coveredByPlainIndex && !coveredByClusteredIndex {
return false
}
isClusteredNewCollationIdx := collate.NewCollationEnabled() &&
column.GetType().EvalType() == types.ETString &&
!mysql.HasBinaryFlag(column.GetType().GetFlag())
if !coveredByPlainIndex && coveredByClusteredIndex && isClusteredNewCollationIdx && ds.table.Meta().CommonHandleVersion == 0 {
return false
}
return true
}

func (ds *DataSource) indexCoveringColumns(columns, indexColumns []*expression.Column, idxColLens []int) bool {
xuyifangreeneyes marked this conversation as resolved.
Show resolved Hide resolved
for _, col := range columns {
if tblInfo.PKIsHandle && mysql.HasPriKeyFlag(col.RetType.GetFlag()) {
continue
if !ds.indexCoveringColumn(col, indexColumns, idxColLens, false) {
return false
}
if col.ID == model.ExtraHandleID {
continue
}
return true
}

func (ds *DataSource) indexCoveringCondition(condition expression.Expression, indexColumns []*expression.Column, idxColLens []int) bool {
xuyifangreeneyes marked this conversation as resolved.
Show resolved Hide resolved
xuyifangreeneyes marked this conversation as resolved.
Show resolved Hide resolved
switch v := condition.(type) {
case *expression.Column:
return ds.indexCoveringColumn(v, indexColumns, idxColLens, false)
case *expression.ScalarFunction:
// Even if the index only contains prefix `col`, the index can cover `col is null`.
if v.FuncName.L == ast.IsNull {
if col, ok := v.GetArgs()[0].(*expression.Column); ok {
return ds.indexCoveringColumn(col, indexColumns, idxColLens, true)
}
}
coveredByPlainIndex := indexCoveringCol(col, indexColumns, idxColLens)
coveredByClusteredIndex := indexCoveringCol(col, ds.commonHandleCols, ds.commonHandleLens)
if !coveredByPlainIndex && !coveredByClusteredIndex {
return false
for _, arg := range v.GetArgs() {
if !ds.indexCoveringCondition(arg, indexColumns, idxColLens) {
return false
}
}
isClusteredNewCollationIdx := collate.NewCollationEnabled() &&
col.GetType().EvalType() == types.ETString &&
!mysql.HasBinaryFlag(col.GetType().GetFlag())
if !coveredByPlainIndex && coveredByClusteredIndex && isClusteredNewCollationIdx && ds.table.Meta().CommonHandleVersion == 0 {
return true
}
return true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we need to return false here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about whether to return true or false here. On the one hand, it is safer to return false. On the other hand, the old implementation is ds.isIndexCoveringColumns(expression.ExtractColumns(cond), indexColumns, idxColLens). If the condition is neither Column nor ScalarFunction, no column is extracted and isIndexCoveringColumns returns true.

}

func (ds *DataSource) isSingleScan(indexColumns []*expression.Column, idxColLens []int) bool {
if !ds.ctx.GetSessionVars().OptPrefixIndexSingleScan || ds.colsRequiringFullLen == nil {
// ds.colsRequiringFullLen is set at (*DataSource).PruneColumns. In some cases we don't reach (*DataSource).PruneColumns
xuyifangreeneyes marked this conversation as resolved.
Show resolved Hide resolved
// and ds.colsRequiringFullLen is nil, so we fall back to ds.indexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens).
return ds.indexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens)
}
if !ds.indexCoveringColumns(ds.colsRequiringFullLen, indexColumns, idxColLens) {
return false
}
for _, cond := range ds.allConds {
if !ds.indexCoveringCondition(cond, indexColumns, idxColLens) {
return false
}
}
Expand Down Expand Up @@ -1575,11 +1622,17 @@ func matchIndicesProp(idxCols []*expression.Column, colLens []int, propItems []p
return true
}

func (ds *DataSource) splitIndexFilterConditions(conditions []expression.Expression, indexColumns []*expression.Column, idxColLens []int,
table *model.TableInfo) (indexConds, tableConds []expression.Expression) {
func (ds *DataSource) splitIndexFilterConditions(conditions []expression.Expression, indexColumns []*expression.Column,
idxColLens []int) (indexConds, tableConds []expression.Expression) {
var indexConditions, tableConditions []expression.Expression
for _, cond := range conditions {
if ds.isCoveringIndex(expression.ExtractColumns(cond), indexColumns, idxColLens, table) {
var covered bool
if ds.ctx.GetSessionVars().OptPrefixIndexSingleScan {
covered = ds.indexCoveringCondition(cond, indexColumns, idxColLens)
} else {
covered = ds.indexCoveringColumns(expression.ExtractColumns(cond), indexColumns, idxColLens)
}
if covered {
indexConditions = append(indexConditions, cond)
} else {
tableConditions = append(tableConditions, cond)
Expand Down
39 changes: 39 additions & 0 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7528,6 +7528,10 @@ func TestCorColRangeWithRangeMaxSize(t *testing.T) {
tk.MustExec("insert into t3 values (2), (4)")
tk.MustExec("insert into mysql.opt_rule_blacklist value(\"decorrelate\")")
tk.MustExec("admin reload opt_rule_blacklist")
defer func() {
tk.MustExec("delete from mysql.opt_rule_blacklist where name = \"decorrelate\"")
tk.MustExec("admin reload opt_rule_blacklist")
}()

// Correlated column in index range.
tk.MustExec("set @@tidb_opt_range_max_size=1000")
Expand Down Expand Up @@ -7693,3 +7697,38 @@ func TestOuterJoinEliminationForIssue18216(t *testing.T) {
tk.MustExec("select group_concat(c order by (select group_concat(c order by a) from t2 where a=t1.a)) from t1; ")
tk.MustQuery("select group_concat(c order by (select group_concat(c order by c) from t2 where a=t1.a), c desc) from t1;").Check(testkit.Rows("2,1,4,3"))
}

func TestNullConditionForPrefixIndex(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`CREATE TABLE t1 (
id char(1) DEFAULT NULL,
c1 varchar(255) DEFAULT NULL,
c2 text DEFAULT NULL,
KEY idx1 (c1),
KEY idx2 (c1,c2(5))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin`)
tk.MustExec("create table t2(a int, b varchar(10), index idx(b(5)))")
tk.MustExec("set tidb_opt_prefix_index_single_scan = 1")
tk.MustExec("insert into t1 values ('a', '0xfff', '111111'), ('b', '0xfff', '222222'), ('c', '0xfff', ''), ('d', '0xfff', null)")
tk.MustExec("insert into t2 values (1, 'aaaaaa'), (2, 'bbb'), (3, ''), (4, null)")

var input []string
var output []struct {
SQL string
Plan []string
Result []string
}
integrationSuiteData := core.GetIntegrationSuiteData()
integrationSuiteData.LoadTestCases(t, &input, &output)
for i, tt := range input {
testdata.OnRecord(func() {
output[i].SQL = tt
output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery("explain format='brief' " + tt).Rows())
output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(tt).Sort().Rows())
})
tk.MustQuery("explain format='brief' " + tt).Check(testkit.Rows(output[i].Plan...))
tk.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...))
}
}
8 changes: 6 additions & 2 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1220,6 +1220,10 @@ type DataSource struct {
// contain unique index and the first field is tidb_shard(),
// such as (tidb_shard(a), a ...), the fields are more than 2
containExprPrefixUk bool

// colsRequiringFullLen is the columns that must be fetched with full length.
// It is used to decide whether single scan is enough when reading from an index.
colsRequiringFullLen []*expression.Column
}

// ExtractCorrelatedCols implements LogicalPlan interface.
Expand Down Expand Up @@ -1343,7 +1347,7 @@ func (ds *DataSource) Convert2Gathers() (gathers []LogicalPlan) {
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.Index)
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.schema.Columns, path.Index)
// If index columns can cover all of the needed columns, we can use a IndexGather + IndexScan.
if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) {
if ds.isSingleScan(path.FullIdxCols, path.FullIdxColLens) {
gathers = append(gathers, ds.buildIndexGather(path))
}
// TODO: If index columns can not cover the schema, use IndexLookUpGather.
Expand Down Expand Up @@ -1548,7 +1552,7 @@ func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, _ []expression
}
}
var indexFilters []expression.Expression
indexFilters, path.TableFilters = ds.splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
indexFilters, path.TableFilters = ds.splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens)
path.IndexFilters = append(path.IndexFilters, indexFilters...)
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
Expand Down
8 changes: 8 additions & 0 deletions planner/core/rule_column_pruning.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,14 @@ func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *log

originSchemaColumns := ds.schema.Columns
originColumns := ds.Columns

ds.colsRequiringFullLen = make([]*expression.Column, 0, len(used))
for i, col := range ds.schema.Columns {
if used[i] || (ds.containExprPrefixUk && expression.GcColumnExprIsTidbShard(col.VirtualExpr)) {
ds.colsRequiringFullLen = append(ds.colsRequiringFullLen, col)
}
}

for i := len(used) - 1; i >= 0; i-- {
if !used[i] && !exprUsed[i] {
// If ds has a shard index, and the column is generated column by `tidb_shard()`
Expand Down
2 changes: 1 addition & 1 deletion planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error {
path.IsSingleScan = true
} else {
ds.deriveIndexPathStats(path, ds.pushedDownConds, false)
path.IsSingleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
path.IsSingleScan = ds.isSingleScan(path.FullIdxCols, path.FullIdxColLens)
}
// Try some heuristic rules to select access path.
if len(path.Ranges) == 0 {
Expand Down
15 changes: 15 additions & 0 deletions planner/core/testdata/integration_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -1011,5 +1011,20 @@
"set @@tidb_opt_range_max_size = 300",
"explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.e where t1.b > t2.f and t1.b < t2.f + 10"
]
},
{
"name": "TestNullConditionForPrefixIndex",
"cases": [
"select count(1) from t1 where c1 = '0xfff' and c2 is not null",
"select count(1) from t1 where c1 = '0xfff' and c2 is null",
"select count(1) from t1 where c1 >= '0xfff' and c2 is not null",
"select count(1) from t1 where c1 >= '0xfff' and c2 is null",
"select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is not null",
"select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is null",
"select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is not null",
"select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is null",
"select b from t2 where b is not null",
"select b from t2 where b is null"
]
}
]
Loading