Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: Use/force to apply prefer range scan (#56928) #58444

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/executor/test/issuetest/executor_issue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,14 @@ func TestIndexJoin31494(t *testing.T) {
insertStr += fmt.Sprintf(", (%d, %d)", i, i)
}
tk.MustExec(insertStr)
tk.MustExec("analyze table t1")
tk.MustExec("create table t2(a int(11) default null, b int(11) default null, c int(11) default null)")
insertStr = "insert into t2 values(1, 1, 1)"
for i := 1; i < 32768; i++ {
insertStr += fmt.Sprintf(", (%d, %d, %d)", i, i, i)
}
tk.MustExec(insertStr)
tk.MustExec("analyze table t2")
sm := &testkit.MockSessionManager{
PS: make([]*util.ProcessInfo, 0),
}
Expand Down
1 change: 1 addition & 0 deletions pkg/planner/core/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ go_test(
"//pkg/util/ranger",
"//pkg/util/stmtsummary",
"//pkg/util/tracing",
"//tests/realtikvtest",
"@com_github_docker_go_units//:go-units",
"@com_github_golang_snappy//:snappy",
"@com_github_pingcap_errors//:errors",
Expand Down
6 changes: 3 additions & 3 deletions pkg/planner/core/casetest/dag/testdata/plan_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
},
{
"SQL": "select c from t order by t.a limit 1",
"Best": "IndexReader(Index(t.c_d_e)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->Projection"
"Best": "TableReader(Table(t)->Limit)->Limit->Projection"
},
{
"SQL": "select c from t order by t.a + t.b limit 1",
Expand Down Expand Up @@ -165,7 +165,7 @@
},
{
"SQL": "select * from t t1 join t t2 on t1.b = t2.a order by t1.a",
"Best": "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.b,test.t.a)->Sort"
"Best": "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.b,test.t.a)"
},
{
"SQL": "select * from t t1 join t t2 on t1.b = t2.a order by t1.a limit 1",
Expand Down Expand Up @@ -508,7 +508,7 @@
},
{
"SQL": "select a from t union all (select c from t) order by a limit 1",
"Best": "UnionAll{IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit}->TopN([Column#25],0,1)"
"Best": "UnionAll{TableReader(Table(t)->Limit)->Limit->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit}->TopN([Column#25],0,1)"
}
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2257,11 +2257,11 @@
"Cases": [
{
"SQL": "select max(a) from t;",
"Best": "IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a true],0,1))->TopN([test.t.a true],0,1)->StreamAgg"
"Best": "TableReader(Table(t)->Limit)->Limit->StreamAgg"
},
{
"SQL": "select min(a) from t;",
"Best": "IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->StreamAgg"
"Best": "TableReader(Table(t)->Limit)->Limit->StreamAgg"
},
{
"SQL": "select min(c_str) from t;",
Expand All @@ -2277,7 +2277,7 @@
},
{
"SQL": "select max(a), min(a) from t;",
"Best": "LeftHashJoin{IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a true],0,1))->TopN([test.t.a true],0,1)->StreamAgg->IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->StreamAgg}"
"Best": "LeftHashJoin{TableReader(Table(t)->Limit)->Limit->StreamAgg->TableReader(Table(t)->Limit)->Limit->StreamAgg}"
},
{
"SQL": "select max(a), min(a) from t where a > 10",
Expand All @@ -2289,7 +2289,7 @@
},
{
"SQL": "select max(a), max(c), min(f) from t",
"Best": "LeftHashJoin{LeftHashJoin{IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a true],0,1))->TopN([test.t.a true],0,1)->StreamAgg->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit->StreamAgg}->IndexReader(Index(t.f)[[NULL,+inf]]->Limit)->Limit->StreamAgg}"
"Best": "LeftHashJoin{LeftHashJoin{TableReader(Table(t)->Limit)->Limit->StreamAgg->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit->StreamAgg}->IndexReader(Index(t.f)[[NULL,+inf]]->Limit)->Limit->StreamAgg}"
},
{
"SQL": "select max(a), max(b) from t",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,14 @@
"Query": "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
"Result": [
"Projection 1.00 root test.t.a, test.t.b, test.t.c, test.tp.a, test.tp.b, test.tp.c",
"└─HashJoin 1.00 root inner join, equal:[eq(test.tp.c, test.t.b)]",
"└─IndexJoin 1.00 root inner join, inner:IndexLookUp, outer key:test.tp.c, inner key:test.t.b, equal cond:eq(test.tp.c, test.t.b)",
" ├─TableReader(Build) 1.00 root partition:p1 data:Selection",
" │ └─Selection 1.00 cop[tikv] eq(test.tp.a, 10), not(isnull(test.tp.c))",
" │ └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
" └─TableReader(Probe) 3.00 root data:Selection",
" └─Selection 3.00 cop[tikv] not(isnull(test.t.b))",
" └─TableFullScan 3.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]"
" └─IndexLookUp(Probe) 1.00 root ",
" ├─Selection(Build) 1.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 1.00 cop[tikv] table:t, index:idx(b) range: decided by [eq(test.t.b, test.tp.c)], keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
" └─TableRowIDScan(Probe) 1.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,8 @@
"Plan": [
" TableReader root ",
" └─ExchangeSender cop[tiflash] ",
" └─Selection cop[tiflash] gt(test.t1.a, ?)",
" └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.b, ?), gt(test.t1.c, ?), keep order:false"
" └─Selection cop[tiflash] gt(test.t1.c, ?)",
" └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.a, ?), gt(test.t1.b, ?), keep order:false"
]
},
{
Expand Down Expand Up @@ -463,8 +463,8 @@
"Plan": [
" TableReader root ",
" └─ExchangeSender cop[tiflash] ",
" └─Selection cop[tiflash] gt(test.t1.b, ?), gt(test.t1.c, ?), or(gt(test.t1.a, ?), lt(test.t1.b, ?))",
" └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.a, ?), keep order:false"
" └─Selection cop[tiflash] gt(test.t1.a, ?), gt(test.t1.c, ?), or(gt(test.t1.a, ?), lt(test.t1.b, ?))",
" └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.b, ?), keep order:false"
]
},
{
Expand Down
9 changes: 5 additions & 4 deletions pkg/planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1172,13 +1172,15 @@ func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) [
}
}

preferRange := ds.SCtx().GetSessionVars().GetAllowPreferRangeScan() && (ds.TableStats.HistColl.Pseudo || ds.TableStats.RowCount < 1)
// If we've forced an index merge - we want to keep these plans
preferMerge := len(ds.IndexMergeHints) > 0 || fixcontrol.GetBoolWithDefault(
ds.SCtx().GetSessionVars().GetOptimizerFixControlMap(),
fixcontrol.Fix52869,
false,
)
// tidb_opt_prefer_range_scan is the master switch to control index preferencing
preferRange := ds.SCtx().GetSessionVars().GetAllowPreferRangeScan() &&
(preferMerge || (ds.TableStats.HistColl.Pseudo || ds.TableStats.RowCount < 1))
if preferRange && len(candidates) > 1 {
// If a candidate path is TiFlash-path or forced-path or MV index, we just keep them. For other candidate paths, if there exists
// any range scan path, we remove full scan paths and keep range scan paths.
Expand All @@ -1197,9 +1199,8 @@ func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) [
}
if !ranger.HasFullRange(c.path.Ranges, unsignedIntHandle) {
// Preference plans with equals/IN predicates or where there is more filtering in the index than against the table
equalPlan := c.path.EqCondCount > 0 || c.path.EqOrInCondCount > 0
indexFilters := len(c.path.TableFilters) < len(c.path.IndexFilters)
if preferMerge || (((equalPlan || indexFilters) && prop.IsSortItemEmpty()) || c.isMatchProp) {
indexFilters := c.path.EqCondCount > 0 || c.path.EqOrInCondCount > 0 || len(c.path.TableFilters) < len(c.path.IndexFilters)
if preferMerge || (indexFilters && (prop.IsSortItemEmpty() || c.isMatchProp)) {
preferredPaths = append(preferredPaths, c)
hasRangeScanPath = true
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/logical_plans_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2020,7 +2020,7 @@ func TestSkylinePruning(t *testing.T) {
},
{
sql: "select * from pt2_global_index where b > 1 order by b",
result: "b_global,b_c_global",
result: "PRIMARY_KEY,b_global,b_c_global",
},
{
sql: "select b from pt2_global_index where b > 1 order by b",
Expand Down
85 changes: 61 additions & 24 deletions pkg/planner/core/plan_cost_ver2.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,30 +163,8 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
if p.StoreType == kv.TiFlash {
p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, TiFlashStartupRowPenalty, rowSize, scanFactor))
} else if !p.isChildOfIndexLookUp {
// Apply cost penalty for full scans that carry high risk of underestimation
sessionVars := p.SCtx().GetSessionVars()
allowPreferRangeScan := sessionVars.GetAllowPreferRangeScan()
tblColHists := p.tblColHists

// hasUnreliableStats is a check for pseudo or zero stats
hasUnreliableStats := tblColHists.Pseudo || tblColHists.RealtimeCount < 1
// hasHighModifyCount tracks the high risk of a tablescan where auto-analyze had not yet updated the table row count
hasHighModifyCount := tblColHists.ModifyCount > tblColHists.RealtimeCount
// hasLowEstimate is a check to capture a unique customer case where modifyCount is used for tablescan estimate (but it not adequately understood why)
hasLowEstimate := rows > 1 && tblColHists.ModifyCount < tblColHists.RealtimeCount && int64(rows) <= tblColHists.ModifyCount
// preferRangeScan check here is same as in skylinePruning
preferRangeScanCondition := allowPreferRangeScan && (hasUnreliableStats || hasHighModifyCount || hasLowEstimate)
var unsignedIntHandle bool
if p.Table.PKIsHandle {
if pkColInfo := p.Table.GetPkColInfo(); pkColInfo != nil {
unsignedIntHandle = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
}
}
hasFullRangeScan := ranger.HasFullRange(p.Ranges, unsignedIntHandle)

shouldApplyPenalty := hasFullRangeScan && preferRangeScanCondition
if shouldApplyPenalty {
newRowCount := max(MaxPenaltyRowCount, max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
newRowCount := getTableScanPenalty(p, rows)
if newRowCount > 0 {
p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, newRowCount, rowSize, scanFactor))
}
}
Expand Down Expand Up @@ -935,6 +913,65 @@ func doubleReadCostVer2(option *optimizetrace.PlanCostOption, numTasks float64,
func() string { return fmt.Sprintf("doubleRead(tasks(%v)*%v)", numTasks, requestFactor) })
}

func getTableScanPenalty(p *PhysicalTableScan, rows float64) (rowPenalty float64) {
// Apply cost penalty for full scans that carry high risk of underestimation. Exclude those
// that are the child of an index scan or child is TableRangeScan
if len(p.rangeInfo) > 0 {
return float64(0)
}
var unsignedIntHandle bool
if p.Table.PKIsHandle {
if pkColInfo := p.Table.GetPkColInfo(); pkColInfo != nil {
unsignedIntHandle = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
}
}
hasFullRangeScan := ranger.HasFullRange(p.Ranges, unsignedIntHandle)
if !hasFullRangeScan {
return float64(0)
}

sessionVars := p.SCtx().GetSessionVars()
allowPreferRangeScan := sessionVars.GetAllowPreferRangeScan()
tblColHists := p.tblColHists
originalRows := int64(tblColHists.GetAnalyzeRowCount())

// hasUnreliableStats is a check for pseudo or zero stats
hasUnreliableStats := tblColHists.Pseudo || originalRows < 1
// hasHighModifyCount tracks the high risk of a tablescan where auto-analyze had not yet updated the table row count
hasHighModifyCount := tblColHists.ModifyCount > originalRows
// hasLowEstimate is a check to capture a unique customer case where modifyCount is used for tablescan estimate (but it not adequately understood why)
hasLowEstimate := rows > 1 && tblColHists.ModifyCount < originalRows && int64(rows) <= tblColHists.ModifyCount
// preferRangeScan check here is same as in skylinePruning
preferRangeScanCondition := allowPreferRangeScan && (hasUnreliableStats || hasHighModifyCount || hasLowEstimate)

// differentiate a FullTableScan from a partition level scan - so we shouldn't penalize these
hasPartitionScan := false
if p.PlanPartInfo != nil {
if len(p.PlanPartInfo.PruningConds) > 0 {
hasPartitionScan = true
}
}

// GetIndexForce assumes that the USE/FORCE index is to force a range scan, and thus the
// penalty is applied to a full table scan (not range scan). This may also penalize a
// full table scan where USE/FORCE was applied to the primary key.
hasIndexForce := sessionVars.StmtCtx.GetIndexForce()
shouldApplyPenalty := hasFullRangeScan && (hasIndexForce || preferRangeScanCondition)
if shouldApplyPenalty {
// MySQL will increase the cost of table scan if FORCE index is used. TiDB takes this one
// step further - because we don't differentiate USE/FORCE - the added penalty applies to
// both, and it also applies to any full table scan in the query. Use "max" to get the minimum
// number of rows to add as a penalty to the table scan.
minRows := max(MaxPenaltyRowCount, rows)
if hasPartitionScan {
return minRows
}
// If it isn't a partitioned table - choose the max that includes ModifyCount
return max(minRows, float64(tblColHists.ModifyCount))
}
return float64(0)
}

// In Cost Ver2, we hide cost factors from users and deprecate SQL variables like `tidb_opt_scan_factor`.
type costVer2Factors struct {
TiDBTemp costusage.CostVer2Factor // operations on TiDB temporary table
Expand Down
46 changes: 43 additions & 3 deletions pkg/planner/core/plan_cost_ver2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/pingcap/tidb/pkg/planner/util/optimizetrace"
"github.com/pingcap/tidb/pkg/sessiontxn"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/tests/realtikvtest"
"github.com/stretchr/testify/require"
)

Expand All @@ -59,9 +60,9 @@ func TestCostModelVer2ScanRowSize(t *testing.T) {
{"select a, b from t use index(abc) where a=1 and b=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
{"select a, b, c from t use index(abc) where a=1 and b=1 and c=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
// table scan row-size is always equal to row-size(*)
{"select a from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(10000*logrowsize(80)*tikv_scan_factor(40.7)))"},
{"select a, d from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(10000*logrowsize(80)*tikv_scan_factor(40.7)))"},
{"select * from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(10000*logrowsize(80)*tikv_scan_factor(40.7)))"},
{"select a from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(80)*tikv_scan_factor(40.7)))"},
{"select a, d from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(80)*tikv_scan_factor(40.7)))"},
{"select * from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(80)*tikv_scan_factor(40.7)))"},
}
for _, c := range cases {
rs := tk.MustQuery("explain analyze format=true_card_cost " + c.query).Rows()
Expand Down Expand Up @@ -162,3 +163,42 @@ func BenchmarkGetPlanCost(b *testing.B) {
_, _ = core.GetPlanCost(phyPlan, property.RootTaskType, optimizetrace.NewDefaultPlanCostOption().WithCostFlag(costusage.CostFlagRecalculate))
}
}

func TestTableScanCostWithForce(t *testing.T) {
store, dom := realtikvtest.CreateMockStoreAndDomainAndSetup(t)
defer func() {
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
dom.StatsHandle().Clear()
}()

tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a int, b int, primary key (a))")

// Insert some data
tk.MustExec("insert into t values (1,1),(2,2),(3,3),(4,4),(5,5)")

// Analyze table to update statistics
tk.MustExec("analyze table t")

// Test TableFullScan with and without FORCE INDEX
rs := tk.MustQuery("explain analyze format=verbose select * from t").Rows()
planCost1 := rs[0][2].(string)
rs = tk.MustQuery("explain analyze format=verbose select * from t force index(PRIMARY)").Rows()
planCost2 := rs[0][2].(string)

// Query with FORCE should be more expensive than query without
require.Less(t, planCost1, planCost2)

// Test TableRangeScan with and without FORCE INDEX
rs = tk.MustQuery("explain analyze format=verbose select * from t where a > 1").Rows()
planCost1 = rs[0][2].(string)
rs = tk.MustQuery("explain analyze format=verbose select * from t force index(PRIMARY) where a > 1").Rows()
planCost2 = rs[0][2].(string)

// Query costs should be equal since FORCE cost penalty does not apply to range scan
require.Equal(t, planCost1, planCost2)
}
27 changes: 17 additions & 10 deletions pkg/planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,11 @@ func deriveStats4DataSource(lp base.LogicalPlan, colGroups [][]*expression.Colum
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugTraceAccessPaths(ds.SCtx(), ds.PossibleAccessPaths)
}
ds.AccessPathMinSelectivity = getMinSelectivityFromPaths(ds.PossibleAccessPaths, float64(ds.TblColHists.RealtimeCount))
indexForce := false
ds.AccessPathMinSelectivity, indexForce = getGeneralAttributesFromPaths(ds.PossibleAccessPaths, float64(ds.TblColHists.RealtimeCount))
if indexForce {
ds.SCtx().GetSessionVars().StmtCtx.SetIndexForce()
}

return ds.StatsInfo(), nil
}
Expand Down Expand Up @@ -404,21 +408,24 @@ func detachCondAndBuildRangeForPath(
return err
}

func getMinSelectivityFromPaths(paths []*util.AccessPath, totalRowCount float64) float64 {
func getGeneralAttributesFromPaths(paths []*util.AccessPath, totalRowCount float64) (float64, bool) {
minSelectivity := 1.0
if totalRowCount <= 0 {
return minSelectivity
}
indexForce := false
for _, path := range paths {
// For table path and index merge path, AccessPath.CountAfterIndex is not set and meaningless,
// but we still consider their AccessPath.CountAfterAccess.
if path.IsTablePath() || path.PartialIndexPaths != nil {
minSelectivity = min(minSelectivity, path.CountAfterAccess/totalRowCount)
continue
if totalRowCount > 0 {
if path.IsTablePath() || path.PartialIndexPaths != nil {
minSelectivity = min(minSelectivity, path.CountAfterAccess/totalRowCount)
} else {
minSelectivity = min(minSelectivity, path.CountAfterIndex/totalRowCount)
}
}
if !indexForce && path.Forced {
indexForce = true
}
minSelectivity = min(minSelectivity, path.CountAfterIndex/totalRowCount)
}
return minSelectivity
return minSelectivity, indexForce
}

func getGroupNDVs(ds *logicalop.DataSource, colGroups [][]*expression.Column) []property.GroupNDV {
Expand Down
Loading