Skip to content

Commit

Permalink
planner: increase tablescan cost for high risk scenarios (#56203)
Browse files Browse the repository at this point in the history
close #56012
  • Loading branch information
terry1purcell authored Sep 26, 2024
1 parent 448d569 commit a3a4511
Show file tree
Hide file tree
Showing 20 changed files with 159 additions and 120 deletions.
2 changes: 1 addition & 1 deletion pkg/executor/test/fktest/foreign_key_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1680,7 +1680,7 @@ func TestForeignKeyOnUpdateCascade(t *testing.T) {
tk.MustExec("update t1 set a=101 where id = 1")
tk.MustExec("insert into t2 (id, a, b, name) values (1, 1, 1, 'a')")
tk.MustExec("update t1 set b=102 where id = 2")
tk.MustQuery("select * from t1").Check(testkit.Rows("1 101 1", "2 1 102"))
tk.MustQuery("select * from t1").Sort().Check(testkit.Rows("1 101 1", "2 1 102"))
tk.MustQuery("select id, a, b, name from t2").Check(testkit.Rows("1 1 102 a"))
err := tk.ExecToErr("insert into t2 (id, a, b, name) values (3, 1, 1, 'e')")
require.Error(t, err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/executor/union_scan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ func TestIssue28073(t *testing.T) {

tk.MustExec("begin")
tk.MustExec("insert into t2 (c_int, c_str) values (2, 'romantic grothendieck')")
tk.MustQuery("select * from t2 left join t1 on t1.c_int = t2.c_int for update").Sort().Check(
tk.MustQuery("select * from t2 use index(primary) left join t1 use index(primary) on t1.c_int = t2.c_int for update").Sort().Check(
testkit.Rows(
"1 flamboyant mcclintock 1 flamboyant mcclintock",
"2 romantic grothendieck <nil> <nil>",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@
"children": [
{
"name": "TableFullScan_4",
"cost": 2273079.737793511,
"cost": 2500387.7115728618,
"est_rows": 10000,
"act_rows": 2,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 236018.64918623405,
"cost": 251172.51410485746,
"est_rows": 10000,
"act_rows": 2,
"task_type": 1,
Expand Down Expand Up @@ -104,7 +104,7 @@
"children": [
{
"name": "TableFullScan_16",
"cost": 2273079.737793511,
"cost": 2500387.7115728618,
"est_rows": 10000,
"act_rows": 4,
"task_type": 2,
Expand All @@ -115,31 +115,31 @@
"labels": [
1
],
"cost": 193778.64918623405,
"cost": 208932.51410485746,
"est_rows": 10000,
"act_rows": 4,
"task_type": 1,
"store_type": 1,
"operator_info": "data:TableFullScan_16"
}
],
"cost": 976848.9825195674,
"cost": 992002.8474381908,
"est_rows": 100000000,
"act_rows": 8,
"task_type": 1,
"store_type": 1,
"operator_info": "CARTESIAN inner join"
}
],
"cost": 998976848.9825196,
"cost": 998992002.8474382,
"est_rows": 100000000,
"act_rows": 8,
"task_type": 1,
"store_type": 1,
"operator_info": "cast(test.t.a, decimal(10,0) BINARY)->Column#8"
}
],
"cost": 1996978357.5625196,
"cost": 1996993511.427438,
"est_rows": 1,
"act_rows": 1,
"task_type": 1,
Expand Down Expand Up @@ -184,22 +184,22 @@
"children": [
{
"name": "TableFullScan_5",
"cost": 2273079.737793511,
"cost": 2500387.7115728618,
"est_rows": 10000,
"act_rows": 2,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 2772079.737793511,
"cost": 2999387.7115728618,
"est_rows": 3333.3333333333335,
"task_type": 2,
"store_type": 2,
"operator_info": "gt(test.t.a, 100)"
}
],
"cost": 198885.3158529007,
"cost": 214039.18077152412,
"est_rows": 3333.3333333333335,
"task_type": 1,
"store_type": 1,
Expand All @@ -222,15 +222,15 @@
"children": [
{
"name": "TableFullScan_35",
"cost": 2273079.737793511,
"cost": 2500387.7115728618,
"est_rows": 10000,
"act_rows": 2,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 2772079.737793511,
"cost": 2999387.7115728618,
"est_rows": 9990,
"act_rows": 2,
"task_type": 2,
Expand All @@ -241,7 +241,7 @@
"labels": [
2
],
"cost": 269200.8358529007,
"cost": 284354.70077152416,
"est_rows": 9990,
"act_rows": 2,
"task_type": 1,
Expand All @@ -256,15 +256,15 @@
"children": [
{
"name": "TableFullScan_32",
"cost": 2273079.737793511,
"cost": 2500387.7115728618,
"est_rows": 10000,
"act_rows": 4,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 2772079.737793511,
"cost": 2999387.7115728618,
"est_rows": 9990,
"act_rows": 4,
"task_type": 2,
Expand All @@ -275,15 +275,15 @@
"labels": [
1
],
"cost": 269200.8358529007,
"cost": 284354.70077152416,
"est_rows": 9990,
"act_rows": 4,
"task_type": 1,
"store_type": 1,
"operator_info": "data:Selection_33"
}
],
"cost": 1800237.0717058014,
"cost": 1830544.8015430481,
"est_rows": 12487.5,
"task_type": 1,
"store_type": 1,
Expand Down Expand Up @@ -389,22 +389,22 @@
"children": [
{
"name": "TableFullScan_6",
"cost": 2273079.737793511,
"cost": 2500387.7115728618,
"est_rows": 10000,
"act_rows": 4,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 2772079.737793511,
"cost": 2999387.7115728618,
"est_rows": 3333.3333333333335,
"task_type": 2,
"store_type": 2,
"operator_info": "gt(test.t2.b, 10)"
}
],
"cost": 227045.3158529007,
"cost": 242199.18077152412,
"est_rows": 3333.3333333333335,
"task_type": 1,
"store_type": 1,
Expand Down Expand Up @@ -441,15 +441,15 @@
"children": [
{
"name": "TableFullScan_6",
"cost": 2273079.737793511,
"cost": 2500387.7115728618,
"est_rows": 10000,
"act_rows": 2,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 236018.64918623405,
"cost": 251172.51410485746,
"est_rows": 10000,
"act_rows": 2,
"task_type": 1,
Expand All @@ -475,15 +475,15 @@
"children": [
{
"name": "TableFullScan_5",
"cost": 2273079.737793511,
"cost": 2500387.7115728618,
"est_rows": 10000,
"act_rows": 8,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 193778.64918623405,
"cost": 208932.51410485746,
"est_rows": 10000,
"act_rows": 8,
"task_type": 1,
Expand Down
4 changes: 2 additions & 2 deletions pkg/planner/core/casetest/dag/testdata/plan_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@
},
{
"SQL": "select * from t t1 join t t2 on t1.b = t2.a order by t1.a",
"Best": "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.b,test.t.a)"
"Best": "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.b,test.t.a)->Sort"
},
{
"SQL": "select * from t t1 join t t2 on t1.b = t2.a order by t1.a limit 1",
Expand Down Expand Up @@ -366,7 +366,7 @@
},
{
"SQL": "select t.c in (select count(*) from t s, t t1 where s.a = t.a and s.a = t1.a) from t",
"Best": "Apply{IndexReader(Index(t.c_d_e)[[NULL,+inf]])->MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->StreamAgg}->Projection"
"Best": "Apply{IndexReader(Index(t.c_d_e)[[NULL,+inf]])->IndexJoin{TableReader(Table(t))->TableReader(Table(t)->Sel([eq(test.t.a, test.t.a)]))}(test.t.a,test.t.a)->StreamAgg}->Projection"
},
{
"SQL": "select (select count(*) from t s, t t1 where s.a = t.a and s.a = t1.a) from t",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ func TestParallelApplyWarnning(t *testing.T) {
tk.MustExec("create table t2 (a int, b int, c int, key(a));")
tk.MustExec("create table t3(a int, b int, c int, key(a));")
tk.MustExec("set tidb_enable_parallel_apply=on;")
tk.MustQuery("select (select 1 from t2, t3 where t2.a=t3.a and t2.b > t1.b) from t1;")
tk.MustQuery("select (select /*+ inl_hash_join(t2, t3) */ 1 from t2, t3 where t2.a=t3.a and t2.b > t1.b) from t1;")
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Some apply operators can not be executed in parallel: *core.PhysicalIndexHashJoin doesn't support cloning"))
}
Original file line number Diff line number Diff line change
Expand Up @@ -2273,7 +2273,7 @@
},
{
"SQL": "select max(a), min(a) from t;",
"Best": "RightHashJoin{TableReader(Table(t)->Limit)->Limit->StreamAgg->TableReader(Table(t)->Limit)->Limit->StreamAgg}"
"Best": "LeftHashJoin{TableReader(Table(t)->Limit)->Limit->StreamAgg->TableReader(Table(t)->Limit)->Limit->StreamAgg}"
},
{
"SQL": "select max(a), min(a) from t where a > 10",
Expand All @@ -2285,7 +2285,7 @@
},
{
"SQL": "select max(a), max(c), min(f) from t",
"Best": "LeftHashJoin{RightHashJoin{TableReader(Table(t)->Limit)->Limit->StreamAgg->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit->StreamAgg}->IndexReader(Index(t.f)[[NULL,+inf]]->Limit)->Limit->StreamAgg}"
"Best": "LeftHashJoin{LeftHashJoin{TableReader(Table(t)->Limit)->Limit->StreamAgg->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit->StreamAgg}->IndexReader(Index(t.f)[[NULL,+inf]]->Limit)->Limit->StreamAgg}"
},
{
"SQL": "select max(a), max(b) from t",
Expand All @@ -2305,7 +2305,7 @@
},
{
"SQL": "select max(a) from (select t1.a from t t1 join t t2 on t1.a=t2.a) t",
"Best": "MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->Limit->StreamAgg"
"Best": "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->Limit->StreamAgg"
}
]
},
Expand All @@ -2322,7 +2322,7 @@
},
{
"SQL": "SELECT COUNT(t) FROM (SELECT a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t",
"Best": "Apply{IndexReader(Index(t.f)[[NULL,+inf]])->MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->Limit}->HashAgg"
"Best": "Apply{IndexReader(Index(t.f)[[NULL,+inf]])->IndexJoin{TableReader(Table(t)->Sel([eq(test.t.a, test.t.a)]))->TableReader(Table(t))}(test.t.a,test.t.a)->Limit}->HashAgg"
},
{
"SQL": "SELECT COUNT(a) FROM t t1 WHERE t1.a IN (SELECT t2.a FROM t t2, t t3 WHERE t2.b = t3.b)",
Expand Down Expand Up @@ -2402,9 +2402,9 @@
},
{
"SQL": "select /*+ TIDB_INLJ(t2) */ t1.b, t2.a from t2 t1, t2 t2 where t1.b=t2.b and t2.c=-1;",
"Best": "IndexJoin{TableReader(Table(t2)->Sel([eq(test.t2.c, -1)]))->IndexReader(Index(t2.b)[[NULL,NULL]])}(test.t2.b,test.t2.b)->Projection",
"Best": "IndexJoin{IndexReader(Index(t2.b_c)[[NULL,+inf]]->Sel([eq(test.t2.c, -1)]))->IndexReader(Index(t2.b)[[NULL,NULL]])}(test.t2.b,test.t2.b)->Projection",
"Warning": "[planner:1815]Optimizer Hint /*+ INL_JOIN(t2) */ or /*+ TIDB_INLJ(t2) */ is inapplicable",
"Hints": "inl_join(`test`.`t1`), use_index(@`sel_1` `test`.`t2` ), no_order_index(@`sel_1` `test`.`t2` `primary`), use_index(@`sel_1` `test`.`t1` `b`), no_order_index(@`sel_1` `test`.`t1` `b`)"
"Hints": "inl_join(`test`.`t1`), use_index(@`sel_1` `test`.`t2` `b_c`), no_order_index(@`sel_1` `test`.`t2` `b_c`), use_index(@`sel_1` `test`.`t1` `b`), no_order_index(@`sel_1` `test`.`t1` `b`)"
}
]
},
Expand Down
55 changes: 47 additions & 8 deletions pkg/planner/core/plan_cost_ver2.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/pingcap/tidb/pkg/expression/aggregation"
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/meta/model"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/planner/cardinality"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/planner/property"
Expand All @@ -30,6 +31,7 @@ import (
"github.com/pingcap/tidb/pkg/planner/util/optimizetrace"
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/util/paging"
"github.com/pingcap/tidb/pkg/util/ranger"
"github.com/pingcap/tipb/go-tipb"
)

Expand Down Expand Up @@ -118,6 +120,15 @@ func (p *PhysicalIndexScan) GetPlanCostVer2(taskType property.TaskType, option *
return p.PlanCostVer2, nil
}

const (
// MinRowSize provides a minimum to avoid underestimation
MinRowSize = 2.0
// TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
TiFlashStartupRowPenalty = 10000
// MaxPenaltyRowCount applies a penalty for high risk scans
MaxPenaltyRowCount = 1000
)

// GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = rows * log2(row-size) * scan-factor
// log2(row-size) is from experiments.
Expand All @@ -127,20 +138,48 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
}

rows := getCardinality(p, option.CostFlag)
var rowSize float64
if p.StoreType == kv.TiKV {
rowSize = getAvgRowSize(p.StatsInfo(), p.tblCols) // consider all columns if TiKV

var columns []*expression.Column
if p.StoreType == kv.TiKV { // Assume all columns for TiKV
columns = p.tblCols
} else { // TiFlash
rowSize = getAvgRowSize(p.StatsInfo(), p.schema.Columns)
columns = p.schema.Columns
}
rowSize = math.Max(rowSize, 2.0)
scanFactor := getTaskScanFactorVer2(p, p.StoreType, taskType)
rowSize := getAvgRowSize(p.StatsInfo(), columns)
// Ensure rowSize has a reasonable minimum value to avoid underestimation
rowSize = math.Max(rowSize, MinRowSize)

scanFactor := getTaskScanFactorVer2(p, p.StoreType, taskType)
p.PlanCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)

// give TiFlash a start-up cost to let the optimizer prefers to use TiKV to process small table scans.
// Apply TiFlash startup cost to prefer TiKV for small table scans
if p.StoreType == kv.TiFlash {
p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, 10000, rowSize, scanFactor))
p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, TiFlashStartupRowPenalty, rowSize, scanFactor))
} else {
// Apply cost penalty for full scans that carry high risk of underestimation
sessionVars := p.SCtx().GetSessionVars()
allowPreferRangeScan := sessionVars.GetAllowPreferRangeScan()
tblColHists := p.tblColHists

// preferRangeScan check here is same as in skylinePruning
preferRangeScanCondition := allowPreferRangeScan && (tblColHists.Pseudo || tblColHists.RealtimeCount < 1)
// hasHighModifyCount tracks the high risk of a tablescan where auto-analyze had not yet updated the table row count
hasHighModifyCount := tblColHists.ModifyCount > tblColHists.RealtimeCount
// hasLowEstimate is a check to capture a unique customer case where modifyCount is used for tablescan estimate (but it not adequately understood why)
hasLowEstimate := rows > 1 && int64(rows) < tblColHists.RealtimeCount && int64(rows) <= tblColHists.ModifyCount
var unsignedIntHandle bool
if p.Table.PKIsHandle {
if pkColInfo := p.Table.GetPkColInfo(); pkColInfo != nil {
unsignedIntHandle = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
}
}
hasFullRangeScan := !p.isChildOfIndexLookUp && ranger.HasFullRange(p.Ranges, unsignedIntHandle)

shouldApplyPenalty := hasFullRangeScan && (preferRangeScanCondition || hasHighModifyCount || hasLowEstimate)
if shouldApplyPenalty {
newRowCount := math.Min(MaxPenaltyRowCount, math.Max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, newRowCount, rowSize, scanFactor))
}
}

p.PlanCostInit = true
Expand Down
Loading

0 comments on commit a3a4511

Please sign in to comment.