Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: introduce new cost formula for Selection/TableScan/IndexScan #35378

Merged
merged 8 commits into from
Jun 15, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 80 additions & 20 deletions planner/core/plan_cost.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ const (
CostFlagUseTrueCardinality
)

const (
modelVer1 = 1
modelVer2 = 2
)

func hasCostFlag(costFlag, flag uint64) bool {
return (costFlag & flag) > 0
}
Expand Down Expand Up @@ -61,21 +66,40 @@ func (p *PhysicalSelection) GetPlanCost(taskType property.TaskType, costFlag uin
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
var cpuFactor float64
switch taskType {
case property.RootTaskType, property.MppTaskType:
cpuFactor = p.ctx.GetSessionVars().GetCPUFactor()
case property.CopSingleReadTaskType, property.CopDoubleReadTaskType:
cpuFactor = p.ctx.GetSessionVars().GetCopCPUFactor()
default:
return 0, errors.Errorf("unknown task type %v", taskType)

var selfCost float64
switch p.ctx.GetSessionVars().CostModelVersion {
case modelVer1: // selection cost: rows * cpu-factor
var cpuFactor float64
switch taskType {
case property.RootTaskType, property.MppTaskType:
cpuFactor = p.ctx.GetSessionVars().GetCPUFactor()
case property.CopSingleReadTaskType, property.CopDoubleReadTaskType:
cpuFactor = p.ctx.GetSessionVars().GetCopCPUFactor()
default:
return 0, errors.Errorf("unknown task type %v", taskType)
}
selfCost = getCardinality(p.children[0], costFlag) * cpuFactor
case modelVer2: // selection cost: rows * num-filters * cpu-factor
var cpuFactor float64
switch taskType {
case property.RootTaskType:
cpuFactor = p.ctx.GetSessionVars().GetCPUFactor()
case property.MppTaskType: // use a dedicated cpu-factor for TiFlash
cpuFactor = p.ctx.GetSessionVars().GetTiFlashCPUFactor()
case property.CopSingleReadTaskType, property.CopDoubleReadTaskType:
cpuFactor = p.ctx.GetSessionVars().GetCopCPUFactor()
default:
return 0, errors.Errorf("unknown task type %v", taskType)
}
selfCost = getCardinality(p.children[0], costFlag) * float64(len(p.Conditions)) * cpuFactor
}

childCost, err := p.children[0].GetPlanCost(taskType, costFlag)
if err != nil {
return 0, err
}
p.planCost = childCost
p.planCost += getCardinality(p.children[0], costFlag) * cpuFactor // selection cost: rows * cpu-factor
p.planCost = childCost + selfCost
p.planCostInit = true
return p.planCost, nil
}
Expand Down Expand Up @@ -343,12 +367,34 @@ func (p *PhysicalTableScan) GetPlanCost(taskType property.TaskType, costFlag uin
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
// scan cost: rows * row-size * scan-factor
scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table)
if p.Desc {
scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table)

var selfCost float64
switch p.ctx.GetSessionVars().CostModelVersion {
case modelVer1: // scan cost: rows * row-size * scan-factor
scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table)
if p.Desc {
scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table)
}
selfCost = getCardinality(p, costFlag) * p.getScanRowSize() * scanFactor
case modelVer2: // scan cost: rows * log2(row-size) * scan-factor
var scanFactor float64
switch taskType {
case property.MppTaskType: // use a dedicated scan-factor for TiFlash
// no need to distinguish `Scan` and `DescScan` for TiFlash for now
scanFactor = p.ctx.GetSessionVars().GetTiFlashScanFactor()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we assume that there is little performance difference between Scan and DescScan on TiFlash.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, no need to distinguish Scan and DescScan for TiFlash for now. I'll add some comments.

default: // for TiKV
scanFactor = p.ctx.GetSessionVars().GetScanFactor(p.Table)
if p.Desc {
scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table)
}
}
// the formula `log(rowSize)` is based on experiment results
rowSize := math.Max(p.getScanRowSize(), 2.0) // to guarantee logRowSize >= 1
logRowSize := math.Log2(rowSize)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Log2 in the formula is based on experiment results?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

YES, I'll add some comments here.

selfCost = getCardinality(p, costFlag) * logRowSize * scanFactor
}
p.planCost = getCardinality(p, costFlag) * p.getScanRowSize() * scanFactor

p.planCost = selfCost
p.planCostInit = true
return p.planCost, nil
}
Expand All @@ -358,12 +404,26 @@ func (p *PhysicalIndexScan) GetPlanCost(taskType property.TaskType, costFlag uin
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
// scan cost: rows * row-size * scan-factor
scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table)
if p.Desc {
scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table)

var selfCost float64
switch p.ctx.GetSessionVars().CostModelVersion {
case modelVer1: // scan cost: rows * row-size * scan-factor
scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table)
if p.Desc {
scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table)
}
selfCost = getCardinality(p, costFlag) * p.getScanRowSize() * scanFactor
case modelVer2:
scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table)
if p.Desc {
scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table)
}
rowSize := math.Max(p.getScanRowSize(), 2.0)
logRowSize := math.Log2(rowSize)
selfCost = getCardinality(p, costFlag) * logRowSize * scanFactor
}
p.planCost = getCardinality(p, costFlag) * p.getScanRowSize() * scanFactor

p.planCost = selfCost
p.planCostInit = true
return p.planCost, nil
}
Expand Down