Skip to content

Commit

Permalink
planner: support HashJoin cost detail (#37012)
Browse files Browse the repository at this point in the history
ref #36962
  • Loading branch information
Yisaer authored Aug 22, 2022
1 parent 4cf7eee commit e0da196
Show file tree
Hide file tree
Showing 6 changed files with 296 additions and 54 deletions.
25 changes: 18 additions & 7 deletions planner/core/plan_cost.go
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,7 @@ func (p *PhysicalMergeJoin) GetPlanCost(taskType property.TaskType, option *Plan
}

// GetCost computes cost of hash join operator itself.
func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint64) float64 {
func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint64, op *physicalOptimizeOp) float64 {
buildCnt, probeCnt := lCnt, rCnt
build := p.children[0]
// Taking the right as the inner for right join or using the outer to build a hash table.
Expand All @@ -946,9 +946,13 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint
if isMPP && p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
cpuFactor = sessVars.GetTiFlashCPUFactor() // use the dedicated TiFlash CPU Factor on modelVer2
}
diskFactor := sessVars.GetDiskFactor()
memoryFactor := sessVars.GetMemoryFactor()
concurrencyFactor := sessVars.GetConcurrencyFactor()

cpuCost := buildCnt * cpuFactor
memoryCost := buildCnt * sessVars.GetMemoryFactor()
diskCost := buildCnt * sessVars.GetDiskFactor() * rowSize
memoryCost := buildCnt * memoryFactor
diskCost := buildCnt * diskFactor * rowSize
// Number of matched row pairs regarding the equal join conditions.
helper := &fullJoinRowCountHelper{
cartesian: false,
Expand Down Expand Up @@ -982,7 +986,7 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint
// Cost of querying hash table is cheap actually, so we just compute the cost of
// evaluating `OtherConditions` and joining row pairs.
probeCost := numPairs * cpuFactor
probeDiskCost := numPairs * sessVars.GetDiskFactor() * rowSize
probeDiskCost := numPairs * diskFactor * rowSize
// Cost of evaluating outer filter.
if len(p.LeftConditions)+len(p.RightConditions) > 0 {
// Input outer count for the above compution should be adjusted by SelectionFactor.
Expand All @@ -993,7 +997,7 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint
diskCost += probeDiskCost
probeCost /= float64(p.Concurrency)
// Cost of additional concurrent goroutines.
cpuCost += probeCost + float64(p.Concurrency+1)*sessVars.GetConcurrencyFactor()
cpuCost += probeCost + float64(p.Concurrency+1)*concurrencyFactor
// Cost of traveling the hash table to resolve missing matched cases when building the hash table from the outer table
if p.UseOuterToBuild {
if spill {
Expand All @@ -1002,14 +1006,20 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint
} else {
cpuCost += buildCnt * cpuFactor / float64(p.Concurrency)
}
diskCost += buildCnt * sessVars.GetDiskFactor() * rowSize
diskCost += buildCnt * diskFactor * rowSize
}

if spill {
memoryCost *= float64(memQuota) / (rowSize * buildCnt)
} else {
diskCost = 0
}
if op != nil {
setPhysicalHashJoinCostDetail(p, op, spill, buildCnt, probeCnt, cpuFactor, rowSize, numPairs,
cpuCost, probeCost, memoryCost, diskCost, probeDiskCost,
diskFactor, memoryFactor, concurrencyFactor,
memQuota)
}
return cpuCost + memoryCost + diskCost
}

Expand All @@ -1027,7 +1037,8 @@ func (p *PhysicalHashJoin) GetPlanCost(taskType property.TaskType, option *PlanC
}
p.planCost += childCost
}
p.planCost += p.GetCost(getCardinality(p.children[0], costFlag), getCardinality(p.children[1], costFlag), taskType == property.MppTaskType, costFlag)
p.planCost += p.GetCost(getCardinality(p.children[0], costFlag), getCardinality(p.children[1], costFlag),
taskType == property.MppTaskType, costFlag, option.tracer)
p.planCostInit = true
return p.planCost, nil
}
Expand Down
215 changes: 215 additions & 0 deletions planner/core/plan_cost_detail.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,65 @@ const (
RowCountLbl = "rowCount"
// RowSizeLbl indicates rowSize
RowSizeLbl = "rowSize"
// BuildRowCountLbl indicates rowCount on build side
BuildRowCountLbl = "buildRowCount"
// ProbeRowCountLbl indicates rowCount on probe side
ProbeRowCountLbl = "probeRowCount"
// NumPairsLbl indicates numPairs
NumPairsLbl = "numPairs"

// NetworkFactorLbl indicates networkFactor
NetworkFactorLbl = "networkFactor"
// SeekFactorLbl indicates seekFactor
SeekFactorLbl = "seekFactor"
// ScanFactorLbl indicates for scanFactor
ScanFactorLbl = "scanFactor"
// SelectionFactorLbl indicates selection factor
SelectionFactorLbl = "selectionFactor"
// CPUFactorLbl indicates cpu factor
CPUFactorLbl = "cpuFactor"
// MemoryFactorLbl indicates mem factor
MemoryFactorLbl = "memoryFactor"
// DiskFactorLbl indicates disk factor
DiskFactorLbl = "diskFactor"
// ConcurrencyFactorLbl indicates for concurrency factor
ConcurrencyFactorLbl = "concurrencyFactor"

// ScanConcurrencyLbl indicates sql scan concurrency
ScanConcurrencyLbl = "scanConcurrency"
// HashJoinConcurrencyLbl indicates concurrency for hash join
HashJoinConcurrencyLbl = "hashJoinConcurrency"

// NetSeekCostLbl indicates netSeek cost
NetSeekCostLbl = "netSeekCost"
// TablePlanCostLbl indicates tablePlan cost
TablePlanCostLbl = "tablePlanCost"
// IndexPlanCostLbl indicates indexPlan cost
IndexPlanCostLbl = "indexPlanCost"

// ProbeCostDetailLbl indicates probeCost
ProbeCostDetailLbl = "probeCostDetail"
// ProbeCostDescLbl indicates description for probe cost
ProbeCostDescLbl = "probeCostDesc"
// CPUCostDetailLbl indicates cpuCost detail
CPUCostDetailLbl = "cpuCostDetail"
// CPUCostDescLbl indicates description for cpu cost
CPUCostDescLbl = "cpuCostDesc"
// MemCostDetailLbl indicates mem cost detail
MemCostDetailLbl = "memCostDetail"
// MemCostDescLbl indicates description for mem cost
MemCostDescLbl = "memCostDesc"
// DiskCostDetailLbl indicates disk cost detail
DiskCostDetailLbl = "diskCostDetail"
// DiskCostDescLbl indicates description for disk cost
DiskCostDescLbl = "diskCostDesc"
// ProbeDiskCostLbl indicates probe disk cost detail
ProbeDiskCostLbl = "probeDiskCostDetail"
// ProbeDiskCostDescLbl indicates description for probe disk cost
ProbeDiskCostDescLbl = "probeDiskCostDesc"

// MemQuotaLbl indicates memory quota
MemQuotaLbl = "memQuota"
)

func setPointGetPlanCostDetail(p *PointGetPlan, opt *physicalOptimizeOp,
Expand Down Expand Up @@ -134,3 +176,176 @@ func setPhysicalIndexReaderCostDetail(p *PhysicalIndexReader, opt *physicalOptim
RowCountLbl, RowSizeLbl, NetworkFactorLbl, NetSeekCostLbl, ScanConcurrencyLbl))
opt.appendPlanCostDetail(detail)
}

func setPhysicalHashJoinCostDetail(p *PhysicalHashJoin, opt *physicalOptimizeOp, spill bool,
buildCnt, probeCnt, cpuFactor, rowSize, numPairs,
cpuCost, probeCPUCost, memCost, diskCost, probeDiskCost,
diskFactor, memoryFactor, concurrencyFactor float64,
memQuota int64) {
if opt == nil {
return
}
detail := tracing.NewPhysicalPlanCostDetail(p.ID(), p.TP())
diskCostDetail := &HashJoinDiskCostDetail{
Spill: spill,
UseOuterToBuild: p.UseOuterToBuild,
BuildRowCount: buildCnt,
DiskFactor: diskFactor,
RowSize: rowSize,
ProbeDiskCost: &HashJoinProbeDiskCostDetail{
SelectionFactor: SelectionFactor,
NumPairs: numPairs,
HasConditions: len(p.LeftConditions)+len(p.RightConditions) > 0,
Cost: probeDiskCost,
},
Cost: diskCost,
}
memoryCostDetail := &HashJoinMemoryCostDetail{
Spill: spill,
MemQuota: memQuota,
RowSize: rowSize,
BuildRowCount: buildCnt,
MemoryFactor: memoryFactor,
Cost: memCost,
}
cpuCostDetail := &HashJoinCPUCostDetail{
BuildRowCount: buildCnt,
CPUFactor: cpuFactor,
ConcurrencyFactor: concurrencyFactor,
ProbeCost: &HashJoinProbeCostDetail{
NumPairs: numPairs,
HasConditions: len(p.LeftConditions)+len(p.RightConditions) > 0,
SelectionFactor: SelectionFactor,
ProbeRowCount: probeCnt,
Cost: probeCPUCost,
},
HashJoinConcurrency: p.Concurrency,
Spill: spill,
Cost: cpuCost,
UseOuterToBuild: p.UseOuterToBuild,
}

// record cpu cost detail
detail.AddParam(CPUCostDetailLbl, cpuCostDetail).
AddParam(CPUCostDescLbl, cpuCostDetail.desc()).
AddParam(ProbeCostDescLbl, cpuCostDetail.probeCostDesc())
// record memory cost detail
detail.AddParam(MemCostDetailLbl, memoryCostDetail).
AddParam(MemCostDescLbl, memoryCostDetail.desc())
// record disk cost detail
detail.AddParam(DiskCostDetailLbl, diskCostDetail).
AddParam(DiskCostDescLbl, diskCostDetail.desc()).
AddParam(ProbeDiskCostDescLbl, diskCostDetail.probeDesc())

detail.SetDesc(fmt.Sprintf("%s+%s+%s+all children cost", CPUCostDetailLbl, MemCostDetailLbl, DiskCostDetailLbl))
opt.appendPlanCostDetail(detail)
}

// HashJoinProbeCostDetail indicates probe cpu cost detail
type HashJoinProbeCostDetail struct {
NumPairs float64 `json:"numPairs"`
HasConditions bool `json:"hasConditions"`
SelectionFactor float64 `json:"selectionFactor"`
ProbeRowCount float64 `json:"probeRowCount"`
Cost float64 `json:"cost"`
}

// HashJoinCPUCostDetail indicates cpu cost detail
type HashJoinCPUCostDetail struct {
BuildRowCount float64 `json:"buildRowCount"`
CPUFactor float64 `json:"cpuFactor"`
ConcurrencyFactor float64 `json:"concurrencyFactor"`
ProbeCost *HashJoinProbeCostDetail `json:"probeCost"`
HashJoinConcurrency uint `json:"hashJoinConcurrency"`
Spill bool `json:"spill"`
Cost float64 `json:"cost"`
UseOuterToBuild bool `json:"useOuterToBuild"`
}

func (h *HashJoinCPUCostDetail) desc() string {
var cpuCostDesc string
buildCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, CPUFactorLbl)
cpuCostDesc = fmt.Sprintf("%s+%s+(%s+1)*%s)", buildCostDesc, ProbeCostDetailLbl, HashJoinConcurrencyLbl, ConcurrencyFactorLbl)
if h.UseOuterToBuild {
if h.Spill {
cpuCostDesc = fmt.Sprintf("%s+%s", cpuCostDesc, buildCostDesc)
} else {
cpuCostDesc = fmt.Sprintf("%s+%s/%s", cpuCostDesc, buildCostDesc, HashJoinConcurrencyLbl)
}
}
return cpuCostDesc
}

func (h *HashJoinCPUCostDetail) probeCostDesc() string {
var probeCostDesc string
if h.ProbeCost.HasConditions {
probeCostDesc = fmt.Sprintf("(%s*%s*%s+%s*%s)/%s",
NumPairsLbl, CPUFactorLbl, SelectionFactorLbl,
ProbeRowCountLbl, CPUFactorLbl, HashJoinConcurrencyLbl)
} else {
probeCostDesc = fmt.Sprintf("(%s*%s)/%s",
NumPairsLbl, CPUFactorLbl,
HashJoinConcurrencyLbl)
}
return probeCostDesc
}

// HashJoinMemoryCostDetail indicates memory cost detail
type HashJoinMemoryCostDetail struct {
Spill bool `json:"spill"`
MemQuota int64 `json:"memQuota"`
RowSize float64 `json:"rowSize"`
BuildRowCount float64 `json:"buildRowCount"`
MemoryFactor float64 `json:"memoryFactor"`
Cost float64 `json:"cost"`
}

func (h *HashJoinMemoryCostDetail) desc() string {
memCostDesc := fmt.Sprintf("%s*%s", BuildRowCountLbl, MemoryFactorLbl)
if h.Spill {
memCostDesc = fmt.Sprintf("%s*%s/(%s*%s)", memCostDesc, MemQuotaLbl, RowSizeLbl, BuildRowCountLbl)
}
return memCostDesc
}

// HashJoinProbeDiskCostDetail indicates probe disk cost detail
type HashJoinProbeDiskCostDetail struct {
SelectionFactor float64 `json:"selectionFactor"`
NumPairs float64 `json:"numPairs"`
HasConditions bool `json:"hasConditions"`
Cost float64 `json:"cost"`
}

// HashJoinDiskCostDetail indicates disk cost detail
type HashJoinDiskCostDetail struct {
Spill bool `json:"spill"`
UseOuterToBuild bool `json:"useOuterToBuild"`
BuildRowCount float64 `json:"buildRowCount"`
DiskFactor float64 `json:"diskFactor"`
RowSize float64 `json:"rowSize"`
ProbeDiskCost *HashJoinProbeDiskCostDetail `json:"probeDiskCost"`
Cost float64 `json:"cost"`
}

func (h *HashJoinDiskCostDetail) desc() string {
if !h.Spill {
return ""
}
buildDiskCost := fmt.Sprintf("%s*%s*%s", BuildRowCountLbl, DiskFactorLbl, RowSizeLbl)
desc := fmt.Sprintf("%s+%s", buildDiskCost, ProbeDiskCostLbl)
if h.UseOuterToBuild {
desc = fmt.Sprintf("%s+%s", desc, buildDiskCost)
}
return desc
}

func (h *HashJoinDiskCostDetail) probeDesc() string {
if !h.Spill {
return ""
}
desc := fmt.Sprintf("%s*%s*%s", NumPairsLbl, DiskFactorLbl, RowSizeLbl)
if h.ProbeDiskCost.HasConditions {
desc = fmt.Sprintf("%s*%s", desc, SelectionFactorLbl)
}
return desc
}
Loading

0 comments on commit e0da196

Please sign in to comment.