diff --git a/planner/core/plan_cost.go b/planner/core/plan_cost.go index 60b440f80bbfd..3bfe1fddb1a97 100644 --- a/planner/core/plan_cost.go +++ b/planner/core/plan_cost.go @@ -569,6 +569,9 @@ func (p *PhysicalIndexJoin) GetPlanCost(taskType property.TaskType, option *Plan if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } outerChild, innerChild := p.children[1-p.InnerChildIdx], p.children[p.InnerChildIdx] outerCost, err := outerChild.GetPlanCost(taskType, option) if err != nil { @@ -658,6 +661,9 @@ func (p *PhysicalIndexHashJoin) GetPlanCost(taskType property.TaskType, option * if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } outerChild, innerChild := p.children[1-p.InnerChildIdx], p.children[p.InnerChildIdx] outerCost, err := outerChild.GetPlanCost(taskType, option) if err != nil { @@ -749,6 +755,9 @@ func (p *PhysicalIndexMergeJoin) GetPlanCost(taskType property.TaskType, option if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } outerChild, innerChild := p.children[1-p.InnerChildIdx], p.children[p.InnerChildIdx] outerCost, err := outerChild.GetPlanCost(taskType, option) if err != nil { @@ -802,6 +811,9 @@ func (p *PhysicalApply) GetPlanCost(taskType property.TaskType, option *PlanCost if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } outerChild, innerChild := p.children[1-p.InnerChildIdx], p.children[p.InnerChildIdx] outerCost, err := outerChild.GetPlanCost(taskType, option) if err != nil { @@ -876,6 +888,9 @@ func (p *PhysicalMergeJoin) GetPlanCost(taskType property.TaskType, option *Plan if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } p.planCost = 0 for _, child := range p.children { childCost, err := child.GetPlanCost(taskType, option) @@ -994,6 +1009,9 @@ func (p *PhysicalHashJoin) GetPlanCost(taskType property.TaskType, option *PlanC if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } p.planCost = 0 for _, child := range p.children { childCost, err := child.GetPlanCost(taskType, option) @@ -1036,6 +1054,9 @@ func (p *PhysicalStreamAgg) GetPlanCost(taskType property.TaskType, option *Plan if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } childCost, err := p.children[0].GetPlanCost(taskType, option) if err != nil { return 0, err @@ -1084,6 +1105,9 @@ func (p *PhysicalHashAgg) GetPlanCost(taskType property.TaskType, option *PlanCo if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } childCost, err := p.children[0].GetPlanCost(taskType, option) if err != nil { return 0, err @@ -1216,11 +1240,14 @@ func (p *BatchPointGetPlan) GetCost(opt *physicalOptimizeOp) float64 { } // GetPlanCost calculates the cost of the plan if it has not been calculated yet and returns the cost. -func (p *BatchPointGetPlan) GetPlanCost(_ property.TaskType, option *PlanCostOption) (float64, error) { +func (p *BatchPointGetPlan) GetPlanCost(taskType property.TaskType, option *PlanCostOption) (float64, error) { costFlag := option.CostFlag if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx != nil && p.ctx.GetSessionVars() != nil && p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } p.planCost = p.GetCost(option.tracer) p.planCostInit = true return p.planCost, nil @@ -1264,11 +1291,14 @@ func (p *PointGetPlan) GetCost(opt *physicalOptimizeOp) float64 { } // GetPlanCost calculates the cost of the plan if it has not been calculated yet and returns the cost. -func (p *PointGetPlan) GetPlanCost(_ property.TaskType, option *PlanCostOption) (float64, error) { +func (p *PointGetPlan) GetPlanCost(taskType property.TaskType, option *PlanCostOption) (float64, error) { costFlag := option.CostFlag if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx != nil && p.ctx.GetSessionVars() != nil && p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } p.planCost = p.GetCost(option.tracer) p.planCostInit = true return p.planCost, nil @@ -1311,18 +1341,16 @@ func (p *PhysicalExchangeReceiver) GetPlanCost(taskType property.TaskType, optio if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } + if p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + return p.getPlanCostVer2(taskType, option) + } childCost, err := p.children[0].GetPlanCost(taskType, option) if err != nil { return 0, err } p.planCost = childCost // accumulate net cost - if p.ctx.GetSessionVars().CostModelVersion == modelVer1 { - p.planCost += getCardinality(p.children[0], costFlag) * p.ctx.GetSessionVars().GetNetworkFactor(nil) - } else { // to avoid regression, only consider row-size on model ver2 - rowSize := getTblStats(p.children[0]).GetAvgRowSize(p.ctx, p.children[0].Schema().Columns, false, false) - p.planCost += getCardinality(p.children[0], costFlag) * rowSize * p.ctx.GetSessionVars().GetNetworkFactor(nil) - } + p.planCost += getCardinality(p.children[0], costFlag) * p.ctx.GetSessionVars().GetNetworkFactor(nil) p.planCostInit = true return p.planCost, nil } diff --git a/planner/core/plan_cost_ver2.go b/planner/core/plan_cost_ver2.go index 3ab15931d2b4c..47856d4293006 100644 --- a/planner/core/plan_cost_ver2.go +++ b/planner/core/plan_cost_ver2.go @@ -17,6 +17,8 @@ package core import ( "math" + "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/expression/aggregation" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/planner/property" @@ -24,19 +26,19 @@ import ( ) // getPlanCostVer2 returns the plan-cost of this sub-plan, which is: -// plan-cost = child-cost + sel-cost -// sel-cost = input-rows * len(conditions) * cpu-factor +// plan-cost = child-cost + filter-cost func (p *PhysicalSelection) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { inputRows := getCardinality(p.children[0], option.CostFlag) cpuFactor := getTaskCPUFactor(p, taskType) - selCost := inputRows * float64(len(p.Conditions)) * cpuFactor + + filterCost := filterCostVer2(inputRows, p.Conditions, cpuFactor) childCost, err := p.children[0].GetPlanCost(taskType, option) if err != nil { return 0, err } - p.planCost = selCost + childCost + p.planCost = filterCost + childCost p.planCostInit = true return p.planCost, nil } @@ -66,11 +68,10 @@ func (p *PhysicalProjection) getPlanCostVer2(taskType property.TaskType, option // log2(row-size) is from experiments. func (p *PhysicalIndexScan) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { rows := getCardinality(p, option.CostFlag) - scanFactor := getTaskScanFactor(p, taskType) rowSize := math.Max(p.getScanRowSize(), 2.0) - logRowSize := math.Log2(rowSize) + scanFactor := getTaskScanFactor(p, taskType) - p.planCost = rows * logRowSize * scanFactor + p.planCost = scanCostVer2(rows, rowSize, scanFactor) p.planCostInit = true return p.planCost, nil } @@ -80,15 +81,14 @@ func (p *PhysicalIndexScan) getPlanCostVer2(taskType property.TaskType, option * // log2(row-size) is from experiments. func (p *PhysicalTableScan) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { rows := getCardinality(p, option.CostFlag) - scanFactor := getTaskScanFactor(p, taskType) rowSize := math.Max(p.getScanRowSize(), 2.0) - logRowSize := math.Log2(rowSize) + scanFactor := getTaskScanFactor(p, taskType) - p.planCost = rows * logRowSize * scanFactor + p.planCost = scanCostVer2(rows, rowSize, scanFactor) // give TiFlash a start-up cost to let the optimizer prefers to use TiKV to process small table scans. if p.StoreType == kv.TiFlash { - p.planCost += 10000 * logRowSize * scanFactor + p.planCost += scanCostVer2(10000, rowSize, scanFactor) } p.planCostInit = true @@ -105,7 +105,7 @@ func (p *PhysicalIndexReader) getPlanCostVer2(taskType property.TaskType, option netFactor := getTaskNetFactor(p, taskType) concurrency := float64(p.ctx.GetSessionVars().DistSQLScanConcurrency()) - netCost := rows * rowSize * netFactor + netCost := netCostVer2(rows, rowSize, netFactor) seekCost := estimateNetSeekCost(p.indexPlan) childCost, err := p.indexPlan.GetPlanCost(property.CopSingleReadTaskType, option) @@ -128,7 +128,7 @@ func (p *PhysicalTableReader) getPlanCostVer2(taskType property.TaskType, option netFactor := getTaskNetFactor(p, taskType) concurrency := float64(p.ctx.GetSessionVars().DistSQLScanConcurrency()) - netCost := rows * rowSize * netFactor + netCost := netCostVer2(rows, rowSize, netFactor) seekCost := estimateNetSeekCost(p.tablePlan) childCost, err := p.tablePlan.GetPlanCost(property.CopSingleReadTaskType, option) @@ -168,7 +168,7 @@ func (p *PhysicalIndexLookUpReader) getPlanCostVer2(taskType property.TaskType, doubleReadConcurrency := float64(p.ctx.GetSessionVars().IndexLookupConcurrency()) // index-side - indexNetCost := indexRows * indexRowSize * netFactor + indexNetCost := netCostVer2(indexRows, indexRowSize, netFactor) indexSeekCost := estimateNetSeekCost(p.indexPlan) indexChildCost, err := p.indexPlan.GetPlanCost(property.CopDoubleReadTaskType, option) if err != nil { @@ -177,7 +177,7 @@ func (p *PhysicalIndexLookUpReader) getPlanCostVer2(taskType property.TaskType, indexSideCost := (indexNetCost + indexSeekCost + indexChildCost) / distConcurrency // table-side - tableNetCost := tableRows * tableRowSize * netFactor + tableNetCost := netCostVer2(tableRows, tableRowSize, netFactor) tableSeekCost := estimateNetSeekCost(p.tablePlan) tableChildCost, err := p.tablePlan.GetPlanCost(property.CopDoubleReadTaskType, option) if err != nil { @@ -211,7 +211,7 @@ func (p *PhysicalIndexMergeReader) getPlanCostVer2(taskType property.TaskType, o rows := getCardinality(tablePath, option.CostFlag) rowSize := getAvgRowSize(tablePath.Stats(), tablePath.Schema()) - tableNetCost := rows * rowSize * netFactor + tableNetCost := netCostVer2(rows, rowSize, netFactor) tableSeekCost := estimateNetSeekCost(tablePath) tableChildCost, err := tablePath.GetPlanCost(taskType, option) if err != nil { @@ -225,7 +225,7 @@ func (p *PhysicalIndexMergeReader) getPlanCostVer2(taskType property.TaskType, o rows := getCardinality(indexPath, option.CostFlag) rowSize := getAvgRowSize(indexPath.Stats(), indexPath.Schema()) - indexNetCost := rows * rowSize * netFactor + indexNetCost := netCostVer2(rows, rowSize, netFactor) indexSeekCost := estimateNetSeekCost(indexPath) indexChildCost, err := indexPath.GetPlanCost(taskType, option) if err != nil { @@ -306,6 +306,286 @@ func (p *PhysicalTopN) getPlanCostVer2(taskType property.TaskType, option *PlanC return p.planCost, nil } +// getPlanCostVer2 returns the plan-cost of this sub-plan, which is: +// plan-cost = child-cost + agg-cost + group-cost +func (p *PhysicalStreamAgg) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { + rows := getCardinality(p.children[0], option.CostFlag) + cpuFactor := getTaskCPUFactor(p, taskType) + + aggCost := aggCostVer2(rows, p.AggFuncs, cpuFactor) + groupCost := groupCostVer2(rows, p.GroupByItems, cpuFactor) + + childCost, err := p.children[0].GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + + p.planCost = childCost + aggCost + groupCost + p.planCostInit = true + return p.planCost, nil +} + +// getPlanCostVer2 returns the plan-cost of this sub-plan, which is: +// plan-cost = child-cost + (agg-cost + group-cost + hash-build-cost + hash-probe-cost) / concurrency +func (p *PhysicalHashAgg) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { + inputRows := getCardinality(p.children[0], option.CostFlag) + outputRows := getCardinality(p, option.CostFlag) + outputRowSize := getAvgRowSize(p.Stats(), p.Schema()) + cpuFactor := getTaskCPUFactor(p, taskType) + memFactor := getTaskMemFactor(p, taskType) + concurrency := p.ctx.GetSessionVars().GetConcurrencyFactor() + + aggCost := aggCostVer2(inputRows, p.AggFuncs, cpuFactor) + groupCost := groupCostVer2(inputRows, p.GroupByItems, cpuFactor) + hashBuildCost := hashBuildCostVer2(outputRows, outputRowSize, p.GroupByItems, cpuFactor, memFactor) + hashProbeCost := hashProbeCostVer2(inputRows, p.GroupByItems, cpuFactor) + + childCost, err := p.children[0].GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + + p.planCost = childCost + (aggCost+groupCost+hashBuildCost+hashProbeCost)/concurrency + p.planCostInit = true + return p.planCost, nil +} + +// getPlanCostVer2 returns the plan-cost of this sub-plan, which is: +// plan-cost = left-child-cost + right-child-cost + filter-cost + group-cost +func (p *PhysicalMergeJoin) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { + leftRows := getCardinality(p.children[0], option.CostFlag) + rightRows := getCardinality(p.children[1], option.CostFlag) + cpuFactor := getTaskCPUFactor(p, taskType) + + filterCost := filterCostVer2(leftRows, p.LeftConditions, cpuFactor) + + filterCostVer2(rightRows, p.RightConditions, cpuFactor) + groupCost := groupCostVer2(leftRows, cols2Exprs(p.LeftJoinKeys), cpuFactor) + + groupCostVer2(rightRows, cols2Exprs(p.LeftJoinKeys), cpuFactor) + + leftChildCost, err := p.children[0].GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + rightChildCost, err := p.children[1].GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + + p.planCost = leftChildCost + rightChildCost + filterCost + groupCost + p.planCostInit = true + return p.planCost, nil +} + +// getPlanCostVer2 returns the plan-cost of this sub-plan, which is: +// plan-cost = build-child-cost + probe-child-cost + +// build-hash-cost + build-filter-cost + +// (probe-filter-cost + probe-hash-cost) / concurrency +func (p *PhysicalHashJoin) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { + build, probe := p.children[0], p.children[1] + buildFilters, probeFilters := p.LeftConditions, p.RightConditions + buildKeys, probeKeys := p.LeftJoinKeys, p.RightJoinKeys + if (p.InnerChildIdx == 1 && !p.UseOuterToBuild) || (p.InnerChildIdx == 0 && p.UseOuterToBuild) { + build, probe = probe, build + buildFilters, probeFilters = probeFilters, buildFilters + } + buildRows := getCardinality(build, option.CostFlag) + probeRows := getCardinality(probe, option.CostFlag) + buildRowSize := getAvgRowSize(build.Stats(), build.Schema()) + concurrency := float64(p.Concurrency) + cpuFactor := getTaskCPUFactor(p, taskType) + memFactor := getTaskMemFactor(p, taskType) + + buildFilterCost := filterCostVer2(buildRows, buildFilters, cpuFactor) + buildHashCost := hashBuildCostVer2(buildRows, buildRowSize, cols2Exprs(buildKeys), cpuFactor, memFactor) + + probeFilterCost := filterCostVer2(probeRows, probeFilters, cpuFactor) + probeHashCost := hashProbeCostVer2(probeRows, cols2Exprs(probeKeys), cpuFactor) + + buildChildCost, err := build.GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + probeChildCost, err := probe.GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + + p.planCost = buildChildCost + probeChildCost + buildHashCost + buildFilterCost + + (probeFilterCost+probeHashCost)/concurrency + p.planCostInit = true + return p.planCost, nil +} + +// getPlanCostVer2 returns the plan-cost of this sub-plan, which is: +// plan-cost = build-child-cost + build-filter-cost + +// (probe-cost + probe-filter-cost) / concurrency +// probe-cost = probe-child-cost * build-rows / batchRatio +func (p *PhysicalIndexJoin) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { + build, probe := p.children[1-p.InnerChildIdx], p.children[p.InnerChildIdx] + buildRows := getCardinality(build, option.CostFlag) + probeRowsOne := getCardinality(probe, option.CostFlag) + probeRowsTot := probeRowsOne * buildRows + buildFilters, probeFilters := p.LeftConditions, p.RightConditions + probeConcurrency := float64(p.ctx.GetSessionVars().IndexLookupJoinConcurrency()) + cpuFactor := getTaskCPUFactor(p, taskType) + + buildFilterCost := filterCostVer2(buildRows, buildFilters, cpuFactor) + buildChildCost, err := build.GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + + probeFilterCost := filterCostVer2(probeRowsTot, probeFilters, cpuFactor) + probeChildCost, err := probe.GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + // IndexJoin executes a batch of rows at a time, so the actual cost of this part should be + // `innerCostPerBatch * numberOfBatches` instead of `innerCostPerRow * numberOfOuterRow`. + // Use an empirical value batchRatio to handle this now. + // TODO: remove this empirical value. + batchRatio := 30.0 + probeCost := probeChildCost * buildRows / batchRatio + + p.planCost = buildChildCost + buildFilterCost + (probeCost+probeFilterCost)/probeConcurrency + p.planCostInit = true + return p.planCost, nil +} + +func (p *PhysicalIndexHashJoin) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { + // TODO: distinguish IndexHashJoin with IndexJoin + return p.PhysicalIndexJoin.getPlanCostVer2(taskType, option) +} + +func (p *PhysicalIndexMergeJoin) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { + // TODO: distinguish IndexMergeJoin with IndexJoin + return p.PhysicalIndexJoin.getPlanCostVer2(taskType, option) +} + +// getPlanCostVer2 returns the plan-cost of this sub-plan, which is: +// plan-cost = build-child-cost + build-filter-cost + probe-cost + probe-filter-cost +// probe-cost = probe-child-cost * build-rows +func (p *PhysicalApply) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { + buildRows := getCardinality(p.children[0], option.CostFlag) + probeRowsOne := getCardinality(p.children[1], option.CostFlag) + probeRowsTot := buildRows * probeRowsOne + cpuFactor := getTaskCPUFactor(p, taskType) + + buildFilterCost := filterCostVer2(buildRows, p.LeftConditions, cpuFactor) + buildChildCost, err := p.children[0].GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + + probeFilterCost := filterCostVer2(probeRowsTot, p.RightConditions, cpuFactor) + probeChildCost, err := p.children[1].GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + probeCost := probeChildCost * buildRows + + p.planCost = buildChildCost + buildFilterCost + probeCost + probeFilterCost + p.planCostInit = true + return p.planCost, nil +} + +// getPlanCostVer2 returns the plan-cost of this sub-plan, which is: +// plan-cost = child-cost + net-cost +func (p *PhysicalExchangeReceiver) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { + rows := getCardinality(p, option.CostFlag) + rowSize := getAvgRowSize(p.stats, p.Schema()) + netFactor := getTableNetFactor(p) + + netCost := netCostVer2(rows, rowSize, netFactor) + childCost, err := p.children[0].GetPlanCost(taskType, option) + if err != nil { + return 0, err + } + + p.planCost = childCost + netCost + p.planCostInit = true + return p.planCost, nil +} + +// getPlanCostVer2 returns the plan-cost of this sub-plan, which is: +// plan-cost = seek-cost + net-cost +func (p *PointGetPlan) getPlanCostVer2(taskType property.TaskType, _ *PlanCostOption) (float64, error) { + if p.accessCols == nil { // from fast plan code path + p.planCost = 0 + p.planCostInit = true + return 0, nil + } + rowSize := getAvgRowSize(p.stats, p.schema) + netFactor := getTaskNetFactor(p, taskType) + seekFactor := getTaskSeekFactor(p, taskType) + + netCost := netCostVer2(1, rowSize, netFactor) + seekCost := 1 * seekFactor / 20 // 20 times faster than general request + + p.planCost = netCost + seekCost + p.planCostInit = true + return p.planCost, nil +} + +// getPlanCostVer2 returns the plan-cost of this sub-plan, which is: +// plan-cost = seek-cost + net-cost +func (p *BatchPointGetPlan) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) { + if p.accessCols == nil { // from fast plan code path + p.planCost = 0 + p.planCostInit = true + return 0, nil + } + rows := getCardinality(p, option.CostFlag) + rowSize := getAvgRowSize(p.stats, p.schema) + netFactor := getTaskNetFactor(p, taskType) + seekFactor := getTaskSeekFactor(p, taskType) + + netCost := netCostVer2(rows, rowSize, netFactor) + seekCost := 1 * seekFactor / 20 // in one batch + + p.planCost = netCost + seekCost + p.planCostInit = true + return p.planCost, nil +} + +func scanCostVer2(rows, rowSize, scanFactor float64) float64 { + // log2 from experiments + return rows * math.Log2(math.Max(1, rowSize)) * scanFactor +} + +func netCostVer2(rows, rowSize, netFactor float64) float64 { + return rows * rowSize * netFactor +} + +func filterCostVer2(rows float64, filters []expression.Expression, cpuFactor float64) float64 { + // TODO: consider types of filters + return rows * float64(len(filters)) * cpuFactor +} + +func aggCostVer2(rows float64, aggFuncs []*aggregation.AggFuncDesc, cpuFactor float64) float64 { + // TODO: consider types of agg-funcs + return rows * float64(len(aggFuncs)) * cpuFactor +} + +func groupCostVer2(rows float64, groupItems []expression.Expression, cpuFactor float64) float64 { + return rows * float64(len(groupItems)) * cpuFactor +} + +func hashBuildCostVer2(buildRows, buildRowSize float64, keys []expression.Expression, cpuFactor, memFactor float64) float64 { + // TODO: 1) consider types of keys, 2) dedicated factor for build-probe hash table + hashKeyCost := buildRows * float64(len(keys)) * cpuFactor + hashMemCost := buildRows * buildRowSize * memFactor + hashBuildCost := buildRows * float64(len(keys)) * cpuFactor + return hashKeyCost + hashMemCost + hashBuildCost +} + +func hashProbeCostVer2(probeRows float64, keys []expression.Expression, cpuFactor float64) float64 { + // TODO: 1) consider types of keys, 2) dedicated factor for build-probe hash table + hashKeyCost := probeRows * float64(len(keys)) * cpuFactor + hashProbeCost := probeRows * float64(len(keys)) * cpuFactor + return hashKeyCost + hashProbeCost +} + func getTaskCPUFactor(p PhysicalPlan, taskType property.TaskType) float64 { switch taskType { case property.RootTaskType: // TiDB @@ -372,3 +652,11 @@ func getTableInfo(p PhysicalPlan) *model.TableInfo { return getTableInfo(x.Children()[0]) } } + +func cols2Exprs(cols []*expression.Column) []expression.Expression { + exprs := make([]expression.Expression, 0, len(cols)) + for _, c := range cols { + exprs = append(exprs, c) + } + return exprs +} diff --git a/planner/core/plan_cost_ver2_test.go b/planner/core/plan_cost_ver2_test.go new file mode 100644 index 0000000000000..f006fe66978ef --- /dev/null +++ b/planner/core/plan_cost_ver2_test.go @@ -0,0 +1,128 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core_test + +import ( + "fmt" + "strconv" + "strings" + "testing" + + "github.com/pingcap/tidb/testkit" + "github.com/stretchr/testify/require" +) + +func testCostQueries(t *testing.T, tk *testkit.TestKit, queries []string) { + // costs of these queries expected increasing + var lastCost float64 + var lastPlan []string + var lastQuery string + for _, q := range queries { + rs := tk.MustQuery("explain format='verbose' " + q).Rows() + cost, err := strconv.ParseFloat(rs[0][2].(string), 64) + require.Nil(t, err) + var plan []string + for _, r := range rs { + plan = append(plan, fmt.Sprintf("%v", r)) + } + require.True(t, cost > lastCost, fmt.Sprintf("cost of %v should be larger than\n%v\n%v\n%v\n", + q, lastQuery, strings.Join(plan, "\n"), strings.Join(lastPlan, "\n"))) + lastCost = cost + lastPlan = plan + lastQuery = q + } +} + +func TestCostModelVer2(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec(`create table t (a int primary key, b int, c int, key(b))`) + vals := make([]string, 0, 100) + for i := 0; i < 100; i++ { + vals = append(vals, fmt.Sprintf("(%v, %v, %v)", i, i, i)) + } + tk.MustExec(fmt.Sprintf("insert into t values %v", strings.Join(vals, ", "))) + tk.MustExec("analyze table t") + for _, q := range []string{ + "set @@tidb_distsql_scan_concurrency=1", + "set @@tidb_executor_concurrency=1", + "set @@tidb_opt_tiflash_concurrency_factor=1", + "set @@tidb_index_lookup_concurrency=1", + "set @@tidb_cost_model_version=2", + } { + tk.MustExec(q) + } + + seriesCases := [][]string{ + { // table scan more rows + "select /*+ use_index(t, primary) */ a from t where a<1", + "select /*+ use_index(t, primary) */ a from t where a<10", + "select /*+ use_index(t, primary) */ a from t where a<100", + }, + { // index scan more rows + "select /*+ use_index(t, b) */ b from t where b<1", + "select /*+ use_index(t, b) */ b from t where b<10", + "select /*+ use_index(t, b) */ b from t where b<100", + }, + { // table scan more cols + "select /*+ use_index(t, primary) */ a from t", + "select /*+ use_index(t, primary) */ a, b from t", + "select /*+ use_index(t, primary) */ a, b, c from t", + }, + { // index lookup more rows + "select /*+ use_index(t, b) */ * from t where b<1", + "select /*+ use_index(t, b) */ * from t where b<10", + "select /*+ use_index(t, b) */ * from t where b<100", + }, + { // selection more filters + "select /*+ use_index(t, primary) */ a from t where mod(a, 20)<10", + "select /*+ use_index(t, primary) */ a from t where mod(a, 20)<10 and mod(a, 20)<11", + "select /*+ use_index(t, primary) */ a from t where mod(a, 20)<10 and mod(a, 20)<11 and mod(a, 20)<12", + }, + { // projection more exprs + "select /*+ use_index(t, primary) */ a+1 from t", + "select /*+ use_index(t, primary) */ a+1, a+2 from t", + "select /*+ use_index(t, primary) */ a+1, a+2, a+3 from t", + }, + { // hash agg more agg-funcs + "select /*+ use_index(t, primary), hash_agg() */ sum(a) from t group by b", + "select /*+ use_index(t, primary), hash_agg() */ sum(a), sum(a+2) from t group by b", + "select /*+ use_index(t, primary), hash_agg() */ sum(a), sum(a+2), sum(a+4) from t group by b", + }, + { // hash agg more group-items + "select /*+ use_index(t, primary), hash_agg() */ sum(a) from t group by b", + "select /*+ use_index(t, primary), hash_agg() */ sum(a) from t group by b, b+1", + "select /*+ use_index(t, primary), hash_agg() */ sum(a) from t group by b, b+1, b+2", + }, + { // stream agg more agg-funcs + "select /*+ use_index(t, primary), stream_agg() */ sum(a) from t group by b", + "select /*+ use_index(t, primary), stream_agg() */ sum(a), sum(a+2) from t group by b", + "select /*+ use_index(t, primary), stream_agg() */ sum(a), sum(a+2), sum(a+4) from t group by b", + }, + { // hash join uses the small table to build hash table + "select /*+ hash_join_build(t1) */ * from t t1, t t2 where t1.b=t2.b and t1.a<10", + "select /*+ hash_join_build(t2) */ * from t t1, t t2 where t1.b=t2.b and t1.a<10", + }, + { // hash join more join keys + "select /*+ hash_join_build(t1) */ * from t t1, t t2 where t1.b=t2.b", + "select /*+ hash_join_build(t1) */ * from t t1, t t2 where t1.a=t2.a and t1.b=t2.b", + }, + } + + for _, cases := range seriesCases { + testCostQueries(t, tk, cases) + } +}