Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner, statistics: use the correct column ID when recording stats loading status #52208

Merged
merged 13 commits into from
Apr 2, 2024
Merged
1 change: 1 addition & 0 deletions build/nogo_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@
"fieldalignment": {
"exclude_files": {
"pkg/parser/parser.go": "parser/parser.go code",
"pkg/statistics/table.go": "disable this limitation that prevents us from splitting struct fields for clarity",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I always want to do this, but I can never convince myself. 😃

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😂 I think performance benefits and memory efficiency from this rule are small. So I think it's totally OK to disable it when it causes problems or inconvenience.

"external/": "no need to vet third party code",
".*_generated\\.go$": "ignore generated code",
".*mock.go$": "ignore generated code",
Expand Down
8 changes: 4 additions & 4 deletions pkg/planner/cardinality/cross_estimation.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func crossEstimateRowCount(sctx context.PlanContext,
if col == nil || len(path.AccessConds) > 0 {
return 0, false, corr
}
colID := col.UniqueID
colUniqueID := col.UniqueID
if corr < 0 {
desc = !desc
}
Expand All @@ -152,11 +152,11 @@ func crossEstimateRowCount(sctx context.PlanContext,
return 0, err == nil, corr
}
idxID := int64(-1)
idxIDs, idxExists := dsStatsInfo.HistColl.ColID2IdxIDs[colID]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to use the clearly name 'colUniqueID' instead of 'colID' to avoid the wrong used.

idxIDs, idxExists := dsStatsInfo.HistColl.ColUniqueID2IdxIDs[colUniqueID]
if idxExists && len(idxIDs) > 0 {
idxID = idxIDs[0]
}
rangeCounts, ok := getColumnRangeCounts(sctx, colID, ranges, dsTableStats.HistColl, idxID)
rangeCounts, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
if !ok {
return 0, false, corr
}
Expand All @@ -168,7 +168,7 @@ func crossEstimateRowCount(sctx context.PlanContext,
if idxExists {
rangeCount, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges)
} else {
rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colID, convertedRanges)
rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges)
}
if err != nil {
return 0, false, corr
Expand Down
28 changes: 18 additions & 10 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,27 @@ func init() {
}

// GetRowCountByColumnRanges estimates the row count by a slice of Range.
func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colID int64, colRanges []*ranger.Range) (result float64, err error) {
func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, colRanges []*ranger.Range) (result float64, err error) {
var name string
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugTraceGetRowCountInput(sctx, colID, colRanges)
debugTraceGetRowCountInput(sctx, colUniqueID, colRanges)
defer func() {
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
debugtrace.LeaveContextCommon(sctx)
}()
}
sc := sctx.GetSessionVars().StmtCtx
c, ok := coll.Columns[colID]
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
c, ok := coll.Columns[colUniqueID]
colInfoID := colUniqueID
if len(coll.UniqueID2colInfoID) > 0 {
colInfoID = coll.UniqueID2colInfoID[colUniqueID]
}
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should distinguish colId and colUniqueId in explain such as "colID: xxx" , "colUniqueID: XXX"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want the column name in the EXPLAIN result, which should be fetched using the column ID from the metadata instead of the UniqueID.

if c != nil && c.Info != nil {
name = c.Info.Name.O
}
if statistics.ColumnStatsIsInvalid(c, sctx, coll, colID) {
if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) {
result, err = getPseudoRowCountByColumnRanges(sc.TypeCtx(), float64(coll.RealtimeCount), colRanges, 0)
if err == nil && sc.EnableOptimizerCETrace && ok {
ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats-Pseudo", uint64(result))
Expand All @@ -71,23 +75,27 @@ func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistCo
}

// GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error) {
func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, intRanges []*ranger.Range) (result float64, err error) {
var name string
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugTraceGetRowCountInput(sctx, colID, intRanges)
debugTraceGetRowCountInput(sctx, colUniqueID, intRanges)
defer func() {
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
debugtrace.LeaveContextCommon(sctx)
}()
}
sc := sctx.GetSessionVars().StmtCtx
c, ok := coll.Columns[colID]
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
c, ok := coll.Columns[colUniqueID]
colInfoID := colUniqueID
if len(coll.UniqueID2colInfoID) > 0 {
colInfoID = coll.UniqueID2colInfoID[colUniqueID]
}
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
if c != nil && c.Info != nil {
name = c.Info.Name.O
}
if statistics.ColumnStatsIsInvalid(c, sctx, coll, colID) {
if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) {
if len(intRanges) == 0 {
return 0, nil
}
Expand Down
18 changes: 9 additions & 9 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,19 +170,19 @@ func getIndexRowCountForStatsV1(sctx context.PlanContext, coll *statistics.HistC
}
var count float64
var err error
colIDs := coll.Idx2ColumnIDs[idxID]
var colID int64
if rangePosition >= len(colIDs) {
colID = -1
colUniqueIDs := coll.Idx2ColUniqueIDs[idxID]
var colUniqueID int64
if rangePosition >= len(colUniqueIDs) {
colUniqueID = -1
} else {
colID = colIDs[rangePosition]
colUniqueID = colUniqueIDs[rangePosition]
}
// prefer index stats over column stats
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 {
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 {
idxID := idxIDs[0]
count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang})
} else {
count, err = GetRowCountByColumnRanges(sctx, coll, colID, []*ranger.Range{&rang})
count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang})
}
if err != nil {
return 0, errors.Trace(err)
Expand Down Expand Up @@ -422,7 +422,7 @@ func expBackoffEstimation(sctx context.PlanContext, idx *statistics.Index, coll
Collators: make([]collate.Collator, 1),
},
}
colsIDs := coll.Idx2ColumnIDs[idx.Histogram.ID]
colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID]
singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal))
// The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like:
// 1. Calc the selectivity of each column.
Expand All @@ -449,7 +449,7 @@ func expBackoffEstimation(sctx context.PlanContext, idx *statistics.Index, coll
count, err = GetRowCountByColumnRanges(sctx, coll, colID, tmpRan)
selectivity = count / float64(coll.RealtimeCount)
}
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
// Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call
// `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This
// check avoids infinite recursion.
Expand Down
8 changes: 4 additions & 4 deletions pkg/planner/cardinality/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ func Selectivity(
})
continue
}
idxCols := findPrefixOfIndexByCol(ctx, extractedCols, coll.Idx2ColumnIDs[id], id2Paths[idxStats.ID])
idxCols := findPrefixOfIndexByCol(ctx, extractedCols, coll.Idx2ColUniqueIDs[id], id2Paths[idxStats.ID])
if len(idxCols) > 0 {
lengths := make([]int, 0, len(idxCols))
for i := 0; i < len(idxCols) && i < len(idxStats.Info.Columns); i++ {
Expand Down Expand Up @@ -919,7 +919,7 @@ func findAvailableStatsForCol(sctx context.PlanContext, coll *statistics.HistCol
return false, uniqueID
}
// try to find available stats in single column index stats (except for prefix index)
for idxStatsIdx, cols := range coll.Idx2ColumnIDs {
for idxStatsIdx, cols := range coll.Idx2ColUniqueIDs {
if len(cols) == 1 && cols[0] == uniqueID {
idxStats := coll.Indices[idxStatsIdx]
if !statistics.IndexStatsIsInvalid(sctx, idxStats, coll, idxStatsIdx) &&
Expand Down Expand Up @@ -968,7 +968,7 @@ func getEqualCondSelectivity(sctx context.PlanContext, coll *statistics.HistColl
return outOfRangeEQSelectivity(sctx, idx.NDV, realtimeCnt, int64(idx.TotalRowCount())), nil
}
// The equal condition only uses prefix columns of the index.
colIDs := coll.Idx2ColumnIDs[idx.ID]
colIDs := coll.Idx2ColUniqueIDs[idx.ID]
var ndv int64
for i, colID := range colIDs {
if i >= usedColsLen {
Expand Down Expand Up @@ -1050,7 +1050,7 @@ func crossValidationSelectivity(
}()
}
minRowCount = math.MaxFloat64
cols := coll.Idx2ColumnIDs[idx.ID]
cols := coll.Idx2ColUniqueIDs[idx.ID]
crossValidationSelectivity = 1.0
totalRowCount := idx.TotalRowCount()
for i, colID := range cols {
Expand Down
4 changes: 2 additions & 2 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -893,8 +893,8 @@ func generateMapsForMockStatsTbl(statsTbl *statistics.Table) {
for _, idxIDs := range colID2IdxIDs {
slices.Sort(idxIDs)
}
statsTbl.Idx2ColumnIDs = idx2Columns
statsTbl.ColID2IdxIDs = colID2IdxIDs
statsTbl.Idx2ColUniqueIDs = idx2Columns
statsTbl.ColUniqueID2IdxIDs = colID2IdxIDs
}

func TestIssue39593(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/casetest/planstats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
],
data = glob(["testdata/**"]),
flaky = True,
shard_count = 4,
shard_count = 5,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
45 changes: 45 additions & 0 deletions pkg/planner/core/casetest/planstats/plan_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,3 +405,48 @@ func TestCollectDependingVirtualCols(t *testing.T) {
require.Equal(t, output[i].OutputColNames, cols)
}
}

func TestPartialStatsInExplain(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")
tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)")
tk.MustExec("create table t2(a int, primary key(a))")
tk.MustExec("insert into t2 values (1),(2),(3)")
tk.MustExec(
"create table tp(a int, b int, c int, index ic(c)) partition by range(a)" +
"(partition p0 values less than (10)," +
"partition p1 values less than (20)," +
"partition p2 values less than maxvalue)",
)
tk.MustExec("insert into tp values (1,1,1),(2,2,2),(13,13,13),(14,14,14),(25,25,25),(36,36,36)")

oriLease := dom.StatsHandle().Lease()
dom.StatsHandle().SetLease(1)
defer func() {
dom.StatsHandle().SetLease(oriLease)
}()
tk.MustExec("analyze table t")
tk.MustExec("analyze table t2")
tk.MustExec("analyze table tp")
tk.RequireNoError(dom.StatsHandle().Update(dom.InfoSchema()))
tk.MustQuery("explain select * from tp where a = 1")
tk.MustExec("set @@tidb_stats_load_sync_wait = 0")
var (
input []string
output []struct {
Query string
Result []string
}
)
testData := GetPlanStatsData()
testData.LoadTestCases(t, &input, &output)
for i, sql := range input {
testdata.OnRecord(func() {
output[i].Query = input[i]
output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows())
})
tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,13 @@
]
}
]
},
{
"name": "TestPartialStatsInExplain",
"cases": [
"explain format = brief select * from tp where b = 10",
"explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
"explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c"
]
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,47 @@
]
}
]
},
{
"Name": "TestPartialStatsInExplain",
"Cases": [
{
"Query": "explain format = brief select * from tp where b = 10",
"Result": [
"TableReader 0.01 root partition:all data:Selection",
"└─Selection 0.01 cop[tikv] eq(test.tp.b, 10)",
" └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[b:allEvicted]"
]
},
{
"Query": "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
"Result": [
"Projection 0.00 root test.t.a, test.t.b, test.t.c, test.tp.a, test.tp.b, test.tp.c",
"└─HashJoin 0.00 root inner join, equal:[eq(test.tp.c, test.t.b)]",
" ├─TableReader(Build) 0.00 root partition:p1 data:Selection",
" │ └─Selection 0.00 cop[tikv] eq(test.tp.a, 10), not(isnull(test.tp.c))",
" │ └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
" └─TableReader(Probe) 3.00 root data:Selection",
" └─Selection 3.00 cop[tikv] not(isnull(test.t.b))",
" └─TableFullScan 3.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]"
]
},
{
"Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
"Result": [
"HashJoin 0.33 root inner join, equal:[eq(test.tp.c, test.t2.a)]",
"├─IndexJoin(Build) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
"│ ├─TableReader(Build) 0.33 root data:Selection",
"│ │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))",
"│ │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
"│ └─IndexLookUp(Probe) 0.33 root partition:p0 ",
"│ ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))",
"│ │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
"│ └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
"└─TableReader(Probe) 1.00 root data:TableRangeScan",
" └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]"
]
}
]
}
]
2 changes: 1 addition & 1 deletion pkg/planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1166,7 +1166,7 @@ func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.Hist
// 2. Try to get NDV from index stats.
// Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is
// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
for idxID, idxCols := range histColl.Idx2ColumnIDs {
for idxID, idxCols := range histColl.Idx2ColUniqueIDs {
if len(idxCols) != len(colUIDs) {
continue
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1798,8 +1798,8 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex
path.IdxCols = append(path.IdxCols, handleCol)
path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength)
// Also updates the map that maps the index id to its prefix column ids.
if len(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID]) == len(path.Index.Columns) {
ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID], handleCol.UniqueID)
if len(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID]) == len(path.Index.Columns) {
ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID], handleCol.UniqueID)
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property.
tbl := ds.tableStats.HistColl
ndvs := make([]property.GroupNDV, 0, len(colGroups))
for idxID, idx := range tbl.Indices {
colsLen := len(tbl.Idx2ColumnIDs[idxID])
// tbl.Idx2ColumnIDs may only contain the prefix of index columns.
colsLen := len(tbl.Idx2ColUniqueIDs[idxID])
// tbl.Idx2ColUniqueIDs may only contain the prefix of index columns.
// But it may exceeds the total index since the index would contain the handle column if it's not a unique index.
// We append the handle at fillIndexPath.
if colsLen < len(idx.Info.Columns) {
Expand All @@ -186,7 +186,7 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property.
colsLen--
}
idxCols := make([]int64, colsLen)
copy(idxCols, tbl.Idx2ColumnIDs[idxID])
copy(idxCols, tbl.Idx2ColUniqueIDs[idxID])
slices.Sort(idxCols)
for _, g := range colGroups {
// We only want those exact matches.
Expand Down
Loading