pingcap · ti-chi-bot · Apr 2, 2024 · Mar 28, 2024 · Mar 28, 2024 · Mar 28, 2024
diff --git a/build/nogo_config.json b/build/nogo_config.json
@@ -175,6 +175,7 @@
   "fieldalignment": {
     "exclude_files": {
       "pkg/parser/parser.go": "parser/parser.go code",
+      "pkg/statistics/table.go": "disable this limitation that prevents us from splitting struct fields for clarity",
       "external/": "no need to vet third party code",
       ".*_generated\\.go$": "ignore generated code",
       ".*mock.go$": "ignore generated code",

diff --git a/pkg/planner/cardinality/cross_estimation.go b/pkg/planner/cardinality/cross_estimation.go
@@ -139,7 +139,7 @@ func crossEstimateRowCount(sctx context.PlanContext,
 	if col == nil || len(path.AccessConds) > 0 {
 		return 0, false, corr
 	}
-	colID := col.UniqueID
+	colUniqueID := col.UniqueID
 	if corr < 0 {
 		desc = !desc
 	}
@@ -152,11 +152,11 @@ func crossEstimateRowCount(sctx context.PlanContext,
 		return 0, err == nil, corr
 	}
 	idxID := int64(-1)
-	idxIDs, idxExists := dsStatsInfo.HistColl.ColID2IdxIDs[colID]
+	idxIDs, idxExists := dsStatsInfo.HistColl.ColUniqueID2IdxIDs[colUniqueID]
 	if idxExists && len(idxIDs) > 0 {
 		idxID = idxIDs[0]
 	}
-	rangeCounts, ok := getColumnRangeCounts(sctx, colID, ranges, dsTableStats.HistColl, idxID)
+	rangeCounts, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
 	if !ok {
 		return 0, false, corr
 	}
@@ -168,7 +168,7 @@ func crossEstimateRowCount(sctx context.PlanContext,
 	if idxExists {
 		rangeCount, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges)
 	} else {
-		rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colID, convertedRanges)
+		rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges)
 	}
 	if err != nil {
 		return 0, false, corr

diff --git a/pkg/planner/cardinality/row_count_column.go b/pkg/planner/cardinality/row_count_column.go
@@ -33,23 +33,27 @@ func init() {
 }
 
 // GetRowCountByColumnRanges estimates the row count by a slice of Range.
-func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colID int64, colRanges []*ranger.Range) (result float64, err error) {
+func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, colRanges []*ranger.Range) (result float64, err error) {
 	var name string
 	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
 		debugtrace.EnterContextCommon(sctx)
-		debugTraceGetRowCountInput(sctx, colID, colRanges)
+		debugTraceGetRowCountInput(sctx, colUniqueID, colRanges)
 		defer func() {
 			debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
 			debugtrace.LeaveContextCommon(sctx)
 		}()
 	}
 	sc := sctx.GetSessionVars().StmtCtx
-	c, ok := coll.Columns[colID]
-	recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
+	c, ok := coll.Columns[colUniqueID]
+	colInfoID := colUniqueID
+	if len(coll.UniqueID2colInfoID) > 0 {
+		colInfoID = coll.UniqueID2colInfoID[colUniqueID]
+	}
+	recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
 	if c != nil && c.Info != nil {
 		name = c.Info.Name.O
 	}
-	if statistics.ColumnStatsIsInvalid(c, sctx, coll, colID) {
+	if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) {
 		result, err = getPseudoRowCountByColumnRanges(sc.TypeCtx(), float64(coll.RealtimeCount), colRanges, 0)
 		if err == nil && sc.EnableOptimizerCETrace && ok {
 			ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats-Pseudo", uint64(result))
@@ -71,23 +75,27 @@ func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistCo
 }
 
 // GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
-func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error) {
+func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, intRanges []*ranger.Range) (result float64, err error) {
 	var name string
 	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
 		debugtrace.EnterContextCommon(sctx)
-		debugTraceGetRowCountInput(sctx, colID, intRanges)
+		debugTraceGetRowCountInput(sctx, colUniqueID, intRanges)
 		defer func() {
 			debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
 			debugtrace.LeaveContextCommon(sctx)
 		}()
 	}
 	sc := sctx.GetSessionVars().StmtCtx
-	c, ok := coll.Columns[colID]
-	recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
+	c, ok := coll.Columns[colUniqueID]
+	colInfoID := colUniqueID
+	if len(coll.UniqueID2colInfoID) > 0 {
+		colInfoID = coll.UniqueID2colInfoID[colUniqueID]
+	}
+	recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
 	if c != nil && c.Info != nil {
 		name = c.Info.Name.O
 	}
-	if statistics.ColumnStatsIsInvalid(c, sctx, coll, colID) {
+	if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) {
 		if len(intRanges) == 0 {
 			return 0, nil
 		}

diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go
@@ -170,19 +170,19 @@ func getIndexRowCountForStatsV1(sctx context.PlanContext, coll *statistics.HistC
 			}
 			var count float64
 			var err error
-			colIDs := coll.Idx2ColumnIDs[idxID]
-			var colID int64
-			if rangePosition >= len(colIDs) {
-				colID = -1
+			colUniqueIDs := coll.Idx2ColUniqueIDs[idxID]
+			var colUniqueID int64
+			if rangePosition >= len(colUniqueIDs) {
+				colUniqueID = -1
 			} else {
-				colID = colIDs[rangePosition]
+				colUniqueID = colUniqueIDs[rangePosition]
 			}
 			// prefer index stats over column stats
-			if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 {
+			if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 {
 				idxID := idxIDs[0]
 				count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang})
 			} else {
-				count, err = GetRowCountByColumnRanges(sctx, coll, colID, []*ranger.Range{&rang})
+				count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang})
 			}
 			if err != nil {
 				return 0, errors.Trace(err)
@@ -422,7 +422,7 @@ func expBackoffEstimation(sctx context.PlanContext, idx *statistics.Index, coll
 			Collators: make([]collate.Collator, 1),
 		},
 	}
-	colsIDs := coll.Idx2ColumnIDs[idx.Histogram.ID]
+	colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID]
 	singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal))
 	// The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like:
 	//   1. Calc the selectivity of each column.
@@ -449,7 +449,7 @@ func expBackoffEstimation(sctx context.PlanContext, idx *statistics.Index, coll
 			count, err = GetRowCountByColumnRanges(sctx, coll, colID, tmpRan)
 			selectivity = count / float64(coll.RealtimeCount)
 		}
-		if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
+		if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
 			// Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call
 			// `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This
 			// check avoids infinite recursion.

diff --git a/pkg/planner/cardinality/selectivity.go b/pkg/planner/cardinality/selectivity.go
@@ -182,7 +182,7 @@ func Selectivity(
 			})
 			continue
 		}
-		idxCols := findPrefixOfIndexByCol(ctx, extractedCols, coll.Idx2ColumnIDs[id], id2Paths[idxStats.ID])
+		idxCols := findPrefixOfIndexByCol(ctx, extractedCols, coll.Idx2ColUniqueIDs[id], id2Paths[idxStats.ID])
 		if len(idxCols) > 0 {
 			lengths := make([]int, 0, len(idxCols))
 			for i := 0; i < len(idxCols) && i < len(idxStats.Info.Columns); i++ {
@@ -919,7 +919,7 @@ func findAvailableStatsForCol(sctx context.PlanContext, coll *statistics.HistCol
 		return false, uniqueID
 	}
 	// try to find available stats in single column index stats (except for prefix index)
-	for idxStatsIdx, cols := range coll.Idx2ColumnIDs {
+	for idxStatsIdx, cols := range coll.Idx2ColUniqueIDs {
 		if len(cols) == 1 && cols[0] == uniqueID {
 			idxStats := coll.Indices[idxStatsIdx]
 			if !statistics.IndexStatsIsInvalid(sctx, idxStats, coll, idxStatsIdx) &&
@@ -968,7 +968,7 @@ func getEqualCondSelectivity(sctx context.PlanContext, coll *statistics.HistColl
 			return outOfRangeEQSelectivity(sctx, idx.NDV, realtimeCnt, int64(idx.TotalRowCount())), nil
 		}
 		// The equal condition only uses prefix columns of the index.
-		colIDs := coll.Idx2ColumnIDs[idx.ID]
+		colIDs := coll.Idx2ColUniqueIDs[idx.ID]
 		var ndv int64
 		for i, colID := range colIDs {
 			if i >= usedColsLen {
@@ -1050,7 +1050,7 @@ func crossValidationSelectivity(
 		}()
 	}
 	minRowCount = math.MaxFloat64
-	cols := coll.Idx2ColumnIDs[idx.ID]
+	cols := coll.Idx2ColUniqueIDs[idx.ID]
 	crossValidationSelectivity = 1.0
 	totalRowCount := idx.TotalRowCount()
 	for i, colID := range cols {

diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go
@@ -893,8 +893,8 @@ func generateMapsForMockStatsTbl(statsTbl *statistics.Table) {
 	for _, idxIDs := range colID2IdxIDs {
 		slices.Sort(idxIDs)
 	}
-	statsTbl.Idx2ColumnIDs = idx2Columns
-	statsTbl.ColID2IdxIDs = colID2IdxIDs
+	statsTbl.Idx2ColUniqueIDs = idx2Columns
+	statsTbl.ColUniqueID2IdxIDs = colID2IdxIDs
 }
 
 func TestIssue39593(t *testing.T) {

diff --git a/pkg/planner/core/casetest/planstats/BUILD.bazel b/pkg/planner/core/casetest/planstats/BUILD.bazel
@@ -9,7 +9,7 @@ go_test(
     ],
     data = glob(["testdata/**"]),
     flaky = True,
-    shard_count = 4,
+    shard_count = 5,
     deps = [
         "//pkg/config",
         "//pkg/domain",

diff --git a/pkg/planner/core/casetest/planstats/plan_stats_test.go b/pkg/planner/core/casetest/planstats/plan_stats_test.go
@@ -405,3 +405,48 @@ func TestCollectDependingVirtualCols(t *testing.T) {
 		require.Equal(t, output[i].OutputColNames, cols)
 	}
 }
+
+func TestPartialStatsInExplain(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+	tk := testkit.NewTestKit(t, store)
+	tk.MustExec("use test")
+	tk.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")
+	tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)")
+	tk.MustExec("create table t2(a int, primary key(a))")
+	tk.MustExec("insert into t2 values (1),(2),(3)")
+	tk.MustExec(
+		"create table tp(a int, b int, c int, index ic(c)) partition by range(a)" +
+			"(partition p0 values less than (10)," +
+			"partition p1 values less than (20)," +
+			"partition p2 values less than maxvalue)",
+	)
+	tk.MustExec("insert into tp values (1,1,1),(2,2,2),(13,13,13),(14,14,14),(25,25,25),(36,36,36)")
+
+	oriLease := dom.StatsHandle().Lease()
+	dom.StatsHandle().SetLease(1)
+	defer func() {
+		dom.StatsHandle().SetLease(oriLease)
+	}()
+	tk.MustExec("analyze table t")
+	tk.MustExec("analyze table t2")
+	tk.MustExec("analyze table tp")
+	tk.RequireNoError(dom.StatsHandle().Update(dom.InfoSchema()))
+	tk.MustQuery("explain select * from tp where a = 1")
+	tk.MustExec("set @@tidb_stats_load_sync_wait = 0")
+	var (
+		input  []string
+		output []struct {
+			Query  string
+			Result []string
+		}
+	)
+	testData := GetPlanStatsData()
+	testData.LoadTestCases(t, &input, &output)
+	for i, sql := range input {
+		testdata.OnRecord(func() {
+			output[i].Query = input[i]
+			output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows())
+		})
+		tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...))
+	}
+}
diff --git a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json
@@ -62,5 +62,13 @@
         ]
       }
     ]
+  },
+  {
+    "name": "TestPartialStatsInExplain",
+    "cases": [
+      "explain format = brief select * from tp where b = 10",
+      "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
+      "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c"
+    ]
   }
 ]
diff --git a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json
@@ -101,5 +101,47 @@
         ]
       }
     ]
+  },
+  {
+    "Name": "TestPartialStatsInExplain",
+    "Cases": [
+      {
+        "Query": "explain format = brief select * from tp where b = 10",
+        "Result": [
+          "TableReader 0.01 root partition:all data:Selection",
+          "└─Selection 0.01 cop[tikv]  eq(test.tp.b, 10)",
+          "  └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[b:allEvicted]"
+        ]
+      },
+      {
+        "Query": "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
+        "Result": [
+          "Projection 0.00 root  test.t.a, test.t.b, test.t.c, test.tp.a, test.tp.b, test.tp.c",
+          "└─HashJoin 0.00 root  inner join, equal:[eq(test.tp.c, test.t.b)]",
+          "  ├─TableReader(Build) 0.00 root partition:p1 data:Selection",
+          "  │ └─Selection 0.00 cop[tikv]  eq(test.tp.a, 10), not(isnull(test.tp.c))",
+          "  │   └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
+          "  └─TableReader(Probe) 3.00 root  data:Selection",
+          "    └─Selection 3.00 cop[tikv]  not(isnull(test.t.b))",
+          "      └─TableFullScan 3.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]"
+        ]
+      },
+      {
+        "Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
+        "Result": [
+          "HashJoin 0.33 root  inner join, equal:[eq(test.tp.c, test.t2.a)]",
+          "├─IndexJoin(Build) 0.33 root  inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
+          "│ ├─TableReader(Build) 0.33 root  data:Selection",
+          "│ │ └─Selection 0.33 cop[tikv]  gt(test.t.b, 10), not(isnull(test.t.b))",
+          "│ │   └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
+          "│ └─IndexLookUp(Probe) 0.33 root partition:p0 ",
+          "│   ├─Selection(Build) 0.33 cop[tikv]  gt(test.tp.c, 10), not(isnull(test.tp.c))",
+          "│   │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
+          "│   └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
+          "└─TableReader(Probe) 1.00 root  data:TableRangeScan",
+          "  └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]"
+        ]
+      }
+    ]
   }
 ]
diff --git a/pkg/planner/core/exhaust_physical_plans.go b/pkg/planner/core/exhaust_physical_plans.go
@@ -1166,7 +1166,7 @@ func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.Hist
 	// 2. Try to get NDV from index stats.
 	// Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is
 	// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
-	for idxID, idxCols := range histColl.Idx2ColumnIDs {
+	for idxID, idxCols := range histColl.Idx2ColUniqueIDs {
 		if len(idxCols) != len(colUIDs) {
 			continue
 		}

diff --git a/pkg/planner/core/logical_plans.go b/pkg/planner/core/logical_plans.go
@@ -1798,8 +1798,8 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex
 				path.IdxCols = append(path.IdxCols, handleCol)
 				path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength)
 				// Also updates the map that maps the index id to its prefix column ids.
-				if len(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID]) == len(path.Index.Columns) {
-					ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColumnIDs[path.Index.ID], handleCol.UniqueID)
+				if len(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID]) == len(path.Index.Columns) {
+					ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID] = append(ds.tableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID], handleCol.UniqueID)
 				}
 			}
 		}

diff --git a/pkg/planner/core/stats.go b/pkg/planner/core/stats.go
@@ -176,8 +176,8 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property.
 	tbl := ds.tableStats.HistColl
 	ndvs := make([]property.GroupNDV, 0, len(colGroups))
 	for idxID, idx := range tbl.Indices {
-		colsLen := len(tbl.Idx2ColumnIDs[idxID])
-		// tbl.Idx2ColumnIDs may only contain the prefix of index columns.
+		colsLen := len(tbl.Idx2ColUniqueIDs[idxID])
+		// tbl.Idx2ColUniqueIDs may only contain the prefix of index columns.
 		// But it may exceeds the total index since the index would contain the handle column if it's not a unique index.
 		// We append the handle at fillIndexPath.
 		if colsLen < len(idx.Info.Columns) {
@@ -186,7 +186,7 @@ func (ds *DataSource) getGroupNDVs(colGroups [][]*expression.Column) []property.
 			colsLen--
 		}
 		idxCols := make([]int64, colsLen)
-		copy(idxCols, tbl.Idx2ColumnIDs[idxID])
+		copy(idxCols, tbl.Idx2ColUniqueIDs[idxID])
 		slices.Sort(idxCols)
 		for _, g := range colGroups {
 			// We only want those exact matches.