From ebaa6f19a4f6c28f2482f0c54b41b0d8c68ac941 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Fri, 14 May 2021 11:22:28 +0800 Subject: [PATCH 1/3] bugfi --- executor/builder.go | 33 ++++++++++++++--- executor/index_lookup_join.go | 10 +++--- executor/partition_table_test.go | 61 +++++++++++++++++++++++++++++++- 3 files changed, 95 insertions(+), 9 deletions(-) diff --git a/executor/builder.go b/executor/builder.go index 3324e52f894ff..73471566b3655 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -16,6 +16,7 @@ package executor import ( "bytes" "context" + "github.com/pingcap/tidb/util/dbterror" "sort" "strings" "sync" @@ -2476,11 +2477,14 @@ func (b *executorBuilder) buildIndexLookUpJoin(v *plannercore.PhysicalIndexJoin) outerKeyCols[i] = v.OuterJoinKeys[i].Index } innerKeyCols := make([]int, len(v.InnerJoinKeys)) + innerKeyColIDs := make([]int64, len(v.InnerJoinKeys)) for i := 0; i < len(v.InnerJoinKeys); i++ { innerKeyCols[i] = v.InnerJoinKeys[i].Index + innerKeyColIDs[i] = v.InnerJoinKeys[i].ID } e.outerCtx.keyCols = outerKeyCols e.innerCtx.keyCols = innerKeyCols + e.innerCtx.keyColIDs = innerKeyColIDs outerHashCols, innerHashCols := make([]int, len(v.OuterHashKeys)), make([]int, len(v.InnerHashKeys)) for i := 0; i < len(v.OuterHashKeys); i++ { @@ -2785,7 +2789,6 @@ func keyColumnsIncludeAllPartitionColumns(keyColumns []int, pe *tables.Partition func prunePartitionForInnerExecutor(ctx sessionctx.Context, tbl table.Table, schema *expression.Schema, partitionInfo *plannercore.PartitionInfo, lookUpContent []*indexJoinLookUpContent) (usedPartition []table.PhysicalTable, canPrune bool, contentPos []int64, err error) { partitionTbl := tbl.(table.PartitionedTable) - locateKey := make([]types.Datum, schema.Len()) // TODO: condition based pruning can be do in advance. condPruneResult, err := partitionPruning(ctx, partitionTbl, partitionInfo.PruningConds, partitionInfo.PartitionNames, partitionInfo.Columns, partitionInfo.ColumnNames) if err != nil { @@ -2800,22 +2803,44 @@ func prunePartitionForInnerExecutor(ctx sessionctx.Context, tbl table.Table, sch if err != nil { return nil, false, nil, err } + + if lookUpContent[0].keyColIDs == nil { + return nil, false, nil, + dbterror.ClassOptimizer.NewStd(mysql.ErrInternal).GenWithStack("cannot get column IDs when dynamic pruning") + } + + keyColOffsets := make([]int, len(lookUpContent[0].keyColIDs)) + for i, colID := range lookUpContent[0].keyColIDs { + offset := -1 + for j, col := range partitionTbl.Cols() { + if colID == col.ID { + offset = j + break + } + } + if offset == -1 { + return nil, false, nil, + dbterror.ClassOptimizer.NewStd(mysql.ErrInternal).GenWithStack("invalid column offset when dynamic pruning") + } + keyColOffsets[i] = offset + } + offsetMap := make(map[int]bool) - for _, offset := range lookUpContent[0].keyCols { + for _, offset := range keyColOffsets { offsetMap[offset] = true } for _, offset := range pe.ColumnOffset { if _, ok := offsetMap[offset]; !ok { - logutil.BgLogger().Warn("can not runtime prune in index join") return condPruneResult, false, nil, nil } } + locateKey := make([]types.Datum, len(partitionTbl.Cols())) partitions := make(map[int64]table.PhysicalTable) contentPos = make([]int64, len(lookUpContent)) for idx, content := range lookUpContent { for i, date := range content.keys { - locateKey[content.keyCols[i]] = date + locateKey[keyColOffsets[i]] = date } p, err := partitionTbl.GetPartitionByRow(ctx, locateKey) if err != nil { diff --git a/executor/index_lookup_join.go b/executor/index_lookup_join.go index 9bec8e118515a..0e31280b6632a 100644 --- a/executor/index_lookup_join.go +++ b/executor/index_lookup_join.go @@ -95,6 +95,7 @@ type innerCtx struct { readerBuilder *dataReaderBuilder rowTypes []*types.FieldType keyCols []int + keyColIDs []int64 // the original ID in its table, used by dynamic partition pruning hashCols []int colLens []int hasPrefixCol bool @@ -472,9 +473,10 @@ func (iw *innerWorker) run(ctx context.Context, wg *sync.WaitGroup) { } type indexJoinLookUpContent struct { - keys []types.Datum - row chunk.Row - keyCols []int + keys []types.Datum + row chunk.Row + keyCols []int + keyColIDs []int64 // the original ID in its table, used by dynamic partition pruning } func (iw *innerWorker) handleTask(ctx context.Context, task *lookUpJoinTask) error { @@ -545,7 +547,7 @@ func (iw *innerWorker) constructLookupContent(task *lookUpJoinTask) ([]*indexJoi // dLookUpKey is sorted and deduplicated at sortAndDedupLookUpContents. // So we don't need to do it here. } - lookUpContents = append(lookUpContents, &indexJoinLookUpContent{keys: dLookUpKey, row: chk.GetRow(rowIdx), keyCols: iw.keyCols}) + lookUpContents = append(lookUpContents, &indexJoinLookUpContent{keys: dLookUpKey, row: chk.GetRow(rowIdx), keyCols: iw.keyCols, keyColIDs: iw.keyColIDs}) } } diff --git a/executor/partition_table_test.go b/executor/partition_table_test.go index 5be39c3a04d54..af706b2461c14 100644 --- a/executor/partition_table_test.go +++ b/executor/partition_table_test.go @@ -356,6 +356,65 @@ func (s *partitionTableSuite) TestView(c *C) { } } +func (s *partitionTableSuite) TestDynamicPruningUnderIndexJoin(c *C) { + if israce.RaceEnabled { + c.Skip("exhaustive types test, skip race test") + } + + tk := testkit.NewTestKitWithInit(c, s.store) + tk.MustExec("create database pruing_under_index_join") + tk.MustExec("use pruing_under_index_join") + tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic'") + + tk.MustExec(`create table tnormal (a int, b int, c int, primary key(a), index idx_b(b))`) + tk.MustExec(`create table thash (a int, b int, c int, primary key(a), index idx_b(b)) partition by hash(a) partitions 4`) + tk.MustExec(`create table touter (a int, b int, c int)`) + + vals := make([]string, 0, 2000) + for i := 0; i < 2000; i++ { + vals = append(vals, fmt.Sprintf("(%v, %v, %v)", i, rand.Intn(10000), rand.Intn(10000))) + } + tk.MustExec(`insert into tnormal values ` + strings.Join(vals, ", ")) + tk.MustExec(`insert into thash values ` + strings.Join(vals, ", ")) + tk.MustExec(`insert into touter values ` + strings.Join(vals, ", ")) + + // case 1: IndexReader in the inner side + tk.MustQuery(`explain format='brief' select /*+ INL_JOIN(touter, thash) */ thash.b from touter join thash use index(idx_b) on touter.b = thash.b`).Check(testkit.Rows( + `IndexJoin 12487.50 root inner join, inner:IndexReader, outer key:pruing_under_index_join.touter.b, inner key:pruing_under_index_join.thash.b, equal cond:eq(pruing_under_index_join.touter.b, pruing_under_index_join.thash.b)`, + `├─TableReader(Build) 9990.00 root data:Selection`, + `│ └─Selection 9990.00 cop[tikv] not(isnull(pruing_under_index_join.touter.b))`, + `│ └─TableFullScan 10000.00 cop[tikv] table:touter keep order:false, stats:pseudo`, + `└─IndexReader(Probe) 1.25 root partition:all index:Selection`, + ` └─Selection 1.25 cop[tikv] not(isnull(pruing_under_index_join.thash.b))`, + ` └─IndexRangeScan 1.25 cop[tikv] table:thash, index:idx_b(b) range: decided by [eq(pruing_under_index_join.thash.b, pruing_under_index_join.touter.b)], keep order:false, stats:pseudo`)) + tk.MustQuery(`select /*+ INL_JOIN(touter, thash) */ thash.b from touter join thash use index(idx_b) on touter.b = thash.b`).Sort().Check( + tk.MustQuery(`select /*+ INL_JOIN(touter, tnormal) */ tnormal.b from touter join tnormal use index(idx_b) on touter.b = tnormal.b`).Sort().Rows()) + + // case 2: TableReader in the inner side + tk.MustQuery(`explain format='brief' select /*+ INL_JOIN(touter, thash) */ thash.* from touter join thash use index(primary) on touter.b = thash.a`).Check(testkit.Rows( + `IndexJoin 12487.50 root inner join, inner:TableReader, outer key:pruing_under_index_join.touter.b, inner key:pruing_under_index_join.thash.a, equal cond:eq(pruing_under_index_join.touter.b, pruing_under_index_join.thash.a)`, + `├─TableReader(Build) 9990.00 root data:Selection`, + `│ └─Selection 9990.00 cop[tikv] not(isnull(pruing_under_index_join.touter.b))`, + `│ └─TableFullScan 10000.00 cop[tikv] table:touter keep order:false, stats:pseudo`, + `└─TableReader(Probe) 1.00 root partition:all data:TableRangeScan`, + ` └─TableRangeScan 1.00 cop[tikv] table:thash range: decided by [pruing_under_index_join.touter.b], keep order:false, stats:pseudo`)) + tk.MustQuery(`select /*+ INL_JOIN(touter, thash) */ thash.* from touter join thash use index(primary) on touter.b = thash.a`).Sort().Check( + tk.MustQuery(`select /*+ INL_JOIN(touter, tnormal) */ tnormal.* from touter join tnormal use index(primary) on touter.b = tnormal.a`).Sort().Rows()) + + // case 3: IndexLookUp in the inner side + read all inner columns + tk.MustQuery(`explain format='brief' select /*+ INL_JOIN(touter, thash) */ thash.* from touter join thash use index(idx_b) on touter.b = thash.b`).Check(testkit.Rows( + `IndexJoin 12487.50 root inner join, inner:IndexLookUp, outer key:pruing_under_index_join.touter.b, inner key:pruing_under_index_join.thash.b, equal cond:eq(pruing_under_index_join.touter.b, pruing_under_index_join.thash.b)`, + `├─TableReader(Build) 9990.00 root data:Selection`, + `│ └─Selection 9990.00 cop[tikv] not(isnull(pruing_under_index_join.touter.b))`, + `│ └─TableFullScan 10000.00 cop[tikv] table:touter keep order:false, stats:pseudo`, + `└─IndexLookUp(Probe) 1.25 root partition:all `, + ` ├─Selection(Build) 1.25 cop[tikv] not(isnull(pruing_under_index_join.thash.b))`, + ` │ └─IndexRangeScan 1.25 cop[tikv] table:thash, index:idx_b(b) range: decided by [eq(pruing_under_index_join.thash.b, pruing_under_index_join.touter.b)], keep order:false, stats:pseudo`, + ` └─TableRowIDScan(Probe) 1.25 cop[tikv] table:thash keep order:false, stats:pseudo`)) + tk.MustQuery(`select /*+ INL_JOIN(touter, thash) */ thash.* from touter join thash use index(idx_b) on touter.b = thash.b`).Sort().Check( + tk.MustQuery(`select /*+ INL_JOIN(touter, tnormal) */ tnormal.* from touter join tnormal use index(idx_b) on touter.b = tnormal.b`).Sort().Rows()) +} + func (s *partitionTableSuite) TestGlobalStatsAndSQLBinding(c *C) { if israce.RaceEnabled { c.Skip("exhaustive types test, skip race test") @@ -374,7 +433,7 @@ func (s *partitionTableSuite) TestGlobalStatsAndSQLBinding(c *C) { partition p2 values less than (600), partition p3 values less than (800), partition p4 values less than (1001))`) - tk.MustExec(`create table tlist(a int, b int, key(a)) partition by list (a) ( + tk.MustExec(`create table tlist(a int, b int, key(a)) partition by list (a) (7 partition p0 values in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9), partition p0 values in (10, 11, 12, 13, 14, 15, 16, 17, 18, 19), partition p0 values in (20, 21, 22, 23, 24, 25, 26, 27, 28, 29), From b1664ae5a517c86148c2cc762ad3acc6c9219607 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Fri, 14 May 2021 14:15:40 +0800 Subject: [PATCH 2/3] add some comments --- executor/builder.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/executor/builder.go b/executor/builder.go index 73471566b3655..54e2dfb93012a 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -16,7 +16,6 @@ package executor import ( "bytes" "context" - "github.com/pingcap/tidb/util/dbterror" "sort" "strings" "sync" @@ -50,6 +49,7 @@ import ( "github.com/pingcap/tidb/util" "github.com/pingcap/tidb/util/admin" "github.com/pingcap/tidb/util/chunk" + "github.com/pingcap/tidb/util/dbterror" "github.com/pingcap/tidb/util/execdetails" "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/ranger" @@ -2804,11 +2804,11 @@ func prunePartitionForInnerExecutor(ctx sessionctx.Context, tbl table.Table, sch return nil, false, nil, err } + // recalculate key column offsets if lookUpContent[0].keyColIDs == nil { return nil, false, nil, dbterror.ClassOptimizer.NewStd(mysql.ErrInternal).GenWithStack("cannot get column IDs when dynamic pruning") } - keyColOffsets := make([]int, len(lookUpContent[0].keyColIDs)) for i, colID := range lookUpContent[0].keyColIDs { offset := -1 From 6bbd28c3a37bc10d561bf49b9f86b8455f95f52e Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Fri, 14 May 2021 15:52:20 +0800 Subject: [PATCH 3/3] Update executor/partition_table_test.go Co-authored-by: rebelice --- executor/partition_table_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/executor/partition_table_test.go b/executor/partition_table_test.go index af706b2461c14..0db3e6a4ef184 100644 --- a/executor/partition_table_test.go +++ b/executor/partition_table_test.go @@ -433,7 +433,7 @@ func (s *partitionTableSuite) TestGlobalStatsAndSQLBinding(c *C) { partition p2 values less than (600), partition p3 values less than (800), partition p4 values less than (1001))`) - tk.MustExec(`create table tlist(a int, b int, key(a)) partition by list (a) (7 + tk.MustExec(`create table tlist(a int, b int, key(a)) partition by list (a) ( partition p0 values in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9), partition p0 values in (10, 11, 12, 13, 14, 15, 16, 17, 18, 19), partition p0 values in (20, 21, 22, 23, 24, 25, 26, 27, 28, 29),