diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index d934dbf9d1e6f..511aadf6dfacf 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -1223,31 +1223,30 @@ id count task operator info Projection_25 100.00 root tpch.supplier.s_name, 17_col_0 └─TopN_28 100.00 root 17_col_0:desc, tpch.supplier.s_name:asc, offset:0, count:100 └─HashAgg_31 320000.00 root group by:tpch.supplier.s_name, funcs:count(1), firstrow(tpch.supplier.s_name) - └─Selection_32 3786715.90 root not(16_aux_0) - └─IndexJoin_38 4733394.87 root left outer semi join, inner:IndexLookUp_37, outer key:tpch.l1.l_orderkey, inner key:tpch.l3.l_orderkey, other cond:ne(tpch.l3.l_suppkey, tpch.l1.l_suppkey) - ├─IndexJoin_82 4733394.87 root semi join, inner:IndexLookUp_81, outer key:tpch.l1.l_orderkey, inner key:tpch.l2.l_orderkey, other cond:ne(tpch.l2.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l2.l_suppkey, tpch.supplier.s_suppkey) - │ ├─HashLeftJoin_88 5916743.59 root inner join, inner:TableReader_117, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)] - │ │ ├─HashLeftJoin_93 147918589.81 root inner join, inner:TableReader_114, equal:[eq(tpch.l1.l_suppkey, tpch.supplier.s_suppkey)] - │ │ │ ├─IndexJoin_100 147918589.81 root inner join, inner:IndexLookUp_99, outer key:tpch.orders.o_orderkey, inner key:tpch.l1.l_orderkey - │ │ │ │ ├─TableReader_109 36517371.00 root data:Selection_108 - │ │ │ │ │ └─Selection_108 36517371.00 cop eq(tpch.orders.o_orderstatus, "F") - │ │ │ │ │ └─TableScan_107 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false - │ │ │ │ └─IndexLookUp_99 240004648.80 root - │ │ │ │ ├─IndexScan_96 1.00 cop table:l1, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false - │ │ │ │ └─Selection_98 240004648.80 cop gt(tpch.l1.l_receiptdate, tpch.l1.l_commitdate) - │ │ │ │ └─TableScan_97 1.00 cop table:lineitem, keep order:false - │ │ │ └─TableReader_114 500000.00 root data:TableScan_113 - │ │ │ └─TableScan_113 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false - │ │ └─TableReader_117 1.00 root data:Selection_116 - │ │ └─Selection_116 1.00 cop eq(tpch.nation.n_name, "EGYPT") - │ │ └─TableScan_115 25.00 cop table:nation, range:[-inf,+inf], keep order:false - │ └─IndexLookUp_81 1.00 root - │ ├─IndexScan_79 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.l1.l_orderkey], keep order:false - │ └─TableScan_80 1.00 cop table:lineitem, keep order:false - └─IndexLookUp_37 240004648.80 root - ├─IndexScan_34 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.l1.l_orderkey], keep order:false - └─Selection_36 240004648.80 cop gt(tpch.l3.l_receiptdate, tpch.l3.l_commitdate) - └─TableScan_35 1.00 cop table:lineitem, keep order:false + └─IndexJoin_37 3786715.90 root anti semi join, inner:IndexLookUp_36, outer key:tpch.l1.l_orderkey, inner key:tpch.l3.l_orderkey, other cond:ne(tpch.l3.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l3.l_suppkey, tpch.supplier.s_suppkey) + ├─IndexJoin_81 4733394.87 root semi join, inner:IndexLookUp_80, outer key:tpch.l1.l_orderkey, inner key:tpch.l2.l_orderkey, other cond:ne(tpch.l2.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l2.l_suppkey, tpch.supplier.s_suppkey) + │ ├─HashLeftJoin_87 5916743.59 root inner join, inner:TableReader_116, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)] + │ │ ├─HashLeftJoin_92 147918589.81 root inner join, inner:TableReader_113, equal:[eq(tpch.l1.l_suppkey, tpch.supplier.s_suppkey)] + │ │ │ ├─IndexJoin_99 147918589.81 root inner join, inner:IndexLookUp_98, outer key:tpch.orders.o_orderkey, inner key:tpch.l1.l_orderkey + │ │ │ │ ├─TableReader_108 36517371.00 root data:Selection_107 + │ │ │ │ │ └─Selection_107 36517371.00 cop eq(tpch.orders.o_orderstatus, "F") + │ │ │ │ │ └─TableScan_106 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false + │ │ │ │ └─IndexLookUp_98 240004648.80 root + │ │ │ │ ├─IndexScan_95 1.00 cop table:l1, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false + │ │ │ │ └─Selection_97 240004648.80 cop gt(tpch.l1.l_receiptdate, tpch.l1.l_commitdate) + │ │ │ │ └─TableScan_96 1.00 cop table:lineitem, keep order:false + │ │ │ └─TableReader_113 500000.00 root data:TableScan_112 + │ │ │ └─TableScan_112 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false + │ │ └─TableReader_116 1.00 root data:Selection_115 + │ │ └─Selection_115 1.00 cop eq(tpch.nation.n_name, "EGYPT") + │ │ └─TableScan_114 25.00 cop table:nation, range:[-inf,+inf], keep order:false + │ └─IndexLookUp_80 1.00 root + │ ├─IndexScan_78 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.l1.l_orderkey], keep order:false + │ └─TableScan_79 1.00 cop table:lineitem, keep order:false + └─IndexLookUp_36 240004648.80 root + ├─IndexScan_33 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.l1.l_orderkey], keep order:false + └─Selection_35 240004648.80 cop gt(tpch.l3.l_receiptdate, tpch.l3.l_commitdate) + └─TableScan_34 1.00 cop table:lineitem, keep order:false /* Q22 Global Sales Opportunity Query The Global Sales Opportunity Query identifies geographies where there are customers who may be likely to make a @@ -1299,11 +1298,10 @@ Sort_32 1.00 root custsale.cntrycode:asc └─Projection_34 1.00 root custsale.cntrycode, 28_col_0, 28_col_1 └─HashAgg_37 1.00 root group by:custsale.cntrycode, funcs:count(1), sum(tpch.custsale.c_acctbal), firstrow(custsale.cntrycode) └─Projection_38 0.00 root substring(tpch.customer.c_phone, 1, 2), tpch.customer.c_acctbal - └─Selection_39 0.00 root not(26_aux_0) - └─HashLeftJoin_40 0.00 root left outer semi join, inner:TableReader_46, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] - ├─Selection_41 0.00 root in(substring(tpch.customer.c_phone, 1, 2), "20", "40", "22", "30", "39", "42", "21") - │ └─TableReader_44 0.00 root data:Selection_43 - │ └─Selection_43 0.00 cop gt(tpch.customer.c_acctbal, NULL) - │ └─TableScan_42 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false - └─TableReader_46 75000000.00 root data:TableScan_45 - └─TableScan_45 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false + └─HashLeftJoin_39 0.00 root anti semi join, inner:TableReader_45, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] + ├─Selection_40 0.00 root in(substring(tpch.customer.c_phone, 1, 2), "20", "40", "22", "30", "39", "42", "21") + │ └─TableReader_43 0.00 root data:Selection_42 + │ └─Selection_42 0.00 cop gt(tpch.customer.c_acctbal, NULL) + │ └─TableScan_41 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false + └─TableReader_45 75000000.00 root data:TableScan_44 + └─TableScan_44 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false diff --git a/ddl/ddl_api.go b/ddl/ddl_api.go index 8ed18e44d3df5..d7648c1f63fde 100644 --- a/ddl/ddl_api.go +++ b/ddl/ddl_api.go @@ -1640,8 +1640,10 @@ func modifiableCharsetAndCollation(toCharset, toCollate, origCharset, origCollat if !charset.ValidCharsetAndCollation(toCharset, toCollate) { return ErrUnknownCharacterSet.GenWithStackByArgs(toCharset, toCollate) } - if toCharset == charset.CharsetUTF8MB4 && origCharset == charset.CharsetUTF8 { - // TiDB only allow utf8 to be changed to utf8mb4. + if (origCharset == charset.CharsetUTF8 && toCharset == charset.CharsetUTF8MB4) || + (origCharset == charset.CharsetUTF8 && toCharset == charset.CharsetUTF8) || + (origCharset == charset.CharsetUTF8MB4 && toCharset == charset.CharsetUTF8MB4) { + // TiDB only allow utf8 to be changed to utf8mb4, or changing the collation when the charset is utf8/utf8mb4. return nil } diff --git a/executor/builder.go b/executor/builder.go index e11e0e8fc2dda..77110d1c229a5 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -705,24 +705,43 @@ func (b *executorBuilder) buildExplain(v *plannercore.Explain) Executor { } func (b *executorBuilder) buildUnionScanExec(v *plannercore.PhysicalUnionScan) Executor { - src := b.build(v.Children()[0]) + reader := b.build(v.Children()[0]) if b.err != nil { b.err = errors.Trace(b.err) return nil } - us := &UnionScanExec{baseExecutor: newBaseExecutor(b.ctx, v.Schema(), v.ExplainID(), src)} + us, err := b.buildUnionScanFromReader(reader, v) + if err != nil { + b.err = err + return nil + } + return us +} + +// buildUnionScanFromReader builds union scan executor from child executor. +// Note that this function may be called by inner workers of index lookup join concurrently. +// Be careful to avoid data race. +func (b *executorBuilder) buildUnionScanFromReader(reader Executor, v *plannercore.PhysicalUnionScan) (Executor, error) { + var err error + us := &UnionScanExec{baseExecutor: newBaseExecutor(b.ctx, v.Schema(), v.ExplainID(), reader)} // Get the handle column index of the below plannercore. // We can guarantee that there must be only one col in the map. for _, cols := range v.Children()[0].Schema().TblID2Handle { us.belowHandleIndex = cols[0].Index } - switch x := src.(type) { + switch x := reader.(type) { case *TableReaderExecutor: us.desc = x.desc + // Union scan can only be in a write transaction, so DirtyDB should has non-nil value now, thus + // GetDirtyDB() is safe here. If this table has been modified in the transaction, non-nil DirtyTable + // can be found in DirtyDB now, so GetDirtyTable is safe; if this table has not been modified in the + // transaction, empty DirtyTable would be inserted into DirtyDB, it does not matter when multiple + // goroutines write empty DirtyTable to DirtyDB for this table concurrently. Thus we don't use lock + // to synchronize here. us.dirty = GetDirtyDB(b.ctx).GetDirtyTable(x.table.Meta().ID) us.conditions = v.Conditions us.columns = x.columns - b.err = us.buildAndSortAddedRows() + err = us.buildAndSortAddedRows() case *IndexReaderExecutor: us.desc = x.desc for _, ic := range x.index.Columns { @@ -736,7 +755,7 @@ func (b *executorBuilder) buildUnionScanExec(v *plannercore.PhysicalUnionScan) E us.dirty = GetDirtyDB(b.ctx).GetDirtyTable(x.table.Meta().ID) us.conditions = v.Conditions us.columns = x.columns - b.err = us.buildAndSortAddedRows() + err = us.buildAndSortAddedRows() case *IndexLookUpExecutor: us.desc = x.desc for _, ic := range x.index.Columns { @@ -750,16 +769,16 @@ func (b *executorBuilder) buildUnionScanExec(v *plannercore.PhysicalUnionScan) E us.dirty = GetDirtyDB(b.ctx).GetDirtyTable(x.table.Meta().ID) us.conditions = v.Conditions us.columns = x.columns - b.err = us.buildAndSortAddedRows() + err = us.buildAndSortAddedRows() default: // The mem table will not be written by sql directly, so we can omit the union scan to avoid err reporting. - return src + return reader, nil } - if b.err != nil { - b.err = errors.Trace(b.err) - return nil + if err != nil { + err = errors.Trace(err) + return nil, err } - return us + return us, nil } // buildMergeJoin builds MergeJoinExec executor. @@ -1864,10 +1883,28 @@ func (builder *dataReaderBuilder) buildExecutorForIndexJoin(ctx context.Context, return builder.buildIndexReaderForIndexJoin(ctx, v, datums, IndexRanges, keyOff2IdxOff) case *plannercore.PhysicalIndexLookUpReader: return builder.buildIndexLookUpReaderForIndexJoin(ctx, v, datums, IndexRanges, keyOff2IdxOff) + case *plannercore.PhysicalUnionScan: + return builder.buildUnionScanForIndexJoin(ctx, v, datums, IndexRanges, keyOff2IdxOff) } return nil, errors.New("Wrong plan type for dataReaderBuilder") } +func (builder *dataReaderBuilder) buildUnionScanForIndexJoin(ctx context.Context, v *plannercore.PhysicalUnionScan, + values [][]types.Datum, indexRanges []*ranger.Range, keyOff2IdxOff []int) (Executor, error) { + childBuilder := &dataReaderBuilder{Plan: v.Children()[0], executorBuilder: builder.executorBuilder} + reader, err := childBuilder.buildExecutorForIndexJoin(ctx, values, indexRanges, keyOff2IdxOff) + if err != nil { + return nil, err + } + e, err := builder.buildUnionScanFromReader(reader, v) + if err != nil { + return nil, err + } + us := e.(*UnionScanExec) + us.snapshotChunkBuffer = us.newFirstChunk() + return us, nil +} + func (builder *dataReaderBuilder) buildTableReaderForIndexJoin(ctx context.Context, v *plannercore.PhysicalTableReader, datums [][]types.Datum) (Executor, error) { e, err := buildNoRangeTableReader(builder.executorBuilder, v) if err != nil { diff --git a/executor/index_lookup_join_test.go b/executor/index_lookup_join_test.go index 30cc7b490768f..ca4e685df4d0c 100644 --- a/executor/index_lookup_join_test.go +++ b/executor/index_lookup_join_test.go @@ -65,3 +65,83 @@ func (s *testSuite) TestIndexJoinOverflow(c *C) { tk.MustExec(`create table t2(a int unsigned, index idx(a));`) tk.MustQuery(`select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a;`).Check(testkit.Rows()) } + +func (s *testSuite) TestIndexJoinUnionScan(c *C) { + tk := testkit.NewTestKitWithInit(c, s.store) + tk.MustExec("create table t1(id int primary key, a int)") + tk.MustExec("create table t2(id int primary key, a int, b int, key idx_a(a))") + tk.MustExec("insert into t2 values (1,1,1),(4,2,4)") + tk.MustExec("begin") + tk.MustExec("insert into t1 values(2,2)") + tk.MustExec("insert into t2 values(2,2,2), (3,3,3)") + // TableScan below UnionScan + tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.id").Check(testkit.Rows( + "IndexJoin_11 12500.00 root inner join, inner:UnionScan_10, outer key:test.t1.a, inner key:test.t2.id", + "├─UnionScan_12 10000.00 root ", + "│ └─TableReader_14 10000.00 root data:TableScan_13", + "│ └─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", + "└─UnionScan_10 1.00 root ", + " └─TableReader_9 1.00 root data:TableScan_8", + " └─TableScan_8 1.00 cop table:t2, range: decided by [test.t1.a], keep order:false, stats:pseudo", + )) + tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.id").Check(testkit.Rows( + "2 2 2 2 2", + )) + // IndexLookUp below UnionScan + tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( + "IndexJoin_12 12500.00 root inner join, inner:UnionScan_11, outer key:test.t1.a, inner key:test.t2.a", + "├─UnionScan_13 10000.00 root ", + "│ └─TableReader_15 10000.00 root data:TableScan_14", + "│ └─TableScan_14 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", + "└─UnionScan_11 10.00 root ", + " └─IndexLookUp_10 10.00 root ", + " ├─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", + " └─TableScan_9 10.00 cop table:t2, keep order:false, stats:pseudo", + )) + tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( + "2 2 2 2 2", + "2 2 4 2 4", + )) + // IndexScan below UnionScan + tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ t1.a, t2.a from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( + "Projection_7 12500.00 root test.t1.a, test.t2.a", + "└─IndexJoin_11 12500.00 root inner join, inner:UnionScan_10, outer key:test.t1.a, inner key:test.t2.a", + " ├─UnionScan_12 10000.00 root ", + " │ └─TableReader_14 10000.00 root data:TableScan_13", + " │ └─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", + " └─UnionScan_10 10.00 root ", + " └─IndexReader_9 10.00 root index:IndexScan_8", + " └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", + )) + tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ t1.a, t2.a from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( + "2 2", + "2 2", + )) + tk.MustExec("rollback") +} + +func (s *testSuite) TestBatchIndexJoinUnionScan(c *C) { + tk := testkit.NewTestKitWithInit(c, s.store) + tk.MustExec("create table t1(id int primary key, a int)") + tk.MustExec("create table t2(id int primary key, a int, key idx_a(a))") + tk.MustExec("set @@session.tidb_max_chunk_size=1") + tk.MustExec("set @@session.tidb_index_join_batch_size=1") + tk.MustExec("set @@session.tidb_index_lookup_join_concurrency=4") + tk.MustExec("begin") + tk.MustExec("insert into t1 values(1,1),(2,1),(3,1),(4,1)") + tk.MustExec("insert into t2 values(1,1)") + tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ count(*) from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( + "StreamAgg_13 1.00 root funcs:count(1)", + "└─IndexJoin_24 12500.00 root inner join, inner:UnionScan_23, outer key:test.t1.a, inner key:test.t2.a", + " ├─UnionScan_18 10000.00 root ", + " │ └─TableReader_20 10000.00 root data:TableScan_19", + " │ └─TableScan_19 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", + " └─UnionScan_23 10.00 root ", + " └─IndexReader_22 10.00 root index:IndexScan_21", + " └─IndexScan_21 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", + )) + tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ count(*) from t1 join t2 on t1.a = t2.id").Check(testkit.Rows( + "4", + )) + tk.MustExec("rollback") +} diff --git a/executor/join_test.go b/executor/join_test.go index 60dbeda3df4b8..d0d3062563fe6 100644 --- a/executor/join_test.go +++ b/executor/join_test.go @@ -963,40 +963,6 @@ func (s *testSuite) TestHashJoin(c *C) { c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "1") innerExecInfo := row[3][4].(string) c.Assert(innerExecInfo[len(innerExecInfo)-1:], Equals, "0") - - tk.MustExec("insert into t2 select * from t1;") - tk.MustExec("delete from t1;") - tk.MustQuery("select count(*) from t1").Check(testkit.Rows("0")) - tk.MustQuery("select count(*) from t2").Check(testkit.Rows("5")) - result = tk.MustQuery("explain analyze select /*+ TIDB_HJ(t1, t2) */ * from t1 where not exists (select a from t2 where t1.a = t2.a);") - // id count task operator info execution info | - // Projection_8 4.00 root test.t1.a, test.t1.b time:193.08µs, loops:1, rows:0 | - // └─Selection_9 4.00 root not(6_aux_0) time:146.95µs, loops:1, rows:0 | - // └─HashLeftJoin_10 5.00 root left outer semi join, inner:TableReader_14, equal:[eq(test.t1.a, test.t2.a)] time:144.293µs, loops:1, rows:0 | - // ├─TableReader_12 5.00 root data:TableScan_11 time:26.27µs, loops:1, rows:0 | - // │ └─TableScan_11 5.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo | - // └─TableReader_14 5.00 root data:TableScan_13 time:0s, loops:0, rows:0 | - // └─TableScan_13 5.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo | - row = result.Rows() - c.Assert(len(row), Equals, 7) - outerExecInfo = row[3][4].(string) - c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "0") - innerExecInfo = row[5][4].(string) - c.Assert(innerExecInfo[len(innerExecInfo)-1:], LessEqual, "5") - - result = tk.MustQuery("explain analyze select /*+ TIDB_HJ(t1, t2) */ * from t1 left outer join t2 on t1.a = t2.a;") - // id count task operator info execution info - // HashLeftJoin_6 12500.00 root left outer join, inner:TableReader_10, equal:[eq(test.t1.a, test.t2.a)] time:502.553µs, loops:1, rows:0 - // ├─TableReader_8 10000.00 root data:TableScan_7 time:27.302µs, loops:1, rows:0 - // │ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo - // └─TableReader_10 10000.00 root data:TableScan_9 time:0s, loops:0, rows:0 - // └─TableScan_9 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo - row = result.Rows() - c.Assert(len(row), Equals, 5) - outerExecInfo = row[1][4].(string) - c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "0") - innerExecInfo = row[3][4].(string) - c.Assert(innerExecInfo[len(innerExecInfo)-1:], LessEqual, "5") } func (s *testSuite) TestJoinDifferentDecimals(c *C) { diff --git a/go.mod b/go.mod index 23c407f104f0a..9986917ef2327 100644 --- a/go.mod +++ b/go.mod @@ -48,7 +48,7 @@ require ( github.com/pingcap/goleveldb v0.0.0-20171020084629-8d44bfdf1030 github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11 github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 - github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7 + github.com/pingcap/parser v0.0.0-20190718031118-20e37a65d718 github.com/pingcap/pd v2.1.0-rc.4+incompatible github.com/pingcap/tidb-tools v2.1.3-0.20190116051332-34c808eef588+incompatible github.com/pingcap/tipb v0.0.0-20180910045846-371b48b15d93 diff --git a/go.sum b/go.sum index 592bae0edeb24..d15277fa26449 100644 --- a/go.sum +++ b/go.sum @@ -34,7 +34,6 @@ github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e h1:Fw7ZmgiklsLh github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= -github.com/etcd-io/gofail v0.0.0-20180808172546-51ce9a71510a h1:QNEenQIsGDEEfFNSnN+h6hE1OwnHqTg7Dl9gEk1Cko4= github.com/etcd-io/gofail v0.0.0-20180808172546-51ce9a71510a/go.mod h1:49H/RkXP8pKaZy4h0d+NW16rSLhyVBt4o6VLJbmOqDE= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= @@ -101,8 +100,8 @@ github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11 h1:e81flSfRbbMW5RU github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11/go.mod h1:0gwbe1F2iBIjuQ9AH0DbQhL+Dpr5GofU8fgYyXk+ykk= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 h1:t2OQTpPJnrPDGlvA+3FwJptMTt6MEPdzK1Wt99oaefQ= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596/go.mod h1:WpHUKhNZ18v116SvGrmjkA9CBhYmuUTKL+p8JC9ANEw= -github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7 h1:cbTQGLE0X69qL2nrvtG9HP4u5sBdVGyoIJOhc+KtJXc= -github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= +github.com/pingcap/parser v0.0.0-20190718031118-20e37a65d718 h1:raZFhem9Ga8BcuWhQ6daejp5E5rIeyET0oQddyWK2Q0= +github.com/pingcap/parser v0.0.0-20190718031118-20e37a65d718/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= github.com/pingcap/pd v2.1.0-rc.4+incompatible h1:/buwGk04aHO5odk/+O8ZOXGs4qkUjYTJ2UpCJXna8NE= github.com/pingcap/pd v2.1.0-rc.4+incompatible/go.mod h1:nD3+EoYes4+aNNODO99ES59V83MZSI+dFbhyr667a0E= github.com/pingcap/tidb-tools v2.1.3-0.20190116051332-34c808eef588+incompatible h1:e9Gi/LP9181HT3gBfSOeSBA+5JfemuE4aEAhqNgoE4k= diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index 7039968647b35..31ce139cecbf8 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -411,20 +411,23 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou innerJoinKeys = p.LeftJoinKeys outerJoinKeys = p.RightJoinKeys } - x, ok := innerChild.(*DataSource) - if !ok { + ds, isDataSource := innerChild.(*DataSource) + us, isUnionScan := innerChild.(*LogicalUnionScan) + if !isDataSource && !isUnionScan { return nil } + if isUnionScan { + ds = us.Children()[0].(*DataSource) + } var tblPath *accessPath - for _, path := range x.possibleAccessPaths { + for _, path := range ds.possibleAccessPaths { if path.isTablePath { tblPath = path break } } - if pkCol := x.getPKIsHandleCol(); pkCol != nil && tblPath != nil { + if pkCol := ds.getPKIsHandleCol(); pkCol != nil && tblPath != nil { keyOff2IdxOff := make([]int, len(innerJoinKeys)) - pkCol := x.getPKIsHandleCol() pkMatched := false for i, key := range innerJoinKeys { if !key.Equal(nil, pkCol) { @@ -435,7 +438,7 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou keyOff2IdxOff[i] = 0 } if pkMatched { - innerPlan := p.constructInnerTableScan(x, pkCol, outerJoinKeys) + innerPlan := p.constructInnerTableScan(ds, pkCol, outerJoinKeys, us) // Since the primary key means one value corresponding to exact one row, this will always be a no worse one // comparing to other index. return p.constructIndexJoin(prop, innerJoinKeys, outerJoinKeys, outerIdx, innerPlan, nil, keyOff2IdxOff) @@ -448,12 +451,12 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou remainedOfBest []expression.Expression keyOff2IdxOff []int ) - for _, path := range x.possibleAccessPaths { + for _, path := range ds.possibleAccessPaths { if path.isTablePath { continue } indexInfo := path.index - ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys) + ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, ds, innerJoinKeys) // We choose the index by the number of used columns of the range, the much the better. // Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid. // But obviously when the range is nil, we don't need index join. @@ -466,20 +469,15 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou } } if bestIndexInfo != nil { - innerPlan := p.constructInnerIndexScan(x, bestIndexInfo, remainedOfBest, outerJoinKeys) + innerPlan := p.constructInnerIndexScan(ds, bestIndexInfo, remainedOfBest, outerJoinKeys, us) return p.constructIndexJoin(prop, innerJoinKeys, outerJoinKeys, outerIdx, innerPlan, rangesOfBest, keyOff2IdxOff) } return nil } // constructInnerTableScan is specially used to construct the inner plan for PhysicalIndexJoin. -func (p *LogicalJoin) constructInnerTableScan(ds *DataSource, pk *expression.Column, outerJoinKeys []*expression.Column) PhysicalPlan { - var ranges []*ranger.Range - if pk != nil { - ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pk.RetType.Flag)) - } else { - ranges = ranger.FullIntRange(false) - } +func (p *LogicalJoin) constructInnerTableScan(ds *DataSource, pk *expression.Column, outerJoinKeys []*expression.Column, us *LogicalUnionScan) PhysicalPlan { + ranges := ranger.FullIntRange(mysql.HasUnsignedFlag(pk.RetType.Flag)) ts := PhysicalTableScan{ Table: ds.tableInfo, Columns: ds.Columns, @@ -504,11 +502,23 @@ func (p *LogicalJoin) constructInnerTableScan(ds *DataSource, pk *expression.Col selStats := ts.stats.Scale(selectionFactor) ts.addPushedDownSelection(copTask, selStats) t := finishCopTask(ds.ctx, copTask) - return t.plan() + reader := t.plan() + return p.constructInnerUnionScan(us, reader) +} + +func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader PhysicalPlan) PhysicalPlan { + if us == nil { + return reader + } + // Use `reader.stats` instead of `us.stats` because it should be more accurate. No need to specify + // childrenReqProps now since we have got reader already. + physicalUnionScan := PhysicalUnionScan{Conditions: us.conditions}.init(us.ctx, reader.statsInfo(), nil) + physicalUnionScan.SetChildren(reader) + return physicalUnionScan } // constructInnerIndexScan is specially used to construct the inner plan for PhysicalIndexJoin. -func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexInfo, remainedConds []expression.Expression, outerJoinKeys []*expression.Column) PhysicalPlan { +func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexInfo, remainedConds []expression.Expression, outerJoinKeys []*expression.Column, us *LogicalUnionScan) PhysicalPlan { is := PhysicalIndexScan{ Table: ds.tableInfo, TableAsName: ds.TableAsName, @@ -550,7 +560,8 @@ func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexIn path := &accessPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64} is.addPushedDownSelection(cop, ds, math.MaxFloat64, path) t := finishCopTask(ds.ctx, cop) - return t.plan() + reader := t.plan() + return p.constructInnerUnionScan(us, reader) } // buildRangeForIndexJoin checks whether this index can be used for building index join and return the range if this index is ok. diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 2392ade00e552..482ded33d09c7 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -571,7 +571,7 @@ func (er *expressionRewriter) handleExistSubquery(v *ast.ExistsSubqueryExpr) (as } np = er.popExistsSubPlan(np) if len(np.extractCorrelatedCols()) > 0 { - er.p, er.err = er.b.buildSemiApply(er.p, np, nil, er.asScalar, false) + er.p, er.err = er.b.buildSemiApply(er.p, np, nil, er.asScalar, v.Not) if er.err != nil || !er.asScalar { return v, true } @@ -587,7 +587,7 @@ func (er *expressionRewriter) handleExistSubquery(v *ast.ExistsSubqueryExpr) (as er.err = errors.Trace(err) return v, true } - if len(rows) > 0 { + if (len(rows) > 0 && !v.Not) || (len(rows) == 0 && v.Not) { er.ctxStack = append(er.ctxStack, expression.One.Clone()) } else { er.ctxStack = append(er.ctxStack, expression.Zero.Clone()) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 5c3c670b35279..55b13cefe185a 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -79,6 +79,11 @@ func (p *LogicalTableDual) findBestTask(prop *property.PhysicalProperty) (task, // findBestTask implements LogicalPlan interface. func (p *baseLogicalPlan) findBestTask(prop *property.PhysicalProperty) (bestTask task, err error) { + // If p is an inner plan in an IndexJoin, the IndexJoin will generate an inner plan by itself, + // and set inner child prop nil, so here we do nothing. + if prop == nil { + return nil, nil + } // Look up the task with this prop in the task map. // It's used to reduce double counting. bestTask = p.getTask(prop) @@ -329,10 +334,8 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida // findBestTask implements the PhysicalPlan interface. // It will enumerate all the available indices and choose a plan with least cost. func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err error) { - // If ds is an inner plan in an IndexJoin, the IndexJoin will generate an inner plan by itself. - // So here we do nothing. - // TODO: Add a special prop to handle IndexJoin's inner plan. - // Then we can remove forceToTableScan and forceToIndexScan. + // If ds is an inner plan in an IndexJoin, the IndexJoin will generate an inner plan by itself, + // and set inner child prop nil, so here we do nothing. if prop == nil { return nil, nil } diff --git a/planner/core/physical_plan_test.go b/planner/core/physical_plan_test.go index 458fe62fba065..3d32b5c460a3b 100644 --- a/planner/core/physical_plan_test.go +++ b/planner/core/physical_plan_test.go @@ -1389,3 +1389,52 @@ func (s *testPlanSuite) TestUnmatchedTableInHint(c *C) { } } } + +func (s *testPlanSuite) TestIndexJoinUnionScan(c *C) { + defer testleak.AfterTest(c)() + store, dom, err := newStoreWithBootstrap() + c.Assert(err, IsNil) + defer func() { + dom.Close() + store.Close() + }() + se, err := session.CreateSession4Test(store) + c.Assert(err, IsNil) + _, err = se.Execute(context.Background(), "use test") + c.Assert(err, IsNil) + tests := []struct { + sql string + best string + }{ + // Test Index Join + UnionScan + TableScan. + { + sql: "select /*+ TIDB_INLJ(t1, t2) */ * from t t1, t t2 where t1.a = t2.a", + best: "IndexJoin{TableReader(Table(t))->UnionScan([])->TableReader(Table(t))->UnionScan([])}(test.t1.a,test.t2.a)", + }, + // Test Index Join + UnionScan + DoubleRead. + { + sql: "select /*+ TIDB_INLJ(t1, t2) */ * from t t1, t t2 where t1.a = t2.c", + best: "IndexJoin{TableReader(Table(t))->UnionScan([])->IndexLookUp(Index(t.c_d_e)[[NULL,+inf]], Table(t))->UnionScan([])}(test.t1.a,test.t2.c)", + }, + // Test Index Join + UnionScan + IndexScan. + { + sql: "select /*+ TIDB_INLJ(t1, t2) */ t1.a , t2.c from t t1, t t2 where t1.a = t2.c", + best: "IndexJoin{TableReader(Table(t))->UnionScan([])->IndexReader(Index(t.c_d_e)[[NULL,+inf]])->UnionScan([])}(test.t1.a,test.t2.c)->Projection", + }, + } + for i, tt := range tests { + comment := Commentf("case:%v sql:%s", i, tt.sql) + stmt, err := s.ParseOneStmt(tt.sql, "", "") + c.Assert(err, IsNil, comment) + err = se.NewTxn() + c.Assert(err, IsNil) + // Make txn not read only. + txn, err := se.Txn(true) + c.Assert(err, IsNil) + txn.Set(kv.Key("AAA"), []byte("BBB")) + se.StmtCommit() + p, err := core.Optimize(se, stmt, s.is) + c.Assert(err, IsNil) + c.Assert(core.ToString(p), Equals, tt.best, comment) + } +}