From e6299846b87619b5b14068885235770e0443bd1f Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Thu, 26 Mar 2020 21:28:25 +0800 Subject: [PATCH 1/2] util, executor: use UnionRanges in when build kvRange for IndexReader in INLJ --- executor/builder.go | 64 +++++++++++++++++------------------------ executor/join_test.go | 31 ++++++++++++++------ util/ranger/detacher.go | 2 +- util/ranger/ranger.go | 13 +++++---- 4 files changed, 56 insertions(+), 54 deletions(-) diff --git a/executor/builder.go b/executor/builder.go index 29fb65e3c4e48..a50850b71ccdf 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -2702,10 +2702,11 @@ func (builder *dataReaderBuilder) buildProjectionForIndexJoin(ctx context.Contex // buildKvRangesForIndexJoin builds kv ranges for index join when the inner plan is index scan plan. func buildKvRangesForIndexJoin(ctx sessionctx.Context, tableID, indexID int64, lookUpContents []*indexJoinLookUpContent, - ranges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) ([]kv.KeyRange, error) { + ranges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) (_ []kv.KeyRange, err error) { kvRanges := make([]kv.KeyRange, 0, len(ranges)*len(lookUpContents)) lastPos := len(ranges[0].LowVal) - 1 sc := ctx.GetSessionVars().StmtCtx + tmpDatumRanges := make([]*ranger.Range, 0, len(lookUpContents)) for _, content := range lookUpContents { for _, ran := range ranges { for keyOff, idxOff := range keyOff2IdxOff { @@ -2713,54 +2714,41 @@ func buildKvRangesForIndexJoin(ctx sessionctx.Context, tableID, indexID int64, l ran.HighVal[idxOff] = content.keys[keyOff] } } - if cwc != nil { - nextColRanges, err := cwc.BuildRangesByRow(ctx, content.row) + if cwc == nil { + tmpKvRanges, err := distsql.IndexRangesToKVRanges(sc, tableID, indexID, ranges, nil) if err != nil { return nil, err } - for _, nextColRan := range nextColRanges { - for _, ran := range ranges { - ran.LowVal[lastPos] = nextColRan.LowVal[0] - ran.HighVal[lastPos] = nextColRan.HighVal[0] - ran.LowExclude = nextColRan.LowExclude - ran.HighExclude = nextColRan.HighExclude - } - tmpKvRanges, err := distsql.IndexRangesToKVRanges(sc, tableID, indexID, ranges, nil) - if err != nil { - return nil, errors.Trace(err) - } - kvRanges = append(kvRanges, tmpKvRanges...) - } + kvRanges = append(kvRanges, tmpKvRanges...) continue } - - tmpKvRanges, err := distsql.IndexRangesToKVRanges(sc, tableID, indexID, ranges, nil) + nextColRanges, err := cwc.BuildRangesByRow(ctx, content.row) if err != nil { return nil, err } - kvRanges = append(kvRanges, tmpKvRanges...) - } - // Sort and merge the overlapped ranges. - sort.Slice(kvRanges, func(i, j int) bool { - return bytes.Compare(kvRanges[i].StartKey, kvRanges[j].StartKey) < 0 - }) - if cwc != nil { - // If cwc is not nil, we need to merge the overlapped ranges here. - mergedKeyRanges := make([]kv.KeyRange, 0, len(kvRanges)) - for i := range kvRanges { - if len(mergedKeyRanges) == 0 { - mergedKeyRanges = append(mergedKeyRanges, kvRanges[i]) - continue - } - if bytes.Compare(kvRanges[i].StartKey, mergedKeyRanges[len(mergedKeyRanges)-1].EndKey) <= 0 { - mergedKeyRanges[len(mergedKeyRanges)-1].EndKey = kvRanges[i].EndKey - } else { - mergedKeyRanges = append(mergedKeyRanges, kvRanges[i]) + for _, nextColRan := range nextColRanges { + for _, ran := range ranges { + ran.LowVal[lastPos] = nextColRan.LowVal[0] + ran.HighVal[lastPos] = nextColRan.HighVal[0] + ran.LowExclude = nextColRan.LowExclude + ran.HighExclude = nextColRan.HighExclude + tmpDatumRanges = append(tmpDatumRanges, ran.Clone()) } } - return mergedKeyRanges, nil } - return kvRanges, nil + + if cwc == nil { + sort.Slice(kvRanges, func(i, j int) bool { + return bytes.Compare(kvRanges[i].StartKey, kvRanges[j].StartKey) < 0 + }) + return kvRanges, nil + } + + tmpDatumRanges, err = ranger.UnionRanges(ctx.GetSessionVars().StmtCtx, tmpDatumRanges) + if err != nil { + return nil, err + } + return distsql.IndexRangesToKVRanges(ctx.GetSessionVars().StmtCtx, tableID, indexID, tmpDatumRanges, nil) } func (b *executorBuilder) buildWindow(v *plannercore.PhysicalWindow) *WindowExec { diff --git a/executor/join_test.go b/executor/join_test.go index 224bc394ce560..91b6e853b60ef 100644 --- a/executor/join_test.go +++ b/executor/join_test.go @@ -1222,6 +1222,19 @@ func (s *testSuiteJoin1) TestIndexNestedLoopHashJoin(c *C) { } } +func (s *testSuiteJoin3) TestIssue15686(c *C) { + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t, k;") + tk.MustExec("create table k (a int, pk int primary key, index(a));") + tk.MustExec("create table t (a int, pk int primary key, index(a));") + tk.MustExec("insert into k values(0,8),(0,23),(1,21),(1,33),(1,52),(2,17),(2,34),(2,39),(2,40),(2,66),(2,67),(3,9),(3,25),(3,41),(3,48),(4,4),(4,11),(4,15),(4,26),(4,27),(4,31),(4,35),(4,45),(4,47),(4,49);") + tk.MustExec("insert into t values(3,4),(3,5),(3,27),(3,29),(3,57),(3,58),(3,79),(3,84),(3,92),(3,95);") + tk.MustQuery("select /*+ inl_join(t) */ count(*) from k left join t on k.a = t.a and k.pk > t.pk;").Check(testkit.Rows("33")) + tk.MustQuery("select /*+ inl_hash_join(t) */ count(*) from k left join t on k.a = t.a and k.pk > t.pk;").Check(testkit.Rows("33")) + tk.MustQuery("select /*+ inl_merge_join(t) */ count(*) from k left join t on k.a = t.a and k.pk > t.pk;").Check(testkit.Rows("33")) +} + func (s *testSuiteJoin3) TestIssue13449(c *C) { tk := testkit.NewTestKit(c, s.store) tk.MustExec("use test") @@ -1826,35 +1839,35 @@ func (s *testSuiteJoin1) TestIssue13177(c *C) { tk.MustExec("create table t2(a varchar(20), b int, c int, primary key(a, b))") tk.MustExec("insert into t1 values(\"abcd\", 1, 1), (\"bacd\", 2, 2), (\"cbad\", 3, 3)") tk.MustExec("insert into t2 values(\"bcd\", 1, 1), (\"acd\", 2, 2), (\"bad\", 3, 3)") - tk.MustQuery("select /*+ inl_join(t1, t2) */ * from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Check(testkit.Rows( + tk.MustQuery("select /*+ inl_join(t1, t2) */ * from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Sort().Check(testkit.Rows( "abcd 1 1 bcd 1 1", "bacd 2 2 acd 2 2", "cbad 3 3 bad 3 3", )) - tk.MustQuery("select /*+ inl_hash_join(t1, t2) */ * from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Check(testkit.Rows( + tk.MustQuery("select /*+ inl_hash_join(t1, t2) */ * from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Sort().Check(testkit.Rows( "abcd 1 1 bcd 1 1", "bacd 2 2 acd 2 2", "cbad 3 3 bad 3 3", )) - tk.MustQuery("select /*+ inl_merge_join(t1, t2) */ * from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Check(testkit.Rows( + tk.MustQuery("select /*+ inl_merge_join(t1, t2) */ * from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Sort().Check(testkit.Rows( + "abcd 1 1 bcd 1 1", "bacd 2 2 acd 2 2", "cbad 3 3 bad 3 3", - "abcd 1 1 bcd 1 1", )) - tk.MustQuery("select /*+ inl_join(t1, t2) */ t1.* from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Check(testkit.Rows( + tk.MustQuery("select /*+ inl_join(t1, t2) */ t1.* from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Sort().Check(testkit.Rows( "abcd 1 1", "bacd 2 2", "cbad 3 3", )) - tk.MustQuery("select /*+ inl_hash_join(t1, t2) */ t1.* from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Check(testkit.Rows( + tk.MustQuery("select /*+ inl_hash_join(t1, t2) */ t1.* from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Sort().Check(testkit.Rows( + "abcd 1 1", "bacd 2 2", "cbad 3 3", - "abcd 1 1", )) - tk.MustQuery("select /*+ inl_merge_join(t1, t2) */ t1.* from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Check(testkit.Rows( + tk.MustQuery("select /*+ inl_merge_join(t1, t2) */ t1.* from t1 join t2 on substr(t1.a, 2, 4) = t2.a and t1.b = t2.b where t1.c between 1 and 5").Sort().Check(testkit.Rows( + "abcd 1 1", "bacd 2 2", "cbad 3 3", - "abcd 1 1", )) } diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 8339d0d656c2d..fbad4d2ed3044 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -317,7 +317,7 @@ func detachDNFCondAndBuildRangeForIndex(sctx sessionctx.Context, condition *expr } } - totalRanges, err := unionRanges(sc, totalRanges) + totalRanges, err := UnionRanges(sc, totalRanges) if err != nil { return nil, nil, false, errors.Trace(err) } diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 84732d1121302..0c5ebf53d52b7 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -270,7 +270,7 @@ func buildColumnRange(accessConditions []expression.Expression, sc *stmtctx.Stat ran.HighExclude = false } } - ranges, err = unionRanges(sc, ranges) + ranges, err = UnionRanges(sc, ranges) if err != nil { return nil, err } @@ -340,7 +340,7 @@ func buildCNFIndexRange(sc *stmtctx.StatementContext, cols []*expression.Column, // Take prefix index into consideration. if hasPrefix(lengths) { if fixPrefixColRange(ranges, lengths, newTp) { - ranges, err = unionRanges(sc, ranges) + ranges, err = UnionRanges(sc, ranges) if err != nil { return nil, errors.Trace(err) } @@ -356,7 +356,11 @@ type sortRange struct { encodedEnd []byte } -func unionRanges(sc *stmtctx.StatementContext, ranges []*Range) ([]*Range, error) { +// UnionRanges sorts `ranges`, union adjacent ones if possible. +// For two intervals [a, b], [c, d], we have guaranteed that a >= c. If b >= c. Then two intervals are overlapped. +// And this two can be merged as [a, max(b, d)]. +// Otherwise they aren't overlapped. +func UnionRanges(sc *stmtctx.StatementContext, ranges []*Range) ([]*Range, error) { if len(ranges) == 0 { return nil, nil } @@ -384,9 +388,6 @@ func unionRanges(sc *stmtctx.StatementContext, ranges []*Range) ([]*Range, error ranges = ranges[:0] lastRange := objects[0] for i := 1; i < len(objects); i++ { - // For two intervals [a, b], [c, d], we have guaranteed that a >= c. If b >= c. Then two intervals are overlapped. - // And this two can be merged as [a, max(b, d)]. - // Otherwise they aren't overlapped. if bytes.Compare(lastRange.encodedEnd, objects[i].encodedStart) >= 0 { if bytes.Compare(lastRange.encodedEnd, objects[i].encodedEnd) < 0 { lastRange.encodedEnd = objects[i].encodedEnd From ffeebf1fc77eac0845cb47948165f2cbdd7cdd6b Mon Sep 17 00:00:00 2001 From: HuaiyuXu Date: Thu, 26 Mar 2020 22:34:18 +0800 Subject: [PATCH 2/2] Update util/ranger/ranger.go Co-Authored-By: Kenan Yao --- util/ranger/ranger.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 0c5ebf53d52b7..fbe9963abfe87 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -357,7 +357,7 @@ type sortRange struct { } // UnionRanges sorts `ranges`, union adjacent ones if possible. -// For two intervals [a, b], [c, d], we have guaranteed that a >= c. If b >= c. Then two intervals are overlapped. +// For two intervals [a, b], [c, d], we have guaranteed that a <= c. If b >= c. Then two intervals are overlapped. // And this two can be merged as [a, max(b, d)]. // Otherwise they aren't overlapped. func UnionRanges(sc *stmtctx.StatementContext, ranges []*Range) ([]*Range, error) {