Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util/ranger: support use like to build range for new collation columns | tidb-test=pr/2247 #48522

Merged
merged 72 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
72 commits
Select commit Hold shift + click to select a range
6fd94d1
implement
time-and-fate Nov 10, 2023
9364a42
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 10, 2023
00f823e
bazel update
time-and-fate Nov 10, 2023
112b2e3
fix lint
time-and-fate Nov 10, 2023
afc2e46
tmp
time-and-fate Nov 13, 2023
0350d23
add
time-and-fate Nov 14, 2023
1eaa10e
fix
time-and-fate Nov 14, 2023
e55254b
Merge remote-tracking branch 'upstream/master' into s20-ranger-refact…
time-and-fate Nov 14, 2023
e892ae8
remove old code
time-and-fate Nov 14, 2023
40a00d7
fix one bug and update test result for other changes
time-and-fate Nov 14, 2023
abe70f7
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 14, 2023
df34ca2
Merge branch 's20-ranger-refactor-prefix-index' into s20-issue48181
time-and-fate Nov 14, 2023
287e116
fix
time-and-fate Nov 14, 2023
1fed3da
avoid unnecessary changes
time-and-fate Nov 14, 2023
aa53e1b
fix
time-and-fate Nov 14, 2023
5e5d68a
update
time-and-fate Nov 15, 2023
601b118
fix
time-and-fate Nov 15, 2023
9040424
update test result
time-and-fate Nov 15, 2023
0d83d06
change formatting
time-and-fate Nov 15, 2023
e08ab8a
update test result
time-and-fate Nov 16, 2023
fd700d2
update test result
time-and-fate Nov 16, 2023
9781a40
update test result
time-and-fate Nov 16, 2023
f99d0c3
fix
time-and-fate Nov 20, 2023
5022d9b
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 20, 2023
46c372e
fix
time-and-fate Nov 20, 2023
f26be31
fix
time-and-fate Nov 20, 2023
6138de9
Revert "fix"
time-and-fate Nov 20, 2023
a652cad
fix
time-and-fate Nov 20, 2023
44392bd
fix
time-and-fate Nov 20, 2023
d386d9f
update test result
time-and-fate Nov 20, 2023
42e8d9c
fix for pad space and update test result
time-and-fate Nov 21, 2023
cb7d605
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 21, 2023
0f04ca7
update test result
time-and-fate Nov 21, 2023
8c6bddb
allow prefix index
time-and-fate Nov 21, 2023
91c08f3
update test result
time-and-fate Nov 21, 2023
b5b13df
code cleanup
time-and-fate Nov 22, 2023
6de6b24
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 22, 2023
c849928
fix for typo in clean code
time-and-fate Nov 22, 2023
6d9d49b
code cleanup
time-and-fate Nov 22, 2023
3b0cda9
fix lint
time-and-fate Nov 22, 2023
4c63249
fix panic
time-and-fate Nov 22, 2023
f89b995
fix
time-and-fate Nov 22, 2023
1b4ffb8
improve code
time-and-fate Nov 22, 2023
8a561fe
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 23, 2023
5cdf2be
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 24, 2023
0b7f0b2
update test result
time-and-fate Nov 24, 2023
749ed95
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 24, 2023
fde1465
add test case and fix new bug
time-and-fate Nov 27, 2023
28ebb54
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 27, 2023
3a9b2ab
update test result
time-and-fate Nov 27, 2023
5152fb8
update test result
time-and-fate Nov 27, 2023
cc6d923
add test cases
time-and-fate Nov 27, 2023
30e8071
add more test cases
time-and-fate Nov 28, 2023
c21c8a3
try to remove unneeded code
time-and-fate Nov 28, 2023
768d88f
add comments
time-and-fate Nov 28, 2023
66a85fd
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 28, 2023
13ead7b
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Nov 30, 2023
ba9a0dc
update test result
time-and-fate Nov 30, 2023
25b8345
add comments
time-and-fate Dec 1, 2023
73260b6
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Dec 1, 2023
e31b9ad
update comments
time-and-fate Dec 1, 2023
e717176
add comments and test case
time-and-fate Dec 3, 2023
2ea7df5
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Dec 3, 2023
21eaece
improve wording
time-and-fate Dec 4, 2023
3b3d7c1
improve comments
time-and-fate Dec 4, 2023
9b85170
rename and add tests
time-and-fate Dec 4, 2023
ab22b82
fix
time-and-fate Dec 4, 2023
17ad794
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Dec 4, 2023
5c50b47
rename file
time-and-fate Dec 4, 2023
5fa4cc2
update test names
time-and-fate Dec 4, 2023
ceb4cea
add comments and small restructure code
time-and-fate Dec 5, 2023
06ee151
Merge remote-tracking branch 'upstream/master' into s20-issue48181
time-and-fate Dec 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions pkg/planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1873,14 +1873,14 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
tk.MustExec("drop table if exists t1, t2")
tk.MustExec("create table t1(a int, b varchar(10), c varchar(10), index idx_a_b(a, b))")
tk.MustExec("create table t2(d int)")
tk.MustExec("set @@tidb_opt_range_max_size=1275")
// 1275 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
tk.MustExec("set @@tidb_opt_range_max_size=1260")
// 1260 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
rows := tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('a', 'b', 'c')").Rows()
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, a, b, c)]"))
tk.MustQuery("show warnings").Check(testkit.Rows())
rows = tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('aaaaaa', 'bbbbbb', 'cccccc');").Rows()
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]"))
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]")
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))

tk.MustExec("prepare stmt1 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?)'")
tk.MustExec("set @a='a', @b='b', @c='c'")
Expand All @@ -1895,13 +1895,13 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
tk.Session().SetSessionManager(&testkit.MockSessionManager{PS: ps})
rows = tk.MustQuery(fmt.Sprintf("explain for connection %d", tkProcess.ID)).Rows()
// We don't limit range mem usage when rebuilding index join ranges for the cached plan. So [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc] can be built.
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]"))
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]")

// Test the plan with range fallback would not be put into cache.
tk.MustExec("prepare stmt2 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?, ?, ?)'")
tk.MustExec("set @a='a', @b='b', @c='c', @d='d', @e='e'")
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
"Warning 1105 skip prepared plan-cache: in-list is too long"))
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("0"))
Expand Down
15 changes: 8 additions & 7 deletions pkg/planner/core/testdata/index_merge_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@
"IndexMerge 0.00 root type: intersection",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is1(s1) range:[\"Abc\",\"Abc\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t5, index:is2(s2) range:(\"zzz\",+inf], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"B啊a\"), keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CcC\",\"CcC\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"\\x0eJ\\xfb@\\xd5J\\x0e3\"), keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CCC\",\"CCC\"], keep order:false, stats:pseudo",
"└─TableRowIDScan(Probe) 0.00 cop[tikv] table:t5 keep order:false, stats:pseudo"
],
"Result": [
Expand All @@ -144,7 +144,7 @@
"Plan": [
"IndexMerge 0.03 root type: intersection",
"├─IndexRangeScan(Build) 33.33 cop[tikv] table:t6, index:PRIMARY(s1, s2) range:(\"Abc\" \"zzz\",\"Abc\" +inf], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"A啊a\",\"A啊a\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"\\x0e3\\xfb@\\xd5J\\x0e3\",\"\\x0e3\\xfb@\\xd5J\\x0e3\"], keep order:false, stats:pseudo",
"└─Selection(Probe) 0.03 cop[tikv] gt(test.t6.s2, \"zzz\"), not(like(test.t6.s4, \"Cd_\", 92))",
" └─TableRowIDScan 0.03 cop[tikv] table:t6 keep order:false, stats:pseudo"
],
Expand Down Expand Up @@ -172,13 +172,14 @@
{
"SQL": "select /*+ use_index_merge(t8, primary,is2,is3,is4,is5) */ * from t8 where s1 like '啊A%' and s2 > 'abc' and s3 > 'cba' and s4 in ('aA', '??') and s5 = 'test,2'",
"Plan": [
"Selection 1.42 root eq(test.t8.s5, \"test,2\")",
"└─IndexMerge 0.59 root type: intersection",
"Selection 0.04 root eq(test.t8.s5, \"test,2\")",
"└─IndexMerge 0.06 root type: intersection",
" ├─IndexRangeScan(Build) 250.00 cop[tikv] table:t8, index:PRIMARY(s1) range:[\"UJ\\x00A\",\"UJ\\x00B\"), keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(\"abc\",+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(\"cba\",+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 20.00 cop[tikv] table:t8, index:is4(s4) range:[\"aA\",\"aA\"], [\"??\",\"??\"], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.59 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
" └─TableRowIDScan 2.22 cop[tikv] table:t8 keep order:false, stats:pseudo"
" └─Selection(Probe) 0.06 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
" └─TableRowIDScan 0.06 cop[tikv] table:t8 keep order:false, stats:pseudo"
],
"Result": [
"啊aabbccdd abcc cccc aA tEsT,2"
Expand Down
1 change: 1 addition & 0 deletions pkg/util/ranger/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ go_library(
"//pkg/util/codec",
"//pkg/util/collate",
"//pkg/util/dbterror",
"//pkg/util/hack",
"@com_github_pingcap_errors//:errors",
],
)
Expand Down
10 changes: 0 additions & 10 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,6 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction

func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isAccessCond, shouldReserve bool) {
_, collation := scalar.CharsetAndCollation()
if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) {
// The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte.
// However, this is incorrect for non-binary collation strings because the sort key order is not the same as byte order.
// For example, "`%" is mapped to the range [`, a](where ` is 0x60 and a is 0x61).
// Because the collation utf8_general_ci is case-insensitive, a and A have the same sort key.
// Finally, the range comes to be [`, A], which is actually an empty range.
// See https://github.com/pingcap/tidb/issues/31174 for more details.
// In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range.
return false, true
}
if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) {
return false, true
}
Expand Down
27 changes: 18 additions & 9 deletions pkg/util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ func compareCNFItemRangeResult(curResult, bestResult *cnfItemRangeResult) (curIs
// e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2))
// ((a,b,c) in (1,1,1),(2,2,2)) would be extracted.
func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*cnfItemRangeResult, []*valueInfo, error) {
lengths []int, rangeMaxSize int64, convertToSortKey bool) (*cnfItemRangeResult, []*valueInfo, error) {
if len(conds) < 2 {
return nil, nil, nil
}
Expand All @@ -260,7 +260,7 @@ func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expres
// We build ranges for `(a,b) in ((1,1),(1,2))` and get `[1 1, 1 1] [1 2, 1 2]`, which are point ranges and we can
// append `c = 1` to the point ranges. However, if we choose to merge consecutive ranges here, we get `[1 1, 1 2]`,
// which are not point ranges, and we cannot append `c = 1` anymore.
res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize)
res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize, convertToSortKey)
if err != nil {
return nil, nil, err
}
Expand Down Expand Up @@ -376,7 +376,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
ctx: d.sctx,
}
if considerDNF {
bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize)
bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize, d.convertToSortKey)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -627,12 +627,16 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
}
// Multiple Eq/In conditions for one column in CNF, apply intersection on them
// Lazily compute the points for the previously visited Eq/In
newTp := newFieldType(cols[offset].GetType())
collator := collate.GetCollator(cols[offset].GetType().GetCollate())
if mergedAccesses[offset] == nil {
mergedAccesses[offset] = accesses[offset]
points[offset] = rb.build(accesses[offset], collator, lengths[offset])
// Note that this is a relatively special usage of build(). We will restore the points back to Expression for
// later use and may build the Expression to points again.
// We need to keep the original value here, which means we neither cut prefix nor convert to sort key.
points[offset] = rb.build(accesses[offset], newTp, types.UnspecifiedLength, false)
}
points[offset] = rb.intersection(points[offset], rb.build(cond, collator, lengths[offset]), collator)
points[offset] = rb.intersection(points[offset], rb.build(cond, newTp, types.UnspecifiedLength, false), collator)
if len(points[offset]) == 0 { // Early termination if false expression found
if expression.MaybeOverOptimized4PlanCache(sctx, conditions) {
// `a>@x and a<@y` --> `invalid-range if @x>=@y`
Expand Down Expand Up @@ -773,9 +777,10 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression
if shouldReserve {
hasResidual = true
}
points := rb.build(item, collate.GetCollator(newTpSlice[0].GetCollate()), d.lengths[0])
points := rb.build(item, newTpSlice[0], d.lengths[0], d.convertToSortKey)
tmpNewTp := convertStringFTToBinaryCollate(newTpSlice[0])
// TODO: restrict the mem usage of ranges
ranges, rangeFallback, err := points2Ranges(d.sctx, points, newTpSlice[0], d.rangeMaxSize)
ranges, rangeFallback, err := points2Ranges(d.sctx, points, tmpNewTp, d.rangeMaxSize)
if err != nil {
return nil, nil, nil, false, errors.Trace(err)
}
Expand Down Expand Up @@ -867,6 +872,7 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
cols: cols,
lengths: lengths,
mergeConsecutive: true,
convertToSortKey: true,
rangeMaxSize: rangeMaxSize,
}
return d.detachCondAndBuildRangeForCols()
Expand All @@ -875,13 +881,14 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
// detachCondAndBuildRangeWithoutMerging detaches the index filters from table filters and uses them to build ranges.
// When building ranges, it doesn't merge consecutive ranges.
func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) {
lengths []int, rangeMaxSize int64, convertToSortKey bool) (*DetachRangeResult, error) {
d := &rangeDetacher{
sctx: sctx,
allConds: conditions,
cols: cols,
lengths: lengths,
mergeConsecutive: false,
convertToSortKey: convertToSortKey,
rangeMaxSize: rangeMaxSize,
}
return d.detachCondAndBuildRangeForCols()
Expand All @@ -893,7 +900,7 @@ func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions [
// The returned values are encapsulated into a struct DetachRangeResult, see its comments for explanation.
func DetachCondAndBuildRangeForPartition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) {
return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize)
return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize, false)
}

type rangeDetacher struct {
Expand All @@ -902,6 +909,7 @@ type rangeDetacher struct {
cols []*expression.Column
lengths []int
mergeConsecutive bool
convertToSortKey bool
rangeMaxSize int64
}

Expand Down Expand Up @@ -948,6 +956,7 @@ func DetachSimpleCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions [
cols: cols,
lengths: lengths,
mergeConsecutive: true,
convertToSortKey: true,
rangeMaxSize: rangeMaxSize,
}
res, err := d.detachCNFCondAndBuildRangeForIndex(conditions, newTpSlice, false)
Expand Down
Loading