Skip to content

Commit

Permalink
util/ranger: support use like to build range for new collation colu…
Browse files Browse the repository at this point in the history
  • Loading branch information
time-and-fate committed Feb 20, 2024
1 parent f7fae4b commit f8d69f6
Show file tree
Hide file tree
Showing 13 changed files with 763 additions and 495 deletions.
12 changes: 6 additions & 6 deletions pkg/planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2230,14 +2230,14 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
tk.MustExec("drop table if exists t1, t2")
tk.MustExec("create table t1(a int, b varchar(10), c varchar(10), index idx_a_b(a, b))")
tk.MustExec("create table t2(d int)")
tk.MustExec("set @@tidb_opt_range_max_size=1275")
// 1275 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
tk.MustExec("set @@tidb_opt_range_max_size=1260")
// 1260 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
rows := tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('a', 'b', 'c')").Rows()
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, a, b, c)]"))
tk.MustQuery("show warnings").Check(testkit.Rows())
rows = tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('aaaaaa', 'bbbbbb', 'cccccc');").Rows()
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]"))
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]")
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))

tk.MustExec("prepare stmt1 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?)'")
tk.MustExec("set @a='a', @b='b', @c='c'")
Expand All @@ -2252,13 +2252,13 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
tk.Session().SetSessionManager(&testkit.MockSessionManager{PS: ps})
rows = tk.MustQuery(fmt.Sprintf("explain for connection %d", tkProcess.ID)).Rows()
// We don't limit range mem usage when rebuilding index join ranges for the cached plan. So [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc] can be built.
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]"))
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]")

// Test the plan with range fallback would not be put into cache.
tk.MustExec("prepare stmt2 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?, ?, ?)'")
tk.MustExec("set @a='a', @b='b', @c='c', @d='d', @e='e'")
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
"Warning 1105 skip prepared plan-cache: in-list is too long"))
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("0"))
Expand Down
15 changes: 8 additions & 7 deletions pkg/planner/core/testdata/index_merge_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@
"IndexMerge 0.00 root type: intersection",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is1(s1) range:[\"Abc\",\"Abc\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t5, index:is2(s2) range:(\"zzz\",+inf], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"B啊a\"), keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CcC\",\"CcC\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"\\x0eJ\\xfb@\\xd5J\\x0e3\"), keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CCC\",\"CCC\"], keep order:false, stats:pseudo",
"└─TableRowIDScan(Probe) 0.00 cop[tikv] table:t5 keep order:false, stats:pseudo"
],
"Result": [
Expand All @@ -144,7 +144,7 @@
"Plan": [
"IndexMerge 0.03 root type: intersection",
"├─IndexRangeScan(Build) 33.33 cop[tikv] table:t6, index:PRIMARY(s1, s2) range:(\"Abc\" \"zzz\",\"Abc\" +inf], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"A啊a\",\"A啊a\"], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"\\x0e3\\xfb@\\xd5J\\x0e3\",\"\\x0e3\\xfb@\\xd5J\\x0e3\"], keep order:false, stats:pseudo",
"└─Selection(Probe) 0.03 cop[tikv] gt(test.t6.s2, \"zzz\"), not(like(test.t6.s4, \"Cd_\", 92))",
" └─TableRowIDScan 0.03 cop[tikv] table:t6 keep order:false, stats:pseudo"
],
Expand Down Expand Up @@ -172,13 +172,14 @@
{
"SQL": "select /*+ use_index_merge(t8, primary,is2,is3,is4,is5) */ * from t8 where s1 like '啊A%' and s2 > 'abc' and s3 > 'cba' and s4 in ('aA', '??') and s5 = 'test,2'",
"Plan": [
"Selection 1.42 root eq(test.t8.s5, \"test,2\")",
"└─IndexMerge 0.59 root type: intersection",
"Selection 0.04 root eq(test.t8.s5, \"test,2\")",
"└─IndexMerge 0.06 root type: intersection",
" ├─IndexRangeScan(Build) 250.00 cop[tikv] table:t8, index:PRIMARY(s1) range:[\"UJ\\x00A\",\"UJ\\x00B\"), keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(\"abc\",+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(\"cba\",+inf], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 20.00 cop[tikv] table:t8, index:is4(s4) range:[\"aA\",\"aA\"], [\"??\",\"??\"], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.59 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
" └─TableRowIDScan 2.22 cop[tikv] table:t8 keep order:false, stats:pseudo"
" └─Selection(Probe) 0.06 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
" └─TableRowIDScan 0.06 cop[tikv] table:t8 keep order:false, stats:pseudo"
],
"Result": [
"啊aabbccdd abcc cccc aA tEsT,2"
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/ranger/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ go_library(
"//pkg/util/codec",
"//pkg/util/collate",
"//pkg/util/dbterror",
"//pkg/util/mathutil",
"//pkg/util/hack",
"@com_github_pingcap_errors//:errors",
],
)
Expand Down
10 changes: 0 additions & 10 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,6 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction

func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isAccessCond, shouldReserve bool) {
_, collation := scalar.CharsetAndCollation()
if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) {
// The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte.
// However, this is incorrect for non-binary collation strings because the sort key order is not the same as byte order.
// For example, "`%" is mapped to the range [`, a](where ` is 0x60 and a is 0x61).
// Because the collation utf8_general_ci is case-insensitive, a and A have the same sort key.
// Finally, the range comes to be [`, A], which is actually an empty range.
// See https://github.com/pingcap/tidb/issues/31174 for more details.
// In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range.
return false, true
}
if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) {
return false, true
}
Expand Down
27 changes: 18 additions & 9 deletions pkg/util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ func compareCNFItemRangeResult(curResult, bestResult *cnfItemRangeResult) (curIs
// e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2))
// ((a,b,c) in (1,1,1),(2,2,2)) would be extracted.
func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*cnfItemRangeResult, []*valueInfo, error) {
lengths []int, rangeMaxSize int64, convertToSortKey bool) (*cnfItemRangeResult, []*valueInfo, error) {
if len(conds) < 2 {
return nil, nil, nil
}
Expand All @@ -261,7 +261,7 @@ func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expres
// We build ranges for `(a,b) in ((1,1),(1,2))` and get `[1 1, 1 1] [1 2, 1 2]`, which are point ranges and we can
// append `c = 1` to the point ranges. However, if we choose to merge consecutive ranges here, we get `[1 1, 1 2]`,
// which are not point ranges, and we cannot append `c = 1` anymore.
res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize)
res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize, convertToSortKey)
if err != nil {
return nil, nil, err
}
Expand Down Expand Up @@ -377,7 +377,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan,
}
if considerDNF {
bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize)
bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize, d.convertToSortKey)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -628,12 +628,16 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
}
// Multiple Eq/In conditions for one column in CNF, apply intersection on them
// Lazily compute the points for the previously visited Eq/In
newTp := newFieldType(cols[offset].GetType())
collator := collate.GetCollator(cols[offset].GetType().GetCollate())
if mergedAccesses[offset] == nil {
mergedAccesses[offset] = accesses[offset]
points[offset] = rb.build(accesses[offset], collator, lengths[offset])
// Note that this is a relatively special usage of build(). We will restore the points back to Expression for
// later use and may build the Expression to points again.
// We need to keep the original value here, which means we neither cut prefix nor convert to sort key.
points[offset] = rb.build(accesses[offset], newTp, types.UnspecifiedLength, false)
}
points[offset] = rb.intersection(points[offset], rb.build(cond, collator, lengths[offset]), collator)
points[offset] = rb.intersection(points[offset], rb.build(cond, newTp, types.UnspecifiedLength, false), collator)
if len(points[offset]) == 0 { // Early termination if false expression found
if expression.MaybeOverOptimized4PlanCache(sctx, conditions) {
// `a>@x and a<@y` --> `invalid-range if @x>=@y`
Expand Down Expand Up @@ -774,9 +778,10 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression
if shouldReserve {
hasResidual = true
}
points := rb.build(item, collate.GetCollator(newTpSlice[0].GetCollate()), d.lengths[0])
points := rb.build(item, newTpSlice[0], d.lengths[0], d.convertToSortKey)
tmpNewTp := convertStringFTToBinaryCollate(newTpSlice[0])
// TODO: restrict the mem usage of ranges
ranges, rangeFallback, err := points2Ranges(d.sctx, points, newTpSlice[0], d.rangeMaxSize)
ranges, rangeFallback, err := points2Ranges(d.sctx, points, tmpNewTp, d.rangeMaxSize)
if err != nil {
return nil, nil, nil, false, errors.Trace(err)
}
Expand Down Expand Up @@ -868,6 +873,7 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
cols: cols,
lengths: lengths,
mergeConsecutive: true,
convertToSortKey: true,
rangeMaxSize: rangeMaxSize,
}
return d.detachCondAndBuildRangeForCols()
Expand All @@ -876,13 +882,14 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
// detachCondAndBuildRangeWithoutMerging detaches the index filters from table filters and uses them to build ranges.
// When building ranges, it doesn't merge consecutive ranges.
func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) {
lengths []int, rangeMaxSize int64, convertToSortKey bool) (*DetachRangeResult, error) {
d := &rangeDetacher{
sctx: sctx,
allConds: conditions,
cols: cols,
lengths: lengths,
mergeConsecutive: false,
convertToSortKey: convertToSortKey,
rangeMaxSize: rangeMaxSize,
}
return d.detachCondAndBuildRangeForCols()
Expand All @@ -894,7 +901,7 @@ func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions [
// The returned values are encapsulated into a struct DetachRangeResult, see its comments for explanation.
func DetachCondAndBuildRangeForPartition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) {
return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize)
return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize, false)
}

type rangeDetacher struct {
Expand All @@ -903,6 +910,7 @@ type rangeDetacher struct {
cols []*expression.Column
lengths []int
mergeConsecutive bool
convertToSortKey bool
rangeMaxSize int64
}

Expand Down Expand Up @@ -949,6 +957,7 @@ func DetachSimpleCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions [
cols: cols,
lengths: lengths,
mergeConsecutive: true,
convertToSortKey: true,
rangeMaxSize: rangeMaxSize,
}
res, err := d.detachCNFCondAndBuildRangeForIndex(conditions, newTpSlice, false)
Expand Down
Loading

0 comments on commit f8d69f6

Please sign in to comment.