Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util/ranger: add missing Selection for range scan from like on PAD SPACE column | tidb-test=pr/2251 #48845

Merged
merged 4 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2171,11 +2171,11 @@
},
{
"SQL": "select a from t where c_str like ''",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]]->Sel([like(test.t.c_str, , 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]]->Sel([like(test.t.c_str, abc, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str not like 'abc'",
Expand All @@ -2191,7 +2191,7 @@
},
{
"SQL": "select a from t where c_str like 'abc%'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc%, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc_'",
Expand All @@ -2203,31 +2203,31 @@
},
{
"SQL": "select a from t where c_str like 'abc\\_' escape ''",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\_'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\\\_'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\_%'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\_%, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc=_%' escape '='",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc=_%, 61)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\__'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 123",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]]->Sel([like(test.t.c_str, 123, 92)]))->Projection"
},
{
"SQL": "select a from t where c = 1.9 and d > 3",
Expand Down
16 changes: 14 additions & 2 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package ranger
import (
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/types"
Expand Down Expand Up @@ -168,11 +169,22 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
if err != nil {
return false, true
}
likeFuncReserve := !c.isFullLengthColumn()

// Different from `=`, trailing spaces are always significant, and can't be ignored in `like`.
// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
// unable to distinguish 'xxx' from 'xxx ' by a single index range scan, and we may read more data than needed by
// the `like` function. Therefore, a Selection is needed to filter the data.
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
// collation != binary check here.
if collation != charset.CollationBin {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about make PAD SPACE as a new attribute for collation, so that we won't import bugs when a new NO PAD collation is added?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. But I think it's better for SQL team to add this into collation-related packages, especially when they start to implement NO PAD collations. I added an isPadSpaceCollation function in #48984 since we need the same check there.
This won't cause bugs. If we use current logic when NO PAD collation is added, there will be unnecessary Selection, that's not perfect but won't cause bugs.

likeFuncReserve = true
}

if len(patternStr) == 0 {
return true, !c.isFullLengthColumn()
return true, likeFuncReserve
}
escape := byte(scalar.GetArgs()[2].(*expression.Constant).Value.GetInt64())
likeFuncReserve := !c.isFullLengthColumn()
for i := 0; i < len(patternStr); i++ {
if patternStr[i] == escape {
i++
Expand Down
12 changes: 6 additions & 6 deletions pkg/util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,7 @@ create table t(
indexPos: 0,
exprStr: `a LIKE 'abc%'`,
accessConds: `[like(test.t.a, abc%, 92)]`,
filterConds: "[]",
filterConds: "[like(test.t.a, abc%, 92)]",
resultStr: "[[\"abc\",\"abd\")]",
},
{
Expand All @@ -1120,14 +1120,14 @@ create table t(
indexPos: 0,
exprStr: "a LIKE 'abc'",
accessConds: "[like(test.t.a, abc, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, abc, 92)]",
resultStr: "[[\"abc\",\"abc\"]]",
},
{
indexPos: 0,
exprStr: `a LIKE "ab\_c"`,
accessConds: "[like(test.t.a, ab\\_c, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, ab\\_c, 92)]",
resultStr: "[[\"ab_c\",\"ab_c\"]]",
},
{
Expand All @@ -1141,21 +1141,21 @@ create table t(
indexPos: 0,
exprStr: `a LIKE '\%a'`,
accessConds: "[like(test.t.a, \\%a, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, \\%a, 92)]",
resultStr: `[["%a","%a"]]`,
},
{
indexPos: 0,
exprStr: `a LIKE "\\"`,
accessConds: "[like(test.t.a, \\, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, \\, 92)]",
resultStr: "[[\"\\\\\",\"\\\\\"]]",
},
{
indexPos: 0,
exprStr: `a LIKE "\\\\a%"`,
accessConds: `[like(test.t.a, \\a%, 92)]`,
filterConds: "[]",
filterConds: "[like(test.t.a, \\\\a%, 92)]",
resultStr: "[[\"\\\\a\",\"\\\\b\")]",
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,8 @@ id estRows task access object operator info
StreamAgg 1.00 root funcs:count(Column#6)->Column#4
└─IndexReader 1.00 root index:StreamAgg
└─StreamAgg 1.00 cop[tikv] funcs:count(1)->Column#6
└─IndexRangeScan 250.00 cop[tikv] table:tbl1, index:expression_index(md5(`s`)) range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo
└─Selection 250.00 cop[tikv] like(md5(cast(explain_generate_column_substitute.tbl1.s, var_string(20))), "02e74f10e0327ad868d138f2b4fdd6f%", 92)
└─IndexRangeScan 250.00 cop[tikv] table:tbl1, index:expression_index(md5(`s`)) range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo
select count(*) from tbl1 use index() where md5(s) like '02e74f10e0327ad868d138f2b4fdd6f%';
count(*)
64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3308,21 +3308,21 @@ Projection 249.75 root planner__core__casetest__physicalplantest__physical_plan
│ └─StreamAgg 249.75 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.tc.id)->Column#14
│ └─TopN 62.38 root planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1
│ └─IndexLookUp 62.38 root
│ ├─Selection(Build) 62.44 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name)
│ ├─Selection(Build) 62.38 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name), like(planner__core__casetest__physicalplantest__physical_plan.tc.name, "chad99%", 92)
│ │ └─IndexRangeScan 62437.50 cop[tikv] table:tc, index:idx_tc_name(name) range:["chad99","chad9:"), keep order:false, stats:pseudo
│ └─TopN(Probe) 62.38 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1
│ └─Selection 62.38 cop[tikv] not(isnull(planner__core__casetest__physicalplantest__physical_plan.tc.id))
│ └─TableRowIDScan 62.44 cop[tikv] table:tc keep order:false, stats:pseudo
│ └─TableRowIDScan 62.38 cop[tikv] table:tc keep order:false, stats:pseudo
└─Selection(Probe) 199.80 root gt(Column#19, 100)
└─MaxOneRow 249.75 root
└─StreamAgg 249.75 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.td.id)->Column#19
└─Limit 62.38 root offset:0, count:1
└─Projection 62.38 root planner__core__casetest__physicalplantest__physical_plan.td.id, planner__core__casetest__physicalplantest__physical_plan.td.name
└─IndexLookUp 62.38 root
├─Selection(Build) 2495.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id)
│ └─IndexFullScan 2495002.50 cop[tikv] table:td, index:idx_tc_id(id) keep order:true, desc, stats:pseudo
└─Selection(Probe) 62.38 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92)
└─TableRowIDScan 2495.00 cop[tikv] table:td keep order:false, stats:pseudo
└─TopN 62.38 root planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1
└─IndexLookUp 62.38 root
├─Selection(Build) 1560.94 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92)
│ └─IndexRangeScan 62437.50 cop[tikv] table:td, index:idx_tc_name(name) range:["chad999","chad99:"), keep order:false, stats:pseudo
└─TopN(Probe) 62.38 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1
└─Selection 62.38 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id), not(isnull(planner__core__casetest__physicalplantest__physical_plan.td.id))
└─TableRowIDScan 1560.94 cop[tikv] table:td keep order:false, stats:pseudo
SELECT ta.NAME FROM ta WHERE EXISTS (select /*+ semi_join_rewrite() */ 1 from tb where ta.code = tb.code and tb.NAME LIKE 'chad9%') AND (select /*+ no_decorrelate() */ max(id) from tc where ta.name=tc.name and tc.name like 'chad99%') > 100 and (select /*+ no_decorrelate() */ max(id) from td where ta.id=td.id and td.name like 'chad999%') > 100;
NAME
show warnings;
Expand All @@ -3335,29 +3335,31 @@ Projection 10000.00 root planner__core__casetest__physicalplantest__physical_pl
│ ├─Apply(Build) 10000.00 root CARTESIAN semi join
│ │ ├─TableReader(Build) 10000.00 root data:TableFullScan
│ │ │ └─TableFullScan 10000.00 cop[tikv] table:ta keep order:false, stats:pseudo
│ │ └─TableReader(Probe) 2500.00 root data:Selection
│ │ └─Selection 2500.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.code, planner__core__casetest__physicalplantest__physical_plan.tb.code), like(planner__core__casetest__physicalplantest__physical_plan.tb.name, "chad9%", 92)
│ │ └─TableFullScan 100000000.00 cop[tikv] table:tb keep order:false, stats:pseudo
│ │ └─IndexLookUp(Probe) 2500.00 root
│ │ ├─Selection(Build) 62500.00 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.tb.name, "chad9%", 92)
│ │ │ └─IndexRangeScan 2500000.00 cop[tikv] table:tb, index:idx_tb_name(name) range:["chad9","chad:"), keep order:false, stats:pseudo
│ │ └─Selection(Probe) 2500.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.code, planner__core__casetest__physicalplantest__physical_plan.tb.code)
│ │ └─TableRowIDScan 62500.00 cop[tikv] table:tb keep order:false, stats:pseudo
│ └─Selection(Probe) 8000.00 root gt(Column#14, 100)
│ └─MaxOneRow 10000.00 root
│ └─StreamAgg 10000.00 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.tc.id)->Column#14
│ └─TopN 2497.50 root planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1
│ └─IndexLookUp 2497.50 root
│ ├─Selection(Build) 2500.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name)
│ ├─Selection(Build) 2497.50 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name), like(planner__core__casetest__physicalplantest__physical_plan.tc.name, "chad99%", 92)
│ │ └─IndexRangeScan 2500000.00 cop[tikv] table:tc, index:idx_tc_name(name) range:["chad99","chad9:"), keep order:false, stats:pseudo
│ └─TopN(Probe) 2497.50 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1
│ └─Selection 2497.50 cop[tikv] not(isnull(planner__core__casetest__physicalplantest__physical_plan.tc.id))
│ └─TableRowIDScan 2500.00 cop[tikv] table:tc keep order:false, stats:pseudo
│ └─TableRowIDScan 2497.50 cop[tikv] table:tc keep order:false, stats:pseudo
└─Selection(Probe) 8000.00 root gt(Column#19, 100)
└─MaxOneRow 10000.00 root
└─StreamAgg 10000.00 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.td.id)->Column#19
└─Limit 2497.50 root offset:0, count:1
└─Projection 2497.50 root planner__core__casetest__physicalplantest__physical_plan.td.id, planner__core__casetest__physicalplantest__physical_plan.td.name
└─IndexLookUp 2497.50 root
├─Selection(Build) 99900.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id)
│ └─IndexFullScan 99900000.00 cop[tikv] table:td, index:idx_tc_id(id) keep order:true, desc, stats:pseudo
└─Selection(Probe) 2497.50 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92)
└─TableRowIDScan 99900.00 cop[tikv] table:td keep order:false, stats:pseudo
└─TopN 2497.50 root planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1
└─IndexLookUp 2497.50 root
├─Selection(Build) 62500.00 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92)
│ └─IndexRangeScan 2500000.00 cop[tikv] table:td, index:idx_tc_name(name) range:["chad999","chad99:"), keep order:false, stats:pseudo
└─TopN(Probe) 2497.50 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1
└─Selection 2497.50 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id), not(isnull(planner__core__casetest__physicalplantest__physical_plan.td.id))
└─TableRowIDScan 62500.00 cop[tikv] table:td keep order:false, stats:pseudo
SELECT ta.NAME FROM ta WHERE EXISTS (select /*+ no_decorrelate() */ 1 from tb where ta.code = tb.code and tb.NAME LIKE 'chad9%') AND (select /*+ no_decorrelate() */ max(id) from tc where ta.name=tc.name and tc.name like 'chad99%') > 100 and (select /*+ no_decorrelate() */ max(id) from td where ta.id=td.id and td.name like 'chad999%') > 100;
NAME
show warnings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,48 @@ LEFT JOIN tmp3 c3 ON c3.id = '1';
id id
1 1
1 1
drop table if exists t1, t2;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
id estRows task access object operator info
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
└─UnionScan 250.00 root like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92)
└─IndexReader 250.00 root index:Selection
└─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92)
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["测试 ","测试!"), keep order:false, stats:pseudo
explain format = brief select *,length(a) from t1 where a like '测试';
id estRows task access object operator info
Projection 10.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
└─UnionScan 10.00 root like(planner__core__issuetest__planner_issue.t1.a, "测试", 92)
└─IndexReader 10.00 root index:Selection
└─Selection 10.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "测试", 92)
└─IndexRangeScan 10.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试"], keep order:false, stats:pseudo
select *,length(a) from t1 where a like '测试 %';
a length(a)
测试 8
select *,length(a) from t1 where a like '测试';
a length(a)
测试 6
create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
insert into t2 value('测试'),('测试 ');
explain format = brief select *,length(a) from t2 where a like '测试 %';
id estRows task access object operator info
Projection 8000.00 root planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3
└─UnionScan 8000.00 root like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92)
└─TableReader 8000.00 root data:Selection
└─Selection 8000.00 cop[tikv] like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92)
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
explain format = brief select *,length(a) from t2 where a like '测试';
id estRows task access object operator info
Projection 8000.00 root planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3
└─UnionScan 8000.00 root like(planner__core__issuetest__planner_issue.t2.a, "测试", 92)
└─TableReader 8000.00 root data:Selection
└─Selection 8000.00 cop[tikv] like(planner__core__issuetest__planner_issue.t2.a, "测试", 92)
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
select *,length(a) from t2 where a like '测试 %';
a length(a)
测试 6
select *,length(a) from t2 where a like '测试';
a length(a)
测试 4
Loading