Skip to content

Commit

Permalink
util/ranger: add missing Selection for range scan from like on PA…
Browse files Browse the repository at this point in the history
…D SPACE column (#48845)

ref #48181, close #48821
  • Loading branch information
time-and-fate authored Nov 24, 2023
1 parent a0f5376 commit 27d2ba5
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2171,11 +2171,11 @@
},
{
"SQL": "select a from t where c_str like ''",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]]->Sel([like(test.t.c_str, , 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]]->Sel([like(test.t.c_str, abc, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str not like 'abc'",
Expand All @@ -2191,7 +2191,7 @@
},
{
"SQL": "select a from t where c_str like 'abc%'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc%, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc_'",
Expand All @@ -2203,31 +2203,31 @@
},
{
"SQL": "select a from t where c_str like 'abc\\_' escape ''",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\_'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\\\_'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\_%'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\_%, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc=_%' escape '='",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc=_%, 61)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\__'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 123",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]]->Sel([like(test.t.c_str, 123, 92)]))->Projection"
},
{
"SQL": "select a from t where c = 1.9 and d > 3",
Expand Down
16 changes: 14 additions & 2 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package ranger
import (
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/types"
Expand Down Expand Up @@ -168,11 +169,22 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
if err != nil {
return false, true
}
likeFuncReserve := !c.isFullLengthColumn()

// Different from `=`, trailing spaces are always significant, and can't be ignored in `like`.
// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
// unable to distinguish 'xxx' from 'xxx ' by a single index range scan, and we may read more data than needed by
// the `like` function. Therefore, a Selection is needed to filter the data.
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
// collation != binary check here.
if collation != charset.CollationBin {
likeFuncReserve = true
}

if len(patternStr) == 0 {
return true, !c.isFullLengthColumn()
return true, likeFuncReserve
}
escape := byte(scalar.GetArgs()[2].(*expression.Constant).Value.GetInt64())
likeFuncReserve := !c.isFullLengthColumn()
for i := 0; i < len(patternStr); i++ {
if patternStr[i] == escape {
i++
Expand Down
12 changes: 6 additions & 6 deletions pkg/util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,7 @@ create table t(
indexPos: 0,
exprStr: `a LIKE 'abc%'`,
accessConds: `[like(test.t.a, abc%, 92)]`,
filterConds: "[]",
filterConds: "[like(test.t.a, abc%, 92)]",
resultStr: "[[\"abc\",\"abd\")]",
},
{
Expand All @@ -1120,14 +1120,14 @@ create table t(
indexPos: 0,
exprStr: "a LIKE 'abc'",
accessConds: "[like(test.t.a, abc, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, abc, 92)]",
resultStr: "[[\"abc\",\"abc\"]]",
},
{
indexPos: 0,
exprStr: `a LIKE "ab\_c"`,
accessConds: "[like(test.t.a, ab\\_c, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, ab\\_c, 92)]",
resultStr: "[[\"ab_c\",\"ab_c\"]]",
},
{
Expand All @@ -1141,21 +1141,21 @@ create table t(
indexPos: 0,
exprStr: `a LIKE '\%a'`,
accessConds: "[like(test.t.a, \\%a, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, \\%a, 92)]",
resultStr: `[["%a","%a"]]`,
},
{
indexPos: 0,
exprStr: `a LIKE "\\"`,
accessConds: "[like(test.t.a, \\, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, \\, 92)]",
resultStr: "[[\"\\\\\",\"\\\\\"]]",
},
{
indexPos: 0,
exprStr: `a LIKE "\\\\a%"`,
accessConds: `[like(test.t.a, \\a%, 92)]`,
filterConds: "[]",
filterConds: "[like(test.t.a, \\\\a%, 92)]",
resultStr: "[[\"\\\\a\",\"\\\\b\")]",
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,8 @@ id estRows task access object operator info
StreamAgg 1.00 root funcs:count(Column#6)->Column#4
└─IndexReader 1.00 root index:StreamAgg
└─StreamAgg 1.00 cop[tikv] funcs:count(1)->Column#6
└─IndexRangeScan 250.00 cop[tikv] table:tbl1, index:expression_index(md5(`s`)) range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo
└─Selection 250.00 cop[tikv] like(md5(cast(explain_generate_column_substitute.tbl1.s, var_string(20))), "02e74f10e0327ad868d138f2b4fdd6f%", 92)
└─IndexRangeScan 250.00 cop[tikv] table:tbl1, index:expression_index(md5(`s`)) range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo
select count(*) from tbl1 use index() where md5(s) like '02e74f10e0327ad868d138f2b4fdd6f%';
count(*)
64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3308,21 +3308,21 @@ Projection 249.75 root planner__core__casetest__physicalplantest__physical_plan
│ └─StreamAgg 249.75 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.tc.id)->Column#14
│ └─TopN 62.38 root planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1
│ └─IndexLookUp 62.38 root
│ ├─Selection(Build) 62.44 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name)
│ ├─Selection(Build) 62.38 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name), like(planner__core__casetest__physicalplantest__physical_plan.tc.name, "chad99%", 92)
│ │ └─IndexRangeScan 62437.50 cop[tikv] table:tc, index:idx_tc_name(name) range:["chad99","chad9:"), keep order:false, stats:pseudo
│ └─TopN(Probe) 62.38 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1
│ └─Selection 62.38 cop[tikv] not(isnull(planner__core__casetest__physicalplantest__physical_plan.tc.id))
│ └─TableRowIDScan 62.44 cop[tikv] table:tc keep order:false, stats:pseudo
│ └─TableRowIDScan 62.38 cop[tikv] table:tc keep order:false, stats:pseudo
└─Selection(Probe) 199.80 root gt(Column#19, 100)
└─MaxOneRow 249.75 root
└─StreamAgg 249.75 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.td.id)->Column#19
└─Limit 62.38 root offset:0, count:1
└─Projection 62.38 root planner__core__casetest__physicalplantest__physical_plan.td.id, planner__core__casetest__physicalplantest__physical_plan.td.name
└─IndexLookUp 62.38 root
├─Selection(Build) 2495.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id)
│ └─IndexFullScan 2495002.50 cop[tikv] table:td, index:idx_tc_id(id) keep order:true, desc, stats:pseudo
└─Selection(Probe) 62.38 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92)
└─TableRowIDScan 2495.00 cop[tikv] table:td keep order:false, stats:pseudo
└─TopN 62.38 root planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1
└─IndexLookUp 62.38 root
├─Selection(Build) 1560.94 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92)
│ └─IndexRangeScan 62437.50 cop[tikv] table:td, index:idx_tc_name(name) range:["chad999","chad99:"), keep order:false, stats:pseudo
└─TopN(Probe) 62.38 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1
└─Selection 62.38 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id), not(isnull(planner__core__casetest__physicalplantest__physical_plan.td.id))
└─TableRowIDScan 1560.94 cop[tikv] table:td keep order:false, stats:pseudo
SELECT ta.NAME FROM ta WHERE EXISTS (select /*+ semi_join_rewrite() */ 1 from tb where ta.code = tb.code and tb.NAME LIKE 'chad9%') AND (select /*+ no_decorrelate() */ max(id) from tc where ta.name=tc.name and tc.name like 'chad99%') > 100 and (select /*+ no_decorrelate() */ max(id) from td where ta.id=td.id and td.name like 'chad999%') > 100;
NAME
show warnings;
Expand All @@ -3335,29 +3335,31 @@ Projection 10000.00 root planner__core__casetest__physicalplantest__physical_pl
│ ├─Apply(Build) 10000.00 root CARTESIAN semi join
│ │ ├─TableReader(Build) 10000.00 root data:TableFullScan
│ │ │ └─TableFullScan 10000.00 cop[tikv] table:ta keep order:false, stats:pseudo
│ │ └─TableReader(Probe) 2500.00 root data:Selection
│ │ └─Selection 2500.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.code, planner__core__casetest__physicalplantest__physical_plan.tb.code), like(planner__core__casetest__physicalplantest__physical_plan.tb.name, "chad9%", 92)
│ │ └─TableFullScan 100000000.00 cop[tikv] table:tb keep order:false, stats:pseudo
│ │ └─IndexLookUp(Probe) 2500.00 root
│ │ ├─Selection(Build) 62500.00 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.tb.name, "chad9%", 92)
│ │ │ └─IndexRangeScan 2500000.00 cop[tikv] table:tb, index:idx_tb_name(name) range:["chad9","chad:"), keep order:false, stats:pseudo
│ │ └─Selection(Probe) 2500.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.code, planner__core__casetest__physicalplantest__physical_plan.tb.code)
│ │ └─TableRowIDScan 62500.00 cop[tikv] table:tb keep order:false, stats:pseudo
│ └─Selection(Probe) 8000.00 root gt(Column#14, 100)
│ └─MaxOneRow 10000.00 root
│ └─StreamAgg 10000.00 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.tc.id)->Column#14
│ └─TopN 2497.50 root planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1
│ └─IndexLookUp 2497.50 root
│ ├─Selection(Build) 2500.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name)
│ ├─Selection(Build) 2497.50 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name), like(planner__core__casetest__physicalplantest__physical_plan.tc.name, "chad99%", 92)
│ │ └─IndexRangeScan 2500000.00 cop[tikv] table:tc, index:idx_tc_name(name) range:["chad99","chad9:"), keep order:false, stats:pseudo
│ └─TopN(Probe) 2497.50 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1
│ └─Selection 2497.50 cop[tikv] not(isnull(planner__core__casetest__physicalplantest__physical_plan.tc.id))
│ └─TableRowIDScan 2500.00 cop[tikv] table:tc keep order:false, stats:pseudo
│ └─TableRowIDScan 2497.50 cop[tikv] table:tc keep order:false, stats:pseudo
└─Selection(Probe) 8000.00 root gt(Column#19, 100)
└─MaxOneRow 10000.00 root
└─StreamAgg 10000.00 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.td.id)->Column#19
└─Limit 2497.50 root offset:0, count:1
└─Projection 2497.50 root planner__core__casetest__physicalplantest__physical_plan.td.id, planner__core__casetest__physicalplantest__physical_plan.td.name
└─IndexLookUp 2497.50 root
├─Selection(Build) 99900.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id)
│ └─IndexFullScan 99900000.00 cop[tikv] table:td, index:idx_tc_id(id) keep order:true, desc, stats:pseudo
└─Selection(Probe) 2497.50 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92)
└─TableRowIDScan 99900.00 cop[tikv] table:td keep order:false, stats:pseudo
└─TopN 2497.50 root planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1
└─IndexLookUp 2497.50 root
├─Selection(Build) 62500.00 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92)
│ └─IndexRangeScan 2500000.00 cop[tikv] table:td, index:idx_tc_name(name) range:["chad999","chad99:"), keep order:false, stats:pseudo
└─TopN(Probe) 2497.50 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1
└─Selection 2497.50 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id), not(isnull(planner__core__casetest__physicalplantest__physical_plan.td.id))
└─TableRowIDScan 62500.00 cop[tikv] table:td keep order:false, stats:pseudo
SELECT ta.NAME FROM ta WHERE EXISTS (select /*+ no_decorrelate() */ 1 from tb where ta.code = tb.code and tb.NAME LIKE 'chad9%') AND (select /*+ no_decorrelate() */ max(id) from tc where ta.name=tc.name and tc.name like 'chad99%') > 100 and (select /*+ no_decorrelate() */ max(id) from td where ta.id=td.id and td.name like 'chad999%') > 100;
NAME
show warnings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,48 @@ LEFT JOIN tmp3 c3 ON c3.id = '1';
id id
1 1
1 1
drop table if exists t1, t2;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
id estRows task access object operator info
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
└─UnionScan 250.00 root like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92)
└─IndexReader 250.00 root index:Selection
└─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92)
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["测试 ","测试!"), keep order:false, stats:pseudo
explain format = brief select *,length(a) from t1 where a like '测试';
id estRows task access object operator info
Projection 10.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
└─UnionScan 10.00 root like(planner__core__issuetest__planner_issue.t1.a, "测试", 92)
└─IndexReader 10.00 root index:Selection
└─Selection 10.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "测试", 92)
└─IndexRangeScan 10.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试"], keep order:false, stats:pseudo
select *,length(a) from t1 where a like '测试 %';
a length(a)
测试 8
select *,length(a) from t1 where a like '测试';
a length(a)
测试 6
create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
insert into t2 value('测试'),('测试 ');
explain format = brief select *,length(a) from t2 where a like '测试 %';
id estRows task access object operator info
Projection 8000.00 root planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3
└─UnionScan 8000.00 root like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92)
└─TableReader 8000.00 root data:Selection
└─Selection 8000.00 cop[tikv] like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92)
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
explain format = brief select *,length(a) from t2 where a like '测试';
id estRows task access object operator info
Projection 8000.00 root planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3
└─UnionScan 8000.00 root like(planner__core__issuetest__planner_issue.t2.a, "测试", 92)
└─TableReader 8000.00 root data:Selection
└─Selection 8000.00 cop[tikv] like(planner__core__issuetest__planner_issue.t2.a, "测试", 92)
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
select *,length(a) from t2 where a like '测试 %';
a length(a)
测试 6
select *,length(a) from t2 where a like '测试';
a length(a)
测试 4
Loading

0 comments on commit 27d2ba5

Please sign in to comment.