Skip to content

Commit

Permalink
This is an automated cherry-pick of #48845
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
  • Loading branch information
time-and-fate authored and ti-chi-bot committed Nov 24, 2023
1 parent 069631e commit 15e4ad1
Show file tree
Hide file tree
Showing 8 changed files with 3,876 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2171,11 +2171,11 @@
},
{
"SQL": "select a from t where c_str like ''",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]]->Sel([like(test.t.c_str, , 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]]->Sel([like(test.t.c_str, abc, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str not like 'abc'",
Expand All @@ -2191,7 +2191,7 @@
},
{
"SQL": "select a from t where c_str like 'abc%'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc%, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc_'",
Expand All @@ -2203,31 +2203,31 @@
},
{
"SQL": "select a from t where c_str like 'abc\\_' escape ''",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\_'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\\\_'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\_%'",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\_%, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc=_%' escape '='",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc=_%, 61)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc\\__'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 123",
"Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]])->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]]->Sel([like(test.t.c_str, 123, 92)]))->Projection"
},
{
"SQL": "select a from t where c = 1.9 and d > 3",
Expand Down
16 changes: 14 additions & 2 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package ranger
import (
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/collate"
Expand Down Expand Up @@ -166,11 +167,22 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
if err != nil {
return false, true
}
likeFuncReserve := !c.isFullLengthColumn()

// Different from `=`, trailing spaces are always significant, and can't be ignored in `like`.
// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
// unable to distinguish 'xxx' from 'xxx ' by a single index range scan, and we may read more data than needed by
// the `like` function. Therefore, a Selection is needed to filter the data.
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
// collation != binary check here.
if collation != charset.CollationBin {
likeFuncReserve = true
}

if len(patternStr) == 0 {
return true, !c.isFullLengthColumn()
return true, likeFuncReserve
}
escape := byte(scalar.GetArgs()[2].(*expression.Constant).Value.GetInt64())
likeFuncReserve := !c.isFullLengthColumn()
for i := 0; i < len(patternStr); i++ {
if patternStr[i] == escape {
i++
Expand Down
12 changes: 6 additions & 6 deletions pkg/util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1098,7 +1098,7 @@ create table t(
indexPos: 0,
exprStr: `a LIKE 'abc%'`,
accessConds: `[like(test.t.a, abc%, 92)]`,
filterConds: "[]",
filterConds: "[like(test.t.a, abc%, 92)]",
resultStr: "[[\"abc\",\"abd\")]",
},
{
Expand All @@ -1112,14 +1112,14 @@ create table t(
indexPos: 0,
exprStr: "a LIKE 'abc'",
accessConds: "[like(test.t.a, abc, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, abc, 92)]",
resultStr: "[[\"abc\",\"abc\"]]",
},
{
indexPos: 0,
exprStr: `a LIKE "ab\_c"`,
accessConds: "[like(test.t.a, ab\\_c, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, ab\\_c, 92)]",
resultStr: "[[\"ab_c\",\"ab_c\"]]",
},
{
Expand All @@ -1133,21 +1133,21 @@ create table t(
indexPos: 0,
exprStr: `a LIKE '\%a'`,
accessConds: "[like(test.t.a, \\%a, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, \\%a, 92)]",
resultStr: `[["%a","%a"]]`,
},
{
indexPos: 0,
exprStr: `a LIKE "\\"`,
accessConds: "[like(test.t.a, \\, 92)]",
filterConds: "[]",
filterConds: "[like(test.t.a, \\, 92)]",
resultStr: "[[\"\\\\\",\"\\\\\"]]",
},
{
indexPos: 0,
exprStr: `a LIKE "\\\\a%"`,
accessConds: `[like(test.t.a, \\a%, 92)]`,
filterConds: "[]",
filterConds: "[like(test.t.a, \\\\a%, 92)]",
resultStr: "[[\"\\\\a\",\"\\\\b\")]",
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,8 @@ id estRows task access object operator info
StreamAgg 1.00 root funcs:count(Column#6)->Column#4
└─IndexReader 1.00 root index:StreamAgg
└─StreamAgg 1.00 cop[tikv] funcs:count(1)->Column#6
└─IndexRangeScan 250.00 cop[tikv] table:tbl1, index:expression_index(md5(`s`)) range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo
└─Selection 250.00 cop[tikv] like(md5(cast(explain_generate_column_substitute.tbl1.s, var_string(20))), "02e74f10e0327ad868d138f2b4fdd6f%", 92)
└─IndexRangeScan 250.00 cop[tikv] table:tbl1, index:expression_index(md5(`s`)) range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo
select count(*) from tbl1 use index() where md5(s) like '02e74f10e0327ad868d138f2b4fdd6f%';
count(*)
64
Expand Down
Loading

0 comments on commit 15e4ad1

Please sign in to comment.