Skip to content

Commit

Permalink
[fix](like_func) incorrect result of like with 'NO_BACKSLASH_ESCAPES'…
Browse files Browse the repository at this point in the history
  • Loading branch information
mrhhsg authored and gnehil committed Dec 4, 2023
1 parent ddc521f commit fc716b0
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 21 deletions.
43 changes: 25 additions & 18 deletions be/src/vec/functions/like.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,24 +453,30 @@ void FunctionLike::convert_like_pattern(LikeSearchState* state, const std::strin

bool is_escaped = false;
for (size_t i = 0; i < pattern.size(); ++i) {
if (!is_escaped && pattern[i] == '%') {
re_pattern->append(".*");
} else if (!is_escaped && pattern[i] == '_') {
re_pattern->append(".");
// check for escape char before checking for regex special chars, they might overlap
} else if (!is_escaped && pattern[i] == state->escape_char) {
is_escaped = true;
} else if (pattern[i] == '.' || pattern[i] == '[' || pattern[i] == ']' ||
pattern[i] == '{' || pattern[i] == '}' || pattern[i] == '(' ||
pattern[i] == ')' || pattern[i] == '\\' || pattern[i] == '*' ||
pattern[i] == '+' || pattern[i] == '?' || pattern[i] == '|' ||
pattern[i] == '^' || pattern[i] == '$') {
// escape all regex special characters; see list at
re_pattern->append("\\");
re_pattern->append(1, pattern[i]);
is_escaped = false;
if (!is_escaped) {
switch (pattern[i]) {
case '%':
re_pattern->append(".*");
break;
case '_':
re_pattern->append(".");
break;
default:
is_escaped = pattern[i] == state->escape_char;
if (!is_escaped) {
re_pattern->append(1, pattern[i]);
}
break;
}
} else {
// regular character or escaped special character
if (pattern[i] == '.' || pattern[i] == '[' || pattern[i] == ']' || pattern[i] == '{' ||
pattern[i] == '}' || pattern[i] == '(' || pattern[i] == ')' || pattern[i] == '\\' ||
pattern[i] == '*' || pattern[i] == '+' || pattern[i] == '?' || pattern[i] == '|' ||
pattern[i] == '^' || pattern[i] == '$') {
re_pattern->append("\\");
} else if (pattern[i] != '%' && pattern[i] != '_') {
re_pattern->append("\\\\");
}
re_pattern->append(1, pattern[i]);
is_escaped = false;
}
Expand Down Expand Up @@ -634,7 +640,8 @@ Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionSta
opts.set_dot_nl(true);
state->search_state.regex = std::make_unique<RE2>(re_pattern, opts);
if (!state->search_state.regex->ok()) {
return Status::InternalError("Invalid regex expression: {}", pattern_str);
return Status::InternalError("Invalid regex expression: {}(origin: {})",
re_pattern, pattern_str);
}
}

Expand Down
10 changes: 7 additions & 3 deletions fe/fe-core/src/main/jflex/sql_scanner.flex
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,11 @@ import org.apache.doris.qe.SqlModeHelper;
return new Symbol(id, yyline+1, yycolumn+1, value);
}

private static String escapeBackSlash(String str) {
private static String escapeBackSlash(String str, long sqlMode) {
if ((sqlMode & SqlModeHelper.MODE_NO_BACKSLASH_ESCAPES) != 0) {
return str;
}

StringWriter writer = new StringWriter();
int strLen = str.length();
for (int i = 0; i < strLen; ++i) {
Expand Down Expand Up @@ -732,12 +736,12 @@ EndOfLineComment = "--" !({HintContent}|{ContainsLineTerminator}) {LineTerminato

{SingleQuoteStringLiteral} {
return newToken(SqlParserSymbols.STRING_LITERAL,
escapeBackSlash(yytext().substring(1, yytext().length()-1)).replaceAll("''", "'"));
escapeBackSlash(yytext().substring(1, yytext().length()-1), sql_mode).replaceAll("''", "'"));
}

{DoubleQuoteStringLiteral} {
return newToken(SqlParserSymbols.STRING_LITERAL,
escapeBackSlash(yytext().substring(1, yytext().length()-1)).replaceAll("\"\"", "\""));
escapeBackSlash(yytext().substring(1, yytext().length()-1), sql_mode).replaceAll("\"\"", "\""));
}

{CommentedHintBegin} {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select1 --
1 TIN\\PEXNB601C6UUTAB

-- !select2 --
1 TIN\\PEXNB601C6UUTAB

Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_like_no_backslash_escapes_mode") {

sql """ set sql_mode = "NO_BACKSLASH_ESCAPES"; """
def tbName = "test_like_no_backslash_escapes_mode_tbl"
sql "DROP TABLE IF EXISTS ${tbName}"

sql """
CREATE TABLE `${tbName}` (
`id` INT NULL,
`value` VARCHAR(100) NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""

sql """
INSERT INTO ${tbName} VALUES (1, "TIN\\PEXNB601C6UUTAB");
"""

qt_select1 """
select * from ${tbName} where `value` like "%TIN\\PE%";
"""

qt_select2 """
select * from ${tbName} where `value` = "TIN\\PEXNB601C6UUTAB";
"""

// sql "DROP TABLE ${tbName};"
}

0 comments on commit fc716b0

Please sign in to comment.