Skip to content

Commit

Permalink
[fix](like_func) incorrect result of like with 'NO_BACKSLASH_ESCAPES'…
Browse files Browse the repository at this point in the history
… mode(apache#27842)
  • Loading branch information
mrhhsg committed Dec 1, 2023
1 parent 5f544a1 commit 6d28d2f
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 18 deletions.
43 changes: 25 additions & 18 deletions be/src/vec/functions/like.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,24 +453,30 @@ void FunctionLike::convert_like_pattern(LikeSearchState* state, const std::strin

bool is_escaped = false;
for (size_t i = 0; i < pattern.size(); ++i) {
if (!is_escaped && pattern[i] == '%') {
re_pattern->append(".*");
} else if (!is_escaped && pattern[i] == '_') {
re_pattern->append(".");
// check for escape char before checking for regex special chars, they might overlap
} else if (!is_escaped && pattern[i] == state->escape_char) {
is_escaped = true;
} else if (pattern[i] == '.' || pattern[i] == '[' || pattern[i] == ']' ||
pattern[i] == '{' || pattern[i] == '}' || pattern[i] == '(' ||
pattern[i] == ')' || pattern[i] == '\\' || pattern[i] == '*' ||
pattern[i] == '+' || pattern[i] == '?' || pattern[i] == '|' ||
pattern[i] == '^' || pattern[i] == '$') {
// escape all regex special characters; see list at
re_pattern->append("\\");
re_pattern->append(1, pattern[i]);
is_escaped = false;
if (!is_escaped) {
switch (pattern[i]) {
case '%':
re_pattern->append(".*");
break;
case '_':
re_pattern->append(".");
break;
default:
is_escaped = pattern[i] == state->escape_char;
if (!is_escaped) {
re_pattern->append(1, pattern[i]);
}
break;
}
} else {
// regular character or escaped special character
if (pattern[i] == '.' || pattern[i] == '[' || pattern[i] == ']' || pattern[i] == '{' ||
pattern[i] == '}' || pattern[i] == '(' || pattern[i] == ')' || pattern[i] == '\\' ||
pattern[i] == '*' || pattern[i] == '+' || pattern[i] == '?' || pattern[i] == '|' ||
pattern[i] == '^' || pattern[i] == '$') {
re_pattern->append("\\");
} else if (pattern[i] != '%' && pattern[i] != '_') {
re_pattern->append("\\\\");
}
re_pattern->append(1, pattern[i]);
is_escaped = false;
}
Expand Down Expand Up @@ -634,7 +640,8 @@ Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionSta
opts.set_dot_nl(true);
state->search_state.regex = std::make_unique<RE2>(re_pattern, opts);
if (!state->search_state.regex->ok()) {
return Status::InternalError("Invalid regex expression: {}", pattern_str);
return Status::InternalError("Invalid regex expression: {}(origin: {})",
re_pattern, pattern_str);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select1 --
1 TIN\\PEXNB601C6UUTAB

-- !select2 --
1 TIN\\PEXNB601C6UUTAB

Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_like_no_backslash_escapes_mode") {

sql """ set sql_mode = "NO_BACKSLASH_ESCAPES"; """
def tbName = "test_like_no_backslash_escapes_mode_tbl"
sql "DROP TABLE IF EXISTS ${tbName}"

sql """
CREATE TABLE `${tbName}` (
`id` INT NULL,
`value` VARCHAR(100) NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
);
"""

sql """
INSERT INTO ${tbName} VALUES (1, "TIN\PEXNB601C6UUTAB");
"""

qt_select1 """
select * from ${tbName} where `value` like "%TIN\PE%";
"""

qt_select2 """
select * from ${tbName} where `value` = "TIN\PEXNB601C6UUTAB";
"""

// sql "DROP TABLE ${tbName};"
}

0 comments on commit 6d28d2f

Please sign in to comment.