From 6d28d2f579344594b7d3c5fc258f1c70265d806d Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Fri, 1 Dec 2023 00:09:09 +0800 Subject: [PATCH] [fix](like_func) incorrect result of like with 'NO_BACKSLASH_ESCAPES' mode(#27842) --- be/src/vec/functions/like.cpp | 43 +++++++++------- .../test_like_no_backslash_escapes_mode.out | 7 +++ ...test_like_no_backslash_escapes_mode.groovy | 50 +++++++++++++++++++ 3 files changed, 82 insertions(+), 18 deletions(-) create mode 100644 regression-test/data/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.out create mode 100644 regression-test/suites/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.groovy diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp index 8851b777fee6e6..add09f845a62aa 100644 --- a/be/src/vec/functions/like.cpp +++ b/be/src/vec/functions/like.cpp @@ -453,24 +453,30 @@ void FunctionLike::convert_like_pattern(LikeSearchState* state, const std::strin bool is_escaped = false; for (size_t i = 0; i < pattern.size(); ++i) { - if (!is_escaped && pattern[i] == '%') { - re_pattern->append(".*"); - } else if (!is_escaped && pattern[i] == '_') { - re_pattern->append("."); - // check for escape char before checking for regex special chars, they might overlap - } else if (!is_escaped && pattern[i] == state->escape_char) { - is_escaped = true; - } else if (pattern[i] == '.' || pattern[i] == '[' || pattern[i] == ']' || - pattern[i] == '{' || pattern[i] == '}' || pattern[i] == '(' || - pattern[i] == ')' || pattern[i] == '\\' || pattern[i] == '*' || - pattern[i] == '+' || pattern[i] == '?' || pattern[i] == '|' || - pattern[i] == '^' || pattern[i] == '$') { - // escape all regex special characters; see list at - re_pattern->append("\\"); - re_pattern->append(1, pattern[i]); - is_escaped = false; + if (!is_escaped) { + switch (pattern[i]) { + case '%': + re_pattern->append(".*"); + break; + case '_': + re_pattern->append("."); + break; + default: + is_escaped = pattern[i] == state->escape_char; + if (!is_escaped) { + re_pattern->append(1, pattern[i]); + } + break; + } } else { - // regular character or escaped special character + if (pattern[i] == '.' || pattern[i] == '[' || pattern[i] == ']' || pattern[i] == '{' || + pattern[i] == '}' || pattern[i] == '(' || pattern[i] == ')' || pattern[i] == '\\' || + pattern[i] == '*' || pattern[i] == '+' || pattern[i] == '?' || pattern[i] == '|' || + pattern[i] == '^' || pattern[i] == '$') { + re_pattern->append("\\"); + } else if (pattern[i] != '%' && pattern[i] != '_') { + re_pattern->append("\\\\"); + } re_pattern->append(1, pattern[i]); is_escaped = false; } @@ -634,7 +640,8 @@ Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionSta opts.set_dot_nl(true); state->search_state.regex = std::make_unique(re_pattern, opts); if (!state->search_state.regex->ok()) { - return Status::InternalError("Invalid regex expression: {}", pattern_str); + return Status::InternalError("Invalid regex expression: {}(origin: {})", + re_pattern, pattern_str); } } diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.out b/regression-test/data/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.out new file mode 100644 index 00000000000000..13540d6ee2d122 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select1 -- +1 TIN\\PEXNB601C6UUTAB + +-- !select2 -- +1 TIN\\PEXNB601C6UUTAB + diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.groovy new file mode 100644 index 00000000000000..259aa95284a28d --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.groovy @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_like_no_backslash_escapes_mode") { + + sql """ set sql_mode = "NO_BACKSLASH_ESCAPES"; """ + def tbName = "test_like_no_backslash_escapes_mode_tbl" + sql "DROP TABLE IF EXISTS ${tbName}" + + sql """ + CREATE TABLE `${tbName}` ( + `id` INT NULL, + `value` VARCHAR(100) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + ); + """ + + sql """ + INSERT INTO ${tbName} VALUES (1, "TIN\PEXNB601C6UUTAB"); + """ + + qt_select1 """ + select * from ${tbName} where `value` like "%TIN\PE%"; + """ + + qt_select2 """ + select * from ${tbName} where `value` = "TIN\PEXNB601C6UUTAB"; + """ + + // sql "DROP TABLE ${tbName};" +} \ No newline at end of file