diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp index dcb4fc6dd0dbe3..4ed14280e4c51e 100644 --- a/be/src/vec/functions/like.cpp +++ b/be/src/vec/functions/like.cpp @@ -669,16 +669,30 @@ VPatternSearchStateSPtr FunctionLikeBase::pattern_type_recognition(const ColumnS Status FunctionLikeBase::vector_non_const(const ColumnString& values, const ColumnString& patterns, ColumnUInt8::Container& result, LikeState* state, size_t input_rows_count) const { + ColumnString::MutablePtr replaced_patterns; VPatternSearchStateSPtr vector_search_state; if (state->is_like_pattern) { - vector_search_state = pattern_type_recognition(patterns); + if (state->has_custom_escape) { + replaced_patterns = ColumnString::create(); + for (int i = 0; i < input_rows_count; ++i) { + std::string val = + replace_pattern_by_escape(patterns.get_data_at(i), state->escape_char); + replaced_patterns->insert_data(val.c_str(), val.size()); + } + vector_search_state = pattern_type_recognition(*replaced_patterns); + } else { + vector_search_state = pattern_type_recognition(patterns); + } } else { vector_search_state = pattern_type_recognition(patterns); } + + const ColumnString& real_pattern = state->has_custom_escape ? *replaced_patterns : patterns; + if (vector_search_state == nullptr) { // pattern type recognition failed, use default case for (int i = 0; i < input_rows_count; ++i) { - const auto pattern_val = patterns.get_data_at(i); + const auto pattern_val = real_pattern.get_data_at(i); const auto value_val = values.get_data_at(i); RETURN_IF_ERROR((state->scalar_function)(&state->search_state, value_val, pattern_val, &result[i])); @@ -815,7 +829,12 @@ void verbose_log_match(const std::string& str, const std::string& pattern_name, Status FunctionLike::construct_like_const_state(FunctionContext* context, const StringRef& pattern, std::shared_ptr& state, bool try_hyperscan) { - std::string pattern_str = pattern.to_string(); + std::string pattern_str; + if (state->has_custom_escape) { + pattern_str = replace_pattern_by_escape(pattern, state->escape_char); + } else { + pattern_str = pattern.to_string(); + } state->search_state.pattern_str = pattern_str; std::string search_string; @@ -920,6 +939,16 @@ Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionSta state->is_like_pattern = true; state->function = like_fn; state->scalar_function = like_fn_scalar; + if (context->is_col_constant(2)) { + state->has_custom_escape = true; + const auto escape_col = context->get_constant_col(2)->column_ptr; + const auto& escape = escape_col->get_data_at(0); + if (escape.size != 1) { + return Status::InternalError("Escape character must be a single character, got: {}", + escape.to_string()); + } + state->escape_char = escape.data[0]; + } if (context->is_col_constant(1)) { const auto pattern_col = context->get_constant_col(1)->column_ptr; const auto& pattern = pattern_col->get_data_at(0); diff --git a/be/src/vec/functions/like.h b/be/src/vec/functions/like.h index 435e2742788497..d9f95123cead7d 100644 --- a/be/src/vec/functions/like.h +++ b/be/src/vec/functions/like.h @@ -51,6 +51,31 @@ class Block; namespace doris::vectorized { +inline std::string replace_pattern_by_escape(const StringRef& pattern, char escape_char) { + std::string result; + result.reserve(pattern.size); + for (size_t i = 0; i < pattern.size; ++i) { + if (i + 1 < pattern.size && pattern.data[i] == escape_char && + (pattern.data[i + 1] == escape_char || pattern.data[i + 1] == '%' || + pattern.data[i + 1] == '_')) { + // "^^" -> "^" + // "^%" -> "\%" + // "^_" -> "\_" + if ((pattern.data[i + 1] == '%' || pattern.data[i + 1] == '_')) { + result.push_back('\\'); + } + result.push_back(pattern.data[i + 1]); + ++i; // skip next char + } else if (pattern.data[i] == '\\') { + // "\" -> "\\" + result.append("\\\\"); + } else { + result.push_back(pattern.data[i]); + } + } + return result; +} + // TODO: replace with std::string_view when `LikeSearchState.substring_pattern` can // construct from std::string_view. struct LikeSearchState { @@ -123,6 +148,8 @@ using VectorLikeFn = std::function; class FunctionLikeBase : public IFunction { public: - size_t get_number_of_arguments() const override { return 2; } + size_t get_number_of_arguments() const override { return 0; } + bool is_variadic() const override { return true; } DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override { return std::make_shared(); diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 index 47a45b67aa7b36..0fe651d9675523 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 @@ -235,6 +235,7 @@ ENGINE: 'ENGINE'; ENGINES: 'ENGINES'; ENTER: 'ENTER'; ERRORS: 'ERRORS'; +ESCAPE: 'ESCAPE'; EVENTS: 'EVENTS'; EVERY: 'EVERY'; EXCEPT: 'EXCEPT'; diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 7b3ecbafa7e27e..bc205f6686f817 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -1455,7 +1455,8 @@ rowConstructorItem predicate : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression - | NOT? kind=(LIKE | REGEXP | RLIKE) pattern=valueExpression + | NOT? kind=(REGEXP | RLIKE) pattern=valueExpression + | NOT? kind=LIKE pattern=valueExpression (ESCAPE escape=valueExpression)? | NOT? kind=(MATCH | MATCH_ANY | MATCH_ALL | MATCH_PHRASE | MATCH_PHRASE_PREFIX | MATCH_REGEXP | MATCH_PHRASE_EDGE) pattern=valueExpression | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN @@ -1893,6 +1894,7 @@ nonReserved | ENGINE | ENGINES | ERRORS + | ESCAPE | EVENTS | EVERY | EXCLUDE diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 6d21441c95f3a4..39dc52a03fd1f1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -3505,10 +3505,16 @@ private Expression withPredicate(Expression valueExpression, PredicateContext ct } break; case DorisParser.LIKE: - outExpression = new Like( - valueExpression, - getExpression(ctx.pattern) - ); + if (ctx.ESCAPE() == null) { + outExpression = new Like( + valueExpression, + getExpression(ctx.pattern)); + } else { + outExpression = new Like( + valueExpression, + getExpression(ctx.pattern), + getExpression(ctx.escape)); + } break; case DorisParser.RLIKE: case DorisParser.REGEXP: diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java index e2836204cdc033..e532deb3901a31 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/LikeToEqualRewrite.java @@ -51,6 +51,10 @@ private static Expression rewriteLikeToEqual(Like like) { StringBuilder sb = new StringBuilder(); int len = str.length(); char escapeChar = '\\'; + if (like.arity() == 3) { + escapeChar = ((VarcharLiteral) like.child(2)).value.charAt(0); + } + for (int i = 0; i < len;) { char c = str.charAt(i); if (c == escapeChar && (i + 1) < len diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java index 84b6ffa984fff4..10f25fb0ebca98 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java @@ -17,7 +17,12 @@ package org.apache.doris.nereids.trees.expressions; +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.BooleanType; +import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -28,10 +33,20 @@ * like expression: a like 'xxx%'. */ public class Like extends StringRegexPredicate { + + private static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT, + VarcharType.SYSTEM_DEFAULT)); + public Like(Expression left, Expression right) { this(ImmutableList.of(left, right)); } + public Like(Expression left, Expression right, Expression escape) { + this(ImmutableList.of(left, right, escape)); + } + private Like(List children) { this(children, false); } @@ -40,9 +55,32 @@ private Like(List children, boolean inferred) { super("like", children, inferred); } + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public String computeToSql() { + if (arity() == 2) { + return super.computeToSql(); + } + return '(' + left().toSql() + ' ' + getName() + ' ' + right().toSql() + " escape " + child(2).toSql() + + ')'; + } + + @Override + public String toString() { + if (arity() == 2) { + return super.computeToSql(); + } + return "(" + left() + " " + getName() + " " + right() + " escape " + child(2) + + ")"; + } + @Override public Like withChildren(List children) { - Preconditions.checkArgument(children.size() == 2); + Preconditions.checkArgument(children.size() == 2 || children.size() == 3); return new Like(children); } @@ -54,4 +92,19 @@ public R accept(ExpressionVisitor visitor, C context) { public Expression withInferred(boolean inferred) { return new Like(this.children, inferred); } + + @Override + public void checkLegalityBeforeTypeCoercion() { + if (arity() == 3) { + if (child(2) instanceof StringLikeLiteral) { + String escapeChar = ((StringLikeLiteral) child(2)).getStringValue(); + if (escapeChar.getBytes().length != 1) { + throw new AnalysisException( + "like escape character must be a single ascii character: " + escapeChar); + } + } else { + throw new AnalysisException("like escape character must be a string literal: " + this.toSql()); + } + } + } } diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_like_escape.out b/regression-test/data/query_p0/sql_functions/string_functions/test_like_escape.out new file mode 100644 index 00000000000000..59f0007906ef3e --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_like_escape.out @@ -0,0 +1,40 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test -- +true + +-- !test -- +true + +-- !test -- +true + +-- !test -- +true + +-- !test -- +true + +-- !test -- +false + +-- !test -- +false + +-- !test -- +true + +-- !test -- +true + +-- !test -- +false + +-- !test -- +true + +-- !test -- +false + +-- !test -- +true + diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_like_escape.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_like_escape.groovy new file mode 100644 index 00000000000000..106d2709a850b7 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_like_escape.groovy @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_like_escapes") { + qt_test """ + select "%a" like "a%_" ESCAPE "a"; + """ + qt_test """ + select "%_" like "a%_" ESCAPE "a"; + """ + qt_test """ + select "a" like "a" ESCAPE "a"; + """ + qt_test """ + select "a" like "aa" ESCAPE "a"; + """ + qt_test """ + select "%a" like "a%a" ESCAPE "a"; + """ + qt_test """ + select "%_" like "a%a" ESCAPE "a"; + """ + qt_test """ + select "%a" like "a%a_" ESCAPE "a"; + """ + qt_test """ + select "%_" like "a%a_" ESCAPE "a"; + """ + + test { + sql """select "啊啊" like "啊啊" ESCAPE "啊";""" + exception "like escape character must be a single ascii character" + } + test { + sql """select "a" like "aa" ESCAPE "aa";""" + exception "like escape character must be a single ascii character" + } + test { + sql """select "a" like "aa" ESCAPE 1;""" + exception "like escape character must be a string literal" + } + qt_test """ + select "啊%a" like "啊a%_" ESCAPE "a"; + """ + qt_test """ + select "%a" like "a%_" ESCAPE "A"; + """ + qt_test """ + select "\\\\" like "\\\\%" ESCAPE "A"; + """ + qt_test """ + select "\\\\" like "\\\\A%" ESCAPE "A"; + """ + qt_test """ + select "\\\\%" like "\\\\A%" ESCAPE "A"; + """ +} \ No newline at end of file