Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 32 additions & 3 deletions be/src/vec/functions/like.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,16 +669,30 @@ VPatternSearchStateSPtr FunctionLikeBase::pattern_type_recognition(const ColumnS
Status FunctionLikeBase::vector_non_const(const ColumnString& values, const ColumnString& patterns,
ColumnUInt8::Container& result, LikeState* state,
size_t input_rows_count) const {
ColumnString::MutablePtr replaced_patterns;
VPatternSearchStateSPtr vector_search_state;
if (state->is_like_pattern) {
vector_search_state = pattern_type_recognition<true>(patterns);
if (state->has_custom_escape) {
replaced_patterns = ColumnString::create();
for (int i = 0; i < input_rows_count; ++i) {
std::string val =
replace_pattern_by_escape(patterns.get_data_at(i), state->escape_char);
replaced_patterns->insert_data(val.c_str(), val.size());
}
vector_search_state = pattern_type_recognition<true>(*replaced_patterns);
} else {
vector_search_state = pattern_type_recognition<true>(patterns);
}
} else {
vector_search_state = pattern_type_recognition<false>(patterns);
}

const ColumnString& real_pattern = state->has_custom_escape ? *replaced_patterns : patterns;

if (vector_search_state == nullptr) {
// pattern type recognition failed, use default case
for (int i = 0; i < input_rows_count; ++i) {
const auto pattern_val = patterns.get_data_at(i);
const auto pattern_val = real_pattern.get_data_at(i);
const auto value_val = values.get_data_at(i);
RETURN_IF_ERROR((state->scalar_function)(&state->search_state, value_val, pattern_val,
&result[i]));
Expand Down Expand Up @@ -815,7 +829,12 @@ void verbose_log_match(const std::string& str, const std::string& pattern_name,
Status FunctionLike::construct_like_const_state(FunctionContext* context, const StringRef& pattern,
std::shared_ptr<LikeState>& state,
bool try_hyperscan) {
std::string pattern_str = pattern.to_string();
std::string pattern_str;
if (state->has_custom_escape) {
pattern_str = replace_pattern_by_escape(pattern, state->escape_char);
} else {
pattern_str = pattern.to_string();
}
state->search_state.pattern_str = pattern_str;
std::string search_string;

Expand Down Expand Up @@ -920,6 +939,16 @@ Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionSta
state->is_like_pattern = true;
state->function = like_fn;
state->scalar_function = like_fn_scalar;
if (context->is_col_constant(2)) {
state->has_custom_escape = true;
const auto escape_col = context->get_constant_col(2)->column_ptr;
const auto& escape = escape_col->get_data_at(0);
if (escape.size != 1) {
return Status::InternalError("Escape character must be a single character, got: {}",
escape.to_string());
}
state->escape_char = escape.data[0];
}
if (context->is_col_constant(1)) {
const auto pattern_col = context->get_constant_col(1)->column_ptr;
const auto& pattern = pattern_col->get_data_at(0);
Expand Down
30 changes: 29 additions & 1 deletion be/src/vec/functions/like.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,31 @@ class Block;

namespace doris::vectorized {

inline std::string replace_pattern_by_escape(const StringRef& pattern, char escape_char) {
std::string result;
result.reserve(pattern.size);
for (size_t i = 0; i < pattern.size; ++i) {
if (i + 1 < pattern.size && pattern.data[i] == escape_char &&
(pattern.data[i + 1] == escape_char || pattern.data[i + 1] == '%' ||
pattern.data[i + 1] == '_')) {
// "^^" -> "^"
// "^%" -> "\%"
// "^_" -> "\_"
if ((pattern.data[i + 1] == '%' || pattern.data[i + 1] == '_')) {
result.push_back('\\');
}
result.push_back(pattern.data[i + 1]);
++i; // skip next char
} else if (pattern.data[i] == '\\') {
// "\" -> "\\"
result.append("\\\\");
} else {
result.push_back(pattern.data[i]);
}
}
return result;
}

// TODO: replace with std::string_view when `LikeSearchState.substring_pattern` can
// construct from std::string_view.
struct LikeSearchState {
Expand Down Expand Up @@ -123,6 +148,8 @@ using VectorLikeFn = std::function<doris::Status(const ColumnString&, const Colu

struct LikeState {
bool is_like_pattern;
bool has_custom_escape = false;
char escape_char = {};
LikeSearchState search_state;
LikeFn function;
ScalarLikeFn scalar_function;
Expand Down Expand Up @@ -150,7 +177,8 @@ using VPatternSearchStateSPtr = std::shared_ptr<VectorPatternSearchState>;

class FunctionLikeBase : public IFunction {
public:
size_t get_number_of_arguments() const override { return 2; }
size_t get_number_of_arguments() const override { return 0; }
bool is_variadic() const override { return true; }

DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
return std::make_shared<DataTypeUInt8>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ ENGINE: 'ENGINE';
ENGINES: 'ENGINES';
ENTER: 'ENTER';
ERRORS: 'ERRORS';
ESCAPE: 'ESCAPE';
EVENTS: 'EVENTS';
EVERY: 'EVERY';
EXCEPT: 'EXCEPT';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1455,7 +1455,8 @@ rowConstructorItem

predicate
: NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
| NOT? kind=(LIKE | REGEXP | RLIKE) pattern=valueExpression
| NOT? kind=(REGEXP | RLIKE) pattern=valueExpression
| NOT? kind=LIKE pattern=valueExpression (ESCAPE escape=valueExpression)?
| NOT? kind=(MATCH | MATCH_ANY | MATCH_ALL | MATCH_PHRASE | MATCH_PHRASE_PREFIX | MATCH_REGEXP | MATCH_PHRASE_EDGE) pattern=valueExpression
| NOT? kind=IN LEFT_PAREN query RIGHT_PAREN
| NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
Expand Down Expand Up @@ -1893,6 +1894,7 @@ nonReserved
| ENGINE
| ENGINES
| ERRORS
| ESCAPE
| EVENTS
| EVERY
| EXCLUDE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3505,10 +3505,16 @@ private Expression withPredicate(Expression valueExpression, PredicateContext ct
}
break;
case DorisParser.LIKE:
outExpression = new Like(
valueExpression,
getExpression(ctx.pattern)
);
if (ctx.ESCAPE() == null) {
outExpression = new Like(
valueExpression,
getExpression(ctx.pattern));
} else {
outExpression = new Like(
valueExpression,
getExpression(ctx.pattern),
getExpression(ctx.escape));
}
break;
case DorisParser.RLIKE:
case DorisParser.REGEXP:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ private static Expression rewriteLikeToEqual(Like like) {
StringBuilder sb = new StringBuilder();
int len = str.length();
char escapeChar = '\\';
if (like.arity() == 3) {
escapeChar = ((VarcharLiteral) like.child(2)).value.charAt(0);
}

for (int i = 0; i < len;) {
char c = str.charAt(i);
if (c == escapeChar && (i + 1) < len
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@

package org.apache.doris.nereids.trees.expressions;

import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.BooleanType;
import org.apache.doris.nereids.types.VarcharType;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
Expand All @@ -28,10 +33,20 @@
* like expression: a like 'xxx%'.
*/
public class Like extends StringRegexPredicate {

private static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT),
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT,
VarcharType.SYSTEM_DEFAULT));

public Like(Expression left, Expression right) {
this(ImmutableList.of(left, right));
}

public Like(Expression left, Expression right, Expression escape) {
this(ImmutableList.of(left, right, escape));
}

private Like(List<Expression> children) {
this(children, false);
}
Expand All @@ -40,9 +55,32 @@ private Like(List<Expression> children, boolean inferred) {
super("like", children, inferred);
}

@Override
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
}

@Override
public String computeToSql() {
if (arity() == 2) {
return super.computeToSql();
}
return '(' + left().toSql() + ' ' + getName() + ' ' + right().toSql() + " escape " + child(2).toSql()
+ ')';
}

@Override
public String toString() {
if (arity() == 2) {
return super.computeToSql();
}
return "(" + left() + " " + getName() + " " + right() + " escape " + child(2)
+ ")";
}

@Override
public Like withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 2);
Preconditions.checkArgument(children.size() == 2 || children.size() == 3);
return new Like(children);
}

Expand All @@ -54,4 +92,19 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
public Expression withInferred(boolean inferred) {
return new Like(this.children, inferred);
}

@Override
public void checkLegalityBeforeTypeCoercion() {
if (arity() == 3) {
if (child(2) instanceof StringLikeLiteral) {
String escapeChar = ((StringLikeLiteral) child(2)).getStringValue();
if (escapeChar.getBytes().length != 1) {
throw new AnalysisException(
"like escape character must be a single ascii character: " + escapeChar);
}
} else {
throw new AnalysisException("like escape character must be a string literal: " + this.toSql());
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !test --
true

-- !test --
true

-- !test --
true

-- !test --
true

-- !test --
true

-- !test --
false

-- !test --
false

-- !test --
true

-- !test --
true

-- !test --
false

-- !test --
true

-- !test --
false

-- !test --
true

Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_like_escapes") {
qt_test """
select "%a" like "a%_" ESCAPE "a";
"""
qt_test """
select "%_" like "a%_" ESCAPE "a";
"""
qt_test """
select "a" like "a" ESCAPE "a";
"""
qt_test """
select "a" like "aa" ESCAPE "a";
"""
qt_test """
select "%a" like "a%a" ESCAPE "a";
"""
qt_test """
select "%_" like "a%a" ESCAPE "a";
"""
qt_test """
select "%a" like "a%a_" ESCAPE "a";
"""
qt_test """
select "%_" like "a%a_" ESCAPE "a";
"""

test {
sql """select "啊啊" like "啊啊" ESCAPE "啊";"""
exception "like escape character must be a single ascii character"
}
test {
sql """select "a" like "aa" ESCAPE "aa";"""
exception "like escape character must be a single ascii character"
}
test {
sql """select "a" like "aa" ESCAPE 1;"""
exception "like escape character must be a string literal"
}
qt_test """
select "啊%a" like "啊a%_" ESCAPE "a";
"""
qt_test """
select "%a" like "a%_" ESCAPE "A";
"""
qt_test """
select "\\\\" like "\\\\%" ESCAPE "A";
"""
qt_test """
select "\\\\" like "\\\\A%" ESCAPE "A";
"""
qt_test """
select "\\\\%" like "\\\\A%" ESCAPE "A";
"""
}
Loading