Skip to content

Commit

Permalink
[feature](function) support new function replace_empty (#36283)
Browse files Browse the repository at this point in the history
## Proposed changes

Add new function `replace_empty()`.
It acts all like `replace()`, instead for the search string(the second
param) is empty.

If the search string is empty, it will insert the third string in front
of all characters
of the first string, as well as the end of the first string.

eg:

```
mysql> select replace("abc", '', 'xyz');
+---------------------------+
| replace('abc', '', 'xyz') |
+---------------------------+
| abc                       |
+---------------------------+
1 row in set (0.01 sec)

mysql> select replace_empty("abc", '', 'xyz');
+---------------------------------+
| replace_empty('abc', '', 'xyz') |
+---------------------------------+
| xyzaxyzbxyzcxyz                 |
+---------------------------------+
1 row in set (0.00 sec)
```

Doc: apache/doris-website#749

This function is for compatibility of Presto/Trino, it behave exactly
same as `replace()` function in Presto/Trino.
  • Loading branch information
morningman authored Jun 20, 2024
1 parent 2c795a9 commit a819ca4
Show file tree
Hide file tree
Showing 8 changed files with 520 additions and 100 deletions.
3 changes: 2 additions & 1 deletion be/src/vec/functions/function_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1017,7 +1017,8 @@ void register_function_string(SimpleFunctionFactory& factory) {
factory.register_function<FunctionStringDigestOneArg<MD5Sum>>();
factory.register_function<FunctionStringDigestSHA1>();
factory.register_function<FunctionStringDigestSHA2>();
factory.register_function<FunctionReplace>();
factory.register_function<FunctionReplace<ReplaceImpl, true>>();
factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
factory.register_function<FunctionMask>();
factory.register_function<FunctionMaskPartial<true>>();
factory.register_function<FunctionMaskPartial<false>>();
Expand Down
46 changes: 36 additions & 10 deletions be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -2891,10 +2891,19 @@ class FunctionStringLocatePos : public IFunction {
}
};

struct ReplaceImpl {
static constexpr auto name = "replace";
};

struct ReplaceEmptyImpl {
static constexpr auto name = "replace_empty";
};

template <typename Impl, bool empty>
class FunctionReplace : public IFunction {
public:
static constexpr auto name = "replace";
static FunctionPtr create() { return std::make_shared<FunctionReplace>(); }
static constexpr auto name = Impl::name;
static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 3; }

Expand Down Expand Up @@ -2936,16 +2945,33 @@ class FunctionReplace : public IFunction {
private:
std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
if (old_str.empty()) {
if constexpr (empty) {
return str;
} else {
// Different from "Replace" only when the search string is empty.
// it will insert `new_str` in front of every character and at the end of the old str.
if (new_str.empty()) {
return str;
}
std::string result;
result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
for (char c : str) {
result += new_str;
result += c;
}
result += new_str;
return result;
}
} else {
std::string::size_type pos = 0;
std::string::size_type oldLen = old_str.size();
std::string::size_type newLen = new_str.size();
while ((pos = str.find(old_str, pos)) != std::string::npos) {
str.replace(pos, oldLen, new_str);
pos += newLen;
}
return str;
}
std::string::size_type pos = 0;
std::string::size_type oldLen = old_str.size();
std::string::size_type newLen = new_str.size();
while ((pos = str.find(old_str, pos)) != std::string::npos) {
str.replace(pos, oldLen, new_str);
pos += newLen;
}
return str;
}
};

Expand Down
282 changes: 282 additions & 0 deletions be/test/vec/function/function_string_test.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpReplaceOne;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Repeat;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Replace;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ReplaceEmpty;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Reverse;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Right;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Round;
Expand Down Expand Up @@ -794,6 +795,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
scalar(RegexpReplaceOne.class, "regexp_replace_one"),
scalar(Repeat.class, "repeat"),
scalar(Replace.class, "replace"),
scalar(ReplaceEmpty.class, "replace_empty"),
scalar(Reverse.class, "reverse"),
scalar(Right.class, "right"),
scalar(Round.class, "round"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.expressions.functions.scalar;

import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.TernaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.StringType;
import org.apache.doris.nereids.types.VarcharType;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

import java.util.List;

/**
* ScalarFunction 'replace_empty'.
*/
public class ReplaceEmpty extends ScalarFunction
implements TernaryExpression, ExplicitlyCastableSignature, PropagateNullable {

public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT)
.args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT),
FunctionSignature.ret(StringType.INSTANCE)
.args(StringType.INSTANCE, StringType.INSTANCE, StringType.INSTANCE)
);

/**
* constructor with 3 arguments.
*/
public ReplaceEmpty(Expression arg0, Expression arg1, Expression arg2) {
super("replace_empty", arg0, arg1, arg2);
}

/**
* withChildren.
*/
@Override
public ReplaceEmpty withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 3);
return new ReplaceEmpty(children.get(0), children.get(1), children.get(2));
}

@Override
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
}

@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitReplaceEmpty(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpReplaceOne;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Repeat;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Replace;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ReplaceEmpty;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Reverse;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Right;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Round;
Expand Down Expand Up @@ -1703,6 +1704,10 @@ default R visitReplace(Replace replace, C context) {
return visitScalarFunction(replace, context);
}

default R visitReplaceEmpty(ReplaceEmpty replaceEmpty, C context) {
return visitScalarFunction(replaceEmpty, context);
}

default R visitReverse(Reverse reverse, C context) {
return visitScalarFunction(reverse, context);
}
Expand Down
24 changes: 24 additions & 0 deletions regression-test/data/nereids_function_p0/scalar_function/R.out
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,30 @@ string3
string3
string3

-- !sql_relace_empty01 --
abcxabcyabczabc

-- !sql_relace_empty02 --
\N

-- !sql_relace_empty03 --
\N

-- !sql_relace_empty04 --
\N

-- !sql_relace_empty05 --
abcyz

-- !sql_relace_empty06 --
yz

-- !sql_relace_empty07 --
xyz

-- !sql_relace_empty08 --
abc

-- !sql_right_Varchar_Integer --
\N
1
Expand Down
Loading

0 comments on commit a819ca4

Please sign in to comment.