diff --git a/cpp/src/gandiva/regexp_matches_holder.cc b/cpp/src/gandiva/regexp_matches_holder.cc index 6b10678573dda..cde657debb351 100644 --- a/cpp/src/gandiva/regexp_matches_holder.cc +++ b/cpp/src/gandiva/regexp_matches_holder.cc @@ -25,6 +25,7 @@ namespace gandiva { RE2 RegexpMatchesHolder::starts_with_regex_(R"(\^([\w\s]+)(\.\*)?)"); RE2 RegexpMatchesHolder::ends_with_regex_(R"((\.\*)?([\w\s]+)\$)"); +RE2 RegexpMatchesHolder::is_substr_regex_(R"((\w|\s)*)"); // Short-circuit pattern matches for the two common sub cases : // - starts_with and ends_with. @@ -45,6 +46,11 @@ const FunctionNode RegexpMatchesHolder::TryOptimize(const FunctionNode& node) { std::make_shared(literal_type, LiteralHolder(substr), false); return FunctionNode("ends_with", {node.children().at(0), suffix_node}, node.return_type()); + } else if (RE2::FullMatch(pattern, is_substr_regex_)) { + auto substr_node = + std::make_shared(literal_type, LiteralHolder(pattern), false); + return FunctionNode("is_substr", {node.children().at(0), substr_node}, + node.return_type()); } } diff --git a/cpp/src/gandiva/regexp_matches_holder.h b/cpp/src/gandiva/regexp_matches_holder.h index d2ca892a17784..4a78e068426a5 100644 --- a/cpp/src/gandiva/regexp_matches_holder.h +++ b/cpp/src/gandiva/regexp_matches_holder.h @@ -55,6 +55,7 @@ class GANDIVA_EXPORT RegexpMatchesHolder : public LikeHolder { static RE2 starts_with_regex_; // pre-compiled pattern for matching starts_with static RE2 ends_with_regex_; // pre-compiled pattern for matching ends_with + static RE2 is_substr_regex_; // pre-compiled pattern for matching is_substr }; } // namespace gandiva diff --git a/cpp/src/gandiva/regexp_matches_holder_test.cc b/cpp/src/gandiva/regexp_matches_holder_test.cc index d2ece61a75463..2b3c1bc6871f2 100644 --- a/cpp/src/gandiva/regexp_matches_holder_test.cc +++ b/cpp/src/gandiva/regexp_matches_holder_test.cc @@ -172,6 +172,11 @@ TEST_F(TestRegexpMatchesHolder, TestOptimise) { EXPECT_EQ(fnode.descriptor()->name(), "ends_with"); EXPECT_EQ(fnode.ToString(), "bool ends_with((string) in, (const string) xyz)"); + // optimise for 'is_substr' + fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("xyz")); + EXPECT_EQ(fnode.descriptor()->name(), "is_substr"); + EXPECT_EQ(fnode.ToString(), "bool is_substr((string) in, (const string) xyz)"); + // no optimisation for others. fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("^xyz$")); EXPECT_EQ(fnode.descriptor()->name(), "regexp_matches"); diff --git a/cpp/src/gandiva/sql_like_holder.cc b/cpp/src/gandiva/sql_like_holder.cc index a6e0f0ccd0b2d..a5361d63a851e 100644 --- a/cpp/src/gandiva/sql_like_holder.cc +++ b/cpp/src/gandiva/sql_like_holder.cc @@ -46,8 +46,8 @@ const FunctionNode SQLLikeHolder::TryOptimize(const FunctionNode& node) { auto suffix = pattern.substr(2); // skip .* auto suffix_node = std::make_shared(literal_type, LiteralHolder(suffix), false); - return FunctionNode("ends_with", {node.children().at(0), suffix_node}, - node.return_type()); + return FunctionNode("ends_with", {node.children().at(0), suffix_node}, + node.return_type()); } else if (RE2::FullMatch(pattern, is_substr_regex_)) { auto substr = pattern.substr(2, pattern.length() - 4); // trim starting and ending .*