Skip to content

Commit

Permalink
Use utf8 as the type of escape char
Browse files Browse the repository at this point in the history
  • Loading branch information
Crystrix committed Apr 2, 2021
1 parent 5048d85 commit 8d59c42
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 19 deletions.
4 changes: 2 additions & 2 deletions cpp/src/gandiva/function_registry_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
kResultNullIfNull, "gdv_fn_like_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),

NativeFunction("like", {}, DataTypeVector{utf8(), utf8(), int8()}, boolean(),
kResultNullIfNull, "gdv_fn_like_utf8_utf8_int8",
NativeFunction("like", {}, DataTypeVector{utf8(), utf8(), utf8()}, boolean(),
kResultNullIfNull, "gdv_fn_like_utf8_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),

NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
Expand Down
13 changes: 7 additions & 6 deletions cpp/src/gandiva/gdv_function_stubs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
return (*holder)(std::string(data, data_len));
}

bool gdv_fn_like_utf8_utf8_int8(int64_t ptr, const char* data, int data_len,
bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len,
int8_t escape_char) {
const char* escape_char, int escape_char_len) {
gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
return (*holder)(std::string(data, data_len));
}
Expand Down Expand Up @@ -236,17 +236,18 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
types->i1_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_like_utf8_utf8));

// gdv_fn_like_utf8_utf8_int8
// gdv_fn_like_utf8_utf8_utf8
args = {types->i64_type(), // int64_t ptr
types->i8_ptr_type(), // const char* data
types->i32_type(), // int data_len
types->i8_ptr_type(), // const char* pattern
types->i32_type(), // int pattern_len
types->i8_type()}; // int8_t escape_char
types->i8_ptr_type(), // const char* escape_char
types->i32_type()}; // int escape_char_len

engine->AddGlobalMappingForFunc("gdv_fn_like_utf8_utf8_int8",
engine->AddGlobalMappingForFunc("gdv_fn_like_utf8_utf8_utf8",
types->i1_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_like_utf8_utf8_int8));
reinterpret_cast<void*>(gdv_fn_like_utf8_utf8_utf8));

// gdv_fn_to_date_utf8_utf8
args = {types->i64_type(), // int64_t execution_context
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/gandiva/gdv_function_stubs.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ extern "C" {
bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len);

bool gdv_fn_like_utf8_utf8_int8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len, int8_t escape_char);
bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len,
const char* escape_char, int escape_char_len);

int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data,
int data_len, bool in1_validity,
Expand Down
19 changes: 13 additions & 6 deletions cpp/src/gandiva/like_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h

auto escape_char_type = escape_char->return_type()->id();
ARROW_RETURN_IF(
escape_char_type != arrow::Type::INT8,
!IsArrowStringLiteral(escape_char_type),
Status::Invalid(
"'like' function requires a int8 literal as the third parameter"));
"'like' function requires a string literal as the third parameter"));
return Make(arrow::util::get<std::string>(literal->holder()),
arrow::util::get<int8_t>(escape_char->holder()), holder);
arrow::util::get<std::string>(escape_char->holder()), holder);
}
}

Expand All @@ -111,11 +111,18 @@ Status LikeHolder::Make(const std::string& sql_pattern,
return Status::OK();
}

Status LikeHolder::Make(const std::string& sql_pattern, char escape_char,
Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escape_char,
std::shared_ptr<LikeHolder>* holder) {
ARROW_RETURN_IF(escape_char.length() > 1,
Status::Invalid("The length of escape char ", escape_char,
" in 'like' function is greater than 1"));
std::string pcre_pattern;
ARROW_RETURN_NOT_OK(
RegexUtil::SqlLikePatternToPcre(sql_pattern, escape_char, pcre_pattern));
if (escape_char.length() == 1) {
ARROW_RETURN_NOT_OK(
RegexUtil::SqlLikePatternToPcre(sql_pattern, escape_char.at(0), pcre_pattern));
} else {
ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
}

auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
ARROW_RETURN_IF(!lholder->regex_.ok(),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/like_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {

static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder);

static Status Make(const std::string& sql_pattern, char escape_char,
static Status Make(const std::string& sql_pattern, const std::string& escape_char,
std::shared_ptr<LikeHolder>* holder);

// Try and optimise a function node with a "like" pattern.
Expand Down
39 changes: 37 additions & 2 deletions cpp/src/gandiva/like_holder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ TEST_F(TestLikeHolder, TestOptimise) {
TEST_F(TestLikeHolder, TestMatchOneEscape) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab\\_", '\\', &like_holder);
auto status = LikeHolder::Make("ab\\_", "\\", &like_holder);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
Expand All @@ -163,7 +163,7 @@ TEST_F(TestLikeHolder, TestMatchOneEscape) {
TEST_F(TestLikeHolder, TestMatchManyEscape) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab\\%", '\\', &like_holder);
auto status = LikeHolder::Make("ab\\%", "\\", &like_holder);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
Expand All @@ -176,4 +176,39 @@ TEST_F(TestLikeHolder, TestMatchManyEscape) {
EXPECT_FALSE(like("abcd"));
EXPECT_FALSE(like("dabc"));
}

TEST_F(TestLikeHolder, TestMatchEscape) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;

EXPECT_TRUE(like("ab\\"));

EXPECT_FALSE(like("abc"));
}

TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab\\_", "", &like_holder);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;

EXPECT_TRUE(like("ab\\c"));
EXPECT_TRUE(like("ab\\_"));

EXPECT_FALSE(like("ab\\_d"));
EXPECT_FALSE(like("ab__"));
}

TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
EXPECT_EQ(status.ok(), false) << status.message();
}
} // namespace gandiva

0 comments on commit 8d59c42

Please sign in to comment.