Skip to content

Commit

Permalink
implemented regexp_matches function
Browse files Browse the repository at this point in the history
fixed linting issues

fixed check style issues

Fixed some names

Remove extra whitespace

Add substr short-ciruit for regexp_like

Refactor to share logic between like and rlike

Fix style issues

Fix segfault

Fix SqlLikePatternToPcre to use partial matching

Fix style issues and warning

Fix formatting; ran the docker image and used clang-format
  • Loading branch information
projjal authored and wjones127 committed Jul 25, 2021
1 parent 2d921dc commit a21742f
Show file tree
Hide file tree
Showing 11 changed files with 532 additions and 158 deletions.
5 changes: 4 additions & 1 deletion cpp/src/gandiva/expr_decomposer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ Status ExprDecomposer::Visit(const FieldNode& node) {
// time.
const FunctionNode ExprDecomposer::TryOptimize(const FunctionNode& node) {
if (node.descriptor()->name() == "like") {
return LikeHolder::TryOptimize(node);
return SQLLikeHolder::TryOptimize(node);
} else if (node.descriptor()->name() == "regexp_matches" ||
node.descriptor()->name() == "regexp_like") {
return RegexpMatchesHolder::TryOptimize(node);
} else {
return node;
}
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/gandiva/function_holder_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include <unordered_map>

#include "arrow/status.h"

#include "gandiva/function_holder.h"
#include "gandiva/like_holder.h"
#include "gandiva/node.h"
Expand Down Expand Up @@ -61,8 +60,10 @@ class FunctionHolderRegistry {
private:
static map_type& makers() {
static map_type maker_map = {
{"like", LAMBDA_MAKER(LikeHolder)},
{"ilike", LAMBDA_MAKER(LikeHolder)},
{"like", LAMBDA_MAKER(SQLLikeHolder)},
{"ilike", LAMBDA_MAKER(SQLLikeHolder)},
{"regexp_matches", LAMBDA_MAKER(RegexpMatchesHolder)},
{"regexp_like", LAMBDA_MAKER(RegexpMatchesHolder)},
{"to_date", LAMBDA_MAKER(ToDateHolder)},
{"random", LAMBDA_MAKER(RandomGeneratorHolder)},
{"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/gandiva/function_registry_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
kResultNullIfNull, "gdv_fn_ilike_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),

NativeFunction("regexp_matches", {"regexp_like"}, DataTypeVector{utf8(), utf8()},
boolean(), kResultNullIfNull, "gdv_fn_regexp_matches_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),

NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext),

Expand Down
25 changes: 20 additions & 5 deletions cpp/src/gandiva/gdv_function_stubs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,25 @@ extern "C" {

bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len) {
gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
gandiva::RegexpMatchesHolder* holder =
reinterpret_cast<gandiva::RegexpMatchesHolder*>(ptr);
return (*holder)(std::string(data, data_len));
}

bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len,
const char* escape_char, int escape_char_len) {
gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
return (*holder)(std::string(data, data_len));
return gdv_fn_like_utf8_utf8(ptr, data, data_len, pattern, pattern_len);
}

bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len) {
gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
return (*holder)(std::string(data, data_len));
return gdv_fn_like_utf8_utf8(ptr, data, data_len, pattern, pattern_len);
}

bool gdv_fn_regexp_matches_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len) {
return gdv_fn_like_utf8_utf8(ptr, data, data_len, pattern, pattern_len);
}

double gdv_fn_random(int64_t ptr) {
Expand Down Expand Up @@ -884,6 +888,17 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
types->i1_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_ilike_utf8_utf8));

// gdv_fn_regexp_matches_utf8_utf8
args = {types->i64_type(), // int64_t ptr
types->i8_ptr_type(), // const char* data
types->i32_type(), // int data_len
types->i8_ptr_type(), // const char* pattern
types->i32_type()}; // int pattern_len

engine->AddGlobalMappingForFunc(
"gdv_fn_regexp_matches_utf8_utf8", types->i1_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_regexp_matches_utf8_utf8));

// gdv_fn_to_date_utf8_utf8
args = {types->i64_type(), // int64_t execution_context
types->i64_type(), // int64_t holder_ptr
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/gandiva/gdv_function_stubs.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len);

bool gdv_fn_regexp_matches_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len);

int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data,
int data_len, bool in1_validity,
const char* pattern, int pattern_len,
Expand Down
Loading

0 comments on commit a21742f

Please sign in to comment.