Skip to content

Commit

Permalink
GDV-56: [C++] Add a helper library containing cpp stubs (apache#88)
Browse files Browse the repository at this point in the history
- To get around the java load issue, create a native library and load it in the LLVM module. 
   This module has the hooks for all the c++ function helpers.
- for files that are compiled in libgandiva_helpers, add into  gandiva::helpers namespace.
- merged status.cc into status.h
  • Loading branch information
pravindra authored and praveenbingo committed Sep 10, 2018
1 parent e941e27 commit e1d1630
Show file tree
Hide file tree
Showing 17 changed files with 241 additions and 112 deletions.
15 changes: 13 additions & 2 deletions cpp/src/gandiva/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,22 @@ project(gandiva)
find_package(LLVM)

# Set the path where the byte-code files will be installed.
set(GANDIVA_BC_FILE_NAME irhelpers.bc)
set(GANDIVA_BC_INSTALL_DIR
${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/gandiva)

set(GANDIVA_BC_FILE_NAME irhelpers.bc)
set(GANDIVA_BC_INSTALL_PATH ${GANDIVA_BC_INSTALL_DIR}/${GANDIVA_BC_FILE_NAME})
set(GANDIVA_BC_OUTPUT_PATH ${CMAKE_BINARY_DIR}/irhelpers.bc)
set(GANDIVA_BC_OUTPUT_PATH ${CMAKE_BINARY_DIR}/${GANDIVA_BC_FILE_NAME})

# Set the path where the so lib file will be installed.
if (APPLE)
set(GANDIVA_HELPER_LIB_FILE_NAME libgandiva_helpers.dylib)
else()
set(GANDIVA_HELPER_LIB_FILE_NAME libgandiva_helpers.so)
endif(APPLE)

set(GANDIVA_HELPER_LIB_INSTALL_PATH ${GANDIVA_BC_INSTALL_DIR}/${GANDIVA_HELPER_LIB_FILE_NAME})
set(GANDIVA_HELPER_LIB_OUTPUT_PATH ${CMAKE_BINARY_DIR}/src/codegen/${GANDIVA_HELPER_LIB_FILE_NAME})

add_subdirectory(codegen)
add_subdirectory(jni)
Expand Down
44 changes: 34 additions & 10 deletions cpp/src/gandiva/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ find_package(Boost COMPONENTS system regex filesystem REQUIRED)
set(BC_FILE_PATH_CC "${CMAKE_CURRENT_BINARY_DIR}/bc_file_path.cc")
configure_file(bc_file_path.cc.in ${BC_FILE_PATH_CC})

# helper files that are shared between libgandiva and libgandiva_helpers
set(SHARED_HELPER_FILES
like_holder.cc
regex_util.cc)

set(SRC_FILES annotator.cc
bitmap_accumulator.cc
configuration.cc
Expand All @@ -35,12 +40,10 @@ set(SRC_FILES annotator.cc
function_signature.cc
llvm_generator.cc
llvm_types.cc
like_holder.cc
projector.cc
selection_vector.cc
regex_util.cc
status.cc
tree_expr_builder.cc
${SHARED_HELPER_FILES}
${BC_FILE_PATH_CC})

add_library(gandiva_obj_lib OBJECT ${SRC_FILES})
Expand Down Expand Up @@ -84,18 +87,39 @@ install(
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)

# Pre-compiled .so library for function helpers.
add_library(gandiva_helpers SHARED
${SHARED_HELPER_FILES}
function_holder_stubs.cc)

target_compile_definitions(gandiva_helpers
PRIVATE -DGDV_HELPERS
)

target_include_directories(gandiva_helpers
PRIVATE
${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/src
$<TARGET_PROPERTY:ARROW::ARROW_SHARED,INTERFACE_INCLUDE_DIRECTORIES>
)

target_link_libraries(gandiva_helpers PRIVATE Boost::boost)
if (NOT APPLE)
target_link_libraries(gandiva_helpers LINK_PRIVATE -static-libstdc++ -static-libgcc)
endif()

#args: label test-file src-files
add_gandiva_unit_test(bitmap_accumulator_test.cc bitmap_accumulator.cc)
add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc status.cc configuration.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc configuration.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(function_signature_test.cc function_signature.cc)
add_gandiva_unit_test(function_registry_test.cc function_registry.cc function_signature.cc)
add_gandiva_unit_test(llvm_types_test.cc llvm_types.cc)
add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc regex_util.cc engine.cc llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc status.cc bitmap_accumulator.cc configuration.cc function_signature.cc like_holder.cc regex_util.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc regex_util.cc engine.cc llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc bitmap_accumulator.cc configuration.cc function_signature.cc like_holder.cc regex_util.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(annotator_test.cc annotator.cc function_signature.cc)
add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc status.cc)
add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc status.cc)
add_gandiva_unit_test(status_test.cc status.cc)
add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc)
add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc)
add_gandiva_unit_test(status_test.cc)
add_gandiva_unit_test(expression_registry_test.cc llvm_types.cc expression_registry.cc function_signature.cc function_registry.cc)
add_gandiva_unit_test(selection_vector_test.cc selection_vector.cc status.cc)
add_gandiva_unit_test(selection_vector_test.cc selection_vector.cc)
add_gandiva_unit_test(lru_cache_test.cc)
add_gandiva_unit_test(like_holder_test.cc like_holder.cc regex_util.cc status.cc)
add_gandiva_unit_test(like_holder_test.cc like_holder.cc regex_util.cc)
3 changes: 3 additions & 0 deletions cpp/src/gandiva/codegen/bc_file_path.cc.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,7 @@ namespace gandiva {
// Path to the byte-code file.
extern const char kByteCodeFilePath[] = "${GANDIVA_BC_OUTPUT_PATH}";

// Path to the pre-compiled solib file.
extern const char kHelperLibFilePath[] = "${GANDIVA_HELPER_LIB_OUTPUT_PATH}";

} // namespace gandiva
30 changes: 24 additions & 6 deletions cpp/src/gandiva/codegen/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
namespace gandiva {

extern const char kByteCodeFilePath[];
extern const char kHelperLibFilePath[];

class ConfigurationBuilder;
/// \brief runtime config for gandiva
Expand All @@ -32,17 +33,23 @@ class ConfigurationBuilder;
/// at run time.
class Configuration {
public:
const std::string &byte_code_file_path() const { return byte_code_file_path_; }
friend class ConfigurationBuilder;

const std::string &byte_code_file_path() const { return byte_code_file_path_; }
const std::string &helper_lib_file_path() const { return helper_lib_file_path_; }

std::size_t Hash() const;
bool operator==(const Configuration &other) const;
bool operator!=(const Configuration &other) const;

private:
explicit Configuration(const std::string byte_code_file_path)
: byte_code_file_path_(byte_code_file_path) {}
explicit Configuration(const std::string &byte_code_file_path,
const std::string &helper_lib_file_path)
: byte_code_file_path_(byte_code_file_path),
helper_lib_file_path_(helper_lib_file_path) {}

const std::string byte_code_file_path_;
const std::string helper_lib_file_path_;
};

/// \brief configuration builder for gandiva
Expand All @@ -51,15 +58,24 @@ class Configuration {
/// to override specific values and build a custom instance
class ConfigurationBuilder {
public:
ConfigurationBuilder() : byte_code_file_path_(kByteCodeFilePath) {}
ConfigurationBuilder()
: byte_code_file_path_(kByteCodeFilePath),
helper_lib_file_path_(kHelperLibFilePath) {}

ConfigurationBuilder &set_byte_code_file_path(const std::string &byte_code_file_path) {
byte_code_file_path_ = byte_code_file_path;
return *this;
}

ConfigurationBuilder &set_helper_lib_file_path(
const std::string &helper_lib_file_path) {
helper_lib_file_path_ = helper_lib_file_path;
return *this;
}

std::shared_ptr<Configuration> build() {
std::shared_ptr<Configuration> configuration(new Configuration(byte_code_file_path_));
std::shared_ptr<Configuration> configuration(
new Configuration(byte_code_file_path_, helper_lib_file_path_));
return configuration;
}

Expand All @@ -69,9 +85,11 @@ class ConfigurationBuilder {

private:
std::string byte_code_file_path_;
std::string helper_lib_file_path_;

static std::shared_ptr<Configuration> InitDefaultConfig() {
std::shared_ptr<Configuration> configuration(new Configuration(kByteCodeFilePath));
std::shared_ptr<Configuration> configuration(
new Configuration(kByteCodeFilePath, kHelperLibFilePath));
return configuration;
}

Expand Down
32 changes: 29 additions & 3 deletions cpp/src/gandiva/codegen/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@

namespace gandiva {

bool Engine::init_once_done_ = false;
std::once_flag init_once_flag;

bool Engine::init_once_done_ = false;
std::set<std::string> Engine::loaded_libs_ = {};
std::mutex Engine::mtx_;

// One-time initializations.
void Engine::InitOnce() {
DCHECK_EQ(init_once_done_, false);
Expand Down Expand Up @@ -78,12 +81,35 @@ Status Engine::Make(std::shared_ptr<Configuration> config,
return Status::CodeGenError(engine_obj->llvm_error_);
}

Status result = engine_obj->LoadPreCompiledIRFiles(config->byte_code_file_path());
GANDIVA_RETURN_NOT_OK(result);
auto status = engine_obj->LoadPreCompiledHelperLibs(config->helper_lib_file_path());
GANDIVA_RETURN_NOT_OK(status);

status = engine_obj->LoadPreCompiledIRFiles(config->byte_code_file_path());
GANDIVA_RETURN_NOT_OK(status);

*engine = std::move(engine_obj);
return Status::OK();
}

Status Engine::LoadPreCompiledHelperLibs(const std::string &file_path) {
int err = 0;

mtx_.lock();
// Load each so lib only once.
if (loaded_libs_.find(file_path) == loaded_libs_.end()) {
err = llvm::sys::DynamicLibrary::LoadLibraryPermanently(file_path.c_str());
if (!err) {
loaded_libs_.insert(file_path);
}
}
mtx_.unlock();

return (err == 0)
? Status::OK()
: Status::CodeGenError("loading precompiled native file " + file_path +
" failed with error " + std::to_string(err));
}

// Handling for pre-compiled IR libraries.
Status Engine::LoadPreCompiledIRFiles(const std::string &byte_code_file_path) {
/// Read from file into memory buffer.
Expand Down
9 changes: 8 additions & 1 deletion cpp/src/gandiva/codegen/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define GANDIVA_ENGINE_H

#include <memory>
#include <set>
#include <string>
#include <vector>

Expand Down Expand Up @@ -67,7 +68,10 @@ class Engine {

llvm::ExecutionEngine &execution_engine() { return *execution_engine_.get(); }

/// load pre-compiled modules and merge them into the main module.
/// load pre-compiled so libraries and merge them into the main module.
Status LoadPreCompiledHelperLibs(const std::string &helper_lib_file_path);

/// load pre-compiled IR modules and merge them into the main module.
Status LoadPreCompiledIRFiles(const std::string &byte_code_file_path);

/// dump the IR code to stdout with the prefix string.
Expand All @@ -83,6 +87,9 @@ class Engine {

bool module_finalized_;
std::string llvm_error_;

static std::set<std::string> loaded_libs_;
static std::mutex mtx_;
};

} // namespace gandiva
Expand Down
23 changes: 23 additions & 0 deletions cpp/src/gandiva/codegen/function_holder_stubs.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (C) 2017-2018 Dremio Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "codegen/like_holder.h"

// Wrapper C functions for "like" to be invoked from LLVM.
extern "C" bool like_utf8_utf8(int64_t ptr, const char *data, int data_len,
const char *pattern, int pattern_len) {
gandiva::helpers::LikeHolder *holder =
reinterpret_cast<gandiva::helpers::LikeHolder *>(ptr);
return (*holder)(std::string(data, data_len));
}
11 changes: 6 additions & 5 deletions cpp/src/gandiva/codegen/like_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@

namespace gandiva {

#ifdef GDV_HELPERS
namespace helpers {
#endif

Status LikeHolder::Make(const FunctionNode &node, std::shared_ptr<LikeHolder> *holder) {
if (node.children().size() != 2) {
return Status::Invalid("'like' function requires two parameters");
Expand Down Expand Up @@ -49,11 +53,8 @@ Status LikeHolder::Make(const std::string &sql_pattern,
return Status::OK();
}

// Wrapper C functions for "like" to be invoked from LLVM.
extern "C" bool like_utf8_utf8(int64_t ptr, const char *data, int data_len,
const char *pattern, int pattern_len) {
LikeHolder *holder = reinterpret_cast<LikeHolder *>(ptr);
return (*holder)(std::string(data, data_len));
#ifdef GDV_HELPERS
}
#endif

} // namespace gandiva
8 changes: 8 additions & 0 deletions cpp/src/gandiva/codegen/like_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@

namespace gandiva {

#ifdef GDV_HELPERS
namespace helpers {
#endif

/// Function Holder for SQL 'like'
class LikeHolder : public FunctionHolder {
public:
Expand All @@ -41,6 +45,10 @@ class LikeHolder : public FunctionHolder {
std::regex regex_; // compiled regex for the pattern
};

#ifdef GDV_HELPERS
}
#endif

} // namespace gandiva

#endif // GANDIVA_LIKE_HOLDER_H
2 changes: 1 addition & 1 deletion cpp/src/gandiva/codegen/llvm_generator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ TEST_F(TestLLVMGenerator, VerifyPCFunctions) {
std::unique_ptr<LLVMGenerator> generator;
Status status =
LLVMGenerator::Make(ConfigurationBuilder::DefaultConfiguration(), &generator);
EXPECT_TRUE(status.ok());
EXPECT_TRUE(status.ok()) << status.message();

llvm::Module *module = generator->module();
for (auto &iter : registry_) {
Expand Down
12 changes: 10 additions & 2 deletions cpp/src/gandiva/codegen/regex_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@

namespace gandiva {

/// Characters that are considered special by posix regex. These needs to be
/// escaped with '\\'.
#ifdef GDV_HELPERS
namespace helpers {
#endif

const std::set<char> RegexUtil::posix_regex_specials_ = {
'[', ']', '(', ')', '|', '^', '-', '+', '*', '?', '{', '}', '$', '\\'};

Status RegexUtil::SqlLikePatternToPosix(const std::string &sql_pattern, char escape_char,
std::string &posix_pattern) {
/// Characters that are considered special by posix regex. These needs to be
/// escaped with '\\'.
posix_pattern.clear();
for (size_t idx = 0; idx < sql_pattern.size(); ++idx) {
auto cur = sql_pattern.at(idx);
Expand Down Expand Up @@ -61,4 +65,8 @@ Status RegexUtil::SqlLikePatternToPosix(const std::string &sql_pattern, char esc
return Status::OK();
}

#ifdef GDV_HELPERS
} // namespace helpers
#endif

} // namespace gandiva
9 changes: 8 additions & 1 deletion cpp/src/gandiva/codegen/regex_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@

namespace gandiva {

#ifdef GDV_HELPERS
namespace helpers {
#endif

/// \brief Utility class for converting sql patterns to posix patterns.
class RegexUtil {
public:
Expand All @@ -33,10 +37,13 @@ class RegexUtil {
}

private:
// set of characters that std::regex treats as special.
static const std::set<char> posix_regex_specials_;
};

#ifdef GDV_HELPERS
} // namespace helpers
#endif

} // namespace gandiva

#endif // GANDIVA_REGEX_UTIL_H
Loading

0 comments on commit e1d1630

Please sign in to comment.