Skip to content

Commit

Permalink
GDV-68:[Java][C++]Dynamically load dependencies. (apache#49)
Browse files Browse the repository at this point in the history
Loading Gandiva dynamically in java bindings.
Packaging the dynamic library and byte code files in Gandiva JAR.
Introduced configuration object to customize Gandiva at runtime.
  • Loading branch information
praveenbingo authored Jul 2, 2018
1 parent 7676bdd commit 758772d
Show file tree
Hide file tree
Showing 17 changed files with 422 additions and 57 deletions.
80 changes: 80 additions & 0 deletions cpp/src/gandiva/include/gandiva/configuration.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright (C) 2017-2018 Dremio Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GANDIVA_CONFIGURATION_H
#define GANDIVA_CONFIGURATION_H

#include <string>
#include <memory>

#include "gandiva/status.h"

namespace gandiva {

extern const char kByteCodeFilePath[];

class ConfigurationBuilder;
/// \brief runtime config for gandiva
///
/// It contains elements to customize gandiva execution
/// at run time.
class Configuration {
public:
const std::string &byte_code_file_path() const {
return byte_code_file_path_;
}
friend class ConfigurationBuilder;
private:
explicit Configuration(const std::string byte_code_file_path):
byte_code_file_path_(byte_code_file_path) {}

const std::string byte_code_file_path_;
};

/// \brief configuration builder for gandiva
///
/// Provides a default configuration and convenience methods
/// to override specific values and build a custom instance
class ConfigurationBuilder {
public:
ConfigurationBuilder() :
byte_code_file_path_(kByteCodeFilePath) {}

ConfigurationBuilder& set_byte_code_file_path(const std::string &byte_code_file_path) {
byte_code_file_path_ = byte_code_file_path;
return *this;
}

std::shared_ptr<Configuration> build () {
std::shared_ptr<Configuration> configuration(new Configuration(byte_code_file_path_));
return configuration;
}

static std::shared_ptr<Configuration> DefaultConfiguration() {
return default_configuration_;
}
private:
std::string byte_code_file_path_;

static std::shared_ptr<Configuration> InitDefaultConfig() {
std::shared_ptr<Configuration> configuration(new Configuration(kByteCodeFilePath));
return configuration;
}

static const std::shared_ptr<Configuration> default_configuration_;
};

} // namespace gandiva
#endif // GANDIVA_CONFIGURATION_H
21 changes: 20 additions & 1 deletion cpp/src/gandiva/include/gandiva/projector.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
#define GANDIVA_EXPR_PROJECTOR_H

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "gandiva/arrow.h"
#include "gandiva/expression.h"
#include "gandiva/configuration.h"
#include "gandiva/status.h"

namespace gandiva {
Expand All @@ -34,15 +36,30 @@ class LLVMGenerator;
/// Once the projector is built, it can be used to evaluate many row batches.
class Projector {
public:
/// Build a default projector for the given schema to evaluate
/// the vector of expressions.
///
/// \param[in] : schema schema for the record batches, and the expressions.
/// \param[in] : exprs vector of expressions.
/// \param[in] : pool memory pool used to allocate output arrays (if required).
/// \param[out]: projector the returned projector object
static Status Make(SchemaPtr schema,
const ExpressionVector &exprs,
arrow::MemoryPool *pool,
std::shared_ptr<Projector> *projector);

/// Build a projector for the given schema to evaluate the vector of expressions.
/// Customize the projector with runtime configuration.
///
/// \param[in] : schema schema for the record batches, and the expressions.
/// \param[in] : exprs vector of expressions.
/// \param[in] : pool memory pool used to allocate output arrays (if required).
/// \param[in] : run time configuration.
/// \param[out]: projector the returned projector object
static Status Make(SchemaPtr schema,
const ExpressionVector &exprs,
arrow::MemoryPool *pool,
std::shared_ptr<Configuration>,
std::shared_ptr<Projector> *projector);

/// Evaluate the specified record batch, and return the allocated and populated output
Expand All @@ -67,7 +84,8 @@ class Projector {
Projector(std::unique_ptr<LLVMGenerator> llvm_generator,
SchemaPtr schema,
const FieldVector &output_fields,
arrow::MemoryPool *pool);
arrow::MemoryPool *pool,
std::shared_ptr<Configuration>);

/// Allocate an ArrowData of length 'length'.
Status AllocArrayData(const DataTypePtr &type,
Expand All @@ -86,6 +104,7 @@ class Projector {
const SchemaPtr schema_;
const FieldVector output_fields_;
arrow::MemoryPool *pool_;
const std::shared_ptr<Configuration> configuration_;
};

} // namespace gandiva
Expand Down
44 changes: 44 additions & 0 deletions cpp/src/gandiva/integ/projector_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,50 @@ TEST_F(TestProjector, TestIntSumSub) {
EXPECT_ARROW_ARRAY_EQUALS(exp_sub, outputs.at(1));
}

TEST_F(TestProjector, TestIntSumSubCustomConfig) {
// schema for input fields
auto field0 = field("f0", int32());
auto field1 = field("f2", int32());
auto schema = arrow::schema({field0, field1});

// output fields
auto field_sum = field("add", int32());
auto field_sub = field("subtract", int32());

// Build expression
auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
auto sub_expr = TreeExprBuilder::MakeExpression("subtract", {field0, field1},
field_sub);

std::shared_ptr<Projector> projector;
ConfigurationBuilder config_builder;
std::shared_ptr<Configuration> config = config_builder.build();

Status status = Projector::Make(schema,
{sum_expr, sub_expr}, pool_, config, &projector);
EXPECT_TRUE(status.ok());

// Create a row-batch with some sample data
int num_records = 4;
auto array0 = MakeArrowArrayInt32({ 1, 2, 3, 4 }, { true, true, true, false });
auto array1 = MakeArrowArrayInt32({ 11, 13, 15, 17 }, { true, true, false, true });
// expected output
auto exp_sum = MakeArrowArrayInt32({ 12, 15, 0, 0 }, { true, true, false, false });
auto exp_sub = MakeArrowArrayInt32({ -10, -11, 0, 0 }, { true, true, false, false });

// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});

// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, &outputs);
EXPECT_TRUE(status.ok());

// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
EXPECT_ARROW_ARRAY_EQUALS(exp_sub, outputs.at(1));
}

template<typename TYPE, typename C_TYPE>
static void TestArithmeticOpsForType(arrow::MemoryPool *pool) {
auto atype = arrow::TypeTraits<TYPE>::type_singleton();
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/gandiva/src/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ configure_file(bc_file_path.cc.in ${BC_FILE_PATH_CC})
add_library(gandiva SHARED
annotator.cc
bitmap_accumulator.cc
configuration.cc
engine.cc
expr_decomposer.cc
expr_validator.cc
Expand Down Expand Up @@ -89,11 +90,11 @@ install(
#args: label test-file src-files
add_gandiva_unit_test(bitmap_accumulator_test.cc bitmap_accumulator.cc)
add_gandiva_unit_test(dex_llvm_test.cc)
add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc status.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc status.cc configuration.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(function_signature_test.cc)
add_gandiva_unit_test(function_registry_test.cc function_registry.cc)
add_gandiva_unit_test(llvm_types_test.cc llvm_types.cc)
add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc engine.cc llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc status.cc bitmap_accumulator.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc engine.cc llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc status.cc bitmap_accumulator.cc configuration.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(annotator_test.cc annotator.cc)
add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc)
add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc)
Expand Down
20 changes: 20 additions & 0 deletions cpp/src/gandiva/src/codegen/configuration.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (C) 2017-2018 Dremio Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "gandiva/configuration.h"

namespace gandiva {
const std::shared_ptr<Configuration>
ConfigurationBuilder::default_configuration_ = InitDefaultConfig();
}
13 changes: 6 additions & 7 deletions cpp/src/gandiva/src/codegen/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ namespace gandiva {
bool Engine::init_once_done_ = false;
std::once_flag init_once_flag;

extern const char kByteCodeFilePath[];

// One-time initializations.
void Engine::InitOnce() {
assert(!init_once_done_);
Expand All @@ -57,7 +55,8 @@ void Engine::InitOnce() {
}

/// factory method to construct the engine.
Status Engine::Make(std::unique_ptr<Engine> *engine) {
Status Engine::Make(std::shared_ptr<Configuration> config,
std::unique_ptr<Engine> *engine) {
std::unique_ptr<Engine> engine_obj(new Engine());

std::call_once(init_once_flag, [&engine_obj] {engine_obj->InitOnce();});
Expand All @@ -79,20 +78,20 @@ Status Engine::Make(std::unique_ptr<Engine> *engine) {
return Status::CodeGenError(engine_obj->llvm_error_);
}

Status result = engine_obj->LoadPreCompiledIRFiles();
Status result = engine_obj->LoadPreCompiledIRFiles(config->byte_code_file_path());
GANDIVA_RETURN_NOT_OK(result);
*engine = std::move(engine_obj);
return Status::OK();
}

// Handling for pre-compiled IR libraries.
Status Engine::LoadPreCompiledIRFiles() {
Status Engine::LoadPreCompiledIRFiles(const std::string &byte_code_file_path) {
/// Read from file into memory buffer.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer_or_error =
llvm::MemoryBuffer::getFile(kByteCodeFilePath);
llvm::MemoryBuffer::getFile(byte_code_file_path);
if (!buffer_or_error) {
std::stringstream ss;
ss << "Could not load module from IR " << kByteCodeFilePath << ": " <<
ss << "Could not load module from IR " << byte_code_file_path << ": " <<
buffer_or_error.getError().message();
return Status::CodeGenError(ss.str());
}
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/gandiva/src/codegen/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include "gandiva/configuration.h"
#include "gandiva/logging.h"
#include "gandiva/status.h"


namespace gandiva {

/// \brief LLVM Execution engine wrapper.
Expand All @@ -39,7 +41,8 @@ class Engine {
/// factory method to create and initialize the engine object.
///
/// \param[out] engine the created engine.
static Status Make(std::unique_ptr<Engine> *engine);
static Status Make(std::shared_ptr<Configuration> config,
std::unique_ptr<Engine> *engine);

/// Add the function to the list of IR functions that need to be compiled.
/// Compiling only the functions that are used by the module saves time.
Expand All @@ -66,7 +69,7 @@ class Engine {
llvm::ExecutionEngine &execution_engine() { return *execution_engine_.get(); }

/// load pre-compiled modules and merge them into the main module.
Status LoadPreCompiledIRFiles();
Status LoadPreCompiledIRFiles(const std::string &byte_code_file_path);

/// dump the IR code to stdout with the prefix string.
void DumpIR(std::string prefix);
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/gandiva/src/codegen/engine_llvm_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ llvm::Function *TestEngine::BuildVecAdd(Engine *engine, LLVMTypes *types) {

TEST_F(TestEngine, TestAddUnoptimised) {
std::unique_ptr<Engine> engine;
Engine::Make(&engine);
Engine::Make(ConfigurationBuilder::DefaultConfiguration(),
&engine);
LLVMTypes types(*engine->context());
llvm::Function *ir_func = BuildVecAdd(engine.get(), &types);
engine->FinalizeModule(false, false);
Expand All @@ -116,7 +117,8 @@ TEST_F(TestEngine, TestAddUnoptimised) {

TEST_F(TestEngine, TestAddOptimised) {
std::unique_ptr<Engine> engine;
Engine::Make(&engine);
Engine::Make(ConfigurationBuilder::DefaultConfiguration(),
&engine);
LLVMTypes types(*engine->context());
llvm::Function *ir_func = BuildVecAdd(engine.get(), &types);
engine->FinalizeModule(true, false);
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/gandiva/src/codegen/llvm_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@ LLVMGenerator::LLVMGenerator() :
optimise_ir_(true),
enable_ir_traces_(false) {}

Status LLVMGenerator::Make(std::unique_ptr<LLVMGenerator> *llvm_generator) {
Status LLVMGenerator::Make(std::shared_ptr<Configuration> config,
std::unique_ptr<LLVMGenerator> *llvm_generator) {
std::unique_ptr<LLVMGenerator> llvmgen_obj(new LLVMGenerator());
Status status = Engine::Make(&(llvmgen_obj->engine_));
Status status = Engine::Make(config, &(llvmgen_obj->engine_));
GANDIVA_RETURN_NOT_OK(status);
llvmgen_obj->types_ = new LLVMTypes(*(llvmgen_obj->engine_)->context());
*llvm_generator = std::move(llvmgen_obj);
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/gandiva/src/codegen/llvm_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "codegen/llvm_types.h"
#include "codegen/lvalue.h"
#include "codegen/value_validity_pair.h"
#include "gandiva/configuration.h"
#include "gandiva/gandiva_aliases.h"

namespace gandiva {
Expand All @@ -39,7 +40,8 @@ class LLVMGenerator {
~LLVMGenerator();

/// \brief Factory method to initialize the generator.
static Status Make(std::unique_ptr<LLVMGenerator> *llvm_generator);
static Status Make(std::shared_ptr<Configuration> config,
std::unique_ptr<LLVMGenerator> *llvm_generator);

/// \brief Build the code for the expression trees. Each element in the vector
/// represents an expression tree
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/gandiva/src/codegen/llvm_generator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "codegen/func_descriptor.h"
#include "codegen/function_registry.h"
#include "gandiva/expression.h"
#include "gandiva/configuration.h"

namespace gandiva {

Expand All @@ -35,7 +36,8 @@ class TestLLVMGenerator : public ::testing::Test {
TEST_F(TestLLVMGenerator, TestAdd) {
// Setup LLVM generator to do an arithmetic add of two vectors
std::unique_ptr<LLVMGenerator> generator;
Status status = LLVMGenerator::Make(&generator);
Status status = LLVMGenerator::Make(ConfigurationBuilder::DefaultConfiguration(),
&generator);
EXPECT_TRUE(status.ok());
Annotator annotator;

Expand Down Expand Up @@ -100,7 +102,8 @@ TEST_F(TestLLVMGenerator, TestAdd) {
TEST_F(TestLLVMGenerator, TestNullInternal) {
// Setup LLVM generator to evaluate a NULL_INTERNAL type function.
std::unique_ptr<LLVMGenerator> generator;
Status status = LLVMGenerator::Make(&generator);
Status status = LLVMGenerator::Make(ConfigurationBuilder::DefaultConfiguration(),
&generator);
EXPECT_TRUE(status.ok());
Annotator annotator;

Expand Down
Loading

0 comments on commit 758772d

Please sign in to comment.