diff --git a/cpp/src/gandiva/codegen/CMakeLists.txt b/cpp/src/gandiva/codegen/CMakeLists.txt index e25109f69e47a..d178d072ed7b2 100644 --- a/cpp/src/gandiva/codegen/CMakeLists.txt +++ b/cpp/src/gandiva/codegen/CMakeLists.txt @@ -23,17 +23,19 @@ set(BC_FILE_PATH_CC "${CMAKE_CURRENT_BINARY_DIR}/bc_file_path.cc") configure_file(bc_file_path.cc.in ${BC_FILE_PATH_CC}) set(SRC_FILES annotator.cc - engine.cc bitmap_accumulator.cc configuration.cc + engine.cc expr_decomposer.cc expr_validator.cc + expression.cc + expression_registry.cc function_registry.cc + function_signature.cc llvm_generator.cc llvm_types.cc projector.cc status.cc - expression.cc tree_expr_builder.cc ${BC_FILE_PATH_CC}) @@ -81,11 +83,13 @@ install( add_gandiva_unit_test(bitmap_accumulator_test.cc bitmap_accumulator.cc) add_gandiva_unit_test(dex_llvm_test.cc) add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc status.cc configuration.cc ${BC_FILE_PATH_CC}) -add_gandiva_unit_test(function_signature_test.cc) -add_gandiva_unit_test(function_registry_test.cc function_registry.cc) +add_gandiva_unit_test(function_signature_test.cc function_signature.cc) +add_gandiva_unit_test(function_registry_test.cc function_registry.cc function_signature.cc) add_gandiva_unit_test(llvm_types_test.cc llvm_types.cc) -add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc engine.cc llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc status.cc bitmap_accumulator.cc configuration.cc ${BC_FILE_PATH_CC}) -add_gandiva_unit_test(annotator_test.cc annotator.cc) -add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc) -add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc) +add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc engine.cc llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc status.cc bitmap_accumulator.cc configuration.cc function_signature.cc ${BC_FILE_PATH_CC}) +add_gandiva_unit_test(annotator_test.cc annotator.cc function_signature.cc) +add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc function_signature.cc) +add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc function_signature.cc) add_gandiva_unit_test(status_test.cc status.cc) +add_gandiva_unit_test(expression_registry_test.cc llvm_types.cc expression_registry.cc function_signature.cc function_registry.cc) + diff --git a/cpp/src/gandiva/codegen/expr_decomposer.cc b/cpp/src/gandiva/codegen/expr_decomposer.cc index db182d9ce0cf1..905fef43dd5d0 100644 --- a/cpp/src/gandiva/codegen/expr_decomposer.cc +++ b/cpp/src/gandiva/codegen/expr_decomposer.cc @@ -22,8 +22,8 @@ #include "codegen/annotator.h" #include "codegen/dex.h" #include "codegen/function_registry.h" -#include "codegen/function_signature.h" #include "codegen/node.h" +#include "gandiva/function_signature.h" namespace gandiva { diff --git a/cpp/src/gandiva/codegen/expr_decomposer_test.cc b/cpp/src/gandiva/codegen/expr_decomposer_test.cc index b100d66814936..463529e69278a 100644 --- a/cpp/src/gandiva/codegen/expr_decomposer_test.cc +++ b/cpp/src/gandiva/codegen/expr_decomposer_test.cc @@ -18,8 +18,8 @@ #include "codegen/annotator.h" #include "codegen/dex.h" #include "codegen/function_registry.h" -#include "codegen/function_signature.h" #include "codegen/node.h" +#include "gandiva/function_signature.h" #include "gandiva/gandiva_aliases.h" #include "gandiva/tree_expr_builder.h" diff --git a/cpp/src/gandiva/codegen/expression_registry.cc b/cpp/src/gandiva/codegen/expression_registry.cc new file mode 100644 index 0000000000000..8791170476a95 --- /dev/null +++ b/cpp/src/gandiva/codegen/expression_registry.cc @@ -0,0 +1,151 @@ +// Copyright (C) 2017-2018 Dremio Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gandiva/expression_registry.h" + +#include "boost/iterator/transform_iterator.hpp" + +#include "codegen/function_registry.h" +#include "codegen/llvm_types.h" + +namespace gandiva { + +ExpressionRegistry::ExpressionRegistry() { + function_registry_.reset(new FunctionRegistry()); +} + +ExpressionRegistry::~ExpressionRegistry() {} + +const ExpressionRegistry::FunctionSignatureIterator +ExpressionRegistry::function_signature_begin() { + return FunctionSignatureIterator(function_registry_->begin(), + function_registry_->end()); +} + +const ExpressionRegistry::FunctionSignatureIterator +ExpressionRegistry::function_signature_end() const { + return FunctionSignatureIterator(function_registry_->end(), function_registry_->end()); +} + +bool ExpressionRegistry::FunctionSignatureIterator::operator!=( + const FunctionSignatureIterator &func_sign_it) { + return func_sign_it.it != this->it; +} + +FunctionSignature ExpressionRegistry::FunctionSignatureIterator::operator*() { + return (*it).signature(); +} + +ExpressionRegistry::iterator ExpressionRegistry::FunctionSignatureIterator::operator++( + int increment) { + return it++; +} + +DataTypeVector ExpressionRegistry::supported_types_ = + ExpressionRegistry::InitSupportedTypes(); + +DataTypeVector ExpressionRegistry::InitSupportedTypes() { + DataTypeVector data_type_vector; + llvm::LLVMContext llvm_context; + LLVMTypes llvm_types(llvm_context); + auto supported_arrow_types = llvm_types.GetSupportedArrowTypes(); + for (auto &type_id : supported_arrow_types) { + AddArrowTypesToVector(type_id, data_type_vector); + } + return data_type_vector; +} + +void ExpressionRegistry::AddArrowTypesToVector(arrow::Type::type &type, + DataTypeVector &vector) { + switch (type) { + case arrow::Type::type::BOOL: + vector.push_back(arrow::boolean()); + break; + case arrow::Type::type::UINT8: + vector.push_back(arrow::uint8()); + break; + case arrow::Type::type::INT8: + vector.push_back(arrow::int8()); + break; + case arrow::Type::type::UINT16: + vector.push_back(arrow::uint16()); + break; + case arrow::Type::type::INT16: + vector.push_back(arrow::int16()); + break; + case arrow::Type::type::UINT32: + vector.push_back(arrow::uint32()); + break; + case arrow::Type::type::INT32: + vector.push_back(arrow::int32()); + break; + case arrow::Type::type::UINT64: + vector.push_back(arrow::uint64()); + break; + case arrow::Type::type::INT64: + vector.push_back(arrow::int64()); + break; + case arrow::Type::type::HALF_FLOAT: + vector.push_back(arrow::float16()); + break; + case arrow::Type::type::FLOAT: + vector.push_back(arrow::float32()); + break; + case arrow::Type::type::DOUBLE: + vector.push_back(arrow::float64()); + break; + case arrow::Type::type::STRING: + vector.push_back(arrow::utf8()); + break; + case arrow::Type::type::BINARY: + vector.push_back(arrow::binary()); + break; + case arrow::Type::type::DATE32: + vector.push_back(arrow::date32()); + break; + case arrow::Type::type::DATE64: + vector.push_back(arrow::date64()); + break; + case arrow::Type::type::TIMESTAMP: + vector.push_back(arrow::timestamp(arrow::TimeUnit::SECOND)); + vector.push_back(arrow::timestamp(arrow::TimeUnit::MILLI)); + vector.push_back(arrow::timestamp(arrow::TimeUnit::NANO)); + vector.push_back(arrow::timestamp(arrow::TimeUnit::MICRO)); + break; + case arrow::Type::type::TIME32: + vector.push_back(arrow::time32(arrow::TimeUnit::SECOND)); + vector.push_back(arrow::time32(arrow::TimeUnit::MILLI)); + break; + case arrow::Type::type::TIME64: + vector.push_back(arrow::time64(arrow::TimeUnit::MICRO)); + vector.push_back(arrow::time64(arrow::TimeUnit::NANO)); + break; + case arrow::Type::type::NA: + vector.push_back(arrow::null()); + break; + case arrow::Type::type::FIXED_SIZE_BINARY: + case arrow::Type::type::MAP: + case arrow::Type::type::INTERVAL: + case arrow::Type::type::DECIMAL: + case arrow::Type::type::LIST: + case arrow::Type::type::STRUCT: + case arrow::Type::type::UNION: + case arrow::Type::type::DICTIONARY: + // un-supported types. test ensures that + // when one of these are added build breaks. + DCHECK(false); + } +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/codegen/expression_registry.h b/cpp/src/gandiva/codegen/expression_registry.h new file mode 100644 index 0000000000000..3de870f602046 --- /dev/null +++ b/cpp/src/gandiva/codegen/expression_registry.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2017-2018 Dremio Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef GANDIVA_TYPES_H +#define GANDIVA_TYPES_H + +#include +#include + +#include "gandiva/arrow.h" +#include "gandiva/function_signature.h" +#include "gandiva/gandiva_aliases.h" + +namespace gandiva { + +class NativeFunction; +class FunctionRegistry; +/// \brief Exports types supported by Gandiva for processing. +/// +/// Has helper methods for clients to programatically discover +/// data types and functions supported by Gandiva. +class ExpressionRegistry { + public: + using iterator = const NativeFunction *; + ExpressionRegistry(); + ~ExpressionRegistry(); + static DataTypeVector supported_types() { return supported_types_; } + class FunctionSignatureIterator { + public: + FunctionSignatureIterator(iterator begin, iterator end) : it(begin), end(end) {} + + bool operator!=(const FunctionSignatureIterator &func_sign_it); + + FunctionSignature operator*(); + + iterator operator++(int); + + private: + iterator it; + iterator end; + }; + const FunctionSignatureIterator function_signature_begin(); + const FunctionSignatureIterator function_signature_end() const; + + private: + static DataTypeVector supported_types_; + static DataTypeVector InitSupportedTypes(); + static void AddArrowTypesToVector(arrow::Type::type &type, DataTypeVector &vector); + std::unique_ptr function_registry_; +}; +} // namespace gandiva +#endif // GANDIVA_TYPES_H diff --git a/cpp/src/gandiva/codegen/expression_registry_test.cc b/cpp/src/gandiva/codegen/expression_registry_test.cc new file mode 100644 index 0000000000000..95b8fa732c9dd --- /dev/null +++ b/cpp/src/gandiva/codegen/expression_registry_test.cc @@ -0,0 +1,64 @@ +// Copyright (C) 2017-2018 Dremio Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gandiva/expression_registry.h" + +#include +#include + +#include +#include "codegen/function_registry.h" +#include "codegen/llvm_types.h" +#include "gandiva/function_signature.h" + +namespace gandiva { + +typedef int64_t (*add_vector_func_t)(int64_t *elements, int nelements); + +class TestExpressionRegistry : public ::testing::Test { + protected: + FunctionRegistry registry_; +}; + +// Verify all functions in registry are exported. +TEST_F(TestExpressionRegistry, VerifySupportedFunctions) { + std::vector functions; + ExpressionRegistry expr_registry; + for (auto iter = expr_registry.function_signature_begin(); + iter != expr_registry.function_signature_end(); iter++) { + functions.push_back((*iter)); + } + for (auto &iter : registry_) { + auto function = iter.signature(); + auto element = std::find(functions.begin(), functions.end(), function); + EXPECT_NE(element, functions.end()) + << "function " << iter.pc_name() << " missing in supported functions.\n"; + } +} + +// Verify all types are supported. +TEST_F(TestExpressionRegistry, VerifyDataTypes) { + DataTypeVector data_types = ExpressionRegistry::supported_types(); + llvm::LLVMContext llvm_context; + LLVMTypes llvm_types(llvm_context); + auto supported_arrow_types = llvm_types.GetSupportedArrowTypes(); + for (auto &type_id : supported_arrow_types) { + auto element = + std::find(supported_arrow_types.begin(), supported_arrow_types.end(), type_id); + EXPECT_NE(element, supported_arrow_types.end()) + << "data type " << type_id << " missing in supported data types.\n"; + } +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/codegen/function_registry.h b/cpp/src/gandiva/codegen/function_registry.h index c8c7cc306d590..27f749da01883 100644 --- a/cpp/src/gandiva/codegen/function_registry.h +++ b/cpp/src/gandiva/codegen/function_registry.h @@ -18,6 +18,7 @@ #include #include "codegen/native_function.h" +#include "gandiva/gandiva_aliases.h" namespace gandiva { diff --git a/cpp/src/gandiva/codegen/function_signature.cc b/cpp/src/gandiva/codegen/function_signature.cc new file mode 100644 index 0000000000000..e6f92b8b71cd8 --- /dev/null +++ b/cpp/src/gandiva/codegen/function_signature.cc @@ -0,0 +1,63 @@ +// Copyright (C) 2017-2018 Dremio Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include + +#include "boost/functional/hash.hpp" + +namespace gandiva { + +bool FunctionSignature::operator==(const FunctionSignature &other) const { + if (param_types_.size() != other.param_types_.size() || + !DataTypeEquals(ret_type_, other.ret_type_) || base_name_ != other.base_name_) { + return false; + } + + for (size_t idx = 0; idx < param_types_.size(); idx++) { + if (!DataTypeEquals(param_types_[idx], other.param_types_[idx])) { + return false; + } + } + return true; +} + +/// calculated based on base_name, datatpype id of parameters and datatype id +/// of return type. +std::size_t FunctionSignature::Hash() const { + static const size_t kSeedValue = 17; + size_t result = kSeedValue; + boost::hash_combine(result, base_name_); + boost::hash_combine(result, ret_type_->id()); + // not using hash_range since we only want to include the id from the data type + for (auto ¶m_type : param_types_) { + boost::hash_combine(result, param_type->id()); + } + return result; +} + +std::string FunctionSignature::ToString() const { + std::stringstream s; + + s << ret_type_->ToString() << " " << base_name_ << "("; + for (uint32_t i = 0; i < param_types_.size(); i++) { + if (i > 0) { + s << ", "; + } + + s << param_types_[i]->ToString(); + } + + s << ")"; + return s.str(); +} +} // namespace gandiva diff --git a/cpp/src/gandiva/codegen/function_signature.h b/cpp/src/gandiva/codegen/function_signature.h index 7c6e19de0b738..76c9888128bc7 100644 --- a/cpp/src/gandiva/codegen/function_signature.h +++ b/cpp/src/gandiva/codegen/function_signature.h @@ -19,7 +19,6 @@ #include #include -#include "boost/functional/hash.hpp" #include "gandiva/arrow.h" #include "gandiva/logging.h" @@ -40,51 +39,19 @@ class FunctionSignature { DCHECK(ret_type); } - bool operator==(const FunctionSignature &other) const { - if (param_types_.size() != other.param_types_.size() || - !DataTypeEquals(ret_type_, other.ret_type_) || base_name_ != other.base_name_) { - return false; - } - - for (size_t idx = 0; idx < param_types_.size(); idx++) { - if (!DataTypeEquals(param_types_[idx], other.param_types_[idx])) { - return false; - } - } - return true; - } + bool operator==(const FunctionSignature &other) const; /// calculated based on base_name, datatpype id of parameters and datatype id /// of return type. - std::size_t Hash() const { - static const size_t kSeedValue = 17; - size_t result = kSeedValue; - boost::hash_combine(result, base_name_); - boost::hash_combine(result, ret_type_->id()); - // not using hash_range since we only want to include the id from the data type - for (auto ¶m_type : param_types_) { - boost::hash_combine(result, param_type->id()); - } - return result; - } + std::size_t Hash() const; DataTypePtr ret_type() const { return ret_type_; } - std::string ToString() const { - std::stringstream s; - - s << ret_type_->ToString() << " " << base_name_ << "("; - for (uint32_t i = 0; i < param_types_.size(); i++) { - if (i > 0) { - s << ", "; - } + const std::string &base_name() const { return base_name_; } - s << param_types_[i]->ToString(); - } + DataTypeVector param_types() const { return param_types_; } - s << ")"; - return s.str(); - } + std::string ToString() const; private: // TODO : for some of the types, this shouldn't match type specific data. eg. for diff --git a/cpp/src/gandiva/codegen/function_signature_test.cc b/cpp/src/gandiva/codegen/function_signature_test.cc index 71ec622a2a10f..3316a2d927393 100644 --- a/cpp/src/gandiva/codegen/function_signature_test.cc +++ b/cpp/src/gandiva/codegen/function_signature_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "codegen/function_signature.h" +#include "gandiva/function_signature.h" #include diff --git a/cpp/src/gandiva/codegen/gandiva_aliases.h b/cpp/src/gandiva/codegen/gandiva_aliases.h index 9ab7b7444d4b6..735bdce643e8a 100644 --- a/cpp/src/gandiva/codegen/gandiva_aliases.h +++ b/cpp/src/gandiva/codegen/gandiva_aliases.h @@ -49,6 +49,10 @@ using NodeVector = std::vector>; class EvalBatch; using EvalBatchPtr = std::shared_ptr; +class FunctionSignature; +using FuncSignaturePtr = std::shared_ptr; +using FuncSignatureVector = std::vector; + } // namespace gandiva #endif // GANDIVA_ALIASES_H diff --git a/cpp/src/gandiva/codegen/llvm_types.h b/cpp/src/gandiva/codegen/llvm_types.h index 93a4f5351133d..cf4603168d8c5 100644 --- a/cpp/src/gandiva/codegen/llvm_types.h +++ b/cpp/src/gandiva/codegen/llvm_types.h @@ -16,6 +16,7 @@ #define GANDIVA_LLVM_TYPES_H #include +#include #include #include @@ -101,6 +102,14 @@ class LLVMTypes { return (found == arrow_id_to_llvm_type_map_.end()) ? NULL : found->second; } + std::vector GetSupportedArrowTypes() { + std::vector retval; + for (auto const &element : arrow_id_to_llvm_type_map_) { + retval.push_back(element.first); + } + return retval; + } + private: std::map arrow_id_to_llvm_type_map_; diff --git a/cpp/src/gandiva/codegen/native_function.h b/cpp/src/gandiva/codegen/native_function.h index 15d7d273c57ac..6db44d610db16 100644 --- a/cpp/src/gandiva/codegen/native_function.h +++ b/cpp/src/gandiva/codegen/native_function.h @@ -19,7 +19,7 @@ #include #include -#include "codegen/function_signature.h" +#include "gandiva/function_signature.h" namespace gandiva { diff --git a/cpp/src/gandiva/codegen/tree_expr_test.cc b/cpp/src/gandiva/codegen/tree_expr_test.cc index cd4de2a09ada1..e9055281d27ca 100644 --- a/cpp/src/gandiva/codegen/tree_expr_test.cc +++ b/cpp/src/gandiva/codegen/tree_expr_test.cc @@ -19,8 +19,8 @@ #include "codegen/dex.h" #include "codegen/expr_decomposer.h" #include "codegen/function_registry.h" -#include "codegen/function_signature.h" #include "codegen/node.h" +#include "gandiva/function_signature.h" #include "gandiva/gandiva_aliases.h" namespace gandiva { diff --git a/cpp/src/gandiva/jni/CMakeLists.txt b/cpp/src/gandiva/jni/CMakeLists.txt index 5ee5802d54336..297ba0fa16372 100644 --- a/cpp/src/gandiva/jni/CMakeLists.txt +++ b/cpp/src/gandiva/jni/CMakeLists.txt @@ -32,6 +32,7 @@ add_library(gandiva_jni SHARED native_builder.cc config_builder.cc config_holder.cc + expression_registry_helper.cc ${PROTO_SRCS} ${PROTO_HDRS}) add_dependencies(gandiva_jni gandiva_java) diff --git a/cpp/src/gandiva/jni/expression_registry_helper.cc b/cpp/src/gandiva/jni/expression_registry_helper.cc new file mode 100644 index 0000000000000..9270f87eec2c5 --- /dev/null +++ b/cpp/src/gandiva/jni/expression_registry_helper.cc @@ -0,0 +1,178 @@ +// Copyright (C) 2017-2018 Dremio Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "jni/org_apache_arrow_gandiva_evaluator_ExpressionRegistryJniHelper.h" + +#include + +#include "Types.pb.h" +#include "gandiva/arrow.h" +#include "gandiva/expression_registry.h" + +using gandiva::DataTypePtr; +using gandiva::ExpressionRegistry; + +types::TimeUnit MapTimeUnit(arrow::TimeUnit::type &unit) { + switch (unit) { + case arrow::TimeUnit::MILLI: + return types::TimeUnit::MILLISEC; + case arrow::TimeUnit::SECOND: + return types::TimeUnit::SEC; + case arrow::TimeUnit::MICRO: + return types::TimeUnit::MICROSEC; + case arrow::TimeUnit::NANO: + return types::TimeUnit::NANOSEC; + } + // satifsy gcc. should be unreachable. + return types::TimeUnit::SEC; +} + +void ArrowToProtobuf(DataTypePtr type, types::ExtGandivaType *gandiva_data_type) { + switch (type->id()) { + case arrow::Type::type::BOOL: + gandiva_data_type->set_type(types::GandivaType::BOOL); + break; + case arrow::Type::type::UINT8: + gandiva_data_type->set_type(types::GandivaType::UINT8); + break; + case arrow::Type::type::INT8: + gandiva_data_type->set_type(types::GandivaType::INT8); + break; + case arrow::Type::type::UINT16: + gandiva_data_type->set_type(types::GandivaType::UINT16); + break; + case arrow::Type::type::INT16: + gandiva_data_type->set_type(types::GandivaType::INT16); + break; + case arrow::Type::type::UINT32: + gandiva_data_type->set_type(types::GandivaType::UINT32); + break; + case arrow::Type::type::INT32: + gandiva_data_type->set_type(types::GandivaType::INT32); + break; + case arrow::Type::type::UINT64: + gandiva_data_type->set_type(types::GandivaType::UINT64); + break; + case arrow::Type::type::INT64: + gandiva_data_type->set_type(types::GandivaType::INT64); + break; + case arrow::Type::type::HALF_FLOAT: + gandiva_data_type->set_type(types::GandivaType::HALF_FLOAT); + break; + case arrow::Type::type::FLOAT: + gandiva_data_type->set_type(types::GandivaType::FLOAT); + break; + case arrow::Type::type::DOUBLE: + gandiva_data_type->set_type(types::GandivaType::DOUBLE); + break; + case arrow::Type::type::STRING: + gandiva_data_type->set_type(types::GandivaType::UTF8); + break; + case arrow::Type::type::BINARY: + gandiva_data_type->set_type(types::GandivaType::BINARY); + break; + case arrow::Type::type::DATE32: + gandiva_data_type->set_type(types::GandivaType::DATE32); + break; + case arrow::Type::type::DATE64: + gandiva_data_type->set_type(types::GandivaType::DATE64); + break; + case arrow::Type::type::TIMESTAMP: { + gandiva_data_type->set_type(types::GandivaType::TIMESTAMP); + std::shared_ptr cast_time_stamp_type = + std::dynamic_pointer_cast(type); + arrow::TimeUnit::type unit = cast_time_stamp_type->unit(); + types::TimeUnit time_unit = MapTimeUnit(unit); + gandiva_data_type->set_timeunit(time_unit); + break; + } + case arrow::Type::type::TIME32: { + gandiva_data_type->set_type(types::GandivaType::TIME32); + std::shared_ptr cast_time_32_type = + std::dynamic_pointer_cast(type); + arrow::TimeUnit::type unit = cast_time_32_type->unit(); + types::TimeUnit time_unit = MapTimeUnit(unit); + gandiva_data_type->set_timeunit(time_unit); + break; + } + case arrow::Type::type::TIME64: { + gandiva_data_type->set_type(types::GandivaType::TIME32); + std::shared_ptr cast_time_64_type = + std::dynamic_pointer_cast(type); + arrow::TimeUnit::type unit = cast_time_64_type->unit(); + types::TimeUnit time_unit = MapTimeUnit(unit); + gandiva_data_type->set_timeunit(time_unit); + break; + } + case arrow::Type::type::NA: + gandiva_data_type->set_type(types::GandivaType::NONE); + break; + case arrow::Type::type::FIXED_SIZE_BINARY: + case arrow::Type::type::MAP: + case arrow::Type::type::INTERVAL: + case arrow::Type::type::DECIMAL: + case arrow::Type::type::LIST: + case arrow::Type::type::STRUCT: + case arrow::Type::type::UNION: + case arrow::Type::type::DICTIONARY: + // un-supported types. test ensures that + // when one of these are added build breaks. + DCHECK(false); + } +} + +JNIEXPORT jbyteArray JNICALL +Java_org_apache_arrow_gandiva_evaluator_ExpressionRegistryJniHelper_getGandivaSupportedDataTypes( + JNIEnv *env, jobject types_helper) { + types::GandivaDataTypes gandiva_data_types; + auto supported_types = ExpressionRegistry::supported_types(); + for (auto const &type : supported_types) { + types::ExtGandivaType *gandiva_data_type = gandiva_data_types.add_datatype(); + ArrowToProtobuf(type, gandiva_data_type); + } + size_t size = gandiva_data_types.ByteSizeLong(); + std::unique_ptr buffer{new jbyte[size]}; + gandiva_data_types.SerializeToArray((void *)buffer.get(), size); + jbyteArray ret = env->NewByteArray(size); + env->SetByteArrayRegion(ret, 0, size, buffer.get()); + return ret; +} + +/* + * Class: org_apache_arrow_gandiva_types_ExpressionRegistryJniHelper + * Method: getGandivaSupportedFunctions + * Signature: ()[B + */ +JNIEXPORT jbyteArray JNICALL +Java_org_apache_arrow_gandiva_evaluator_ExpressionRegistryJniHelper_getGandivaSupportedFunctions( + JNIEnv *env, jobject types_helper) { + ExpressionRegistry expr_registry; + types::GandivaFunctions gandiva_functions; + for (auto function = expr_registry.function_signature_begin(); + function != expr_registry.function_signature_end(); function++) { + types::FunctionSignature *function_signature = gandiva_functions.add_function(); + function_signature->set_name((*function).base_name()); + types::ExtGandivaType *return_type = function_signature->mutable_returntype(); + ArrowToProtobuf((*function).ret_type(), return_type); + for (auto ¶m_type : (*function).param_types()) { + types::ExtGandivaType *proto_param_type = function_signature->add_paramtypes(); + ArrowToProtobuf(param_type, proto_param_type); + } + } + size_t size = gandiva_functions.ByteSizeLong(); + std::unique_ptr buffer{new jbyte[size]}; + gandiva_functions.SerializeToArray((void *)buffer.get(), size); + jbyteArray ret = env->NewByteArray(size); + env->SetByteArrayRegion(ret, 0, size, buffer.get()); + return ret; +}