diff --git a/cpp/src/gandiva/cmake/BuildUtils.cmake b/cpp/src/gandiva/cmake/BuildUtils.cmake index e8d01d0713a9b..eb135b6cf0efe 100644 --- a/cpp/src/gandiva/cmake/BuildUtils.cmake +++ b/cpp/src/gandiva/cmake/BuildUtils.cmake @@ -89,6 +89,8 @@ function(add_lint) file(GLOB_RECURSE LINT_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.h" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/integ/*.h" + "${CMAKE_CURRENT_SOURCE_DIR}/integ/*.cc" ) find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR}) diff --git a/cpp/src/gandiva/integ/CMakeLists.txt b/cpp/src/gandiva/integ/CMakeLists.txt index 420c9ae76661e..db8875ff02ab8 100644 --- a/cpp/src/gandiva/integ/CMakeLists.txt +++ b/cpp/src/gandiva/integ/CMakeLists.txt @@ -18,4 +18,5 @@ add_gandiva_integ_test(projector_test.cc) add_gandiva_integ_test(if_expr_test.cc) add_gandiva_integ_test(boolean_expr_test.cc) add_gandiva_integ_test(literal_test.cc) +add_gandiva_integ_test(date_time_test.cc) add_gandiva_integ_test(projector_build_validation_test.cc) diff --git a/cpp/src/gandiva/integ/boolean_expr_test.cc b/cpp/src/gandiva/integ/boolean_expr_test.cc index ec1af40c3a507..d82d6413a4694 100644 --- a/cpp/src/gandiva/integ/boolean_expr_test.cc +++ b/cpp/src/gandiva/integ/boolean_expr_test.cc @@ -219,7 +219,7 @@ TEST_F(TestBooleanExpr, AndThree) { int num_records = 8; std::vector validity({true, true, true, true, true, true, true, true }); - auto arraya = MakeArrowArrayInt32({2, 2, 2, 0, 2, 0, 0, 0}, validity) ; + auto arraya = MakeArrowArrayInt32({2, 2, 2, 0, 2, 0, 0, 0}, validity); auto arrayb = MakeArrowArrayInt32({2, 2, 0, 2, 0, 2, 0, 0}, validity); auto arrayc = MakeArrowArrayInt32({2, 0, 2, 2, 0, 0, 2, 0}, validity); auto exp = MakeArrowArrayBool({true, false, false, false, false, false, false, false }, @@ -267,7 +267,7 @@ TEST_F(TestBooleanExpr, OrThree) { int num_records = 8; std::vector validity({true, true, true, true, true, true, true, true }); - auto arraya = MakeArrowArrayInt32({2, 2, 2, 0, 2, 0, 0, 0}, validity) ; + auto arraya = MakeArrowArrayInt32({2, 2, 2, 0, 2, 0, 0, 0}, validity); auto arrayb = MakeArrowArrayInt32({2, 2, 0, 2, 0, 2, 0, 0}, validity); auto arrayc = MakeArrowArrayInt32({2, 0, 2, 2, 0, 0, 2, 0}, validity); auto exp = MakeArrowArrayBool({true, true, true, true, true, true, true, false}, @@ -328,7 +328,7 @@ TEST_F(TestBooleanExpr, BooleanAndInsideIf) { int num_records = 4; std::vector validity({true, true, true, true }); - auto arraya = MakeArrowArrayInt32({4, 4, 2, 1}, validity) ; + auto arraya = MakeArrowArrayInt32({4, 4, 2, 1}, validity); auto arrayb = MakeArrowArrayInt32({5, 3, 3, 1}, validity); auto exp = MakeArrowArrayBool({true, false, true, false}, validity); @@ -380,7 +380,7 @@ TEST_F(TestBooleanExpr, IfInsideBooleanAnd) { int num_records = 4; std::vector validity({true, true, true, true }); - auto arraya = MakeArrowArrayInt32({4, 3, 3, 2}, validity) ; + auto arraya = MakeArrowArrayInt32({4, 3, 3, 2}, validity); auto arrayb = MakeArrowArrayInt32({3, 4, 2, 3}, validity); auto exp = MakeArrowArrayBool({true, true, false, false}, validity); diff --git a/cpp/src/gandiva/integ/date_time_test.cc b/cpp/src/gandiva/integ/date_time_test.cc new file mode 100644 index 0000000000000..b454c82331ec3 --- /dev/null +++ b/cpp/src/gandiva/integ/date_time_test.cc @@ -0,0 +1,166 @@ +// Copyright (C) 2017-2018 Dremio Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "arrow/memory_pool.h" +#include "integ/test_util.h" +#include "gandiva/projector.h" +#include "gandiva/tree_expr_builder.h" + +namespace gandiva { + +using arrow::int32; +using arrow::int64; +using arrow::float32; +using arrow::boolean; +using arrow::date64; + +class TestProjector : public ::testing::Test { + public: + void SetUp() { pool_ = arrow::default_memory_pool(); } + + protected: + arrow::MemoryPool* pool_; +}; + +int64_t MillisSince(time_t base_line, + int32_t yy, int32_t mm, int32_t dd, + int32_t hr, int32_t min, int32_t sec) { + struct tm given_ts = {0}; + given_ts.tm_year = (yy - 1900); + given_ts.tm_mon = (mm - 1); + given_ts.tm_mday = dd; + given_ts.tm_hour = hr; + given_ts.tm_min = min; + given_ts.tm_sec = sec; + + return (lround(difftime(mktime(&given_ts), base_line)) * 1000); +} + +TEST_F(TestProjector, TestTime) { + auto field0 = field("f0", date64()); + auto field1 = field("f1", time32(arrow::TimeUnit::MILLI)); + auto field2 = field("f2", timestamp(arrow::TimeUnit::MILLI)); + auto schema = arrow::schema({field0, field2}); + + // output fields + auto field_year = field("yy", int64()); + auto field_month = field("mm", int64()); + auto field_day = field("dd", int64()); + auto field_hour = field("hh", int64()); + + // extract year and month from date + auto date2year_expr = TreeExprBuilder::MakeExpression( + "extractYear", + {field0}, + field_year); + auto date2month_expr = TreeExprBuilder::MakeExpression( + "extractMonth", + {field0}, + field_month); + + // extract day and hour from time32 + auto time2day_expr = TreeExprBuilder::MakeExpression( + "extractDay", + {field1}, + field_day); + auto time2hour_expr = TreeExprBuilder::MakeExpression( + "extractHour", + {field1}, + field_hour); + + // extract month and day from timestamp + auto ts2month_expr = TreeExprBuilder::MakeExpression + ("extractMonth", + {field2}, + field_month); + auto ts2day_expr = TreeExprBuilder::MakeExpression("extractDay", {field2}, field_day); + + std::shared_ptr projector; + Status status = Projector::Make( + schema, + {date2year_expr, date2month_expr, ts2month_expr, ts2day_expr}, + pool_, + &projector); + ASSERT_TRUE(status.ok()); + + struct tm y1970 = {0}; + y1970.tm_year = 70; y1970.tm_mon = 0; y1970.tm_mday = 1; + y1970.tm_hour = 0; y1970.tm_min = 0; y1970.tm_sec = 0; + time_t epoch = mktime(&y1970); + + // Create a row-batch with some sample data + int num_records = 4; + auto validity = { true, true, true, true }; + std::vector field0_data = { + MillisSince(epoch, 2000, 1, 1, 5, 0, 0), + MillisSince(epoch, 1999, 12, 31, 5, 0, 0), + MillisSince(epoch, 2015, 6, 30, 20, 0, 0), + MillisSince(epoch, 2015, 7, 1, 20, 0, 0) + }; + auto array0 = MakeArrowTypeArray( + date64(), + field0_data, validity); + + std::vector field1_data = { + MillisSince(epoch, 2000, 1, 1, 5, 0, 0), + MillisSince(epoch, 1999, 12, 31, 4, 0, 0), + MillisSince(epoch, 2015, 6, 30, 20, 0, 0), + MillisSince(epoch, 2015, 7, 3, 3, 0, 0) + }; + + auto array1 = MakeArrowTypeArray( + time32(arrow::TimeUnit::MILLI), + field1_data, + validity); + + std::vector field2_data = { + MillisSince(epoch, 1999, 12, 31, 5, 0, 0), + MillisSince(epoch, 2000, 1, 2, 5, 0, 0), + MillisSince(epoch, 2015, 7, 1, 1, 0, 0), + MillisSince(epoch, 2015, 6, 29, 23, 0, 0) + }; + + auto array2 = MakeArrowTypeArray( + arrow::timestamp(arrow::TimeUnit::MILLI), + field2_data, + validity); + + // expected output + // date 2 year and date 2 month + auto exp_yy_from_date = MakeArrowArrayInt64({ 2000, 1999, 2015, 2015 }, validity); + auto exp_mm_from_date = MakeArrowArrayInt64({ 1, 12, 6, 7 }, validity); + + // ts 2 month and ts 2 day + auto exp_mm_from_ts = MakeArrowArrayInt64({12, 1, 7, 6}, validity); + auto exp_dd_from_ts = MakeArrowArrayInt64({31, 2, 1, 29}, validity); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array2}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, &outputs); + EXPECT_TRUE(status.ok()); + + // Validate results + EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date, outputs.at(0)); + EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date, outputs.at(1)); + EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(2)); + EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(3)); +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/integ/literal_test.cc b/cpp/src/gandiva/integ/literal_test.cc index 08b21b225a154..ddcbba640c162 100644 --- a/cpp/src/gandiva/integ/literal_test.cc +++ b/cpp/src/gandiva/integ/literal_test.cc @@ -73,12 +73,12 @@ TEST_F(TestLiteral, TestSimpleArithmetic) { auto expr_c = TreeExprBuilder::MakeExpression(func_c, res_c); auto node_d = TreeExprBuilder::MakeField(field_d); - auto literal_d = TreeExprBuilder::MakeLiteral((float)1); + auto literal_d = TreeExprBuilder::MakeLiteral(static_cast(1)); auto func_d = TreeExprBuilder::MakeFunction("add", {node_d, literal_d}, float32()); auto expr_d = TreeExprBuilder::MakeExpression(func_d, res_d); auto node_e = TreeExprBuilder::MakeField(field_e); - auto literal_e = TreeExprBuilder::MakeLiteral((double)1); + auto literal_e = TreeExprBuilder::MakeLiteral(static_cast(1)); auto func_e = TreeExprBuilder::MakeFunction("add", {node_e, literal_e}, float64()); auto expr_e = TreeExprBuilder::MakeExpression(func_e, res_e); @@ -93,14 +93,14 @@ TEST_F(TestLiteral, TestSimpleArithmetic) { // Create a row-batch with some sample data int num_records = 4; - auto array_a = MakeArrowArrayBool({true, true, false, true},{true, true, true, false}); + auto array_a = MakeArrowArrayBool({true, true, false, true}, {true, true, true, false}); auto array_b = MakeArrowArrayInt32({5, 15, -15, 17}, {true, true, true, false}); auto array_c = MakeArrowArrayInt64({5, 15, -15, 17}, {true, true, true, false}); auto array_d = MakeArrowArrayFloat32({5.2, 15, -15.6, 17}, {true, true, true, false}); auto array_e = MakeArrowArrayFloat64({5.6, 15, -15.9, 17}, {true, true, true, false}); // expected output - auto exp_a = MakeArrowArrayBool({true, true, false, false},{true, true, true, false}); + auto exp_a = MakeArrowArrayBool({true, true, false, false}, {true, true, true, false}); auto exp_b = MakeArrowArrayInt32({6, 16, -14, 0}, {true, true, true, false}); auto exp_c = MakeArrowArrayInt64({6, 16, -14, 0}, {true, true, true, false}); auto exp_d = MakeArrowArrayFloat32({6.2, 16, -14.6, 0}, {true, true, true, false}); diff --git a/cpp/src/gandiva/integ/projector_build_validation_test.cc b/cpp/src/gandiva/integ/projector_build_validation_test.cc index bf9607894fe9d..53ed572430f3f 100644 --- a/cpp/src/gandiva/integ/projector_build_validation_test.cc +++ b/cpp/src/gandiva/integ/projector_build_validation_test.cc @@ -55,25 +55,25 @@ TEST_F(TestProjector, TestNonExistentFunction) { } TEST_F(TestProjector, TestNotMatchingDataType) { - // schema for input fields - auto field0 = field("f0", float32()); - auto schema = arrow::schema({field0}); - - // output fields - auto field_result = field("res", boolean()); - - // Build expression - auto node_f0 = TreeExprBuilder::MakeField(field0); - auto lt_expr = TreeExprBuilder::MakeExpression(node_f0, field_result); - - // Build a projector for the expressions. - std::shared_ptr projector; - Status status = Projector::Make(schema, {lt_expr}, pool_, &projector); - EXPECT_TRUE(status.IsExpressionValidationError()); - std::string expected_error = - "Return type of root node float does not match that of expression bool"; - EXPECT_TRUE(status.message().find(expected_error) != std::string::npos); - } + // schema for input fields + auto field0 = field("f0", float32()); + auto schema = arrow::schema({field0}); + + // output fields + auto field_result = field("res", boolean()); + + // Build expression + auto node_f0 = TreeExprBuilder::MakeField(field0); + auto lt_expr = TreeExprBuilder::MakeExpression(node_f0, field_result); + + // Build a projector for the expressions. + std::shared_ptr projector; + Status status = Projector::Make(schema, {lt_expr}, pool_, &projector); + EXPECT_TRUE(status.IsExpressionValidationError()); + std::string expected_error = + "Return type of root node float does not match that of expression bool"; + EXPECT_TRUE(status.message().find(expected_error) != std::string::npos); +} TEST_F(TestProjector, TestNotSupportedDataType) { // schema for input fields @@ -96,24 +96,24 @@ TEST_F(TestProjector, TestNotSupportedDataType) { } TEST_F(TestProjector, TestIncorrectSchemaMissingField) { - // schema for input fields - auto field0 = field("f0", float32()); - auto field1 = field("f2", float32()); - auto schema = arrow::schema({field0, field0}); + // schema for input fields + auto field0 = field("f0", float32()); + auto field1 = field("f2", float32()); + auto schema = arrow::schema({field0, field0}); - // output fields - auto field_result = field("res", boolean()); + // output fields + auto field_result = field("res", boolean()); - // Build expression - auto lt_expr = TreeExprBuilder::MakeExpression("less_than", + // Build expression + auto lt_expr = TreeExprBuilder::MakeExpression("less_than", {field0, field1}, field_result); - // Build a projector for the expressions. - std::shared_ptr projector; - Status status = Projector::Make(schema, {lt_expr}, pool_, &projector); - EXPECT_TRUE(status.IsExpressionValidationError()); - std::string expected_error = "Field f2 not in schema"; - EXPECT_TRUE(status.message().find(expected_error) != std::string::npos); + // Build a projector for the expressions. + std::shared_ptr projector; + Status status = Projector::Make(schema, {lt_expr}, pool_, &projector); + EXPECT_TRUE(status.IsExpressionValidationError()); + std::string expected_error = "Field f2 not in schema"; + EXPECT_TRUE(status.message().find(expected_error) != std::string::npos); } TEST_F(TestProjector, TestIncorrectSchemaTypeNotMatching) { diff --git a/cpp/src/gandiva/integ/projector_test.cc b/cpp/src/gandiva/integ/projector_test.cc index e53b7d3e6ba17..c325975caab04 100644 --- a/cpp/src/gandiva/integ/projector_test.cc +++ b/cpp/src/gandiva/integ/projector_test.cc @@ -346,18 +346,21 @@ TEST_F(TestProjector, TestZeroCopy) { // allocate output buffers int64_t bitmap_sz = arrow::BitUtil::BytesForBits(num_records); - std::unique_ptr bitmap(new uint8_t[bitmap_sz]); + std::unique_ptr bitmap(new uint8_t[bitmap_sz]); std::shared_ptr bitmap_buf = std::make_shared(bitmap.get(), bitmap_sz); int64_t data_sz = sizeof (float) * num_records; - std::unique_ptr data(new uint8_t[data_sz]); + std::unique_ptr data(new uint8_t[data_sz]); std::shared_ptr data_buf = std::make_shared(data.get(), data_sz); - auto array_data = arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, data_buf}); + auto array_data = arrow::ArrayData::Make( + float32(), + num_records, + {bitmap_buf, data_buf}); - // Evaluate expression + // Evaluate expression status = projector->Evaluate(*in_batch, {array_data}); EXPECT_TRUE(status.ok()); @@ -391,16 +394,19 @@ TEST_F(TestProjector, TestZeroCopyNegative) { // allocate output buffers int64_t bitmap_sz = arrow::BitUtil::BytesForBits(num_records); - std::unique_ptr bitmap(new uint8_t[bitmap_sz]); + std::unique_ptr bitmap(new uint8_t[bitmap_sz]); std::shared_ptr bitmap_buf = std::make_shared(bitmap.get(), bitmap_sz); int64_t data_sz = sizeof (float) * num_records; - std::unique_ptr data(new uint8_t[data_sz]); + std::unique_ptr data(new uint8_t[data_sz]); std::shared_ptr data_buf = std::make_shared(data.get(), data_sz); - auto array_data = arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, data_buf}); + auto array_data = arrow::ArrayData::Make( + float32(), + num_records, + {bitmap_buf, data_buf}); // the batch can't be empty. auto bad_batch = arrow::RecordBatch::Make(schema, 0 /*num_records*/, {array0}); @@ -433,4 +439,5 @@ TEST_F(TestProjector, TestZeroCopyNegative) { status = projector->Evaluate(*in_batch, {bad_array_data3}); EXPECT_EQ(status.code(), StatusCode::Invalid); } + } // namespace gandiva diff --git a/cpp/src/gandiva/integ/test_util.h b/cpp/src/gandiva/integ/test_util.h index d507aa932a407..bf8848a47e9ef 100644 --- a/cpp/src/gandiva/integ/test_util.h +++ b/cpp/src/gandiva/integ/test_util.h @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include "arrow/test-util.h" @@ -34,6 +35,16 @@ static ArrayPtr MakeArrowArray(std::vector values, arrow::ArrayFromVector(validity, values, &out); return out; } + +template +static ArrayPtr MakeArrowTypeArray(const std::shared_ptr& type, + const std::vector &values, + const std::vector &validity) { + ArrayPtr out; + arrow::ArrayFromVector(type, validity, values, &out); + return out; +} + #define MakeArrowArrayBool MakeArrowArray #define MakeArrowArrayInt8 MakeArrowArray #define MakeArrowArrayInt16 MakeArrowArray diff --git a/cpp/src/gandiva/src/jni/native_builder.cc b/cpp/src/gandiva/src/jni/native_builder.cc index d6c49234ae852..c07490002dfa7 100644 --- a/cpp/src/gandiva/src/jni/native_builder.cc +++ b/cpp/src/gandiva/src/jni/native_builder.cc @@ -92,6 +92,46 @@ std::shared_ptr MapLookup(jlong module_id) { return result; } +DataTypePtr ProtoTypeToTime32(const types::ExtGandivaType& ext_type) { + switch (ext_type.timeunit()) { + case types::SEC: + return arrow::time32(arrow::TimeUnit::SECOND); + case types::MILLISEC: + return arrow::time32(arrow::TimeUnit::MILLI); + default: + std::cerr << "Unknown time unit: " << ext_type.timeunit() << " for time32\n"; + return nullptr; + } +} + +DataTypePtr ProtoTypeToTime64(const types::ExtGandivaType& ext_type) { + switch (ext_type.timeunit()) { + case types::MICROSEC: + return arrow::time64(arrow::TimeUnit::MICRO); + case types::NANOSEC: + return arrow::time64(arrow::TimeUnit::NANO); + default: + std::cerr << "Unknown time unit: " << ext_type.timeunit() << " for time64\n"; + return nullptr; + } +} + +DataTypePtr ProtoTypeToTimestamp(const types::ExtGandivaType& ext_type) { + switch (ext_type.timeunit()) { + case types::SEC: + return arrow::timestamp(arrow::TimeUnit::SECOND); + case types::MILLISEC: + return arrow::timestamp(arrow::TimeUnit::MILLI); + case types::MICROSEC: + return arrow::timestamp(arrow::TimeUnit::MICRO); + case types::NANOSEC: + return arrow::timestamp(arrow::TimeUnit::NANO); + default: + std::cerr << "Unknown time unit: " << ext_type.timeunit() << " for timestamp\n"; + return nullptr; + } +} + DataTypePtr ProtoTypeToDataType(const types::ExtGandivaType& ext_type) { switch (ext_type.type()) { case types::NONE: @@ -131,11 +171,14 @@ DataTypePtr ProtoTypeToDataType(const types::ExtGandivaType& ext_type) { case types::DECIMAL: // TODO: error handling return arrow::decimal(ext_type.precision(), ext_type.scale()); - - case types::FIXED_SIZE_BINARY: - case types::TIMESTAMP: case types::TIME32: + return ProtoTypeToTime32(ext_type); case types::TIME64: + return ProtoTypeToTime64(ext_type); + case types::TIMESTAMP: + return ProtoTypeToTimestamp(ext_type); + + case types::FIXED_SIZE_BINARY: case types::INTERVAL: case types::LIST: case types::STRUCT: diff --git a/cpp/src/gandiva/src/precompiled/time.cc b/cpp/src/gandiva/src/precompiled/time.cc index dafc8e3738dd9..dd0b9d3aed9b5 100644 --- a/cpp/src/gandiva/src/precompiled/time.cc +++ b/cpp/src/gandiva/src/precompiled/time.cc @@ -23,7 +23,7 @@ extern "C" { // Expand inner macro for all date types. #define DATE_TYPES(INNER) \ - INNER(date) \ + INNER(date64) \ INNER(time64) \ INNER(timestamp) diff --git a/cpp/src/gandiva/src/precompiled/types.h b/cpp/src/gandiva/src/precompiled/types.h index 418cda06dbfef..ba44b66240d76 100644 --- a/cpp/src/gandiva/src/precompiled/types.h +++ b/cpp/src/gandiva/src/precompiled/types.h @@ -29,7 +29,7 @@ using uint32 = uint32_t; using uint64 = uint64_t; using float32 = float; using float64 = double; -using date = int64_t; +using date64 = int64_t; using time64 = int64_t; using timestamp = int64_t;