Skip to content

Commit

Permalink
GDV-21: Support date/time functions and datatypes (apache#45)
Browse files Browse the repository at this point in the history
Support date/time types in Java
Add cpp/Java tests for date/time types
  • Loading branch information
vvellanki authored Jun 29, 2018
1 parent 45ad498 commit 7676bdd
Show file tree
Hide file tree
Showing 11 changed files with 283 additions and 53 deletions.
2 changes: 2 additions & 0 deletions cpp/src/gandiva/cmake/BuildUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ function(add_lint)
file(GLOB_RECURSE LINT_FILES
"${CMAKE_CURRENT_SOURCE_DIR}/src/*.h"
"${CMAKE_CURRENT_SOURCE_DIR}/src/*.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/integ/*.h"
"${CMAKE_CURRENT_SOURCE_DIR}/integ/*.cc"
)

find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR})
Expand Down
1 change: 1 addition & 0 deletions cpp/src/gandiva/integ/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ add_gandiva_integ_test(projector_test.cc)
add_gandiva_integ_test(if_expr_test.cc)
add_gandiva_integ_test(boolean_expr_test.cc)
add_gandiva_integ_test(literal_test.cc)
add_gandiva_integ_test(date_time_test.cc)
add_gandiva_integ_test(projector_build_validation_test.cc)
8 changes: 4 additions & 4 deletions cpp/src/gandiva/integ/boolean_expr_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ TEST_F(TestBooleanExpr, AndThree) {

int num_records = 8;
std::vector<bool> validity({true, true, true, true, true, true, true, true });
auto arraya = MakeArrowArrayInt32({2, 2, 2, 0, 2, 0, 0, 0}, validity) ;
auto arraya = MakeArrowArrayInt32({2, 2, 2, 0, 2, 0, 0, 0}, validity);
auto arrayb = MakeArrowArrayInt32({2, 2, 0, 2, 0, 2, 0, 0}, validity);
auto arrayc = MakeArrowArrayInt32({2, 0, 2, 2, 0, 0, 2, 0}, validity);
auto exp = MakeArrowArrayBool({true, false, false, false, false, false, false, false },
Expand Down Expand Up @@ -267,7 +267,7 @@ TEST_F(TestBooleanExpr, OrThree) {

int num_records = 8;
std::vector<bool> validity({true, true, true, true, true, true, true, true });
auto arraya = MakeArrowArrayInt32({2, 2, 2, 0, 2, 0, 0, 0}, validity) ;
auto arraya = MakeArrowArrayInt32({2, 2, 2, 0, 2, 0, 0, 0}, validity);
auto arrayb = MakeArrowArrayInt32({2, 2, 0, 2, 0, 2, 0, 0}, validity);
auto arrayc = MakeArrowArrayInt32({2, 0, 2, 2, 0, 0, 2, 0}, validity);
auto exp = MakeArrowArrayBool({true, true, true, true, true, true, true, false},
Expand Down Expand Up @@ -328,7 +328,7 @@ TEST_F(TestBooleanExpr, BooleanAndInsideIf) {

int num_records = 4;
std::vector<bool> validity({true, true, true, true });
auto arraya = MakeArrowArrayInt32({4, 4, 2, 1}, validity) ;
auto arraya = MakeArrowArrayInt32({4, 4, 2, 1}, validity);
auto arrayb = MakeArrowArrayInt32({5, 3, 3, 1}, validity);
auto exp = MakeArrowArrayBool({true, false, true, false},
validity);
Expand Down Expand Up @@ -380,7 +380,7 @@ TEST_F(TestBooleanExpr, IfInsideBooleanAnd) {

int num_records = 4;
std::vector<bool> validity({true, true, true, true });
auto arraya = MakeArrowArrayInt32({4, 3, 3, 2}, validity) ;
auto arraya = MakeArrowArrayInt32({4, 3, 3, 2}, validity);
auto arrayb = MakeArrowArrayInt32({3, 4, 2, 3}, validity);
auto exp = MakeArrowArrayBool({true, true, false, false},
validity);
Expand Down
166 changes: 166 additions & 0 deletions cpp/src/gandiva/integ/date_time_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright (C) 2017-2018 Dremio Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <time.h>
#include <math.h>
#include <gtest/gtest.h>
#include "arrow/memory_pool.h"
#include "integ/test_util.h"
#include "gandiva/projector.h"
#include "gandiva/tree_expr_builder.h"

namespace gandiva {

using arrow::int32;
using arrow::int64;
using arrow::float32;
using arrow::boolean;
using arrow::date64;

class TestProjector : public ::testing::Test {
public:
void SetUp() { pool_ = arrow::default_memory_pool(); }

protected:
arrow::MemoryPool* pool_;
};

int64_t MillisSince(time_t base_line,
int32_t yy, int32_t mm, int32_t dd,
int32_t hr, int32_t min, int32_t sec) {
struct tm given_ts = {0};
given_ts.tm_year = (yy - 1900);
given_ts.tm_mon = (mm - 1);
given_ts.tm_mday = dd;
given_ts.tm_hour = hr;
given_ts.tm_min = min;
given_ts.tm_sec = sec;

return (lround(difftime(mktime(&given_ts), base_line)) * 1000);
}

TEST_F(TestProjector, TestTime) {
auto field0 = field("f0", date64());
auto field1 = field("f1", time32(arrow::TimeUnit::MILLI));
auto field2 = field("f2", timestamp(arrow::TimeUnit::MILLI));
auto schema = arrow::schema({field0, field2});

// output fields
auto field_year = field("yy", int64());
auto field_month = field("mm", int64());
auto field_day = field("dd", int64());
auto field_hour = field("hh", int64());

// extract year and month from date
auto date2year_expr = TreeExprBuilder::MakeExpression(
"extractYear",
{field0},
field_year);
auto date2month_expr = TreeExprBuilder::MakeExpression(
"extractMonth",
{field0},
field_month);

// extract day and hour from time32
auto time2day_expr = TreeExprBuilder::MakeExpression(
"extractDay",
{field1},
field_day);
auto time2hour_expr = TreeExprBuilder::MakeExpression(
"extractHour",
{field1},
field_hour);

// extract month and day from timestamp
auto ts2month_expr = TreeExprBuilder::MakeExpression
("extractMonth",
{field2},
field_month);
auto ts2day_expr = TreeExprBuilder::MakeExpression("extractDay", {field2}, field_day);

std::shared_ptr<Projector> projector;
Status status = Projector::Make(
schema,
{date2year_expr, date2month_expr, ts2month_expr, ts2day_expr},
pool_,
&projector);
ASSERT_TRUE(status.ok());

struct tm y1970 = {0};
y1970.tm_year = 70; y1970.tm_mon = 0; y1970.tm_mday = 1;
y1970.tm_hour = 0; y1970.tm_min = 0; y1970.tm_sec = 0;
time_t epoch = mktime(&y1970);

// Create a row-batch with some sample data
int num_records = 4;
auto validity = { true, true, true, true };
std::vector<int64_t> field0_data = {
MillisSince(epoch, 2000, 1, 1, 5, 0, 0),
MillisSince(epoch, 1999, 12, 31, 5, 0, 0),
MillisSince(epoch, 2015, 6, 30, 20, 0, 0),
MillisSince(epoch, 2015, 7, 1, 20, 0, 0)
};
auto array0 = MakeArrowTypeArray<arrow::Date64Type, int64_t>(
date64(),
field0_data, validity);

std::vector<int64_t> field1_data = {
MillisSince(epoch, 2000, 1, 1, 5, 0, 0),
MillisSince(epoch, 1999, 12, 31, 4, 0, 0),
MillisSince(epoch, 2015, 6, 30, 20, 0, 0),
MillisSince(epoch, 2015, 7, 3, 3, 0, 0)
};

auto array1 = MakeArrowTypeArray<arrow::Time32Type, int64_t>(
time32(arrow::TimeUnit::MILLI),
field1_data,
validity);

std::vector<int64_t> field2_data = {
MillisSince(epoch, 1999, 12, 31, 5, 0, 0),
MillisSince(epoch, 2000, 1, 2, 5, 0, 0),
MillisSince(epoch, 2015, 7, 1, 1, 0, 0),
MillisSince(epoch, 2015, 6, 29, 23, 0, 0)
};

auto array2 = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
arrow::timestamp(arrow::TimeUnit::MILLI),
field2_data,
validity);

// expected output
// date 2 year and date 2 month
auto exp_yy_from_date = MakeArrowArrayInt64({ 2000, 1999, 2015, 2015 }, validity);
auto exp_mm_from_date = MakeArrowArrayInt64({ 1, 12, 6, 7 }, validity);

// ts 2 month and ts 2 day
auto exp_mm_from_ts = MakeArrowArrayInt64({12, 1, 7, 6}, validity);
auto exp_dd_from_ts = MakeArrowArrayInt64({31, 2, 1, 29}, validity);

// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array2});

// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, &outputs);
EXPECT_TRUE(status.ok());

// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date, outputs.at(0));
EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date, outputs.at(1));
EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(2));
EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(3));
}

} // namespace gandiva
8 changes: 4 additions & 4 deletions cpp/src/gandiva/integ/literal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ TEST_F(TestLiteral, TestSimpleArithmetic) {
auto expr_c = TreeExprBuilder::MakeExpression(func_c, res_c);

auto node_d = TreeExprBuilder::MakeField(field_d);
auto literal_d = TreeExprBuilder::MakeLiteral((float)1);
auto literal_d = TreeExprBuilder::MakeLiteral(static_cast<float>(1));
auto func_d = TreeExprBuilder::MakeFunction("add", {node_d, literal_d}, float32());
auto expr_d = TreeExprBuilder::MakeExpression(func_d, res_d);

auto node_e = TreeExprBuilder::MakeField(field_e);
auto literal_e = TreeExprBuilder::MakeLiteral((double)1);
auto literal_e = TreeExprBuilder::MakeLiteral(static_cast<double>(1));
auto func_e = TreeExprBuilder::MakeFunction("add", {node_e, literal_e}, float64());
auto expr_e = TreeExprBuilder::MakeExpression(func_e, res_e);

Expand All @@ -93,14 +93,14 @@ TEST_F(TestLiteral, TestSimpleArithmetic) {

// Create a row-batch with some sample data
int num_records = 4;
auto array_a = MakeArrowArrayBool({true, true, false, true},{true, true, true, false});
auto array_a = MakeArrowArrayBool({true, true, false, true}, {true, true, true, false});
auto array_b = MakeArrowArrayInt32({5, 15, -15, 17}, {true, true, true, false});
auto array_c = MakeArrowArrayInt64({5, 15, -15, 17}, {true, true, true, false});
auto array_d = MakeArrowArrayFloat32({5.2, 15, -15.6, 17}, {true, true, true, false});
auto array_e = MakeArrowArrayFloat64({5.6, 15, -15.9, 17}, {true, true, true, false});

// expected output
auto exp_a = MakeArrowArrayBool({true, true, false, false},{true, true, true, false});
auto exp_a = MakeArrowArrayBool({true, true, false, false}, {true, true, true, false});
auto exp_b = MakeArrowArrayInt32({6, 16, -14, 0}, {true, true, true, false});
auto exp_c = MakeArrowArrayInt64({6, 16, -14, 0}, {true, true, true, false});
auto exp_d = MakeArrowArrayFloat32({6.2, 16, -14.6, 0}, {true, true, true, false});
Expand Down
66 changes: 33 additions & 33 deletions cpp/src/gandiva/integ/projector_build_validation_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,25 +55,25 @@ TEST_F(TestProjector, TestNonExistentFunction) {
}

TEST_F(TestProjector, TestNotMatchingDataType) {
// schema for input fields
auto field0 = field("f0", float32());
auto schema = arrow::schema({field0});

// output fields
auto field_result = field("res", boolean());

// Build expression
auto node_f0 = TreeExprBuilder::MakeField(field0);
auto lt_expr = TreeExprBuilder::MakeExpression(node_f0, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error =
"Return type of root node float does not match that of expression bool";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}
// schema for input fields
auto field0 = field("f0", float32());
auto schema = arrow::schema({field0});

// output fields
auto field_result = field("res", boolean());

// Build expression
auto node_f0 = TreeExprBuilder::MakeField(field0);
auto lt_expr = TreeExprBuilder::MakeExpression(node_f0, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error =
"Return type of root node float does not match that of expression bool";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}

TEST_F(TestProjector, TestNotSupportedDataType) {
// schema for input fields
Expand All @@ -96,24 +96,24 @@ TEST_F(TestProjector, TestNotSupportedDataType) {
}

TEST_F(TestProjector, TestIncorrectSchemaMissingField) {
// schema for input fields
auto field0 = field("f0", float32());
auto field1 = field("f2", float32());
auto schema = arrow::schema({field0, field0});
// schema for input fields
auto field0 = field("f0", float32());
auto field1 = field("f2", float32());
auto schema = arrow::schema({field0, field0});

// output fields
auto field_result = field("res", boolean());
// output fields
auto field_result = field("res", boolean());

// Build expression
auto lt_expr = TreeExprBuilder::MakeExpression("less_than",
// Build expression
auto lt_expr = TreeExprBuilder::MakeExpression("less_than",
{field0, field1}, field_result);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error = "Field f2 not in schema";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
EXPECT_TRUE(status.IsExpressionValidationError());
std::string expected_error = "Field f2 not in schema";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
}

TEST_F(TestProjector, TestIncorrectSchemaTypeNotMatching) {
Expand Down
21 changes: 14 additions & 7 deletions cpp/src/gandiva/integ/projector_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -346,18 +346,21 @@ TEST_F(TestProjector, TestZeroCopy) {

// allocate output buffers
int64_t bitmap_sz = arrow::BitUtil::BytesForBits(num_records);
std::unique_ptr<uint8_t []> bitmap(new uint8_t[bitmap_sz]);
std::unique_ptr<uint8_t[]> bitmap(new uint8_t[bitmap_sz]);
std::shared_ptr<arrow::MutableBuffer> bitmap_buf =
std::make_shared<arrow::MutableBuffer>(bitmap.get(), bitmap_sz);

int64_t data_sz = sizeof (float) * num_records;
std::unique_ptr<uint8_t []> data(new uint8_t[data_sz]);
std::unique_ptr<uint8_t[]> data(new uint8_t[data_sz]);
std::shared_ptr<arrow::MutableBuffer> data_buf =
std::make_shared<arrow::MutableBuffer>(data.get(), data_sz);

auto array_data = arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, data_buf});
auto array_data = arrow::ArrayData::Make(
float32(),
num_records,
{bitmap_buf, data_buf});

// Evaluate expression
// Evaluate expression
status = projector->Evaluate(*in_batch, {array_data});
EXPECT_TRUE(status.ok());

Expand Down Expand Up @@ -391,16 +394,19 @@ TEST_F(TestProjector, TestZeroCopyNegative) {

// allocate output buffers
int64_t bitmap_sz = arrow::BitUtil::BytesForBits(num_records);
std::unique_ptr<uint8_t []> bitmap(new uint8_t[bitmap_sz]);
std::unique_ptr<uint8_t[]> bitmap(new uint8_t[bitmap_sz]);
std::shared_ptr<arrow::MutableBuffer> bitmap_buf =
std::make_shared<arrow::MutableBuffer>(bitmap.get(), bitmap_sz);

int64_t data_sz = sizeof (float) * num_records;
std::unique_ptr<uint8_t []> data(new uint8_t[data_sz]);
std::unique_ptr<uint8_t[]> data(new uint8_t[data_sz]);
std::shared_ptr<arrow::MutableBuffer> data_buf =
std::make_shared<arrow::MutableBuffer>(data.get(), data_sz);

auto array_data = arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, data_buf});
auto array_data = arrow::ArrayData::Make(
float32(),
num_records,
{bitmap_buf, data_buf});

// the batch can't be empty.
auto bad_batch = arrow::RecordBatch::Make(schema, 0 /*num_records*/, {array0});
Expand Down Expand Up @@ -433,4 +439,5 @@ TEST_F(TestProjector, TestZeroCopyNegative) {
status = projector->Evaluate(*in_batch, {bad_array_data3});
EXPECT_EQ(status.code(), StatusCode::Invalid);
}

} // namespace gandiva
Loading

0 comments on commit 7676bdd

Please sign in to comment.