GDV-46: [C++] Add unit tests for bitmap/time fns

- Added definitions for other integer types (int8, int16) - Added definitions for unsigned types - Added a test for arithmetic ops on all int types - The functions should be inlined in the pre-compiled library, but not in the unit tests. Added a compiler flag to control this.
praveenbingo · Sep 10, 2018 · 87468de · 87468de
1 parent 754b7b7
commit 87468de
Show file tree

Hide file tree

Showing 14 changed files with 279 additions and 22 deletions.
diff --git a/cpp/src/gandiva/codegen/function_registry.cc b/cpp/src/gandiva/codegen/function_registry.cc
@@ -18,8 +18,14 @@
 namespace gandiva {
 
 using std::vector;
+using arrow::int8;
+using arrow::int16;
 using arrow::int32;
 using arrow::int64;
+using arrow::uint8;
+using arrow::uint16;
+using arrow::uint32;
+using arrow::uint64;
 using arrow::float32;
 using arrow::float64;
 using arrow::boolean;
@@ -107,8 +113,14 @@ using arrow::date64;
 
 // Iterate the inner macro over all numeric types
 #define NUMERIC_TYPES(INNER, NAME) \
-  INNER(NAME, int32), \
-  INNER(NAME, int64), \
+  INNER(NAME, int8),    \
+  INNER(NAME, int16),   \
+  INNER(NAME, int32),   \
+  INNER(NAME, int64),   \
+  INNER(NAME, uint8),   \
+  INNER(NAME, uint16),  \
+  INNER(NAME, uint32),  \
+  INNER(NAME, uint64),  \
   INNER(NAME, float32), \
   INNER(NAME, float64)
 
@@ -121,7 +133,7 @@ using arrow::date64;
 #define DATE_TYPES(INNER, NAME) \
   INNER(NAME, date64), \
   INNER(NAME, time64), \
-  INNER(NAME, timestamp64)
+  INNER(NAME, timestamp)
 
 // list of registered native functions.
 NativeFunction FunctionRegistry::pc_registry_[] = {

diff --git a/cpp/src/gandiva/codegen/function_registry.h b/cpp/src/gandiva/codegen/function_registry.h
@@ -44,7 +44,7 @@ class FunctionRegistry {
     return arrow::time64(arrow::TimeUnit::MICRO);
   }
 
-  static DataTypePtr timestamp64() {
+  static DataTypePtr timestamp() {
     return arrow::timestamp(arrow::TimeUnit::MILLI);
   }
 

diff --git a/cpp/src/gandiva/codegen/llvm_types.cc b/cpp/src/gandiva/codegen/llvm_types.cc
@@ -16,13 +16,21 @@
 
 namespace gandiva {
 
+// LLVM doesn't distinguish between signed and unsigned types.
+
 LLVMTypes::LLVMTypes(llvm::LLVMContext &context)
     : context_(context) {
 
   arrow_id_to_llvm_type_map_ = {
       {arrow::Type::type::BOOL, i1_type()},
+      {arrow::Type::type::INT8, i8_type()},
+      {arrow::Type::type::INT16, i16_type()},
       {arrow::Type::type::INT32, i32_type()},
       {arrow::Type::type::INT64, i64_type()},
+      {arrow::Type::type::UINT8, i8_type()},
+      {arrow::Type::type::UINT16, i16_type()},
+      {arrow::Type::type::UINT32, i32_type()},
+      {arrow::Type::type::UINT64, i64_type()},
       {arrow::Type::type::FLOAT, float_type()},
       {arrow::Type::type::DOUBLE, double_type()},
       {arrow::Type::type::DATE64, i64_type()},

diff --git a/cpp/src/gandiva/codegen/llvm_types.h b/cpp/src/gandiva/codegen/llvm_types.h
@@ -35,6 +35,10 @@ class LLVMTypes {
     return llvm::Type::getInt8Ty(context_);
   }
 
+  llvm::Type *i16_type() {
+    return llvm::Type::getInt16Ty(context_);
+  }
+
   llvm::Type *i32_type() {
     return llvm::Type::getInt32Ty(context_);
   }

diff --git a/cpp/src/gandiva/integ/projector_test.cc b/cpp/src/gandiva/integ/projector_test.cc
@@ -72,6 +72,98 @@ TEST_F(TestProjector, TestIntSumSub) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_sub, outputs.at(1));
 }
 
+template<typename TYPE, typename C_TYPE>
+static void TestArithmeticOpsForType(arrow::MemoryPool *pool) {
+  auto atype =  arrow::TypeTraits<TYPE>::type_singleton();
+
+  // schema for input fields
+  auto field0 = field("f0", atype);
+  auto field1 = field("f1", atype);
+  auto schema = arrow::schema({field0, field1});
+
+  // output fields
+  auto field_sum = field("add", atype);
+  auto field_sub = field("subtract", atype);
+  auto field_mul = field("multiply", atype);
+  auto field_div = field("divide", atype);
+  auto field_eq = field("equal", arrow::boolean());
+  auto field_lt = field("less_than", arrow::boolean());
+
+  // Build expression
+  auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
+  auto sub_expr = TreeExprBuilder::MakeExpression("subtract", {field0, field1},
+                                                  field_sub);
+  auto mul_expr = TreeExprBuilder::MakeExpression("multiply", {field0, field1},
+                                                  field_mul);
+  auto div_expr = TreeExprBuilder::MakeExpression("divide", {field0, field1}, field_div);
+  auto eq_expr = TreeExprBuilder::MakeExpression("equal", {field0, field1}, field_eq);
+  auto lt_expr = TreeExprBuilder::MakeExpression("less_than", {field0, field1}, field_lt);
+
+  std::shared_ptr<Projector> projector;
+  Status status =
+      Projector::Make(schema, {sum_expr, sub_expr, mul_expr, div_expr, eq_expr, lt_expr},
+                      pool, &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  std::vector<C_TYPE> input0 = {1, 2, 53, 84};
+  std::vector<C_TYPE> input1 = {10, 15, 23, 84};
+  std::vector<bool> validity = {true, true, true, true};
+
+  auto array0 = MakeArrowArray<TYPE, C_TYPE>(input0, validity);
+  auto array1 = MakeArrowArray<TYPE, C_TYPE>(input1, validity);
+
+  // expected output
+  std::vector<C_TYPE> sum;
+  std::vector<C_TYPE> sub;
+  std::vector<C_TYPE> mul;
+  std::vector<C_TYPE> div;
+  std::vector<bool> eq;
+  std::vector<bool> lt;
+  for (int i = 0; i < num_records; i++) {
+    sum.push_back(input0[i] + input1[i]);
+    sub.push_back(input0[i] - input1[i]);
+    mul.push_back(input0[i] * input1[i]);
+    div.push_back(input0[i] / input1[i]);
+    eq.push_back(input0[i] == input1[i]);
+    lt.push_back(input0[i] < input1[i]);
+  }
+  auto exp_sum = MakeArrowArray<TYPE, C_TYPE>(sum, validity);
+  auto exp_sub = MakeArrowArray<TYPE, C_TYPE>(sub, validity);
+  auto exp_mul = MakeArrowArray<TYPE, C_TYPE>(mul, validity);
+  auto exp_div = MakeArrowArray<TYPE, C_TYPE>(div, validity);
+  auto exp_eq = MakeArrowArray<arrow::BooleanType, bool>(eq, validity);
+  auto exp_lt = MakeArrowArray<arrow::BooleanType, bool>(lt, validity);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_sub, outputs.at(1));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_mul, outputs.at(2));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_div, outputs.at(3));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_eq, outputs.at(4));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_lt, outputs.at(5));
+}
+
+TEST_F(TestProjector, TestAllIntTypes) {
+  TestArithmeticOpsForType<arrow::UInt8Type, uint8_t>(pool_);
+  TestArithmeticOpsForType<arrow::UInt16Type, uint16_t>(pool_);
+  TestArithmeticOpsForType<arrow::UInt32Type, uint32_t>(pool_);
+  TestArithmeticOpsForType<arrow::UInt64Type, uint64_t>(pool_);
+  TestArithmeticOpsForType<arrow::Int8Type, int8_t>(pool_);
+  TestArithmeticOpsForType<arrow::Int16Type, int16_t>(pool_);
+  TestArithmeticOpsForType<arrow::Int32Type, int32_t>(pool_);
+  TestArithmeticOpsForType<arrow::Int64Type, int64_t>(pool_);
+}
+
 TEST_F(TestProjector, TestFloatLessThan) {
   // schema for input fields
   auto field0 = field("f0", float32());

diff --git a/cpp/src/gandiva/integ/test_util.h b/cpp/src/gandiva/integ/test_util.h
@@ -35,8 +35,14 @@ static ArrayPtr MakeArrowArray(std::vector<C_TYPE> values,
   return out;
 }
 #define MakeArrowArrayBool MakeArrowArray<arrow::BooleanType, bool>
+#define MakeArrowArrayInt8 MakeArrowArray<arrow::Int8Type, int8_t>
+#define MakeArrowArrayInt16 MakeArrowArray<arrow::Int16Type, int16_t>
 #define MakeArrowArrayInt32 MakeArrowArray<arrow::Int32Type, int32_t>
 #define MakeArrowArrayInt64 MakeArrowArray<arrow::Int64Type, int64_t>
+#define MakeArrowArrayUint8 MakeArrowArray<arrow::Unt8Type, uint8_t>
+#define MakeArrowArrayUint16 MakeArrowArray<arrow::Unt16Type, uint16_t>
+#define MakeArrowArrayUint32 MakeArrowArray<arrow::Unt32Type, uint32_t>
+#define MakeArrowArrayUint64 MakeArrowArray<arrow::Unt64Type, uint64_t>
 #define MakeArrowArrayFloat32 MakeArrowArray<arrow::FloatType, float>
 #define MakeArrowArrayFloat64 MakeArrowArray<arrow::DoubleType, double>
 

diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -48,4 +48,6 @@ add_custom_command(
 add_custom_target(precompiled ALL DEPENDS ${GANDIVA_BC_OUTPUT_PATH})
 
 # testing
+add_precompiled_unit_test(bitmap_test.cc bitmap.cc)
+add_precompiled_unit_test(time_test.cc time.cc)
 add_precompiled_unit_test(sample_test.cc sample.cc)
diff --git a/cpp/src/gandiva/precompiled/arithmetic_ops.cc b/cpp/src/gandiva/precompiled/arithmetic_ops.cc
@@ -18,24 +18,30 @@ extern "C" {
 
 // Expand inner macro for all numeric types.
 #define NUMERIC_TYPES(INNER, NAME, OP) \
-  INNER(NAME, int32, OP) \
-  INNER(NAME, int64, OP) \
-  INNER(NAME, float32, OP) \
+  INNER(NAME, int8, OP)   \
+  INNER(NAME, int16, OP)  \
+  INNER(NAME, int32, OP)  \
+  INNER(NAME, int64, OP)  \
+  INNER(NAME, uint8, OP)  \
+  INNER(NAME, uint16, OP) \
+  INNER(NAME, uint32, OP) \
+  INNER(NAME, uint64, OP) \
+  INNER(NAME, float32, OP)\
   INNER(NAME, float64, OP)
 
 #define NUMERIC_AND_BOOL_TYPES(INNER, NAME, OP) \
   NUMERIC_TYPES(INNER, NAME, OP) \
   INNER(NAME, boolean, OP)
 
 #define BINARY_GENERIC_OP(NAME, IN_TYPE1, IN_TYPE2, OUT_TYPE, OP) \
-  __attribute__((always_inline)) \
+  FORCE_INLINE \
   OUT_TYPE NAME##_##IN_TYPE1##_##IN_TYPE2(IN_TYPE1 left, IN_TYPE2 right) { \
     return left OP right; \
   }
 
 // Symmetric binary fns : left, right params and return type are same.
 #define BINARY_SYMMETRIC(NAME, TYPE, OP) \
-  __attribute__((always_inline)) \
+  FORCE_INLINE \
   TYPE NAME##_##TYPE##_##TYPE(TYPE left, TYPE right) { \
     return left OP right; \
   }
@@ -51,7 +57,7 @@ BINARY_GENERIC_OP(mod, int64, int64, int64, %)
 
 // Relational binary fns : left, right params are same, return is bool.
 #define BINARY_RELATIONAL(NAME, TYPE, OP) \
-  __attribute__((always_inline)) \
+  FORCE_INLINE \
   bool NAME##_##TYPE##_##TYPE(TYPE left, TYPE right) { \
     return left OP right; \
   }
@@ -65,7 +71,7 @@ NUMERIC_TYPES(BINARY_RELATIONAL, greater_than_or_equal_to, >=)
 
 // cast fns : takes one param type, returns another type.
 #define CAST_UNARY(NAME, IN_TYPE, OUT_TYPE) \
-  __attribute__((always_inline)) \
+  FORCE_INLINE \
   OUT_TYPE NAME##_##IN_TYPE(IN_TYPE in) { \
     return (OUT_TYPE)in; \
   }
@@ -79,7 +85,7 @@ CAST_UNARY(castFLOAT8, float32, float64)
 
 // simple nullable functions, result value = fn(input validity)
 #define VALIDITY_OP(NAME, TYPE, OP) \
-  __attribute__((always_inline)) \
+  FORCE_INLINE \
   bool NAME##_##TYPE(TYPE in, boolean is_valid) { \
     return OP is_valid; \
   }

diff --git a/cpp/src/gandiva/precompiled/bitmap.cc b/cpp/src/gandiva/precompiled/bitmap.cc
@@ -24,14 +24,14 @@ extern "C" {
 #define POS_TO_BYTE_INDEX(p) (p / 8)
 #define POS_TO_BIT_INDEX(p) (p % 8)
 
-__attribute__((always_inline))
+FORCE_INLINE
 bool bitMapGetBit(const unsigned char *bmap, int position) {
   int byteIdx = POS_TO_BYTE_INDEX(position);
   int bitIdx = POS_TO_BIT_INDEX(position);
   return ((bmap[byteIdx] & (1 << bitIdx)) > 0);
 }
 
-__attribute__((always_inline))
+FORCE_INLINE
 void bitMapSetBit(unsigned char *bmap, int position, bool value) {
   int byteIdx = POS_TO_BYTE_INDEX(position);
   int bitIdx = POS_TO_BIT_INDEX(position);
@@ -43,7 +43,7 @@ void bitMapSetBit(unsigned char *bmap, int position, bool value) {
 }
 
 // Clear the bit if value = false. Does nothing if value = true.
-__attribute__((always_inline))
+FORCE_INLINE
 void bitMapClearBitIfFalse(unsigned char *bmap, int position, bool value) {
   if (!value) {
     int byteIdx = POS_TO_BYTE_INDEX(position);

diff --git a/cpp/src/gandiva/precompiled/bitmap_test.cc b/cpp/src/gandiva/precompiled/bitmap_test.cc
@@ -0,0 +1,60 @@
+// Copyright (C) 2017-2018 Dremio Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "precompiled/types.h"
+
+namespace gandiva {
+
+TEST(TestBitMap, TestSimple) {
+  static const int kNumBytes = 16;
+  uint8_t bit_map[kNumBytes];
+  memset(bit_map, 0, kNumBytes);
+
+  EXPECT_EQ(bitMapGetBit(bit_map, 100), false);
+
+  // set 100th bit and verify
+  bitMapSetBit(bit_map, 100, true);
+  EXPECT_EQ(bitMapGetBit(bit_map, 100), true);
+
+  // clear 100th bit and verify
+  bitMapSetBit(bit_map, 100, false);
+  EXPECT_EQ(bitMapGetBit(bit_map, 100), false);
+}
+
+TEST(TestBitMap, TestClearIfFalse) {
+  static const int kNumBytes = 32;
+  uint8_t bit_map[kNumBytes];
+  memset(bit_map, 0, kNumBytes);
+
+  bitMapSetBit(bit_map, 24, true);
+
+  // bit should remain unchanged.
+  bitMapClearBitIfFalse(bit_map, 24, true);
+  EXPECT_EQ(bitMapGetBit(bit_map, 24), true);
+
+  // bit should be cleared.
+  bitMapClearBitIfFalse(bit_map, 24, false);
+  EXPECT_EQ(bitMapGetBit(bit_map, 24), false);
+
+  // this function should have no impact if the bit is already clear.
+  bitMapClearBitIfFalse(bit_map, 24, true);
+  EXPECT_EQ(bitMapGetBit(bit_map, 24), false);
+
+  bitMapClearBitIfFalse(bit_map, 24, false);
+  EXPECT_EQ(bitMapGetBit(bit_map, 24), false);
+}
+
+
+} // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/sample.cc b/cpp/src/gandiva/precompiled/sample.cc
@@ -19,6 +19,7 @@ extern "C" {
 // Dummy function to test NULL_INTERNAL (most valid ones need varchar).
 
 // If input is valid and a multiple of 2, return half the value. else, null.
+FORCE_INLINE
 int half_or_null_int32(int32 val, bool in_valid, bool *out_valid) {
   if (in_valid && (val % 2 == 0)) {
     // output is valid.