From 6bafc5fa94e167bc5242840fab26ac71dc6262c9 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Mon, 28 Mar 2016 18:37:30 +0200 Subject: [PATCH 1/2] ARROW-87: [C++] Add all four possible ways to encode Decimals in Parquet to schema conversion --- cpp/src/arrow/parquet/parquet-schema-test.cc | 36 ++++++++++++++++++++ cpp/src/arrow/parquet/schema.cc | 9 +++++ 2 files changed, 45 insertions(+) diff --git a/cpp/src/arrow/parquet/parquet-schema-test.cc b/cpp/src/arrow/parquet/parquet-schema-test.cc index 02a8caf03c9bd..67b43aa2437f9 100644 --- a/cpp/src/arrow/parquet/parquet-schema-test.cc +++ b/cpp/src/arrow/parquet/parquet-schema-test.cc @@ -22,6 +22,7 @@ #include "arrow/test-util.h" #include "arrow/type.h" +#include "arrow/types/decimal.h" #include "arrow/util/status.h" #include "arrow/parquet/schema.h" @@ -46,6 +47,7 @@ const auto DOUBLE = std::make_shared(); const auto UTF8 = std::make_shared(); const auto BINARY = std::make_shared( std::make_shared("", UINT8)); +const auto DECIMAL_8_4 = std::make_shared(8, 4); class TestConvertParquetSchema : public ::testing::Test { public: @@ -119,6 +121,40 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) { CheckFlatSchema(arrow_schema); } +TEST_F(TestConvertParquetSchema, ParquetFlatDecimals) { + std::vector parquet_fields; + std::vector> arrow_fields; + + parquet_fields.push_back( + PrimitiveNode::Make("flba-decimal", Repetition::OPTIONAL, + parquet_cpp::Type::FIXED_LEN_BYTE_ARRAY, + parquet_cpp::LogicalType::DECIMAL, 4, 8, 4)); + arrow_fields.push_back(std::make_shared("flba-decimal", DECIMAL_8_4)); + + parquet_fields.push_back( + PrimitiveNode::Make("binary-decimal", Repetition::OPTIONAL, + parquet_cpp::Type::BYTE_ARRAY, + parquet_cpp::LogicalType::DECIMAL, -1, 8, 4)); + arrow_fields.push_back(std::make_shared("binary-decimal", DECIMAL_8_4)); + + parquet_fields.push_back( + PrimitiveNode::Make("int32-decimal", Repetition::OPTIONAL, + parquet_cpp::Type::INT32, + parquet_cpp::LogicalType::DECIMAL, -1, 8, 4)); + arrow_fields.push_back(std::make_shared("int32-decimal", DECIMAL_8_4)); + + parquet_fields.push_back( + PrimitiveNode::Make("int64-decimal", Repetition::OPTIONAL, + parquet_cpp::Type::INT64, + parquet_cpp::LogicalType::DECIMAL, -1, 8, 4)); + arrow_fields.push_back(std::make_shared("int64-decimal", DECIMAL_8_4)); + + auto arrow_schema = std::make_shared(arrow_fields); + ASSERT_OK(ConvertSchema(parquet_fields)); + + CheckFlatSchema(arrow_schema); +} + TEST_F(TestConvertParquetSchema, UnsupportedThings) { std::vector unsupported_nodes; diff --git a/cpp/src/arrow/parquet/schema.cc b/cpp/src/arrow/parquet/schema.cc index d8eb2addb0ada..14f4f5be53ce9 100644 --- a/cpp/src/arrow/parquet/schema.cc +++ b/cpp/src/arrow/parquet/schema.cc @@ -57,6 +57,9 @@ static Status FromByteArray(const PrimitiveNode* node, TypePtr* out) { case LogicalType::UTF8: *out = UTF8; break; + case LogicalType::DECIMAL: + *out = MakeDecimalType(node); + break; default: // BINARY *out = BINARY; @@ -86,6 +89,9 @@ static Status FromInt32(const PrimitiveNode* node, TypePtr* out) { case LogicalType::NONE: *out = INT32; break; + case LogicalType::DECIMAL: + *out = MakeDecimalType(node); + break; default: return Status::NotImplemented("Unhandled logical type for int32"); break; @@ -98,6 +104,9 @@ static Status FromInt64(const PrimitiveNode* node, TypePtr* out) { case LogicalType::NONE: *out = INT64; break; + case LogicalType::DECIMAL: + *out = MakeDecimalType(node); + break; default: return Status::NotImplemented("Unhandled logical type for int64"); break; From 05ca3beee56404b8f43b8ba3b3357c5c30e412be Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Mon, 28 Mar 2016 19:42:56 +0200 Subject: [PATCH 2/2] Use parquet:: namespace instead of parquet_cpp --- cpp/src/arrow/parquet/parquet-schema-test.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/parquet/parquet-schema-test.cc b/cpp/src/arrow/parquet/parquet-schema-test.cc index 67b43aa2437f9..a289ddbfde6eb 100644 --- a/cpp/src/arrow/parquet/parquet-schema-test.cc +++ b/cpp/src/arrow/parquet/parquet-schema-test.cc @@ -127,26 +127,26 @@ TEST_F(TestConvertParquetSchema, ParquetFlatDecimals) { parquet_fields.push_back( PrimitiveNode::Make("flba-decimal", Repetition::OPTIONAL, - parquet_cpp::Type::FIXED_LEN_BYTE_ARRAY, - parquet_cpp::LogicalType::DECIMAL, 4, 8, 4)); + ParquetType::FIXED_LEN_BYTE_ARRAY, + LogicalType::DECIMAL, 4, 8, 4)); arrow_fields.push_back(std::make_shared("flba-decimal", DECIMAL_8_4)); parquet_fields.push_back( PrimitiveNode::Make("binary-decimal", Repetition::OPTIONAL, - parquet_cpp::Type::BYTE_ARRAY, - parquet_cpp::LogicalType::DECIMAL, -1, 8, 4)); + ParquetType::BYTE_ARRAY, + LogicalType::DECIMAL, -1, 8, 4)); arrow_fields.push_back(std::make_shared("binary-decimal", DECIMAL_8_4)); parquet_fields.push_back( PrimitiveNode::Make("int32-decimal", Repetition::OPTIONAL, - parquet_cpp::Type::INT32, - parquet_cpp::LogicalType::DECIMAL, -1, 8, 4)); + ParquetType::INT32, + LogicalType::DECIMAL, -1, 8, 4)); arrow_fields.push_back(std::make_shared("int32-decimal", DECIMAL_8_4)); parquet_fields.push_back( PrimitiveNode::Make("int64-decimal", Repetition::OPTIONAL, - parquet_cpp::Type::INT64, - parquet_cpp::LogicalType::DECIMAL, -1, 8, 4)); + ParquetType::INT64, + LogicalType::DECIMAL, -1, 8, 4)); arrow_fields.push_back(std::make_shared("int64-decimal", DECIMAL_8_4)); auto arrow_schema = std::make_shared(arrow_fields);