diff --git a/c++/src/Statistics.cc b/c++/src/Statistics.cc index 8ed29d0e7c..e8aeb51830 100644 --- a/c++/src/Statistics.cc +++ b/c++/src/Statistics.cc @@ -181,13 +181,13 @@ namespace orc { ColumnStatisticsImpl::ColumnStatisticsImpl(const proto::ColumnStatistics& pb) { _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + _stats.setHasNull(pb.has_has_null() ? pb.has_null() : true); } BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + _stats.setHasNull(pb.has_has_null() ? pb.has_null() : true); if (pb.has_binary_statistics() && statContext.correctStats) { _stats.setHasTotalLength(pb.binary_statistics().has_sum()); _stats.setTotalLength(static_cast(pb.binary_statistics().sum())); @@ -197,7 +197,7 @@ namespace orc { BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + _stats.setHasNull(pb.has_has_null() ? pb.has_null() : true); if (pb.has_bucket_statistics() && statContext.correctStats) { _hasCount = true; _trueCount = pb.bucket_statistics().count(0); @@ -210,7 +210,7 @@ namespace orc { DateColumnStatisticsImpl::DateColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + _stats.setHasNull(pb.has_has_null() ? pb.has_null() : true); if (!pb.has_date_statistics() || !statContext.correctStats) { // hasMinimum_ is false by default; // hasMaximum_ is false by default; @@ -227,7 +227,7 @@ namespace orc { DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + _stats.setHasNull(pb.has_has_null() ? pb.has_null() : true); if (pb.has_decimal_statistics() && statContext.correctStats) { const proto::DecimalStatistics& stats = pb.decimal_statistics(); _stats.setHasMinimum(stats.has_minimum()); @@ -242,7 +242,7 @@ namespace orc { DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl(const proto::ColumnStatistics& pb) { _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + _stats.setHasNull(pb.has_has_null() ? pb.has_null() : true); if (!pb.has_double_statistics()) { _stats.setMinimum(0); _stats.setMaximum(0); @@ -261,7 +261,7 @@ namespace orc { IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl(const proto::ColumnStatistics& pb) { _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + _stats.setHasNull(pb.has_has_null() ? pb.has_null() : true); if (!pb.has_int_statistics()) { _stats.setMinimum(0); _stats.setMaximum(0); @@ -281,7 +281,7 @@ namespace orc { StringColumnStatisticsImpl::StringColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + _stats.setHasNull(pb.has_has_null() ? pb.has_null() : true); if (!pb.has_string_statistics() || !statContext.correctStats) { _stats.setTotalLength(0); } else { @@ -299,7 +299,7 @@ namespace orc { TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + _stats.setHasNull(pb.has_has_null() ? pb.has_null() : true); if (!pb.has_timestamp_statistics() || !statContext.correctStats) { _stats.setMinimum(0); _stats.setMaximum(0); @@ -365,7 +365,7 @@ namespace orc { CollectionColumnStatisticsImpl::CollectionColumnStatisticsImpl( const proto::ColumnStatistics& pb) { _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + _stats.setHasNull(pb.has_has_null() ? pb.has_null() : true); if (!pb.has_collection_statistics()) { _stats.setMinimum(0); _stats.setMaximum(0); diff --git a/c++/test/TestStripeIndexStatistics.cc b/c++/test/TestStripeIndexStatistics.cc index 34a4649c35..85fdb80e4f 100644 --- a/c++/test/TestStripeIndexStatistics.cc +++ b/c++/test/TestStripeIndexStatistics.cc @@ -46,18 +46,19 @@ namespace orc { intColStats = reinterpret_cast( stripeStats->getRowIndexStatistics(1, 0)); EXPECT_EQ( - "Data type: Integer\nValues: 2000\nHas null: no\nMinimum: 1\nMaximum: 2000\nSum: 2001000\n", + "Data type: Integer\nValues: 2000\nHas null: yes\nMinimum: 1\nMaximum: 2000\nSum: " + "2001000\n", intColStats->toString()); intColStats = reinterpret_cast( stripeStats->getRowIndexStatistics(1, 1)); EXPECT_EQ( - "Data type: Integer\nValues: 2000\nHas null: no\nMinimum: 2001\nMaximum: 4000\nSum: " + "Data type: Integer\nValues: 2000\nHas null: yes\nMinimum: 2001\nMaximum: 4000\nSum: " "6001000\n", intColStats->toString()); intColStats = reinterpret_cast( stripeStats->getRowIndexStatistics(1, 2)); EXPECT_EQ( - "Data type: Integer\nValues: 2000\nHas null: no\nMinimum: 4001\nMaximum: 6000\nSum: " + "Data type: Integer\nValues: 2000\nHas null: yes\nMinimum: 4001\nMaximum: 6000\nSum: " "10001000\n", intColStats->toString()); @@ -65,20 +66,20 @@ namespace orc { stringColStats = reinterpret_cast( stripeStats->getRowIndexStatistics(2, 0)); EXPECT_EQ( - "Data type: String\nValues: 2000\nHas null: no\nMinimum: 1000\nMaximum: 9a\nTotal length: " + "Data type: String\nValues: 2000\nHas null: yes\nMinimum: 1000\nMaximum: 9a\nTotal length: " "7892\n", stringColStats->toString()); stringColStats = reinterpret_cast( stripeStats->getRowIndexStatistics(2, 1)); EXPECT_EQ( - "Data type: String\nValues: 2000\nHas null: no\nMinimum: 2001\nMaximum: 4000\nTotal " + "Data type: String\nValues: 2000\nHas null: yes\nMinimum: 2001\nMaximum: 4000\nTotal " "length: " "8000\n", stringColStats->toString()); stringColStats = reinterpret_cast( stripeStats->getRowIndexStatistics(2, 2)); EXPECT_EQ( - "Data type: String\nValues: 2000\nHas null: no\nMinimum: 4001\nMaximum: 6000\nTotal " + "Data type: String\nValues: 2000\nHas null: yes\nMinimum: 4001\nMaximum: 6000\nTotal " "length: " "8000\n", stringColStats->toString()); diff --git a/tools/test/TestFileStatistics.cc b/tools/test/TestFileStatistics.cc index 1b2a396dc4..051f2fb3f9 100644 --- a/tools/test/TestFileStatistics.cc +++ b/tools/test/TestFileStatistics.cc @@ -30,12 +30,12 @@ TEST(TestFileStatistics, testNormal) { const std::string expected = "File " + file + " has 3 columns\n" "*** Column 0 ***\n" - "Column has 6000 values and has null value: no\n" + "Column has 6000 values and has null value: yes\n" "\n" "*** Column 1 ***\n" "Data type: Integer\n" "Values: 6000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 1\n" "Maximum: 6000\n" "Sum: 18003000\n" @@ -43,7 +43,7 @@ TEST(TestFileStatistics, testNormal) { "*** Column 2 ***\n" "Data type: String\n" "Values: 6000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 1000\n" "Maximum: 9a\n" "Total length: 23892\n" @@ -54,12 +54,12 @@ TEST(TestFileStatistics, testNormal) { "*** Stripe 0 ***\n" "\n" "--- Column 0 ---\n" - "Column has 6000 values and has null value: no\n" + "Column has 6000 values and has null value: yes\n" "\n" "--- Column 1 ---\n" "Data type: Integer\n" "Values: 6000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 1\n" "Maximum: 6000\n" "Sum: 18003000\n" @@ -67,7 +67,7 @@ TEST(TestFileStatistics, testNormal) { "--- Column 2 ---\n" "Data type: String\n" "Values: 6000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 1000\n" "Maximum: 9a\n" "Total length: 23892\n\n"; @@ -86,12 +86,12 @@ TEST(TestFileStatistics, testOptions) { const std::string expected = "File " + file + " has 3 columns\n" "*** Column 0 ***\n" - "Column has 6000 values and has null value: no\n" + "Column has 6000 values and has null value: yes\n" "\n" "*** Column 1 ***\n" "Data type: Integer\n" "Values: 6000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 1\n" "Maximum: 6000\n" "Sum: 18003000\n" @@ -99,7 +99,7 @@ TEST(TestFileStatistics, testOptions) { "*** Column 2 ***\n" "Data type: String\n" "Values: 6000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 1000\n" "Maximum: 9a\n" "Total length: 23892\n" @@ -110,21 +110,21 @@ TEST(TestFileStatistics, testOptions) { "*** Stripe 0 ***\n" "\n" "--- Column 0 ---\n" - "Column has 6000 values and has null value: no\n" + "Column has 6000 values and has null value: yes\n" "\n" "--- RowIndex 0 ---\n" - "Column has 2000 values and has null value: no\n" + "Column has 2000 values and has null value: yes\n" "\n" "--- RowIndex 1 ---\n" - "Column has 2000 values and has null value: no\n" + "Column has 2000 values and has null value: yes\n" "\n" "--- RowIndex 2 ---\n" - "Column has 2000 values and has null value: no\n" + "Column has 2000 values and has null value: yes\n" "\n" "--- Column 1 ---\n" "Data type: Integer\n" "Values: 6000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 1\n" "Maximum: 6000\n" "Sum: 18003000\n" @@ -132,7 +132,7 @@ TEST(TestFileStatistics, testOptions) { "--- RowIndex 0 ---\n" "Data type: Integer\n" "Values: 2000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 1\n" "Maximum: 2000\n" "Sum: 2001000\n" @@ -140,7 +140,7 @@ TEST(TestFileStatistics, testOptions) { "--- RowIndex 1 ---\n" "Data type: Integer\n" "Values: 2000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 2001\n" "Maximum: 4000\n" "Sum: 6001000\n" @@ -148,7 +148,7 @@ TEST(TestFileStatistics, testOptions) { "--- RowIndex 2 ---\n" "Data type: Integer\n" "Values: 2000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 4001\n" "Maximum: 6000\n" "Sum: 10001000\n" @@ -156,7 +156,7 @@ TEST(TestFileStatistics, testOptions) { "--- Column 2 ---\n" "Data type: String\n" "Values: 6000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 1000\n" "Maximum: 9a\n" "Total length: 23892\n" @@ -164,7 +164,7 @@ TEST(TestFileStatistics, testOptions) { "--- RowIndex 0 ---\n" "Data type: String\n" "Values: 2000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 1000\n" "Maximum: 9a\n" "Total length: 7892\n" @@ -172,7 +172,7 @@ TEST(TestFileStatistics, testOptions) { "--- RowIndex 1 ---\n" "Data type: String\n" "Values: 2000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 2001\n" "Maximum: 4000\n" "Total length: 8000\n" @@ -180,7 +180,7 @@ TEST(TestFileStatistics, testOptions) { "--- RowIndex 2 ---\n" "Data type: String\n" "Values: 2000\n" - "Has null: no\n" + "Has null: yes\n" "Minimum: 4001\n" "Maximum: 6000\n" "Total length: 8000\n\n";