Skip to content
This repository was archived by the owner on May 10, 2024. It is now read-only.

Commit 3cb8a3e

Browse files
author
Deepak Majeti
committed
Review comments
1 parent 17a79f3 commit 3cb8a3e

File tree

6 files changed

+21
-12
lines changed

6 files changed

+21
-12
lines changed

src/parquet/column_writer.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,7 @@ int64_t ColumnWriter::Close() {
268268

269269
EncodedStatistics chunk_statistics = GetChunkStatistics();
270270
if (chunk_statistics.is_set())
271-
metadata_->SetStatistics(SortOrder::SIGNED == GetSortOrder(descr_->logical_type(),
272-
descr_->physical_type()),
271+
metadata_->SetStatistics(SortOrder::SIGNED == descr_->sort_order(),
273272
chunk_statistics);
274273
pager_->Close(has_dictionary_, fallback_);
275274
}
@@ -321,8 +320,7 @@ TypedColumnWriter<Type>::TypedColumnWriter(ColumnChunkMetaDataBuilder* metadata,
321320
}
322321

323322
if (properties->statistics_enabled(descr_->path()) &&
324-
(SortOrder::UNKNOWN !=
325-
GetSortOrder(descr_->logical_type(), descr_->physical_type()))) {
323+
(SortOrder::UNKNOWN != descr_->sort_order())) {
326324
page_statistics_ = std::unique_ptr<TypedStats>(new TypedStats(descr_, allocator_));
327325
chunk_statistics_ = std::unique_ptr<TypedStats>(new TypedStats(descr_, allocator_));
328326
}

src/parquet/file/metadata.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,7 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
117117
inline bool is_stats_set() const {
118118
DCHECK(writer_version_ != nullptr);
119119
return column_->meta_data.__isset.statistics &&
120-
writer_version_->HasCorrectStatistics(
121-
type(), GetSortOrder(descr_->logical_type(), descr_->physical_type()));
120+
writer_version_->HasCorrectStatistics(type(), descr_->sort_order());
122121
}
123122

124123
inline std::shared_ptr<RowGroupStatistics> statistics() const {

src/parquet/schema.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,9 @@ class PARQUET_EXPORT ColumnDescriptor {
332332

333333
LogicalType::type logical_type() const { return primitive_node_->logical_type(); }
334334

335+
SortOrder::type sort_order() const { return GetSortOrder(logical_type(),
336+
physical_type()); }
337+
335338
const std::string& name() const { return primitive_node_->name(); }
336339

337340
const std::shared_ptr<schema::ColumnPath> path() const;

src/parquet/statistics.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ TypedRowGroupStatistics<DType>::TypedRowGroupStatistics(const ColumnDescriptor*
3434
: pool_(pool),
3535
min_buffer_(AllocateBuffer(pool_, 0)),
3636
max_buffer_(AllocateBuffer(pool_, 0)) {
37-
comparator_ = std::static_pointer_cast<CompareDefault<DType> >(Compare::Make(schema));
3837
SetDescr(schema);
3938
Reset();
4039
}
@@ -69,7 +68,6 @@ TypedRowGroupStatistics<DType>::TypedRowGroupStatistics(
6968
IncrementNullCount(null_count);
7069
IncrementDistinctCount(distinct_count);
7170

72-
comparator_ = std::static_pointer_cast<CompareDefault<DType> >(Compare::Make(schema));
7371
SetDescr(schema);
7472

7573
if (!encoded_min.empty()) {
@@ -86,6 +84,11 @@ bool TypedRowGroupStatistics<DType>::HasMinMax() const {
8684
return has_min_max_;
8785
}
8886

87+
template <typename DType>
88+
void TypedRowGroupStatistics<DType>::SetComparator() {
89+
comparator_ = std::static_pointer_cast<CompareDefault<DType> >(Compare::Make(descr_));
90+
}
91+
8992
template <typename DType>
9093
void TypedRowGroupStatistics<DType>::Reset() {
9194
ResetCounts();

src/parquet/statistics.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,19 @@ class PARQUET_EXPORT RowGroupStatistics
9898

9999
virtual EncodedStatistics Encode() = 0;
100100

101+
// Set the Corresponding Comparator
102+
virtual void SetComparator() = 0;
103+
101104
virtual ~RowGroupStatistics() {}
102105

103106
Type::type physical_type() const { return descr_->physical_type(); }
104107

105108
protected:
106109
const ColumnDescriptor* descr() const { return descr_; }
107-
void SetDescr(const ColumnDescriptor* schema) { descr_ = schema; }
110+
void SetDescr(const ColumnDescriptor* schema) {
111+
descr_ = schema;
112+
SetComparator();
113+
}
108114

109115
void IncrementNullCount(int64_t n) { statistics_.null_count += n; }
110116

@@ -147,6 +153,7 @@ class TypedRowGroupStatistics : public RowGroupStatistics {
147153

148154
bool HasMinMax() const override;
149155
void Reset() override;
156+
void SetComparator() override;
150157
void Merge(const TypedRowGroupStatistics<DType>& other);
151158

152159
void Update(const T* values, int64_t num_not_null, int64_t num_null);

src/parquet/util/comparison.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
namespace parquet {
2626

2727
std::shared_ptr<Compare> Compare::Make(const ColumnDescriptor* descr) {
28-
if (SortOrder::SIGNED == GetSortOrder(descr->logical_type(), descr->physical_type())) {
28+
if (SortOrder::SIGNED == descr->sort_order()) {
2929
switch (descr->physical_type()) {
3030
case Type::BOOLEAN:
3131
return std::make_shared<CompareDefaultBoolean>();
@@ -46,8 +46,7 @@ std::shared_ptr<Compare> Compare::Make(const ColumnDescriptor* descr) {
4646
default:
4747
ParquetException::NYI("Signed Compare not implemented");
4848
}
49-
} else if (SortOrder::UNSIGNED ==
50-
GetSortOrder(descr->logical_type(), descr->physical_type())) {
49+
} else if (SortOrder::UNSIGNED == descr->sort_order()) {
5150
switch (descr->physical_type()) {
5251
case Type::INT32:
5352
return std::make_shared<CompareUnsignedInt32>();

0 commit comments

Comments
 (0)