Skip to content

Commit

Permalink
PARQUET-710: Remove unneeded private member variables from RowGroupRe…
Browse files Browse the repository at this point in the history
…ader ABI

The injection capability is retained.
 The private members of the `RowGroupReader` class are removed.

Author: Deepak Majeti <deepak.majeti@hpe.com>

Closes apache#154 from majetideepak/PARQUET-710 and squashes the following commits:

11bfaa6 [Deepak Majeti] Modify comments that mention PIMPL
6dfd40d [Deepak Majeti] clang-format
4d58dfa [Deepak Majeti] Removed private members in Rowgroup

Change-Id: I72e7cbe45fbda6a9c979e9145954e67834ede3d7
  • Loading branch information
Deepak Majeti authored and wesm committed Sep 6, 2016
1 parent 5b5cfcc commit fd38785
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 34 deletions.
24 changes: 17 additions & 7 deletions cpp/src/parquet/file/metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,9 @@ int64_t ColumnChunkMetaData::total_compressed_size() const {
// row-group metadata
class RowGroupMetaData::RowGroupMetaDataImpl {
public:
explicit RowGroupMetaDataImpl(const format::RowGroup* row_group)
: row_group_(row_group) {}
explicit RowGroupMetaDataImpl(
const format::RowGroup* row_group, const SchemaDescriptor* schema)
: row_group_(row_group), schema_(schema) {}
~RowGroupMetaDataImpl() {}

inline int num_columns() const { return row_group_->columns.size(); }
Expand All @@ -176,6 +177,8 @@ class RowGroupMetaData::RowGroupMetaDataImpl {

inline int64_t total_byte_size() const { return row_group_->total_byte_size; }

inline const SchemaDescriptor* schema() const { return schema_; }

std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) {
DCHECK(i < num_columns()) << "The file only has " << num_columns()
<< " columns, requested metadata for column: " << i;
Expand All @@ -185,15 +188,18 @@ class RowGroupMetaData::RowGroupMetaDataImpl {

private:
const format::RowGroup* row_group_;
const SchemaDescriptor* schema_;
};

std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(const uint8_t* metadata) {
return std::unique_ptr<RowGroupMetaData>(new RowGroupMetaData(metadata));
std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
const uint8_t* metadata, const SchemaDescriptor* schema) {
return std::unique_ptr<RowGroupMetaData>(new RowGroupMetaData(metadata, schema));
}

RowGroupMetaData::RowGroupMetaData(const uint8_t* metadata)
RowGroupMetaData::RowGroupMetaData(
const uint8_t* metadata, const SchemaDescriptor* schema)
: impl_{std::unique_ptr<RowGroupMetaDataImpl>(new RowGroupMetaDataImpl(
reinterpret_cast<const format::RowGroup*>(metadata)))} {}
reinterpret_cast<const format::RowGroup*>(metadata), schema))} {}
RowGroupMetaData::~RowGroupMetaData() {}

int RowGroupMetaData::num_columns() const {
Expand All @@ -208,6 +214,10 @@ int64_t RowGroupMetaData::total_byte_size() const {
return impl_->total_byte_size();
}

const SchemaDescriptor* RowGroupMetaData::schema() const {
return impl_->schema();
}

std::unique_ptr<ColumnChunkMetaData> RowGroupMetaData::ColumnChunk(int i) const {
return impl_->ColumnChunk(i);
}
Expand Down Expand Up @@ -238,7 +248,7 @@ class FileMetaData::FileMetaDataImpl {
<< "The file only has " << num_row_groups()
<< " row groups, requested metadata for row group: " << i;
return RowGroupMetaData::Make(
reinterpret_cast<const uint8_t*>(&metadata_->row_groups[i]));
reinterpret_cast<const uint8_t*>(&metadata_->row_groups[i]), &schema_);
}

const SchemaDescriptor* schema_descriptor() const { return &schema_; }
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/parquet/file/metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,21 @@ class PARQUET_EXPORT ColumnChunkMetaData {
class PARQUET_EXPORT RowGroupMetaData {
public:
// API convenience to get a MetaData accessor
static std::unique_ptr<RowGroupMetaData> Make(const uint8_t* metadata);
static std::unique_ptr<RowGroupMetaData> Make(
const uint8_t* metadata, const SchemaDescriptor* schema);

~RowGroupMetaData();

// row-group metadata
int num_columns() const;
int64_t num_rows() const;
int64_t total_byte_size() const;
// Return const-pointer to make it clear that this object is not to be copied
const SchemaDescriptor* schema() const;
std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) const;

private:
explicit RowGroupMetaData(const uint8_t* metadata);
explicit RowGroupMetaData(const uint8_t* metadata, const SchemaDescriptor* schema);
// PIMPL Idiom
class RowGroupMetaDataImpl;
std::unique_ptr<RowGroupMetaDataImpl> impl_;
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/parquet/file/reader-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ const RowGroupMetaData* SerializedRowGroup::metadata() const {
return row_group_metadata_.get();
}

const ReaderProperties* SerializedRowGroup::properties() const {
return &properties_;
}

std::unique_ptr<PageReader> SerializedRowGroup::GetColumnPageReader(int i) {
// Read column chunk from the file
auto col = row_group_metadata_->ColumnChunk(i);
Expand Down Expand Up @@ -195,8 +199,7 @@ std::shared_ptr<RowGroupReader> SerializedFile::GetRowGroup(int i) {
std::unique_ptr<SerializedRowGroup> contents(new SerializedRowGroup(
source_.get(), std::move(file_metadata_->RowGroup(i)), properties_));

return std::make_shared<RowGroupReader>(
file_metadata_->schema_descriptor(), std::move(contents), properties_.allocator());
return std::make_shared<RowGroupReader>(std::move(contents));
}

const FileMetaData* SerializedFile::metadata() const {
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/parquet/file/reader-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ class SerializedRowGroup : public RowGroupReader::Contents {

virtual const RowGroupMetaData* metadata() const;

virtual const ReaderProperties* properties() const;

virtual std::unique_ptr<PageReader> GetColumnPageReader(int i);

private:
Expand Down
15 changes: 8 additions & 7 deletions cpp/src/parquet/file/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,18 @@ namespace parquet {
// ----------------------------------------------------------------------
// RowGroupReader public API

RowGroupReader::RowGroupReader(const SchemaDescriptor* schema,
std::unique_ptr<Contents> contents, MemoryAllocator* allocator)
: schema_(schema), contents_(std::move(contents)), allocator_(allocator) {}
RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents)
: contents_(std::move(contents)) {}

std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
DCHECK(i < schema_->num_columns()) << "The RowGroup only has " << schema_->num_columns()
<< "columns, requested column: " << i;
const ColumnDescriptor* descr = schema_->Column(i);
DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
<< metadata()->num_columns()
<< "columns, requested column: " << i;
const ColumnDescriptor* descr = metadata()->schema()->Column(i);

std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i);
return ColumnReader::Make(descr, std::move(page_reader), allocator_);
return ColumnReader::Make(descr, std::move(page_reader),
const_cast<ReaderProperties*>(contents_->properties())->allocator());
}

// Returns the rowgroup metadata
Expand Down
22 changes: 10 additions & 12 deletions cpp/src/parquet/file/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,17 @@ class RandomAccessSource;

class PARQUET_EXPORT RowGroupReader {
public:
// Forward declare the PIMPL
// Forward declare a virtual class 'Contents' to aid dependency injection and more
// easily create test fixtures
// An implementation of the Contents class is defined in the .cc file
struct Contents {
virtual ~Contents() {}
virtual std::unique_ptr<PageReader> GetColumnPageReader(int i) = 0;
virtual const RowGroupMetaData* metadata() const = 0;
virtual const ReaderProperties* properties() const = 0;
};

RowGroupReader(const SchemaDescriptor* schema, std::unique_ptr<Contents> contents,
MemoryAllocator* allocator);
explicit RowGroupReader(std::unique_ptr<Contents> contents);

// Returns the rowgroup metadata
const RowGroupMetaData* metadata() const;
Expand All @@ -56,17 +58,15 @@ class PARQUET_EXPORT RowGroupReader {
std::shared_ptr<ColumnReader> Column(int i);

private:
const SchemaDescriptor* schema_;
// PIMPL idiom
// This is declared in the .cc file so that we can hide compiled Thrift
// headers from the public API and also more easily create test fixtures.
// Holds a pointer to an instance of Contents implementation
std::unique_ptr<Contents> contents_;
MemoryAllocator* allocator_;
};

class PARQUET_EXPORT ParquetFileReader {
public:
// Forward declare the PIMPL
// Forward declare a virtual class 'Contents' to aid dependency injection and more
// easily create test fixtures
// An implementation of the Contents class is defined in the .cc file
struct Contents {
virtual ~Contents() {}
// Perform any cleanup associated with the file contents
Expand Down Expand Up @@ -99,9 +99,7 @@ class PARQUET_EXPORT ParquetFileReader {
std::ostream& stream, std::list<int> selected_columns, bool print_values = true);

private:
// PIMPL idiom
// This is declared in the .cc file so that we can hide compiled Thrift
// headers from the public API and also more easily create test fixtures.
// Holds a pointer to an instance of Contents implementation
std::unique_ptr<Contents> contents_;
};

Expand Down
12 changes: 8 additions & 4 deletions cpp/src/parquet/file/writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class OutputStream;

class PARQUET_EXPORT RowGroupWriter {
public:
// Forward declare a virtual class 'Contents' to aid dependency injection and more
// easily create test fixtures
// An implementation of the Contents class is defined in the .cc file
struct Contents {
virtual int num_columns() const = 0;
virtual int64_t num_rows() const = 0;
Expand Down Expand Up @@ -75,15 +78,17 @@ class PARQUET_EXPORT RowGroupWriter {
// Owned by the parent ParquetFileWriter
const SchemaDescriptor* schema_;

// This is declared in the .cc file so that we can hide compiled Thrift
// headers from the public API and also more easily create test fixtures.
// Holds a pointer to an instance of Contents implementation
std::unique_ptr<Contents> contents_;

MemoryAllocator* allocator_;
};

class PARQUET_EXPORT ParquetFileWriter {
public:
// Forward declare a virtual class 'Contents' to aid dependency injection and more
// easily create test fixtures
// An implementation of the Contents class is defined in the .cc file
struct Contents {
virtual ~Contents() {}
// Perform any cleanup associated with the file contents
Expand Down Expand Up @@ -155,8 +160,7 @@ class PARQUET_EXPORT ParquetFileWriter {
const ColumnDescriptor* column_schema(int i) const { return schema_->Column(i); }

private:
// This is declared in the .cc file so that we can hide compiled Thrift
// headers from the public API and also more easily create test fixtures.
// Holds a pointer to an instance of Contents implementation
std::unique_ptr<Contents> contents_;

// The SchemaDescriptor is provided by the Contents impl
Expand Down

0 comments on commit fd38785

Please sign in to comment.