Skip to content

Commit

Permalink
PARQUET-477: Add clang-format / clang-tidy checks to toolchain
Browse files Browse the repository at this point in the history
I adapted @emkornfield's work from ARROW-71 (apache@5d12999). It's a large diff due to the first reformatting of the codebase. Per travis-ci/apt-source-safelist#199 we can switch Travis back to Ubuntu 14.04 and maybe upgrade to LLVM 3.8 at some point in the future.

Author: Wes McKinney <wesm@apache.org>

Closes apache#92 from wesm/PARQUET-477 and squashes the following commits:

8b6e8f0 [Wes McKinney] Statically-link zlib
503e793 [Wes McKinney] Boost 1.55
2c512dc [Wes McKinney] Install newer boost due to C++11 issues on ubuntu precise
514601c [Wes McKinney] Fix build dir in travis script
6c2e7cf [Wes McKinney] Adapt clang-tidy / clang-format tools from Apache Arrow C++ codebase

Change-Id: I5f7cbe5be7e898b5c522d0f67c8b934ce7d9dab1
  • Loading branch information
wesm committed May 1, 2016
1 parent ff2017a commit 7a29f2f
Show file tree
Hide file tree
Showing 90 changed files with 1,288 additions and 1,834 deletions.
2 changes: 1 addition & 1 deletion cpp/src/parquet/api/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
#include "parquet/util/mem-allocator.h"
#include "parquet/util/output.h"

#endif // PARQUET_API_IO_H
#endif // PARQUET_API_IO_H
2 changes: 1 addition & 1 deletion cpp/src/parquet/api/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@
// IO
#include "parquet/api/io.h"

#endif // PARQUET_API_READER_H
#endif // PARQUET_API_READER_H
2 changes: 1 addition & 1 deletion cpp/src/parquet/api/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@
#include "parquet/schema/printer.h"
#include "parquet/schema/types.h"

#endif // PARQUET_API_SCHEMA_H
#endif // PARQUET_API_SCHEMA_H
54 changes: 24 additions & 30 deletions cpp/src/parquet/column/column-reader-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,15 @@ class TestPrimitiveReader : public ::testing::Test {
ASSERT_EQ(num_levels_, batch_actual);
ASSERT_EQ(num_values_, total_values_read);
ASSERT_TRUE(vector_equal(values_, vresult));
if (max_def_level_ > 0) {
ASSERT_TRUE(vector_equal(def_levels_, dresult));
}
if (max_rep_level_ > 0) {
ASSERT_TRUE(vector_equal(rep_levels_, rresult));
}
if (max_def_level_ > 0) { ASSERT_TRUE(vector_equal(def_levels_, dresult)); }
if (max_rep_level_ > 0) { ASSERT_TRUE(vector_equal(rep_levels_, rresult)); }
// catch improper writes at EOS
batch_actual = reader->ReadBatch(5, nullptr, nullptr, nullptr, &values_read);
ASSERT_EQ(0, batch_actual);
ASSERT_EQ(0, values_read);
}

void ExecutePlain(int num_pages, int levels_per_page, const ColumnDescriptor *d) {
void ExecutePlain(int num_pages, int levels_per_page, const ColumnDescriptor* d) {
num_values_ = MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_,
rep_levels_, values_, data_buffer_, pages_, Encoding::PLAIN);
num_levels_ = num_pages * levels_per_page;
Expand All @@ -101,7 +97,7 @@ class TestPrimitiveReader : public ::testing::Test {
reader_.reset();
}

void ExecuteDict(int num_pages, int levels_per_page, const ColumnDescriptor *d) {
void ExecuteDict(int num_pages, int levels_per_page, const ColumnDescriptor* d) {
num_values_ = MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_,
rep_levels_, values_, data_buffer_, pages_, Encoding::RLE_DICTIONARY);
num_levels_ = num_pages * levels_per_page;
Expand All @@ -114,12 +110,12 @@ class TestPrimitiveReader : public ::testing::Test {
int num_values_;
int16_t max_def_level_;
int16_t max_rep_level_;
vector<shared_ptr<Page> > pages_;
vector<shared_ptr<Page>> pages_;
std::shared_ptr<ColumnReader> reader_;
vector<int32_t> values_;
vector<int16_t> def_levels_;
vector<int16_t> rep_levels_;
vector<uint8_t> data_buffer_; // For BA and FLBA
vector<uint8_t> data_buffer_; // For BA and FLBA
};

TEST_F(TestPrimitiveReader, TestInt32FlatRequired) {
Expand Down Expand Up @@ -162,63 +158,61 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
const ColumnDescriptor descr(type, max_def_level_, max_rep_level_);
shared_ptr<OwnedMutableBuffer> dummy = std::make_shared<OwnedMutableBuffer>();

shared_ptr<DictionaryPage> dict_page = std::make_shared<DictionaryPage>(dummy,
0, Encoding::PLAIN);
shared_ptr<DataPage> data_page = MakeDataPage<Int32Type>(&descr, {}, 0,
Encoding::RLE_DICTIONARY, {}, 0, {}, 0, {}, 0);
shared_ptr<DictionaryPage> dict_page =
std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN);
shared_ptr<DataPage> data_page = MakeDataPage<Int32Type>(
&descr, {}, 0, Encoding::RLE_DICTIONARY, {}, 0, {}, 0, {}, 0);
pages_.push_back(dict_page);
pages_.push_back(data_page);
InitReader(&descr);
// Tests Dict : PLAIN, Data : RLE_DICTIONARY
ASSERT_NO_THROW(reader_->HasNext());
pages_.clear();

dict_page = std::make_shared<DictionaryPage>(dummy,
0, Encoding::PLAIN_DICTIONARY);
data_page = MakeDataPage<Int32Type>(&descr, {}, 0,
Encoding::PLAIN_DICTIONARY, {}, 0, {}, 0, {}, 0);
dict_page = std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN_DICTIONARY);
data_page = MakeDataPage<Int32Type>(
&descr, {}, 0, Encoding::PLAIN_DICTIONARY, {}, 0, {}, 0, {}, 0);
pages_.push_back(dict_page);
pages_.push_back(data_page);
InitReader(&descr);
// Tests Dict : PLAIN_DICTIONARY, Data : PLAIN_DICTIONARY
ASSERT_NO_THROW(reader_->HasNext());
pages_.clear();

data_page = MakeDataPage<Int32Type>(&descr, {}, 0,
Encoding::RLE_DICTIONARY, {}, 0, {}, 0, {}, 0);
data_page = MakeDataPage<Int32Type>(
&descr, {}, 0, Encoding::RLE_DICTIONARY, {}, 0, {}, 0, {}, 0);
pages_.push_back(data_page);
InitReader(&descr);
// Tests dictionary page must occur before data page
ASSERT_THROW(reader_->HasNext(), ParquetException);
pages_.clear();

dict_page = std::make_shared<DictionaryPage>(dummy,
0, Encoding::DELTA_BYTE_ARRAY);
dict_page = std::make_shared<DictionaryPage>(dummy, 0, Encoding::DELTA_BYTE_ARRAY);
pages_.push_back(dict_page);
InitReader(&descr);
// Tests only RLE_DICTIONARY is supported
ASSERT_THROW(reader_->HasNext(), ParquetException);
pages_.clear();

shared_ptr<DictionaryPage> dict_page1 = std::make_shared<DictionaryPage>(dummy,
0, Encoding::PLAIN_DICTIONARY);
shared_ptr<DictionaryPage> dict_page2 = std::make_shared<DictionaryPage>(dummy,
0, Encoding::PLAIN);
shared_ptr<DictionaryPage> dict_page1 =
std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN_DICTIONARY);
shared_ptr<DictionaryPage> dict_page2 =
std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN);
pages_.push_back(dict_page1);
pages_.push_back(dict_page2);
InitReader(&descr);
// Column cannot have more than one dictionary
ASSERT_THROW(reader_->HasNext(), ParquetException);
pages_.clear();

data_page = MakeDataPage<Int32Type>(&descr, {}, 0,
Encoding::DELTA_BYTE_ARRAY, {}, 0, {}, 0, {}, 0);
data_page = MakeDataPage<Int32Type>(
&descr, {}, 0, Encoding::DELTA_BYTE_ARRAY, {}, 0, {}, 0, {}, 0);
pages_.push_back(data_page);
InitReader(&descr);
// unsupported encoding
ASSERT_THROW(reader_->HasNext(), ParquetException);
pages_.clear();
}

} // namespace test
} // namespace parquet
} // namespace test
} // namespace parquet
18 changes: 8 additions & 10 deletions cpp/src/parquet/column/column-writer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ class TestPrimitiveWriter : public ::testing::Test {
sink_.reset(new InMemoryOutputStream());
std::unique_ptr<SerializedPageWriter> pager(
new SerializedPageWriter(sink_.get(), Compression::UNCOMPRESSED, &metadata_));
return std::unique_ptr<Int64Writer>(new Int64Writer(schema_.get(), std::move(pager),
output_size));
return std::unique_ptr<Int64Writer>(
new Int64Writer(schema_.get(), std::move(pager), output_size));
}

void ReadColumn() {
Expand Down Expand Up @@ -138,8 +138,8 @@ TEST_F(TestPrimitiveWriter, OptionalRepeated) {
std::vector<int16_t> repetition_levels(100, 0);

auto writer = BuildWriter();
writer->WriteBatch(values.size(), definition_levels.data(),
repetition_levels.data(), values.data());
writer->WriteBatch(
values.size(), definition_levels.data(), repetition_levels.data(), values.data());
writer->Close();

ReadColumn();
Expand Down Expand Up @@ -176,8 +176,8 @@ TEST_F(TestPrimitiveWriter, OptionalRepeatedTooFewRows) {
repetition_levels[3] = 1;

auto writer = BuildWriter();
writer->WriteBatch(values.size(), definition_levels.data(),
repetition_levels.data(), values.data());
writer->WriteBatch(
values.size(), definition_levels.data(), repetition_levels.data(), values.data());
ASSERT_THROW(writer->Close(), ParquetException);
}

Expand All @@ -196,7 +196,5 @@ TEST_F(TestPrimitiveWriter, RequiredNonRepeatedLargeChunk) {
ASSERT_EQ(values, values_out_);
}

} // namespace test
} // namespace parquet


} // namespace test
} // namespace parquet
33 changes: 14 additions & 19 deletions cpp/src/parquet/column/levels-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ using std::string;

namespace parquet {

void GenerateLevels(int min_repeat_factor, int max_repeat_factor,
int max_level, std::vector<int16_t>& input_levels) {
void GenerateLevels(int min_repeat_factor, int max_repeat_factor, int max_level,
std::vector<int16_t>& input_levels) {
// for each repetition count upto max_repeat_factor
for (int repeat = min_repeat_factor; repeat <= max_repeat_factor; repeat++) {
// repeat count increases by a factor of 2 for every iteration
Expand All @@ -56,14 +56,13 @@ void EncodeLevels(Encoding::type encoding, int max_level, int num_levels,
// encode levels
if (encoding == Encoding::RLE) {
// leave space to write the rle length value
encoder.Init(encoding, max_level, num_levels,
bytes.data() + sizeof(uint32_t), bytes.size());
encoder.Init(
encoding, max_level, num_levels, bytes.data() + sizeof(uint32_t), bytes.size());

levels_count = encoder.Encode(num_levels, input_levels);
(reinterpret_cast<uint32_t*>(bytes.data()))[0] = encoder.len();
} else {
encoder.Init(encoding, max_level, num_levels,
bytes.data(), bytes.size());
encoder.Init(encoding, max_level, num_levels, bytes.data(), bytes.size());
levels_count = encoder.Encode(num_levels, input_levels);
}
ASSERT_EQ(num_levels, levels_count);
Expand Down Expand Up @@ -94,15 +93,15 @@ void VerifyDecodingLevels(Encoding::type encoding, int max_level,
}
// check the remaining levels
int num_levels_completed = decode_count * (num_levels / decode_count);
int num_remaining_levels = num_levels - num_levels_completed;
int num_remaining_levels = num_levels - num_levels_completed;
if (num_remaining_levels > 0) {
levels_count = decoder.Decode(num_remaining_levels, output_levels.data());
ASSERT_EQ(num_remaining_levels, levels_count);
for (int i = 0; i < num_remaining_levels; i++) {
EXPECT_EQ(input_levels[i + num_levels_completed], output_levels[i]);
}
}
//Test zero Decode values
// Test zero Decode values
ASSERT_EQ(0, decoder.Decode(1, output_levels.data()));
}

Expand Down Expand Up @@ -133,12 +132,11 @@ void VerifyDecodingMultipleSetData(Encoding::type encoding, int max_level,
// increase the repetition count for each iteration by a factor of 2
TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) {
int min_repeat_factor = 0;
int max_repeat_factor = 7; // 128
int max_repeat_factor = 7; // 128
int max_bit_width = 8;
std::vector<int16_t> input_levels;
std::vector<uint8_t> bytes;
Encoding::type encodings[2] = {Encoding::RLE,
Encoding::BIT_PACKED};
Encoding::type encodings[2] = {Encoding::RLE, Encoding::BIT_PACKED};

// for each encoding
for (int encode = 0; encode < 2; encode++) {
Expand All @@ -150,8 +148,7 @@ TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) {
// find the maximum level for the current bit_width
int max_level = (1 << bit_width) - 1;
// Generate levels
GenerateLevels(min_repeat_factor, max_repeat_factor,
max_level, input_levels);
GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels);
EncodeLevels(encoding, max_level, input_levels.size(), input_levels.data(), bytes);
VerifyDecodingLevels(encoding, max_level, input_levels, bytes);
input_levels.clear();
Expand All @@ -162,15 +159,13 @@ TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) {
// Test multiple decoder SetData calls
TEST(TestLevels, TestLevelsDecodeMultipleSetData) {
int min_repeat_factor = 3;
int max_repeat_factor = 7; // 128
int max_repeat_factor = 7; // 128
int bit_width = 8;
int max_level = (1 << bit_width) - 1;
std::vector<int16_t> input_levels;
std::vector<std::vector<uint8_t>> bytes;
Encoding::type encodings[2] = {Encoding::RLE,
Encoding::BIT_PACKED};
GenerateLevels(min_repeat_factor, max_repeat_factor,
max_level, input_levels);
Encoding::type encodings[2] = {Encoding::RLE, Encoding::BIT_PACKED};
GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels);
int num_levels = input_levels.size();
int setdata_factor = 8;
int split_level_size = num_levels / setdata_factor;
Expand All @@ -188,4 +183,4 @@ TEST(TestLevels, TestLevelsDecodeMultipleSetData) {
}
}

} // namespace parquet
} // namespace parquet
29 changes: 10 additions & 19 deletions cpp/src/parquet/column/levels.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ class LevelEncoder {
LevelEncoder() {}

// Initialize the LevelEncoder.
void Init(Encoding::type encoding, int16_t max_level,
int num_buffered_values, uint8_t* data, int data_size) {
void Init(Encoding::type encoding, int16_t max_level, int num_buffered_values,
uint8_t* data, int data_size) {
bit_width_ = BitUtil::Log2(max_level + 1);
encoding_ = encoding;
switch (encoding) {
Expand All @@ -60,18 +60,14 @@ class LevelEncoder {

if (encoding_ == Encoding::RLE) {
for (int i = 0; i < batch_size; ++i) {
if (!rle_encoder_->Put(*(levels + i))) {
break;
}
if (!rle_encoder_->Put(*(levels + i))) { break; }
++num_encoded;
}
rle_encoder_->Flush();
rle_length_ = rle_encoder_->len();
} else {
for (int i = 0; i < batch_size; ++i) {
if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) {
break;
}
if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { break; }
++num_encoded;
}
bit_packed_encoder_->Flush();
Expand All @@ -94,15 +90,14 @@ class LevelEncoder {
std::unique_ptr<BitWriter> bit_packed_encoder_;
};


class LevelDecoder {
public:
LevelDecoder() : num_values_remaining_(0) {}

// Initialize the LevelDecoder state with new data
// and return the number of bytes consumed
int SetData(Encoding::type encoding, int16_t max_level,
int num_buffered_values, const uint8_t* data) {
int SetData(Encoding::type encoding, int16_t max_level, int num_buffered_values,
const uint8_t* data) {
uint32_t num_bytes = 0;
encoding_ = encoding;
num_values_remaining_ = num_buffered_values;
Expand Down Expand Up @@ -140,16 +135,12 @@ class LevelDecoder {
int num_values = std::min(num_values_remaining_, batch_size);
if (encoding_ == Encoding::RLE) {
for (int i = 0; i < num_values; ++i) {
if (!rle_decoder_->Get(levels + i)) {
break;
}
if (!rle_decoder_->Get(levels + i)) { break; }
++num_decoded;
}
} else {
for (int i = 0; i < num_values; ++i) {
if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) {
break;
}
if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) { break; }
++num_decoded;
}
}
Expand All @@ -165,5 +156,5 @@ class LevelDecoder {
std::unique_ptr<BitReader> bit_packed_decoder_;
};

} // namespace parquet
#endif // PARQUET_COLUMN_LEVELS_H
} // namespace parquet
#endif // PARQUET_COLUMN_LEVELS_H
Loading

0 comments on commit 7a29f2f

Please sign in to comment.