Skip to content

Commit

Permalink
PARQUET-512: Add Google benchmark for performance testing
Browse files Browse the repository at this point in the history
Based on @emkornfield 's work in apache#29

Author: Uwe L. Korn <uwelk@xhochy.com>

Closes apache#93 from xhochy/parquet-512 and squashes the following commits:

ebc10d2 [Uwe L. Korn] Fix signed/unsigned comparison
684dbc6 [Uwe L. Korn] Fix c&p bug
5a8e239 [Uwe L. Korn] Build benchmarks but don't run them in Travis
e7dc34c [Uwe L. Korn] Remove Arrow references
f6b02da [Uwe L. Korn] PARQUET-512: Add Google benchmark for performance testing

Change-Id: Icb0d5d7d3886503c74b89a5fc517932a84cfc1b9
  • Loading branch information
xhochy authored and wesm committed May 2, 2016
1 parent 7a29f2f commit ef51f32
Show file tree
Hide file tree
Showing 6 changed files with 247 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cpp/src/parquet/column/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ ADD_PARQUET_TEST(column-reader-test)
ADD_PARQUET_TEST(column-writer-test)
ADD_PARQUET_TEST(levels-test)
ADD_PARQUET_TEST(scanner-test)

ADD_PARQUET_BENCHMARK(column-io-benchmark)
119 changes: 119 additions & 0 deletions cpp/src/parquet/column/column-io-benchmark.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "benchmark/benchmark.h"

#include "parquet/file/reader-internal.h"
#include "parquet/file/writer-internal.h"
#include "parquet/column/reader.h"
#include "parquet/column/writer.h"
#include "parquet/util/input.h"

namespace parquet {

using format::ColumnChunk;
using schema::PrimitiveNode;

namespace benchmark {

std::unique_ptr<Int64Writer> BuildWriter(int64_t output_size, OutputStream* dst,
ColumnChunk* metadata, ColumnDescriptor* schema) {
std::unique_ptr<SerializedPageWriter> pager(
new SerializedPageWriter(dst, Compression::UNCOMPRESSED, metadata));
return std::unique_ptr<Int64Writer>(
new Int64Writer(schema, std::move(pager), output_size));
}

std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition) {
auto node = PrimitiveNode::Make("int64", repetition, Type::INT64);
return std::make_shared<ColumnDescriptor>(
node, repetition != Repetition::REQUIRED, repetition == Repetition::REPEATED);
}

template <Repetition::type repetition>
static void BM_WriteInt64Column(::benchmark::State& state) {
format::ColumnChunk metadata;
std::vector<int64_t> values(state.range_x(), 128);
std::vector<int16_t> definition_levels(state.range_x(), 1);
std::vector<int16_t> repetition_levels(state.range_x(), 0);
std::shared_ptr<ColumnDescriptor> schema = Int64Schema(repetition);

while (state.KeepRunning()) {
InMemoryOutputStream dst;
std::unique_ptr<Int64Writer> writer =
BuildWriter(state.range_x(), &dst, &metadata, schema.get());
writer->WriteBatch(
values.size(), definition_levels.data(), repetition_levels.data(), values.data());
writer->Close();
}
}

BENCHMARK_TEMPLATE(BM_WriteInt64Column, Repetition::REQUIRED)->Range(1024, 65536);

BENCHMARK_TEMPLATE(BM_WriteInt64Column, Repetition::OPTIONAL)->Range(1024, 65536);

BENCHMARK_TEMPLATE(BM_WriteInt64Column, Repetition::REPEATED)->Range(1024, 65536);

std::unique_ptr<Int64Reader> BuildReader(
std::shared_ptr<Buffer>& buffer, ColumnDescriptor* schema) {
std::unique_ptr<InMemoryInputStream> source(new InMemoryInputStream(buffer));
std::unique_ptr<SerializedPageReader> page_reader(
new SerializedPageReader(std::move(source), Compression::UNCOMPRESSED));
return std::unique_ptr<Int64Reader>(new Int64Reader(schema, std::move(page_reader)));
}

template <Repetition::type repetition>
static void BM_ReadInt64Column(::benchmark::State& state) {
format::ColumnChunk metadata;
std::vector<int64_t> values(state.range_x(), 128);
std::vector<int16_t> definition_levels(state.range_x(), 1);
std::vector<int16_t> repetition_levels(state.range_x(), 0);
std::shared_ptr<ColumnDescriptor> schema = Int64Schema(repetition);

InMemoryOutputStream dst;
std::unique_ptr<Int64Writer> writer =
BuildWriter(state.range_x(), &dst, &metadata, schema.get());
writer->WriteBatch(
values.size(), definition_levels.data(), repetition_levels.data(), values.data());
writer->Close();

std::shared_ptr<Buffer> src = dst.GetBuffer();
std::vector<int64_t> values_out(state.range_y());
std::vector<int16_t> definition_levels_out(state.range_y());
std::vector<int16_t> repetition_levels_out(state.range_y());
while (state.KeepRunning()) {
std::unique_ptr<Int64Reader> reader = BuildReader(src, schema.get());
int64_t values_read = 0;
for (size_t i = 0; i < values.size(); i += values_read) {
reader->ReadBatch(values_out.size(), definition_levels_out.data(),
repetition_levels_out.data(), values_out.data(), &values_read);
}
}
}

BENCHMARK_TEMPLATE(BM_ReadInt64Column, Repetition::REQUIRED)
->RangePair(1024, 65536, 1, 1024);

BENCHMARK_TEMPLATE(BM_ReadInt64Column, Repetition::OPTIONAL)
->RangePair(1024, 65536, 1, 1024);

BENCHMARK_TEMPLATE(BM_ReadInt64Column, Repetition::REPEATED)
->RangePair(1024, 65536, 1, 1024);

} // namespace benchmark

} // namespace parquet
1 change: 1 addition & 0 deletions cpp/src/parquet/encodings/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ install(FILES
DESTINATION include/parquet/encodings)

ADD_PARQUET_TEST(encoding-test)
ADD_PARQUET_BENCHMARK(encoding-benchmark)
87 changes: 87 additions & 0 deletions cpp/src/parquet/encodings/encoding-benchmark.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "benchmark/benchmark.h"

#include "parquet/encodings/plain-encoding.h"

namespace parquet {

namespace benchmark {

static void BM_PlainEncodingBoolean(::benchmark::State& state) {
std::vector<bool> values(state.range_x(), 64);
PlainEncoder<BooleanType> encoder(nullptr);

while (state.KeepRunning()) {
InMemoryOutputStream dst;
encoder.Encode(values, values.size(), &dst);
}
}

BENCHMARK(BM_PlainEncodingBoolean)->Range(1024, 65536);

static void BM_PlainDecodingBoolean(::benchmark::State& state) {
std::vector<bool> values(state.range_x(), 64);
bool* output = new bool[state.range_x()];
PlainEncoder<BooleanType> encoder(nullptr);
InMemoryOutputStream dst;
encoder.Encode(values, values.size(), &dst);
std::shared_ptr<Buffer> buf = dst.GetBuffer();

while (state.KeepRunning()) {
PlainDecoder<BooleanType> decoder(nullptr);
decoder.SetData(values.size(), buf->data(), buf->size());
decoder.Decode(output, values.size());
}

delete[] output;
}

BENCHMARK(BM_PlainDecodingBoolean)->Range(1024, 65536);

static void BM_PlainEncodingInt64(::benchmark::State& state) {
std::vector<int64_t> values(state.range_x(), 64);
PlainEncoder<Int64Type> encoder(nullptr);

while (state.KeepRunning()) {
InMemoryOutputStream dst;
encoder.Encode(values.data(), values.size(), &dst);
}
}

BENCHMARK(BM_PlainEncodingInt64)->Range(1024, 65536);

static void BM_PlainDecodingInt64(::benchmark::State& state) {
std::vector<int64_t> values(state.range_x(), 64);
PlainEncoder<Int64Type> encoder(nullptr);
InMemoryOutputStream dst;
encoder.Encode(values.data(), values.size(), &dst);
std::shared_ptr<Buffer> buf = dst.GetBuffer();

while (state.KeepRunning()) {
PlainDecoder<Int64Type> decoder(nullptr);
decoder.SetData(values.size(), buf->data(), buf->size());
decoder.Decode(values.data(), values.size());
}
}

BENCHMARK(BM_PlainDecodingInt64)->Range(1024, 65536);

} // namespace benchmark

} // namespace parquet
14 changes: 14 additions & 0 deletions cpp/src/parquet/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,20 @@ if(PARQUET_BUILD_TESTS)
endif()
endif()

if (PARQUET_BUILD_BENCHMARKS)
add_library(parquet_benchmark_main benchmark_main.cc)
if (APPLE)
target_link_libraries(parquet_benchmark_main
gbenchmark
)
else()
target_link_libraries(parquet_benchmark_main
gbenchmark
pthread
)
endif()
endif()

ADD_PARQUET_TEST(bit-util-test)
ADD_PARQUET_TEST(buffer-test)
ADD_PARQUET_TEST(input-output-test)
Expand Down
24 changes: 24 additions & 0 deletions cpp/src/parquet/util/benchmark_main.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "benchmark/benchmark.h"

int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
return 0;
}

0 comments on commit ef51f32

Please sign in to comment.