Skip to content

Commit 5daa59e

Browse files
committed
Add cuda-benchmark module
Change-Id: Idd283920e38682a85f42621cc5123ad08837cd28
1 parent 1cf1196 commit 5daa59e

File tree

5 files changed

+112
-8
lines changed

5 files changed

+112
-8
lines changed

cpp/src/arrow/gpu/CMakeLists.txt

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,19 @@ install(
114114
FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow-gpu.pc"
115115
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
116116

117+
set(ARROW_GPU_TEST_LINK_LIBS
118+
arrow_gpu_shared
119+
${ARROW_TEST_LINK_LIBS})
120+
117121
if (ARROW_BUILD_TESTS)
118-
set(ARROW_GPU_TEST_LINK_LIBS
119-
arrow_gpu_shared
120-
${ARROW_TEST_LINK_LIBS})
121122
ADD_ARROW_CUDA_TEST(cuda-test
122123
STATIC_LINK_LIBS ${ARROW_GPU_TEST_LINK_LIBS})
123124
endif()
125+
126+
if (ARROW_BUILD_BENCHMARKS)
127+
cuda_add_executable(cuda-benchmark cuda-benchmark.cc)
128+
target_link_libraries(cuda-benchmark
129+
arrow_gpu_shared
130+
gtest
131+
${ARROW_BENCHMARK_LINK_LIBS})
132+
endif()
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "benchmark/benchmark.h"
19+
20+
#include <cstdint>
21+
#include <memory>
22+
#include <vector>
23+
24+
#include "arrow/array.h"
25+
#include "arrow/memory_pool.h"
26+
#include "arrow/test-util.h"
27+
28+
#include "arrow/gpu/cuda_memory.h"
29+
30+
namespace arrow {
31+
namespace gpu {
32+
33+
constexpr int64_t kGpuNumber = 0;
34+
35+
static void CudaBufferWriterBenchmark(benchmark::State& state, const int64_t total_bytes,
36+
const int64_t chunksize,
37+
const int64_t buffer_size) {
38+
std::shared_ptr<CudaBuffer> device_buffer;
39+
ABORT_NOT_OK(AllocateCudaBuffer(kGpuNumber, total_bytes, &device_buffer));
40+
CudaBufferWriter writer(device_buffer);
41+
42+
if (buffer_size > 0) {
43+
ABORT_NOT_OK(writer.SetBufferSize(buffer_size));
44+
}
45+
46+
std::shared_ptr<PoolBuffer> buffer;
47+
ASSERT_OK(test::MakeRandomBytePoolBuffer(total_bytes, default_memory_pool(), &buffer));
48+
49+
const uint8_t* host_data = buffer->data();
50+
while (state.KeepRunning()) {
51+
int64_t bytes_written = 0;
52+
ABORT_NOT_OK(writer.Seek(0));
53+
while (bytes_written < total_bytes) {
54+
int64_t bytes_to_write = std::min(chunksize, total_bytes - bytes_written);
55+
ABORT_NOT_OK(writer.Write(host_data + bytes_written, bytes_to_write));
56+
bytes_written += bytes_to_write;
57+
}
58+
}
59+
state.SetBytesProcessed(int64_t(state.iterations()) * total_bytes);
60+
}
61+
62+
static void BM_Writer_Buffered(benchmark::State& state) {
63+
// 128MB
64+
const int64_t kTotalBytes = 1 << 27;
65+
66+
// 8MB
67+
const int64_t kBufferSize = 1 << 23;
68+
69+
CudaBufferWriterBenchmark(state, kTotalBytes, state.range(0), kBufferSize);
70+
}
71+
72+
static void BM_Writer_Unbuffered(benchmark::State& state) {
73+
// 128MB
74+
const int64_t kTotalBytes = 1 << 27;
75+
CudaBufferWriterBenchmark(state, kTotalBytes, state.range(0), 0);
76+
}
77+
78+
// Vary chunk write size from 256 bytes to 64K
79+
BENCHMARK(BM_Writer_Buffered)
80+
->RangeMultiplier(16)
81+
->Range(1 << 8, 1 << 16)
82+
->MinTime(1.0)
83+
->UseRealTime();
84+
85+
BENCHMARK(BM_Writer_Unbuffered)
86+
->RangeMultiplier(4)
87+
->RangeMultiplier(16)
88+
->Range(1 << 8, 1 << 16)
89+
->MinTime(1.0)
90+
->UseRealTime();
91+
92+
} // namespace gpu
93+
} // namespace arrow

cpp/src/arrow/gpu/cuda-test.cc

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,8 @@ TEST_F(TestCudaBufferWriter, EdgeCases) {
157157
ASSERT_OK(writer_->Write(host_data + 510, 390));
158158
ASSERT_OK(writer_->Write(host_data + 900, 100));
159159

160-
// Seek to beginning flushes buffered bytes
161-
ASSERT_OK(writer_->Seek(0));
162-
ASSERT_OK(writer_->Tell(&position));
163-
ASSERT_EQ(0, position);
164-
ASSERT_EQ(0, writer_->num_bytes_buffered());
160+
// Close flushes
161+
ASSERT_OK(writer_->Close());
165162

166163
// Check that everything was written
167164
AssertCudaBufferEquals(*device_buffer_, host_data, 1000);

cpp/src/arrow/gpu/cuda_memory.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ CudaBufferWriter::CudaBufferWriter(const std::shared_ptr<CudaBuffer>& buffer)
100100

101101
CudaBufferWriter::~CudaBufferWriter() {}
102102

103+
Status CudaBufferWriter::Close() { return Flush(); }
104+
103105
Status CudaBufferWriter::Flush() {
104106
if (buffer_size_ > 0 && buffer_position_ > 0) {
105107
// Only need to flush when the write has been buffered

cpp/src/arrow/gpu/cuda_memory.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ class ARROW_EXPORT CudaBufferWriter : public io::FixedSizeBufferWriter {
9494
explicit CudaBufferWriter(const std::shared_ptr<CudaBuffer>& buffer);
9595
~CudaBufferWriter();
9696

97+
/// \brief Close writer and flush buffered bytes to GPU
98+
Status Close() override;
99+
97100
/// \brief Flush buffered bytes to GPU
98101
Status Flush() override;
99102

0 commit comments

Comments
 (0)