Add cuda-benchmark module

wesm · wesm · commit 5daa59eb72eb · 2017-08-22T15:39:37.000-04:00
Change-Id: Idd283920e38682a85f42621cc5123ad08837cd28
diff --git a/cpp/src/arrow/gpu/CMakeLists.txt b/cpp/src/arrow/gpu/CMakeLists.txt
@@ -114,10 +114,19 @@ install(
   FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow-gpu.pc"
   DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
 
+set(ARROW_GPU_TEST_LINK_LIBS
+  arrow_gpu_shared
+  ${ARROW_TEST_LINK_LIBS})
+
 if (ARROW_BUILD_TESTS)
-  set(ARROW_GPU_TEST_LINK_LIBS
-    arrow_gpu_shared
-    ${ARROW_TEST_LINK_LIBS})
   ADD_ARROW_CUDA_TEST(cuda-test
     STATIC_LINK_LIBS ${ARROW_GPU_TEST_LINK_LIBS})
 endif()
+
+if (ARROW_BUILD_BENCHMARKS)
+  cuda_add_executable(cuda-benchmark cuda-benchmark.cc)
+  target_link_libraries(cuda-benchmark
+    arrow_gpu_shared
+    gtest
+    ${ARROW_BENCHMARK_LINK_LIBS})
+endif()
diff --git a/cpp/src/arrow/gpu/cuda-benchmark.cc b/cpp/src/arrow/gpu/cuda-benchmark.cc
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/memory_pool.h"
+#include "arrow/test-util.h"
+
+#include "arrow/gpu/cuda_memory.h"
+
+namespace arrow {
+namespace gpu {
+
+constexpr int64_t kGpuNumber = 0;
+
+static void CudaBufferWriterBenchmark(benchmark::State& state, const int64_t total_bytes,
+                                      const int64_t chunksize,
+                                      const int64_t buffer_size) {
+  std::shared_ptr<CudaBuffer> device_buffer;
+  ABORT_NOT_OK(AllocateCudaBuffer(kGpuNumber, total_bytes, &device_buffer));
+  CudaBufferWriter writer(device_buffer);
+
+  if (buffer_size > 0) {
+    ABORT_NOT_OK(writer.SetBufferSize(buffer_size));
+  }
+
+  std::shared_ptr<PoolBuffer> buffer;
+  ASSERT_OK(test::MakeRandomBytePoolBuffer(total_bytes, default_memory_pool(), &buffer));
+
+  const uint8_t* host_data = buffer->data();
+  while (state.KeepRunning()) {
+    int64_t bytes_written = 0;
+    ABORT_NOT_OK(writer.Seek(0));
+    while (bytes_written < total_bytes) {
+      int64_t bytes_to_write = std::min(chunksize, total_bytes - bytes_written);
+      ABORT_NOT_OK(writer.Write(host_data + bytes_written, bytes_to_write));
+      bytes_written += bytes_to_write;
+    }
+  }
+  state.SetBytesProcessed(int64_t(state.iterations()) * total_bytes);
+}
+
+static void BM_Writer_Buffered(benchmark::State& state) {
+  // 128MB
+  const int64_t kTotalBytes = 1 << 27;
+
+  // 8MB
+  const int64_t kBufferSize = 1 << 23;
+
+  CudaBufferWriterBenchmark(state, kTotalBytes, state.range(0), kBufferSize);
+}
+
+static void BM_Writer_Unbuffered(benchmark::State& state) {
+  // 128MB
+  const int64_t kTotalBytes = 1 << 27;
+  CudaBufferWriterBenchmark(state, kTotalBytes, state.range(0), 0);
+}
+
+// Vary chunk write size from 256 bytes to 64K
+BENCHMARK(BM_Writer_Buffered)
+    ->RangeMultiplier(16)
+    ->Range(1 << 8, 1 << 16)
+    ->MinTime(1.0)
+    ->UseRealTime();
+
+BENCHMARK(BM_Writer_Unbuffered)
+    ->RangeMultiplier(4)
+    ->RangeMultiplier(16)
+    ->Range(1 << 8, 1 << 16)
+    ->MinTime(1.0)
+    ->UseRealTime();
+
+}  // namespace gpu
+}  // namespace arrow
diff --git a/cpp/src/arrow/gpu/cuda-test.cc b/cpp/src/arrow/gpu/cuda-test.cc
@@ -157,11 +157,8 @@ TEST_F(TestCudaBufferWriter, EdgeCases) {
   ASSERT_OK(writer_->Write(host_data + 510, 390));
   ASSERT_OK(writer_->Write(host_data + 900, 100));
 
-  // Seek to beginning flushes buffered bytes
-  ASSERT_OK(writer_->Seek(0));
-  ASSERT_OK(writer_->Tell(&position));
-  ASSERT_EQ(0, position);
-  ASSERT_EQ(0, writer_->num_bytes_buffered());
+  // Close flushes
+  ASSERT_OK(writer_->Close());
 
   // Check that everything was written
   AssertCudaBufferEquals(*device_buffer_, host_data, 1000);
diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc
@@ -100,6 +100,8 @@ CudaBufferWriter::CudaBufferWriter(const std::shared_ptr<CudaBuffer>& buffer)
 
 CudaBufferWriter::~CudaBufferWriter() {}
 
+Status CudaBufferWriter::Close() { return Flush(); }
+
 Status CudaBufferWriter::Flush() {
   if (buffer_size_ > 0 && buffer_position_ > 0) {
     // Only need to flush when the write has been buffered
diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h
@@ -94,6 +94,9 @@ class ARROW_EXPORT CudaBufferWriter : public io::FixedSizeBufferWriter {
   explicit CudaBufferWriter(const std::shared_ptr<CudaBuffer>& buffer);
   ~CudaBufferWriter();
 
+  /// \brief Close writer and flush buffered bytes to GPU
+  Status Close() override;
+
   /// \brief Flush buffered bytes to GPU
   Status Flush() override;