dmlc · trivialfis · Feb 4, 2020 · Nov 20, 2019 · Nov 21, 2019 · Nov 21, 2019
diff --git a/include/xgboost/base.h b/include/xgboost/base.h
@@ -193,6 +193,24 @@ class GradientPairInternal {
  return g;
  }
 
+ XGBOOST_DEVICE GradientPairInternal<T> operator*(float multiplier) const {
+ GradientPairInternal<T> g;
+ g.grad_ = grad_ * multiplier;
+ g.hess_ = hess_ * multiplier;
+ return g;
+ }
+
+ XGBOOST_DEVICE GradientPairInternal<T> operator/(float divider) const {
+ GradientPairInternal<T> g;
+ g.grad_ = grad_ / divider;
+ g.hess_ = hess_ / divider;
+ return g;
+ }
+
+ XGBOOST_DEVICE bool operator==(const GradientPairInternal<T> &rhs) const {
+ return grad_ == rhs.grad_ && hess_ == rhs.hess_;
+ }
+
  XGBOOST_DEVICE explicit GradientPairInternal(int value) {
  *this = GradientPairInternal<T>(static_cast<float>(value),
  static_cast<float>(value));

diff --git a/src/common/compressed_iterator.h b/src/common/compressed_iterator.h
@@ -63,7 +63,7 @@ class CompressedBufferWriter {
  * \fn static size_t CompressedBufferWriter::CalculateBufferSize(int
  * num_elements, int num_symbols)
  *
- * \brief Calculates number of bytes requiredm for a given number of elements
+ * \brief Calculates number of bytes required for a given number of elements
  * and a symbol range.
  *
  * \author Rory
@@ -74,7 +74,6 @@ class CompressedBufferWriter {
  *
  * \return The calculated buffer size.
  */
-
  static size_t CalculateBufferSize(size_t num_elements, size_t num_symbols) {
  const int bits_per_byte = 8;
  size_t compressed_size = static_cast<size_t>(std::ceil(
@@ -83,6 +82,26 @@ class CompressedBufferWriter {
  return compressed_size + detail::kPadding;
  }
 
+ /**
+ * \brief Calculates maximum number of rows that can fit in a given number of bytes.
+ * \param num_bytes Number of bytes.
+ * \param num_symbols Max number of symbols (alphabet size).
+ * \param row_stride Number of features per row.
+ * \param extra_bytes_per_row Extra number of bytes needed per row.
+ * \return The calculated number of rows.
+ */
+ static size_t CalculateMaxRows(size_t num_bytes,
+ size_t num_symbols,
+ size_t row_stride,
+ size_t extra_bytes_per_row) {
+ const int bits_per_byte = 8;
+ size_t usable_bits = (num_bytes - detail::kPadding) * bits_per_byte;
+ size_t extra_bits = extra_bytes_per_row * bits_per_byte;
+ size_t symbol_bits = row_stride * detail::SymbolBits(num_symbols);
+ size_t num_rows = static_cast<size_t>(std::floor(usable_bits / (extra_bits + symbol_bits)));
+ return num_rows;
+ }
+
  template <typename T>
  void WriteSymbol(CompressedByteT *buffer, T symbol, size_t offset) {
  const int bits_per_byte = 8;
@@ -188,7 +207,7 @@ class CompressedIterator {
 
  public:
  CompressedIterator() : buffer_(nullptr), symbol_bits_(0), offset_(0) {}
- CompressedIterator(CompressedByteT *buffer, int num_symbols)
+ CompressedIterator(CompressedByteT *buffer, size_t num_symbols)
  : buffer_(buffer), offset_(0) {
  symbol_bits_ = detail::SymbolBits(num_symbols);
  }

diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu
@@ -64,6 +64,20 @@ __global__ void CompressBinEllpackKernel(
  wr.AtomicWriteSymbol(buffer, bin, (irow + base_row) * row_stride + ifeature);
 }
 
+// Construct an ELLPACK matrix with the given number of empty rows.
+EllpackPageImpl::EllpackPageImpl(int device, EllpackInfo info, size_t n_rows) {
+ monitor_.Init("ellpack_page");
+ dh::safe_cuda(cudaSetDevice(device));
+
+ matrix.info = info;
+ matrix.base_rowid = 0;
+ matrix.n_rows = n_rows;
+
+ monitor_.StartCuda("InitCompressedData");
+ InitCompressedData(device, n_rows);
+ monitor_.StopCuda("InitCompressedData");
+}
+
 // Construct an ELLPACK matrix in memory.
 EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param) {
  monitor_.Init("ellpack_page");
@@ -96,6 +110,76 @@ EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param) {
  monitor_.StopCuda("BinningCompression");
 }
 
+// A functor that copies the data from one EllpackPage to another.
+struct CopyPageFunction {
+ common::CompressedBufferWriter cbw;
+ common::CompressedByteT* dst_data_d;
+ common::CompressedIterator<uint32_t> src_iterator_d;
+ // The number of elements to skip.
+ size_t offset;
+
+ CopyPageFunction(EllpackPageImpl* dst, EllpackPageImpl* src, size_t offset)
+ : cbw{dst->matrix.info.NumSymbols()},
+ dst_data_d{dst->gidx_buffer.data()},
+ src_iterator_d{src->gidx_buffer.data(), src->matrix.info.NumSymbols()},
+ offset(offset) {}
+
+ __device__ void operator()(size_t i) {
+ cbw.AtomicWriteSymbol(dst_data_d, src_iterator_d[i], i + offset);
+ }
+};
+
+// Copy the data from the given EllpackPage to the current page.
+size_t EllpackPageImpl::Copy(int device, EllpackPageImpl* page, size_t offset) {
+ monitor_.StartCuda("Copy");
+ size_t num_elements = page->matrix.n_rows * page->matrix.info.row_stride;
+ CHECK_EQ(matrix.info.row_stride, page->matrix.info.row_stride);
+ CHECK_EQ(matrix.info.NumSymbols(), page->matrix.info.NumSymbols());
+ CHECK_GE(matrix.n_rows * matrix.info.row_stride, offset + num_elements);
+ dh::LaunchN(device, num_elements, CopyPageFunction(this, page, offset));
+ monitor_.StopCuda("Copy");
+ return num_elements;
+}
+
+// A functor that compacts the rows from one EllpackPage into another.
+struct CompactPageFunction {
+ common::CompressedBufferWriter cbw;
+ common::CompressedByteT* dst_data_d;
+ common::CompressedIterator<uint32_t> src_iterator_d;
+ common::Span<size_t> row_indexes;
+ size_t base_rowid;
+ size_t row_stride;
+
+ CompactPageFunction(EllpackPageImpl* dst, EllpackPageImpl* src, common::Span<size_t> row_indexes)
+ : cbw{dst->matrix.info.NumSymbols()},
+ dst_data_d{dst->gidx_buffer.data()},
+ src_iterator_d{src->gidx_buffer.data(), src->matrix.info.NumSymbols()},
+ row_indexes(row_indexes),
+ base_rowid{src->matrix.base_rowid},
+ row_stride{src->matrix.info.row_stride} {}
+
+ __device__ void operator()(size_t i) {
+ size_t row = base_rowid + i;
+ size_t row_index = row_indexes[row];
+ if (row_index == SIZE_MAX) return;
+ size_t dst_offset = row_index * row_stride;
+ size_t src_offset = i * row_stride;
+ for (size_t j = 0; j < row_stride; j++) {
+ cbw.AtomicWriteSymbol(dst_data_d, src_iterator_d[src_offset], dst_offset + j);
+ }
+ }
+};
+
+// Compacts the data from the given EllpackPage into the current page.
+void EllpackPageImpl::Compact(int device, EllpackPageImpl* page, common::Span<size_t> row_indexes) {
+ monitor_.StartCuda("Compact");
+ CHECK_EQ(matrix.info.row_stride, page->matrix.info.row_stride);
+ CHECK_EQ(matrix.info.NumSymbols(), page->matrix.info.NumSymbols());
+ CHECK_LE(page->matrix.base_rowid + page->matrix.n_rows, row_indexes.size());
+ dh::LaunchN(device, page->matrix.n_rows, CompactPageFunction(this, page, row_indexes));
+ monitor_.StopCuda("Compact");
+}
+
 // Construct an EllpackInfo based on histogram cuts of features.
 EllpackInfo::EllpackInfo(int device,
  bool is_dense,
@@ -123,7 +207,7 @@ void EllpackPageImpl::InitInfo(int device,
 
 // Initialize the buffer to stored compressed features.
 void EllpackPageImpl::InitCompressedData(int device, size_t num_rows) {
- size_t num_symbols = matrix.info.n_bins + 1;
+ size_t num_symbols = matrix.info.NumSymbols();
 
  // Required buffer size for storing data matrix in ELLPack format.
  size_t compressed_size_bytes = common::CompressedBufferWriter::CalculateBufferSize(
@@ -149,7 +233,6 @@ void EllpackPageImpl::CreateHistIndices(int device,
 
  const auto& offset_vec = row_batch.offset.ConstHostVector();
 
- int num_symbols = matrix.info.n_bins + 1;
  // bin and compress entries in batches of rows
  size_t gpu_batch_nrows = std::min(
  dh::TotalMemory(device) / (16 * row_stride * sizeof(Entry)),
@@ -193,7 +276,7 @@ void EllpackPageImpl::CreateHistIndices(int device,
  1);
  dh::LaunchKernel {grid3, block3} (
  CompressBinEllpackKernel,
- common::CompressedBufferWriter(num_symbols),
+ common::CompressedBufferWriter(matrix.info.NumSymbols()),
  gidx_buffer.data(),
  row_ptrs.data().get(),
  entries_d.data().get(),
@@ -254,11 +337,9 @@ void EllpackPageImpl::CompressSparsePage(int device) {
 
 // Return the memory cost for storing the compressed features.
 size_t EllpackPageImpl::MemCostBytes() const {
- size_t num_symbols = matrix.info.n_bins + 1;
-
  // Required buffer size for storing data matrix in ELLPack format.
  size_t compressed_size_bytes = common::CompressedBufferWriter::CalculateBufferSize(
- matrix.info.row_stride * matrix.n_rows, num_symbols);
+ matrix.info.row_stride * matrix.n_rows, matrix.info.NumSymbols());
  return compressed_size_bytes;
 }
 
@@ -280,5 +361,4 @@ void EllpackPageImpl::InitDevice(int device, EllpackInfo info) {
 
  device_initialized_ = true;
 }
-
 } // namespace xgboost
diff --git a/src/data/ellpack_page.cuh b/src/data/ellpack_page.cuh
@@ -71,6 +71,10 @@ struct EllpackInfo {
  size_t row_stride,
  const common::HistogramCuts& hmat,
  dh::BulkAllocator* ba);
+
+ inline size_t NumSymbols() const {
+ return n_bins + 1;
+ }
 };
 
 /** \brief Struct for accessing and manipulating an ellpack matrix on the
@@ -200,6 +204,14 @@ class EllpackPageImpl {
  */
  EllpackPageImpl() = default;
 
+ /*!
+ * \brief Constructor from an existing EllpackInfo.
+ *
+ * This is used in the sampling case. The ELLPACK page is constructed from an existing EllpackInfo
+ * and the given number of rows.
+ */
+ explicit EllpackPageImpl(int device, EllpackInfo info, size_t n_rows);
+
  /*!
  * \brief Constructor from an existing DMatrix.
  *
@@ -208,6 +220,23 @@ class EllpackPageImpl {
  */
  explicit EllpackPageImpl(DMatrix* dmat, const BatchParam& parm);
 
+ /*! \brief Copy the elements of the given ELLPACK page into this page.
+ *
+ * @param device The GPU device to use.
+ * @param page The ELLPACK page to copy from.
+ * @param offset The number of elements to skip before copying.
+ * @returns The number of elements copied.
+ */
+ size_t Copy(int device, EllpackPageImpl* page, size_t offset);
+
+ /*! \brief Compact the given ELLPACK page into the current page.
+ *
+ * @param device The GPU device to use.
+ * @param page The ELLPACK page to compact from.
+ * @param row_indexes Row indexes for the compacted page.
+ */
+ void Compact(int device, EllpackPageImpl* page, common::Span<size_t> row_indexes);
+
  /*!
  * \brief Initialize the EllpackInfo contained in the EllpackMatrix.
  *