From b1573d4828600a185e6b8025d693c824e6d08326 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 9 Jun 2023 11:51:35 +0800 Subject: [PATCH 01/51] use mmap for external memory. --- src/data/sparse_page_source.h | 90 ++++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 27 deletions(-) diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 088f1e98c3d6..69ebd3770057 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -5,24 +5,27 @@ #ifndef XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ #define XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ +#include // for open, O_RDONLY +#include // for mmap, munmap +#include // for close + #include // std::min -#include -#include -#include #include -#include #include #include - -#include "xgboost/base.h" -#include "xgboost/data.h" - -#include "adapter.h" -#include "sparse_page_writer.h" -#include "proxy_dmatrix.h" +#include +#include +#include +#include #include "../common/common.h" +#include "../common/io.h" #include "../common/timer.h" +#include "adapter.h" +#include "proxy_dmatrix.h" +#include "sparse_page_writer.h" +#include "xgboost/base.h" +#include "xgboost/data.h" namespace xgboost { namespace data { @@ -40,6 +43,7 @@ struct Cache { std::string format; // offset into binary cache file. std::vector offset; + std::vector bytes; Cache(bool w, std::string n, std::string fmt) : written{w}, name{std::move(n)}, format{std::move(fmt)} { @@ -54,6 +58,10 @@ struct Cache { std::string ShardName() { return ShardName(this->name, this->format); } + void Push(std::size_t n_bytes) { + bytes.push_back(n_bytes); + offset.push_back(n_bytes); + } // The write is completed. void Commit() { @@ -95,7 +103,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { uint32_t n_batches_ {0}; std::shared_ptr cache_info_; - std::unique_ptr fo_; + // std::unique_ptr fo_; using Ring = std::vector>>; // A ring storing futures to data. Since the DMatrix iterator is forward only, so we @@ -107,8 +115,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { if (!cache_info_->written) { return false; } - if (fo_) { - fo_.reset(); // flush the data to disk. + if (ring_->empty()) { ring_->resize(n_batches_); } // An heuristic for number of pre-fetched batches. We can make it part of BatchParam @@ -126,20 +133,39 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } auto const *self = this; // make sure it's const CHECK_LT(fetch_it, cache_info_->offset.size()); - ring_->at(fetch_it) = std::async(std::launch::async, [fetch_it, self]() { + dmlc::OMPException exec; + ring_->at(fetch_it) = std::async(std::launch::async, [&exec, fetch_it, self]() { + auto page = std::make_shared(); + common::Timer timer; timer.Start(); std::unique_ptr> fmt{CreatePageFormat("raw")}; auto n = self->cache_info_->ShardName(); - size_t offset = self->cache_info_->offset.at(fetch_it); - std::unique_ptr fi{dmlc::SeekStream::CreateForRead(n.c_str())}; - fi->Seek(offset); - CHECK_EQ(fi->Tell(), offset); - auto page = std::make_shared(); - CHECK(fmt->Read(page.get(), fi.get())); + + std::uint64_t offset = self->cache_info_->offset.at(fetch_it); + std::uint64_t length = self->cache_info_->bytes.at(fetch_it); + + // mmap + auto fd = open(n.c_str(), O_RDONLY); + CHECK_GE(fd, 0) << "Failed to open:" << n << ". " << strerror(errno); + auto ptr = mmap64(nullptr, length, PROT_READ, MAP_PRIVATE, fd, offset); + if (ptr == MAP_FAILED) { + LOG(FATAL) << "Failed to map: " << n << ". " << strerror(errno) << ". " + << "len:" << length << " off:" << offset << " it:" << fetch_it << std::endl; + } + + // read page + auto fi = common::MemoryFixSizeBuffer(ptr, length); + CHECK(fmt->Read(page.get(), &fi)); LOG(INFO) << "Read a page in " << timer.ElapsedSeconds() << " seconds."; + + // cleanup + CHECK_NE(close(fd), -1) << "Faled to close: " << n << ". " << strerror(errno); + CHECK_NE(munmap(ptr, length), -1) << "Faled to munmap: " << n << ". " << strerror(errno); + return page; }); + exec.Rethrow(); } CHECK_EQ(std::count_if(ring_->cbegin(), ring_->cend(), [](auto const& f) { return f.valid(); }), n_prefetch_batches) @@ -153,16 +179,26 @@ class SparsePageSourceImpl : public BatchIteratorImpl { common::Timer timer; timer.Start(); std::unique_ptr> fmt{CreatePageFormat("raw")}; - if (!fo_) { - auto n = cache_info_->ShardName(); - fo_.reset(dmlc::Stream::Create(n.c_str(), "w")); - } - auto bytes = fmt->Write(*page_, fo_.get()); + + auto name = cache_info_->ShardName(); + std::unique_ptr fo{dmlc::Stream::Create(name.c_str(), "a")}; + + auto bytes = fmt->Write(*page_, fo.get()); + + // align for mmap + auto page_size = getpagesize(); + CHECK(page_size != 0 && page_size % 2 == 0) << "Failed to get page size on the current system."; + auto n = bytes / page_size; + auto padded = (n + 1) * page_size; + auto padding = padded - bytes; + std::vector padding_bytes(padding, 0); + fo->Write(padding_bytes.data(), padding_bytes.size()); + timer.Stop(); LOG(INFO) << static_cast(bytes) / 1024.0 / 1024.0 << " MB written in " << timer.ElapsedSeconds() << " seconds."; - cache_info_->offset.push_back(bytes); + cache_info_->Push(padded); } virtual void Fetch() = 0; From 23a89b0150c453832e9e27d962892cc2c4ca1c46 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 9 Jun 2023 12:25:08 +0800 Subject: [PATCH 02/51] reduce size. --- src/data/sparse_page_source.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 69ebd3770057..53b27c049662 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -103,7 +103,6 @@ class SparsePageSourceImpl : public BatchIteratorImpl { uint32_t n_batches_ {0}; std::shared_ptr cache_info_; - // std::unique_ptr fo_; using Ring = std::vector>>; // A ring storing futures to data. Since the DMatrix iterator is forward only, so we @@ -120,7 +119,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } // An heuristic for number of pre-fetched batches. We can make it part of BatchParam // to let user adjust number of pre-fetched batches when needed. - uint32_t constexpr kPreFetch = 4; + uint32_t constexpr kPreFetch = 3; size_t n_prefetch_batches = std::min(kPreFetch, n_batches_); CHECK_GT(n_prefetch_batches, 0) << "total batches:" << n_batches_; @@ -149,10 +148,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { auto fd = open(n.c_str(), O_RDONLY); CHECK_GE(fd, 0) << "Failed to open:" << n << ". " << strerror(errno); auto ptr = mmap64(nullptr, length, PROT_READ, MAP_PRIVATE, fd, offset); - if (ptr == MAP_FAILED) { - LOG(FATAL) << "Failed to map: " << n << ". " << strerror(errno) << ". " - << "len:" << length << " off:" << offset << " it:" << fetch_it << std::endl; - } + CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << n << ". " << strerror(errno); // read page auto fi = common::MemoryFixSizeBuffer(ptr, length); From fa5d4602344941bca0210e1d605c059b8f45d688 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 9 Jun 2023 12:42:47 +0800 Subject: [PATCH 03/51] cleanup. --- src/data/sparse_page_source.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 53b27c049662..f0a1a3f0f7f5 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -132,8 +132,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } auto const *self = this; // make sure it's const CHECK_LT(fetch_it, cache_info_->offset.size()); - dmlc::OMPException exec; - ring_->at(fetch_it) = std::async(std::launch::async, [&exec, fetch_it, self]() { + ring_->at(fetch_it) = std::async(std::launch::async, [fetch_it, self]() { auto page = std::make_shared(); common::Timer timer; @@ -156,12 +155,11 @@ class SparsePageSourceImpl : public BatchIteratorImpl { LOG(INFO) << "Read a page in " << timer.ElapsedSeconds() << " seconds."; // cleanup - CHECK_NE(close(fd), -1) << "Faled to close: " << n << ". " << strerror(errno); CHECK_NE(munmap(ptr, length), -1) << "Faled to munmap: " << n << ". " << strerror(errno); + CHECK_NE(close(fd), -1) << "Faled to close: " << n << ". " << strerror(errno); return page; }); - exec.Rethrow(); } CHECK_EQ(std::count_if(ring_->cbegin(), ring_->cend(), [](auto const& f) { return f.valid(); }), n_prefetch_batches) From 9ebd4ef26d34353443dd8f06d8e9572070a8e629 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 9 Jun 2023 22:51:24 +0800 Subject: [PATCH 04/51] abstract into a dmlc stream. --- doc/tutorials/external_memory.rst | 12 ++++++++ rabit/include/rabit/internal/io.h | 2 +- src/common/io.h | 49 ++++++++++++++++++++++++++++++- src/data/sparse_page_source.h | 20 +++---------- 4 files changed, 65 insertions(+), 18 deletions(-) diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst index 006d63b43975..5001bbfea0af 100644 --- a/doc/tutorials/external_memory.rst +++ b/doc/tutorials/external_memory.rst @@ -8,6 +8,18 @@ The feature is still experimental and not yet ready for production use. In this we will introduce both methods. Please note that training on data from external memory is not supported by ``exact`` tree method. +.. warning:: + + The implementation of external memory uses ``mmap`` and is not tested against errors + like disconnected network devices. (`SIGBUS`) + +.. note:: + + When externel memory is used, the CPU training performance is IO bounded. Meaning, the + training speed almost exclusively determined by the disk IO speed. For GPU, please read + on and see the gradient-based sampling with external memory. During benchmark, we used + a NVME connected to a PCIE slot, the performance is "usable" with ``hist`` on CPU. + ************* Data Iterator ************* diff --git a/rabit/include/rabit/internal/io.h b/rabit/include/rabit/internal/io.h index 978eebd8a126..64633b2155aa 100644 --- a/rabit/include/rabit/internal/io.h +++ b/rabit/include/rabit/internal/io.h @@ -60,7 +60,7 @@ struct MemoryFixSizeBuffer : public SeekStream { return curr_ptr_ == buffer_size_; } - private: + protected: /*! \brief in memory buffer */ char *p_buffer_; /*! \brief current pointer */ diff --git a/src/common/io.h b/src/common/io.h index 2dd593c60b6f..f359d2781677 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -9,10 +9,15 @@ #define XGBOOST_COMMON_IO_H_ #include +#include // for open, O_RDONLY #include -#include +#include // for mmap, munmap +#include // for close +#include + #include #include +#include #include "common.h" @@ -127,6 +132,48 @@ inline std::string ReadAll(std::string const &path) { return content; } +/** + * \brief Private mmap file, copy-on-write + */ +class PrivateMmapStream : public MemoryFixSizeBuffer { + std::int32_t fd_; + std::string path_; + + void* Open(StringView path, bool read_only, std::size_t offset, std::size_t length) { + fd_ = open(path.c_str(), O_RDONLY); + CHECK_GE(fd_, 0) << "Failed to open:" << path << ". " << strerror(errno); + + char* ptr{nullptr}; + int prot{PROT_READ}; + if (!read_only) { + prot |= PROT_WRITE; + } +#if defined(__linux__) + ptr = reinterpret_cast(mmap64(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); +#elif defined(__APPLE__) + CHECK_LE(offset, std::numeric_limits::max()) + << "File size has exceeded the limit on macos."; + ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); +#else + // fixme: not yet implemented + ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); +#endif // defined(__linux__) + CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << strerror(errno); + return ptr; + } + + public: + explicit PrivateMmapStream(std::string path, bool read_only, std::size_t offset, + std::size_t length) + : MemoryFixSizeBuffer{Open(StringView{path}, read_only, offset, length), length}, + path_{path} {} + + ~PrivateMmapStream() override { + CHECK_NE(munmap(p_buffer_, buffer_size_), -1) + << "Faled to munmap." << path_ << ". " << strerror(errno); + CHECK_NE(close(fd_), -1) << "Faled to close: " << path_ << ". " << strerror(errno); + } +}; } // namespace common } // namespace xgboost #endif // XGBOOST_COMMON_IO_H_ diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index f0a1a3f0f7f5..938ce130ca05 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -123,9 +123,9 @@ class SparsePageSourceImpl : public BatchIteratorImpl { size_t n_prefetch_batches = std::min(kPreFetch, n_batches_); CHECK_GT(n_prefetch_batches, 0) << "total batches:" << n_batches_; - size_t fetch_it = count_; + std::size_t fetch_it = count_; - for (size_t i = 0; i < n_prefetch_batches; ++i, ++fetch_it) { + for (std::size_t i = 0; i < n_prefetch_batches; ++i, ++fetch_it) { fetch_it %= n_batches_; // ring if (ring_->at(fetch_it).valid()) { continue; @@ -143,21 +143,9 @@ class SparsePageSourceImpl : public BatchIteratorImpl { std::uint64_t offset = self->cache_info_->offset.at(fetch_it); std::uint64_t length = self->cache_info_->bytes.at(fetch_it); - // mmap - auto fd = open(n.c_str(), O_RDONLY); - CHECK_GE(fd, 0) << "Failed to open:" << n << ". " << strerror(errno); - auto ptr = mmap64(nullptr, length, PROT_READ, MAP_PRIVATE, fd, offset); - CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << n << ". " << strerror(errno); - - // read page - auto fi = common::MemoryFixSizeBuffer(ptr, length); - CHECK(fmt->Read(page.get(), &fi)); + auto fi = std::make_unique(n, true, offset, length); + CHECK(fmt->Read(page.get(), fi.get())); LOG(INFO) << "Read a page in " << timer.ElapsedSeconds() << " seconds."; - - // cleanup - CHECK_NE(munmap(ptr, length), -1) << "Faled to munmap: " << n << ". " << strerror(errno); - CHECK_NE(close(fd), -1) << "Faled to close: " << n << ". " << strerror(errno); - return page; }); } From 3832fd58717276b2387e7912319a60a5d5e4d947 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 9 Jun 2023 23:02:47 +0800 Subject: [PATCH 05/51] cleanup. --- src/data/sparse_page_source.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 938ce130ca05..b81320176a58 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -1,14 +1,10 @@ -/*! - * Copyright 2014-2022 by XGBoost Contributors +/** + * Copyright 2014-2023, XGBoost Contributors * \file sparse_page_source.h */ #ifndef XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ #define XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ -#include // for open, O_RDONLY -#include // for mmap, munmap -#include // for close - #include // std::min #include #include @@ -19,7 +15,7 @@ #include #include "../common/common.h" -#include "../common/io.h" +#include "../common/io.h" // for PrivateMmapStream #include "../common/timer.h" #include "adapter.h" #include "proxy_dmatrix.h" From 18c35440241366f77e6401ff171227e586cd2cc0 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 10 Jun 2023 00:08:43 +0800 Subject: [PATCH 06/51] Cleanup. --- doc/tutorials/external_memory.rst | 6 ++-- src/common/io.cc | 49 +++++++++++++++++++++++++------ src/common/io.h | 46 ++++++++--------------------- src/data/sparse_page_source.h | 6 ++-- 4 files changed, 60 insertions(+), 47 deletions(-) diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst index 5001bbfea0af..bfa173384c8d 100644 --- a/doc/tutorials/external_memory.rst +++ b/doc/tutorials/external_memory.rst @@ -16,9 +16,9 @@ not supported by ``exact`` tree method. .. note:: When externel memory is used, the CPU training performance is IO bounded. Meaning, the - training speed almost exclusively determined by the disk IO speed. For GPU, please read - on and see the gradient-based sampling with external memory. During benchmark, we used - a NVME connected to a PCIE slot, the performance is "usable" with ``hist`` on CPU. + training speed is almost exclusively determined by the disk IO speed. For GPU, please + read on and see the gradient-based sampling with external memory. During benchmark, we + used a NVME connected to a PCIE slot, the performance is "usable" with ``hist`` on CPU. ************* Data Iterator diff --git a/src/common/io.cc b/src/common/io.cc index da3a75d6512b..2986864d56ee 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -1,24 +1,25 @@ -/*! - * Copyright (c) by XGBoost Contributors 2019-2022 +/** + * Copyright 2019-2023, by XGBoost Contributors */ #if defined(__unix__) +#include // for open, O_RDONLY +#include // for mmap, mmap64, munmap #include -#include -#include -#endif // defined(__unix__) +#include // for close +#endif // defined(__unix__) #include +#include // for errno +#include #include -#include #include +#include #include -#include -#include "xgboost/logging.h" #include "io.h" +#include "xgboost/logging.h" namespace xgboost { namespace common { - size_t PeekableInStream::Read(void* dptr, size_t size) { size_t nbuffer = buffer_.length() - buffer_ptr_; if (nbuffer == 0) return strm_->Read(dptr, size); @@ -155,5 +156,35 @@ std::string FileExtension(std::string fname, bool lower) { return ""; } } + +void* PrivateMmapStream::Open(StringView path, bool read_only, std::size_t offset, + std::size_t length) { + fd_ = open(path.c_str(), O_RDONLY); + CHECK_GE(fd_, 0) << "Failed to open:" << path << ". " << strerror(errno); + + char* ptr{nullptr}; + int prot{PROT_READ}; + if (!read_only) { + prot |= PROT_WRITE; + } +#if defined(__linux__) || defined(__GLIBC__) + ptr = reinterpret_cast(mmap64(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); +#elif defined(_MSC_VER) + // fixme: not yet implemented + ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); +#else + CHECK_LE(offset, std::numeric_limits::max()) + << "File size has exceeded the limit on the current system."; + ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); +#endif // defined(__linux__) + CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << strerror(errno); + return ptr; +} + +PrivateMmapStream::~PrivateMmapStream() { + CHECK_NE(munmap(p_buffer_, buffer_size_), -1) + << "Faled to munmap." << path_ << ". " << strerror(errno); + CHECK_NE(close(fd_), -1) << "Faled to close: " << path_ << ". " << strerror(errno); +} } // namespace common } // namespace xgboost diff --git a/src/common/io.h b/src/common/io.h index f359d2781677..b3bf0cfe3973 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -1,5 +1,5 @@ -/*! - * Copyright by XGBoost Contributors 2014-2022 +/** + * Copyright 2014-2023, XGBoost Contributors * \file io.h * \brief general stream interface for serialization, I/O * \author Tianqi Chen @@ -9,15 +9,12 @@ #define XGBOOST_COMMON_IO_H_ #include -#include // for open, O_RDONLY #include -#include // for mmap, munmap -#include // for close #include #include #include -#include +#include // for string #include "common.h" @@ -139,40 +136,23 @@ class PrivateMmapStream : public MemoryFixSizeBuffer { std::int32_t fd_; std::string path_; - void* Open(StringView path, bool read_only, std::size_t offset, std::size_t length) { - fd_ = open(path.c_str(), O_RDONLY); - CHECK_GE(fd_, 0) << "Failed to open:" << path << ". " << strerror(errno); - - char* ptr{nullptr}; - int prot{PROT_READ}; - if (!read_only) { - prot |= PROT_WRITE; - } -#if defined(__linux__) - ptr = reinterpret_cast(mmap64(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); -#elif defined(__APPLE__) - CHECK_LE(offset, std::numeric_limits::max()) - << "File size has exceeded the limit on macos."; - ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); -#else - // fixme: not yet implemented - ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); -#endif // defined(__linux__) - CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << strerror(errno); - return ptr; - } + void* Open(StringView path, bool read_only, std::size_t offset, std::size_t length); public: + /** + * @brief Construct a private mmap stream. + * + * @param path File path. + * @param read_only See the `prot` parameter of `mmap` for details. + * @param offset See the `offset` parameter of `mmap` for details. + * @param length See the `length` parameter of `mmap` for details. + */ explicit PrivateMmapStream(std::string path, bool read_only, std::size_t offset, std::size_t length) : MemoryFixSizeBuffer{Open(StringView{path}, read_only, offset, length), length}, path_{path} {} - ~PrivateMmapStream() override { - CHECK_NE(munmap(p_buffer_, buffer_size_), -1) - << "Faled to munmap." << path_ << ". " << strerror(errno); - CHECK_NE(close(fd_), -1) << "Faled to close: " << path_ << ". " << strerror(errno); - } + ~PrivateMmapStream() override; }; } // namespace common } // namespace xgboost diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index b81320176a58..bd331625474c 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -5,7 +5,9 @@ #ifndef XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ #define XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ -#include // std::min +#include // for getpagesize + +#include // for min #include #include #include @@ -164,7 +166,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { auto bytes = fmt->Write(*page_, fo.get()); // align for mmap - auto page_size = getpagesize(); + decltype(bytes) page_size = getpagesize(); CHECK(page_size != 0 && page_size % 2 == 0) << "Failed to get page size on the current system."; auto n = bytes / page_size; auto padded = (n + 1) * page_size; From a04dc395b778085172ff079003b61078e635728f Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 10 Jun 2023 00:30:18 +0800 Subject: [PATCH 07/51] macos. --- src/common/io.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/io.cc b/src/common/io.cc index 2986864d56ee..a560c94a1d89 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -1,7 +1,7 @@ /** * Copyright 2019-2023, by XGBoost Contributors */ -#if defined(__unix__) +#if defined(__unix__) || defined(__APPLE__) #include // for open, O_RDONLY #include // for mmap, mmap64, munmap #include From 1e0405eed6bebdc903789fc126cdf2899b26988d Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 12 Jun 2023 02:29:48 +0800 Subject: [PATCH 08/51] debug. --- demo/guide-python/external_memory.py | 7 +++---- src/data/ellpack_page_raw_format.cu | 2 ++ src/data/ellpack_page_source.cu | 1 + src/data/sparse_page_source.h | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py index cc5527611717..2f79111867f8 100644 --- a/demo/guide-python/external_memory.py +++ b/demo/guide-python/external_memory.py @@ -75,7 +75,7 @@ def reset(self) -> None: def main(tmpdir: str) -> xgboost.Booster: # generate some random data for demo - files = make_batches(1024, 17, 31, tmpdir) + files = make_batches(2 ** 16, 17, 31, tmpdir) it = Iterator(files) # For non-data arguments, specify it here once instead of passing them by the `next` # method. @@ -85,12 +85,11 @@ def main(tmpdir: str) -> xgboost.Booster: # Other tree methods including ``hist`` and ``gpu_hist`` also work, see tutorial in # doc for details. booster = xgboost.train( - {"tree_method": "approx", "max_depth": 2}, + {"tree_method": "gpu_hist", "max_depth": 6, "sampling_method": "gradient_based", "subsample": 0.5}, Xy, evals=[(Xy, "Train")], - num_boost_round=10, + num_boost_round=2, ) - return booster if __name__ == "__main__": diff --git a/src/data/ellpack_page_raw_format.cu b/src/data/ellpack_page_raw_format.cu index 2f54b91c9bbc..445b08699523 100644 --- a/src/data/ellpack_page_raw_format.cu +++ b/src/data/ellpack_page_raw_format.cu @@ -28,6 +28,7 @@ class EllpackPageRawFormat : public SparsePageFormat { if (!fi->Read(&impl->base_rowid)) { return false; } + std::cout << "impl brd:" << impl->base_rowid << std::endl; return true; } @@ -41,6 +42,7 @@ class EllpackPageRawFormat : public SparsePageFormat { bytes += sizeof(impl->is_dense); fo->Write(impl->row_stride); bytes += sizeof(impl->row_stride); + std::cout << "write brd:" << impl->base_rowid << std::endl; CHECK(!impl->gidx_buffer.ConstHostVector().empty()); fo->Write(impl->gidx_buffer.HostVector()); bytes += impl->gidx_buffer.ConstHostSpan().size_bytes() + sizeof(uint64_t); diff --git a/src/data/ellpack_page_source.cu b/src/data/ellpack_page_source.cu index fb414f4aef79..d5ffdc4aa377 100644 --- a/src/data/ellpack_page_source.cu +++ b/src/data/ellpack_page_source.cu @@ -24,6 +24,7 @@ void EllpackPageSource::Fetch() { auto *impl = this->page_->Impl(); *impl = EllpackPageImpl(device_, *cuts_, *csr, is_dense_, row_stride_, feature_types_); page_->SetBaseRowId(csr->base_rowid); + std::cout << "csr br:" << csr->base_rowid << std::endl; this->WriteCache(); } } diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index bd331625474c..398958391cf8 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -117,7 +117,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } // An heuristic for number of pre-fetched batches. We can make it part of BatchParam // to let user adjust number of pre-fetched batches when needed. - uint32_t constexpr kPreFetch = 3; + uint32_t constexpr kPreFetch = 1; size_t n_prefetch_batches = std::min(kPreFetch, n_batches_); CHECK_GT(n_prefetch_batches, 0) << "total batches:" << n_batches_; @@ -140,6 +140,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { std::uint64_t offset = self->cache_info_->offset.at(fetch_it); std::uint64_t length = self->cache_info_->bytes.at(fetch_it); + // std::cout << typeid(S).name() << " offset:" << offset << " length:" << length << std::endl; auto fi = std::make_unique(n, true, offset, length); CHECK(fmt->Read(page.get(), fi.get())); From ba358afcb25b6477215310a31b26e295a9035e4a Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 12 Jun 2023 02:45:50 +0800 Subject: [PATCH 09/51] Fix. --- src/data/sparse_page_source.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 398958391cf8..690ef4f864b4 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -162,7 +162,12 @@ class SparsePageSourceImpl : public BatchIteratorImpl { std::unique_ptr> fmt{CreatePageFormat("raw")}; auto name = cache_info_->ShardName(); - std::unique_ptr fo{dmlc::Stream::Create(name.c_str(), "a")}; + std::unique_ptr fo; + if (this->Iter() == 0) { + fo.reset(dmlc::Stream::Create(name.c_str(), "w")); + } else { + fo.reset(dmlc::Stream::Create(name.c_str(), "a")); + } auto bytes = fmt->Write(*page_, fo.get()); From a6202d0de002f6f7a47e31767df701a385a414d6 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 12 Jun 2023 02:46:18 +0800 Subject: [PATCH 10/51] cleanup. --- src/data/ellpack_page_raw_format.cu | 2 -- src/data/ellpack_page_source.cu | 1 - src/data/sparse_page_source.h | 3 +-- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/data/ellpack_page_raw_format.cu b/src/data/ellpack_page_raw_format.cu index 445b08699523..2f54b91c9bbc 100644 --- a/src/data/ellpack_page_raw_format.cu +++ b/src/data/ellpack_page_raw_format.cu @@ -28,7 +28,6 @@ class EllpackPageRawFormat : public SparsePageFormat { if (!fi->Read(&impl->base_rowid)) { return false; } - std::cout << "impl brd:" << impl->base_rowid << std::endl; return true; } @@ -42,7 +41,6 @@ class EllpackPageRawFormat : public SparsePageFormat { bytes += sizeof(impl->is_dense); fo->Write(impl->row_stride); bytes += sizeof(impl->row_stride); - std::cout << "write brd:" << impl->base_rowid << std::endl; CHECK(!impl->gidx_buffer.ConstHostVector().empty()); fo->Write(impl->gidx_buffer.HostVector()); bytes += impl->gidx_buffer.ConstHostSpan().size_bytes() + sizeof(uint64_t); diff --git a/src/data/ellpack_page_source.cu b/src/data/ellpack_page_source.cu index d5ffdc4aa377..fb414f4aef79 100644 --- a/src/data/ellpack_page_source.cu +++ b/src/data/ellpack_page_source.cu @@ -24,7 +24,6 @@ void EllpackPageSource::Fetch() { auto *impl = this->page_->Impl(); *impl = EllpackPageImpl(device_, *cuts_, *csr, is_dense_, row_stride_, feature_types_); page_->SetBaseRowId(csr->base_rowid); - std::cout << "csr br:" << csr->base_rowid << std::endl; this->WriteCache(); } } diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 690ef4f864b4..b07178c496f3 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -117,7 +117,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } // An heuristic for number of pre-fetched batches. We can make it part of BatchParam // to let user adjust number of pre-fetched batches when needed. - uint32_t constexpr kPreFetch = 1; + uint32_t constexpr kPreFetch = 4; size_t n_prefetch_batches = std::min(kPreFetch, n_batches_); CHECK_GT(n_prefetch_batches, 0) << "total batches:" << n_batches_; @@ -140,7 +140,6 @@ class SparsePageSourceImpl : public BatchIteratorImpl { std::uint64_t offset = self->cache_info_->offset.at(fetch_it); std::uint64_t length = self->cache_info_->bytes.at(fetch_it); - // std::cout << typeid(S).name() << " offset:" << offset << " length:" << length << std::endl; auto fi = std::make_unique(n, true, offset, length); CHECK(fmt->Read(page.get(), fi.get())); From 117fb97e663532108f93dc0affb706c75a5043aa Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 12 Jun 2023 21:45:27 +0800 Subject: [PATCH 11/51] Cleanup. --- demo/guide-python/external_memory.py | 11 ++++++----- doc/tutorials/external_memory.rst | 5 +++-- src/common/io.cc | 3 +-- src/data/sparse_page_source.cc | 20 +++++++++++++++++++ src/data/sparse_page_source.h | 29 ++++++++++++++-------------- 5 files changed, 44 insertions(+), 24 deletions(-) create mode 100644 src/data/sparse_page_source.cc diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py index 2f79111867f8..fa54d184814a 100644 --- a/demo/guide-python/external_memory.py +++ b/demo/guide-python/external_memory.py @@ -75,21 +75,22 @@ def reset(self) -> None: def main(tmpdir: str) -> xgboost.Booster: # generate some random data for demo - files = make_batches(2 ** 16, 17, 31, tmpdir) + files = make_batches(1024, 17, 31, tmpdir) it = Iterator(files) # For non-data arguments, specify it here once instead of passing them by the `next` # method. missing = np.NaN Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False) - # Other tree methods including ``hist`` and ``gpu_hist`` also work, see tutorial in - # doc for details. + # Other tree methods including ``approx``, ``hist``, and ``gpu_hist`` are supported, + # see tutorial in doc for details. booster = xgboost.train( - {"tree_method": "gpu_hist", "max_depth": 6, "sampling_method": "gradient_based", "subsample": 0.5}, + {"tree_method": "hist", "max_depth": 4}, Xy, evals=[(Xy, "Train")], - num_boost_round=2, + num_boost_round=10, ) + return booster if __name__ == "__main__": diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst index bfa173384c8d..31d2cf8657ac 100644 --- a/doc/tutorials/external_memory.rst +++ b/doc/tutorials/external_memory.rst @@ -10,8 +10,9 @@ not supported by ``exact`` tree method. .. warning:: - The implementation of external memory uses ``mmap`` and is not tested against errors - like disconnected network devices. (`SIGBUS`) + The implementation of external memory uses ``mmap`` and is not tested against system + errors like disconnected network devices (`SIGBUS`). In addition, Windows is not yet + supported. .. note:: diff --git a/src/common/io.cc b/src/common/io.cc index a560c94a1d89..f9756c63c42d 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -170,8 +170,7 @@ void* PrivateMmapStream::Open(StringView path, bool read_only, std::size_t offse #if defined(__linux__) || defined(__GLIBC__) ptr = reinterpret_cast(mmap64(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); #elif defined(_MSC_VER) - // fixme: not yet implemented - ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); + LOG(FATAL) << "External memory is not implemented for Windows."; #else CHECK_LE(offset, std::numeric_limits::max()) << "File size has exceeded the limit on the current system."; diff --git a/src/data/sparse_page_source.cc b/src/data/sparse_page_source.cc new file mode 100644 index 000000000000..0cc34900f067 --- /dev/null +++ b/src/data/sparse_page_source.cc @@ -0,0 +1,20 @@ +/** + * Copyright 2023, XGBoost Contributors + */ +#include "sparse_page_source.h" + +#include // for getpagesize + +namespace xgboost::data { +std::size_t PadPageForMMAP(std::size_t file_bytes, dmlc::Stream* fo) { + decltype(file_bytes) page_size = getpagesize(); + CHECK(page_size != 0 && page_size % 2 == 0) << "Failed to get page size on the current system."; + CHECK_NE(file_bytes, 0) << "Empty page encountered."; + auto n = file_bytes / page_size; + auto padded = (n + !!(file_bytes % page_size != 0)) * page_size; + auto padding = padded - file_bytes; + std::vector padding_bytes(padding, 0); + fo->Write(padding_bytes.data(), padding_bytes.size()); + return padded; +} +} // namespace xgboost::data diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index b07178c496f3..e27c9e918bdc 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -5,8 +5,6 @@ #ifndef XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ #define XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ -#include // for getpagesize - #include // for min #include #include @@ -34,6 +32,16 @@ inline void TryDeleteCacheFile(const std::string& file) { } } +/** + * @brief Pad the output file for a page to make it mmap compatible. + * + * @param file_bytes The size of the output file + * @param fo Stream used to write the file. + * + * @return The file size after being padded. + */ +std::size_t PadPageForMMAP(std::size_t file_bytes, dmlc::Stream* fo); + struct Cache { // whether the write to the cache is complete bool written; @@ -41,7 +49,6 @@ struct Cache { std::string format; // offset into binary cache file. std::vector offset; - std::vector bytes; Cache(bool w, std::string n, std::string fmt) : written{w}, name{std::move(n)}, format{std::move(fmt)} { @@ -57,7 +64,6 @@ struct Cache { return ShardName(this->name, this->format); } void Push(std::size_t n_bytes) { - bytes.push_back(n_bytes); offset.push_back(n_bytes); } @@ -139,7 +145,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { auto n = self->cache_info_->ShardName(); std::uint64_t offset = self->cache_info_->offset.at(fetch_it); - std::uint64_t length = self->cache_info_->bytes.at(fetch_it); + std::uint64_t length = self->cache_info_->offset.at(fetch_it + 1) - offset; auto fi = std::make_unique(n, true, offset, length); CHECK(fmt->Read(page.get(), fi.get())); @@ -151,6 +157,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { n_prefetch_batches) << "Sparse DMatrix assumes forward iteration."; page_ = (*ring_)[count_].get(); + CHECK(!(*ring_)[count_].valid()); return true; } @@ -169,18 +176,9 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } auto bytes = fmt->Write(*page_, fo.get()); - - // align for mmap - decltype(bytes) page_size = getpagesize(); - CHECK(page_size != 0 && page_size % 2 == 0) << "Failed to get page size on the current system."; - auto n = bytes / page_size; - auto padded = (n + 1) * page_size; - auto padding = padded - bytes; - std::vector padding_bytes(padding, 0); - fo->Write(padding_bytes.data(), padding_bytes.size()); + auto padded = PadPageForMMAP(bytes, fo.get()); timer.Stop(); - LOG(INFO) << static_cast(bytes) / 1024.0 / 1024.0 << " MB written in " << timer.ElapsedSeconds() << " seconds."; cache_info_->Push(padded); @@ -280,6 +278,7 @@ class SparsePageSource : public SparsePageSourceImpl { } if (at_end_) { + CHECK_EQ(cache_info_->offset.size(), n_batches_ + 1); cache_info_->Commit(); if (n_batches_ != 0) { CHECK_EQ(count_, n_batches_); From 9ee16431a3377b8d6fb466842e83e55c1c040c56 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 12 Jun 2023 21:56:55 +0800 Subject: [PATCH 12/51] Skip python test. --- python-package/xgboost/testing/__init__.py | 8 ++++++++ tests/python/test_demos.py | 1 + 2 files changed, 9 insertions(+) diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 70e5361011b3..523f3f99c053 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -93,6 +93,14 @@ def no_ipv6() -> PytestSkip: return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."} +def no_unix() -> PytestSkip: + """PyTest skip mark for non-unix.""" + return { + "condition": system() == "Windows", + "reason": "unix system is required to be enabled.", + } + + def no_ubjson() -> PytestSkip: return no_mod("ubjson") diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py index c54f35046f8a..90c72c8e6233 100644 --- a/tests/python/test_demos.py +++ b/tests/python/test_demos.py @@ -103,6 +103,7 @@ def test_cross_validation_demo(): subprocess.check_call(cmd) +@pytest.mark.skipif(**tm.no_unix()) def test_external_memory_demo(): script = os.path.join(PYTHON_DEMO_DIR, 'external_memory.py') cmd = ['python', script] From da00b6dbaea1ee6d030e665da529b39357d56ef2 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 12 Jun 2023 21:59:21 +0800 Subject: [PATCH 13/51] lint. --- src/common/io.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/io.cc b/src/common/io.cc index f9756c63c42d..32c0b66027a5 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -11,6 +11,7 @@ #include // for errno #include #include +#include // for numeric_limits #include #include #include From 05ce49bb518d26c7ff41d86c7b73ddfe738a5e3d Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 12 Jun 2023 23:12:30 +0800 Subject: [PATCH 14/51] Add test. --- src/common/io.cc | 12 +++++++++ src/common/io.h | 10 ++++++++ src/data/sparse_page_source.cc | 20 --------------- src/data/sparse_page_source.h | 14 ++--------- tests/cpp/common/test_io.cc | 46 ++++++++++++++++++++++++++++++++++ 5 files changed, 70 insertions(+), 32 deletions(-) delete mode 100644 src/data/sparse_page_source.cc diff --git a/src/common/io.cc b/src/common/io.cc index 32c0b66027a5..f3e1476627db 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -158,6 +158,18 @@ std::string FileExtension(std::string fname, bool lower) { } } +std::size_t PadPageForMMAP(std::size_t file_bytes, dmlc::Stream* fo) { + decltype(file_bytes) page_size = getpagesize(); + CHECK(page_size != 0 && page_size % 2 == 0) << "Failed to get page size on the current system."; + CHECK_NE(file_bytes, 0) << "Empty page encountered."; + auto n_pages = file_bytes / page_size + !!(file_bytes % page_size != 0); + auto padded = n_pages * page_size; + auto padding = padded - file_bytes; + std::vector padding_bytes(padding, 0); + fo->Write(padding_bytes.data(), padding_bytes.size()); + return padded; +} + void* PrivateMmapStream::Open(StringView path, bool read_only, std::size_t offset, std::size_t length) { fd_ = open(path.c_str(), O_RDONLY); diff --git a/src/common/io.h b/src/common/io.h index b3bf0cfe3973..839192cb7e60 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -129,6 +129,16 @@ inline std::string ReadAll(std::string const &path) { return content; } +/** + * @brief Pad the output file for a page to make it mmap compatible. + * + * @param file_bytes The size of the output file + * @param fo Stream used to write the file. + * + * @return The file size after being padded. + */ +std::size_t PadPageForMMAP(std::size_t file_bytes, dmlc::Stream* fo); + /** * \brief Private mmap file, copy-on-write */ diff --git a/src/data/sparse_page_source.cc b/src/data/sparse_page_source.cc deleted file mode 100644 index 0cc34900f067..000000000000 --- a/src/data/sparse_page_source.cc +++ /dev/null @@ -1,20 +0,0 @@ -/** - * Copyright 2023, XGBoost Contributors - */ -#include "sparse_page_source.h" - -#include // for getpagesize - -namespace xgboost::data { -std::size_t PadPageForMMAP(std::size_t file_bytes, dmlc::Stream* fo) { - decltype(file_bytes) page_size = getpagesize(); - CHECK(page_size != 0 && page_size % 2 == 0) << "Failed to get page size on the current system."; - CHECK_NE(file_bytes, 0) << "Empty page encountered."; - auto n = file_bytes / page_size; - auto padded = (n + !!(file_bytes % page_size != 0)) * page_size; - auto padding = padded - file_bytes; - std::vector padding_bytes(padding, 0); - fo->Write(padding_bytes.data(), padding_bytes.size()); - return padded; -} -} // namespace xgboost::data diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index e27c9e918bdc..e5503492ace2 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -15,7 +15,7 @@ #include #include "../common/common.h" -#include "../common/io.h" // for PrivateMmapStream +#include "../common/io.h" // for PrivateMmapStream, PadPageForMMAP #include "../common/timer.h" #include "adapter.h" #include "proxy_dmatrix.h" @@ -32,16 +32,6 @@ inline void TryDeleteCacheFile(const std::string& file) { } } -/** - * @brief Pad the output file for a page to make it mmap compatible. - * - * @param file_bytes The size of the output file - * @param fo Stream used to write the file. - * - * @return The file size after being padded. - */ -std::size_t PadPageForMMAP(std::size_t file_bytes, dmlc::Stream* fo); - struct Cache { // whether the write to the cache is complete bool written; @@ -176,7 +166,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } auto bytes = fmt->Write(*page_, fo.get()); - auto padded = PadPageForMMAP(bytes, fo.get()); + auto padded = common::PadPageForMMAP(bytes, fo.get()); timer.Stop(); LOG(INFO) << static_cast(bytes) / 1024.0 / 1024.0 << " MB written in " diff --git a/tests/cpp/common/test_io.cc b/tests/cpp/common/test_io.cc index feac8bd89934..e9cb650f93e0 100644 --- a/tests/cpp/common/test_io.cc +++ b/tests/cpp/common/test_io.cc @@ -89,5 +89,51 @@ TEST(IO, LoadSequentialFile) { ASSERT_THROW(LoadSequentialFile("non-exist", true), dmlc::Error); } + +TEST(IO, PrivateMmapStream) { + dmlc::TemporaryDirectory tempdir; + auto path = tempdir.path + "/testfile"; + + std::size_t n_batches{8}; + std::vector> batches; + std::vector offset{0ul}; + + using T = std::int32_t; + + { + std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; + for (std::size_t i = 0; i < n_batches; ++i) { + std::size_t size = (i + 1) * 2; + std::vector data(size, 0); + std::iota(data.begin(), data.end(), i * i); + + fo->Write(static_cast(data.size())); + fo->Write(data.data(), data.size() * sizeof(T)); + + std::size_t bytes = sizeof(std::uint64_t) + data.size() * sizeof(T); + auto padded = common::PadPageForMMAP(bytes, fo.get()); + offset.push_back(padded); + + batches.emplace_back(std::move(data)); + } + } + + // Turn size info offset + std::partial_sum(offset.begin(), offset.end(), offset.begin()); + + for (std::size_t i = 0; i < n_batches; ++i) { + std::size_t off = offset[i]; + std::size_t n = offset.at(i + 1) - offset[i]; + std::unique_ptr fi{std::make_unique(path, true, off, n)}; + std::vector data; + + std::uint64_t size{0}; + fi->Read(&size); + data.resize(size); + + fi->Read(data.data(), size * sizeof(T)); + ASSERT_EQ(data, batches[i]); + } +} } // namespace common } // namespace xgboost From ed635d34db4a3ae1881a6270b23fd37e18a9efdd Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 12 Jun 2023 23:17:17 +0800 Subject: [PATCH 15/51] rename. --- src/common/io.cc | 2 +- src/common/io.h | 2 +- src/data/sparse_page_source.h | 2 +- tests/cpp/common/test_io.cc | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index f3e1476627db..ef34ffacac06 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -158,7 +158,7 @@ std::string FileExtension(std::string fname, bool lower) { } } -std::size_t PadPageForMMAP(std::size_t file_bytes, dmlc::Stream* fo) { +std::size_t PadPageForMmap(std::size_t file_bytes, dmlc::Stream* fo) { decltype(file_bytes) page_size = getpagesize(); CHECK(page_size != 0 && page_size % 2 == 0) << "Failed to get page size on the current system."; CHECK_NE(file_bytes, 0) << "Empty page encountered."; diff --git a/src/common/io.h b/src/common/io.h index 839192cb7e60..b56af3801fab 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -137,7 +137,7 @@ inline std::string ReadAll(std::string const &path) { * * @return The file size after being padded. */ -std::size_t PadPageForMMAP(std::size_t file_bytes, dmlc::Stream* fo); +std::size_t PadPageForMmap(std::size_t file_bytes, dmlc::Stream* fo); /** * \brief Private mmap file, copy-on-write diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index e5503492ace2..03e95b3a3652 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -166,7 +166,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } auto bytes = fmt->Write(*page_, fo.get()); - auto padded = common::PadPageForMMAP(bytes, fo.get()); + auto padded = common::PadPageForMmap(bytes, fo.get()); timer.Stop(); LOG(INFO) << static_cast(bytes) / 1024.0 / 1024.0 << " MB written in " diff --git a/tests/cpp/common/test_io.cc b/tests/cpp/common/test_io.cc index e9cb650f93e0..db401dba45df 100644 --- a/tests/cpp/common/test_io.cc +++ b/tests/cpp/common/test_io.cc @@ -111,7 +111,7 @@ TEST(IO, PrivateMmapStream) { fo->Write(data.data(), data.size() * sizeof(T)); std::size_t bytes = sizeof(std::uint64_t) + data.size() * sizeof(T); - auto padded = common::PadPageForMMAP(bytes, fo.get()); + auto padded = common::PadPageForMmap(bytes, fo.get()); offset.push_back(padded); batches.emplace_back(std::move(data)); From 9b5c6864382a519052fa8e70bc21ae579fd7578b Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 12 Jun 2023 23:19:54 +0800 Subject: [PATCH 16/51] cleanup. --- src/common/io.h | 2 +- tests/cpp/common/test_io.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/io.h b/src/common/io.h index b56af3801fab..3ab910ff7b50 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -140,7 +140,7 @@ inline std::string ReadAll(std::string const &path) { std::size_t PadPageForMmap(std::size_t file_bytes, dmlc::Stream* fo); /** - * \brief Private mmap file, copy-on-write + * @brief Private mmap file, copy-on-write. File must be properly aligned by `PadPageForMmap()`. */ class PrivateMmapStream : public MemoryFixSizeBuffer { std::int32_t fd_; diff --git a/tests/cpp/common/test_io.cc b/tests/cpp/common/test_io.cc index db401dba45df..0a5881991bdd 100644 --- a/tests/cpp/common/test_io.cc +++ b/tests/cpp/common/test_io.cc @@ -1,5 +1,5 @@ -/*! - * Copyright (c) by XGBoost Contributors 2019 +/** + * Copyright 2019-2023, XGBoost Contributors */ #include From 1b0dab27ce38ca3083782bdaca94975f797221a4 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 02:09:17 +0800 Subject: [PATCH 17/51] Remove page in grad-based sampling. --- src/tree/gpu_hist/gradient_based_sampler.cu | 14 +++++++------- src/tree/gpu_hist/gradient_based_sampler.cuh | 5 ++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu index f22fa172fbfc..41cb242ecd84 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cu +++ b/src/tree/gpu_hist/gradient_based_sampler.cu @@ -236,12 +236,10 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(Context const* ctx, return {sample_rows, page_.get(), dh::ToSpan(gpair_)}; } -GradientBasedSampling::GradientBasedSampling(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam&, +GradientBasedSampling::GradientBasedSampling(std::size_t n_rows, BatchParam batch_param, float subsample) - : page_(page), - subsample_(subsample), + : subsample_(subsample), + batch_param_{std::move(batch_param)}, threshold_(n_rows + 1, 0.0f), grad_sum_(n_rows, 0.0f) {} @@ -252,12 +250,14 @@ GradientBasedSample GradientBasedSampling::Sample(Context const* ctx, size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex( gpair, dh::ToSpan(threshold_), dh::ToSpan(grad_sum_), n_rows * subsample_); + auto page = (*dmat->GetBatches(ctx, batch_param_).begin()).Impl(); + // Perform Poisson sampling in place. thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), thrust::counting_iterator(0), dh::tbegin(gpair), PoissonSampling(dh::ToSpan(threshold_), threshold_index, RandomWeight(common::GlobalRandom()()))); - return {n_rows, page_, gpair}; + return {n_rows, page, gpair}; } ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling( @@ -339,7 +339,7 @@ GradientBasedSampler::GradientBasedSampler(Context const* ctx, EllpackPageImpl c strategy_.reset( new ExternalMemoryGradientBasedSampling(n_rows, batch_param, subsample)); } else { - strategy_.reset(new GradientBasedSampling(page, n_rows, batch_param, subsample)); + strategy_.reset(new GradientBasedSampling(n_rows, batch_param, subsample)); } break; default:LOG(FATAL) << "unknown sampling method"; diff --git a/src/tree/gpu_hist/gradient_based_sampler.cuh b/src/tree/gpu_hist/gradient_based_sampler.cuh index dafb98cfd8b3..8d22eed03b12 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cuh +++ b/src/tree/gpu_hist/gradient_based_sampler.cuh @@ -84,13 +84,12 @@ class ExternalMemoryUniformSampling : public SamplingStrategy { /*! \brief Gradient-based sampling in in-memory mode.. */ class GradientBasedSampling : public SamplingStrategy { public: - GradientBasedSampling(EllpackPageImpl const* page, size_t n_rows, const BatchParam& batch_param, - float subsample); + GradientBasedSampling(std::size_t n_rows, BatchParam batch_param, float subsample); GradientBasedSample Sample(Context const* ctx, common::Span gpair, DMatrix* dmat) override; private: - EllpackPageImpl const* page_; + BatchParam batch_param_; float subsample_; dh::caching_device_vector threshold_; dh::caching_device_vector grad_sum_; From f383f760f600294593e7eea15d503e38b2d044f0 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 02:13:33 +0800 Subject: [PATCH 18/51] remove page in uniform sampling. --- src/tree/gpu_hist/gradient_based_sampler.cu | 9 +++++---- src/tree/gpu_hist/gradient_based_sampler.cuh | 4 ++-- src/tree/updater_gpu_hist.cu | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu index 41cb242ecd84..b86371d63eb9 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cu +++ b/src/tree/gpu_hist/gradient_based_sampler.cu @@ -175,8 +175,8 @@ GradientBasedSample ExternalMemoryNoSampling::Sample(Context const* ctx, return {dmat->Info().num_row_, page_.get(), gpair}; } -UniformSampling::UniformSampling(EllpackPageImpl const* page, float subsample) - : page_(page), subsample_(subsample) {} +UniformSampling::UniformSampling(BatchParam batch_param, float subsample) + : batch_param_{std::move(batch_param)}, subsample_(subsample) {} GradientBasedSample UniformSampling::Sample(Context const* ctx, common::Span gpair, DMatrix* dmat) { @@ -185,7 +185,8 @@ GradientBasedSample UniformSampling::Sample(Context const* ctx, common::SpanCTP(), dh::tbegin(gpair), dh::tend(gpair), thrust::counting_iterator(0), BernoulliTrial(common::GlobalRandom()(), subsample_), GradientPair()); - return {dmat->Info().num_row_, page_, gpair}; + auto page = (*dmat->GetBatches(ctx, batch_param_).begin()).Impl(); + return {dmat->Info().num_row_, page, gpair}; } ExternalMemoryUniformSampling::ExternalMemoryUniformSampling(size_t n_rows, @@ -331,7 +332,7 @@ GradientBasedSampler::GradientBasedSampler(Context const* ctx, EllpackPageImpl c if (is_external_memory) { strategy_.reset(new ExternalMemoryUniformSampling(n_rows, batch_param, subsample)); } else { - strategy_.reset(new UniformSampling(page, subsample)); + strategy_.reset(new UniformSampling(batch_param, subsample)); } break; case TrainParam::kGradientBased: diff --git a/src/tree/gpu_hist/gradient_based_sampler.cuh b/src/tree/gpu_hist/gradient_based_sampler.cuh index 8d22eed03b12..4ed2339061e4 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cuh +++ b/src/tree/gpu_hist/gradient_based_sampler.cuh @@ -57,12 +57,12 @@ class ExternalMemoryNoSampling : public SamplingStrategy { /*! \brief Uniform sampling in in-memory mode. */ class UniformSampling : public SamplingStrategy { public: - UniformSampling(EllpackPageImpl const* page, float subsample); + UniformSampling(BatchParam batch_param, float subsample); GradientBasedSample Sample(Context const* ctx, common::Span gpair, DMatrix* dmat) override; private: - EllpackPageImpl const* page_; + BatchParam batch_param_; float subsample_; }; diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index d1c1c829098d..c6b0612842d4 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -176,7 +176,7 @@ struct GPUHistMakerDevice { Context const* ctx_; public: - EllpackPageImpl const* page; + EllpackPageImpl const* page{nullptr}; common::Span feature_types; BatchParam batch_param; From 68b838d7cf876cd65f76c85f833d53cacb5b4b6e Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 02:15:29 +0800 Subject: [PATCH 19/51] remove in no sampling. --- src/tree/gpu_hist/gradient_based_sampler.cu | 9 +++++---- src/tree/gpu_hist/gradient_based_sampler.cuh | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu index b86371d63eb9..47e9904cbcc9 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cu +++ b/src/tree/gpu_hist/gradient_based_sampler.cu @@ -146,11 +146,12 @@ class PoissonSampling : public thrust::binary_function gpair, +GradientBasedSample NoSampling::Sample(Context const* ctx, common::Span gpair, DMatrix* dmat) { - return {dmat->Info().num_row_, page_, gpair}; + auto page = (*dmat->GetBatches(ctx, batch_param_).begin()).Impl(); + return {dmat->Info().num_row_, page, gpair}; } ExternalMemoryNoSampling::ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page, @@ -349,7 +350,7 @@ GradientBasedSampler::GradientBasedSampler(Context const* ctx, EllpackPageImpl c if (is_external_memory) { strategy_.reset(new ExternalMemoryNoSampling(ctx, page, n_rows, batch_param)); } else { - strategy_.reset(new NoSampling(page)); + strategy_.reset(new NoSampling(batch_param)); } } } diff --git a/src/tree/gpu_hist/gradient_based_sampler.cuh b/src/tree/gpu_hist/gradient_based_sampler.cuh index 4ed2339061e4..6accc06474b7 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cuh +++ b/src/tree/gpu_hist/gradient_based_sampler.cuh @@ -32,12 +32,12 @@ class SamplingStrategy { /*! \brief No sampling in in-memory mode. */ class NoSampling : public SamplingStrategy { public: - explicit NoSampling(EllpackPageImpl const* page); + explicit NoSampling(BatchParam batch_param); GradientBasedSample Sample(Context const* ctx, common::Span gpair, DMatrix* dmat) override; private: - EllpackPageImpl const* page_; + BatchParam batch_param_; }; /*! \brief No sampling in external memory mode. */ From 4b5d38f07da02740a14dd7db9615234626353b44 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 02:33:18 +0800 Subject: [PATCH 20/51] GPU initialization. --- src/data/sparse_page_source.h | 38 ++++++++++++-------- src/tree/gpu_hist/gradient_based_sampler.cu | 23 ++++++------ src/tree/gpu_hist/gradient_based_sampler.cuh | 7 ++-- src/tree/updater_gpu_hist.cu | 34 +++++++++--------- 4 files changed, 55 insertions(+), 47 deletions(-) diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 03e95b3a3652..a073e51b5008 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -6,7 +6,7 @@ #define XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ #include // for min -#include +#include // async #include #include #include @@ -18,6 +18,7 @@ #include "../common/io.h" // for PrivateMmapStream, PadPageForMMAP #include "../common/timer.h" #include "adapter.h" +#include "dmlc/common.h" // OMPException #include "proxy_dmatrix.h" #include "sparse_page_writer.h" #include "xgboost/base.h" @@ -102,6 +103,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { // A ring storing futures to data. Since the DMatrix iterator is forward only, so we // can pre-fetch data in a ring. std::unique_ptr ring_{new Ring}; + dmlc::OMPException exec_; bool ReadCache() { CHECK(!at_end_); @@ -119,35 +121,41 @@ class SparsePageSourceImpl : public BatchIteratorImpl { CHECK_GT(n_prefetch_batches, 0) << "total batches:" << n_batches_; std::size_t fetch_it = count_; + exec_.Rethrow(); + for (std::size_t i = 0; i < n_prefetch_batches; ++i, ++fetch_it) { fetch_it %= n_batches_; // ring if (ring_->at(fetch_it).valid()) { continue; } - auto const *self = this; // make sure it's const + auto const* self = this; // make sure it's const CHECK_LT(fetch_it, cache_info_->offset.size()); - ring_->at(fetch_it) = std::async(std::launch::async, [fetch_it, self]() { + ring_->at(fetch_it) = std::async(std::launch::async, [fetch_it, self, this]() { auto page = std::make_shared(); - - common::Timer timer; - timer.Start(); - std::unique_ptr> fmt{CreatePageFormat("raw")}; - auto n = self->cache_info_->ShardName(); - - std::uint64_t offset = self->cache_info_->offset.at(fetch_it); - std::uint64_t length = self->cache_info_->offset.at(fetch_it + 1) - offset; - - auto fi = std::make_unique(n, true, offset, length); - CHECK(fmt->Read(page.get(), fi.get())); - LOG(INFO) << "Read a page in " << timer.ElapsedSeconds() << " seconds."; + this->exec_.Run([&] { + common::Timer timer; + timer.Start(); + std::unique_ptr> fmt{CreatePageFormat("raw")}; + auto n = self->cache_info_->ShardName(); + + std::uint64_t offset = self->cache_info_->offset.at(fetch_it); + std::uint64_t length = self->cache_info_->offset.at(fetch_it + 1) - offset; + + auto fi = std::make_unique(n, true, offset, length); + CHECK(fmt->Read(page.get(), fi.get())); + LOG(INFO) << "Read a page in " << timer.ElapsedSeconds() << " seconds."; + }); return page; }); } + CHECK_EQ(std::count_if(ring_->cbegin(), ring_->cend(), [](auto const& f) { return f.valid(); }), n_prefetch_batches) << "Sparse DMatrix assumes forward iteration."; page_ = (*ring_)[count_].get(); CHECK(!(*ring_)[count_].valid()); + exec_.Rethrow(); + return true; } diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu index 47e9904cbcc9..b9ca2753bd25 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cu +++ b/src/tree/gpu_hist/gradient_based_sampler.cu @@ -154,20 +154,22 @@ GradientBasedSample NoSampling::Sample(Context const* ctx, common::SpanInfo().num_row_, page, gpair}; } -ExternalMemoryNoSampling::ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page, - size_t n_rows, BatchParam batch_param) - : batch_param_{std::move(batch_param)}, - page_(new EllpackPageImpl(ctx->gpu_id, page->Cuts(), page->is_dense, page->row_stride, - n_rows)) {} +ExternalMemoryNoSampling::ExternalMemoryNoSampling(BatchParam batch_param) + : batch_param_{std::move(batch_param)} {} GradientBasedSample ExternalMemoryNoSampling::Sample(Context const* ctx, common::Span gpair, DMatrix* dmat) { if (!page_concatenated_) { // Concatenate all the external memory ELLPACK pages into a single in-memory page. + page_.reset(nullptr); size_t offset = 0; for (auto& batch : dmat->GetBatches(ctx, batch_param_)) { auto page = batch.Impl(); + if (!page_) { + page_ = std::make_unique(ctx->gpu_id, page->Cuts(), page->is_dense, + page->row_stride, dmat->Info().num_row_); + } size_t num_elements = page_->Copy(ctx->gpu_id, page, offset); offset += num_elements; } @@ -319,13 +321,12 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c return {sample_rows, page_.get(), dh::ToSpan(gpair_)}; } -GradientBasedSampler::GradientBasedSampler(Context const* ctx, EllpackPageImpl const* page, +GradientBasedSampler::GradientBasedSampler(Context const* ctx, bool is_external_memory, size_t n_rows, const BatchParam& batch_param, float subsample, int sampling_method) { monitor_.Init("gradient_based_sampler"); bool is_sampling = subsample < 1.0; - bool is_external_memory = page->n_rows != n_rows; if (is_sampling) { switch (sampling_method) { @@ -338,17 +339,17 @@ GradientBasedSampler::GradientBasedSampler(Context const* ctx, EllpackPageImpl c break; case TrainParam::kGradientBased: if (is_external_memory) { - strategy_.reset( - new ExternalMemoryGradientBasedSampling(n_rows, batch_param, subsample)); + strategy_.reset(new ExternalMemoryGradientBasedSampling(n_rows, batch_param, subsample)); } else { strategy_.reset(new GradientBasedSampling(n_rows, batch_param, subsample)); } break; - default:LOG(FATAL) << "unknown sampling method"; + default: + LOG(FATAL) << "unknown sampling method"; } } else { if (is_external_memory) { - strategy_.reset(new ExternalMemoryNoSampling(ctx, page, n_rows, batch_param)); + strategy_.reset(new ExternalMemoryNoSampling(batch_param)); } else { strategy_.reset(new NoSampling(batch_param)); } diff --git a/src/tree/gpu_hist/gradient_based_sampler.cuh b/src/tree/gpu_hist/gradient_based_sampler.cuh index 6accc06474b7..8674b5743536 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cuh +++ b/src/tree/gpu_hist/gradient_based_sampler.cuh @@ -43,14 +43,13 @@ class NoSampling : public SamplingStrategy { /*! \brief No sampling in external memory mode. */ class ExternalMemoryNoSampling : public SamplingStrategy { public: - ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page, size_t n_rows, - BatchParam batch_param); + explicit ExternalMemoryNoSampling(BatchParam batch_param); GradientBasedSample Sample(Context const* ctx, common::Span gpair, DMatrix* dmat) override; private: BatchParam batch_param_; - std::unique_ptr page_; + std::unique_ptr page_{nullptr}; bool page_concatenated_{false}; }; @@ -123,7 +122,7 @@ class ExternalMemoryGradientBasedSampling : public SamplingStrategy { */ class GradientBasedSampler { public: - GradientBasedSampler(Context const* ctx, EllpackPageImpl const* page, size_t n_rows, + GradientBasedSampler(Context const* ctx, bool is_external_memory, size_t n_rows, const BatchParam& batch_param, float subsample, int sampling_method); /*! \brief Sample from a DMatrix based on the given gradient pairs. */ diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index c6b0612842d4..f62e78fa914a 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -205,32 +205,25 @@ struct GPUHistMakerDevice { std::unique_ptr feature_groups; - - GPUHistMakerDevice(Context const* ctx, EllpackPageImpl const* _page, - common::Span _feature_types, bst_uint _n_rows, + GPUHistMakerDevice(Context const* ctx, bool is_external_memory, + common::Span _feature_types, bst_row_t _n_rows, TrainParam _param, uint32_t column_sampler_seed, uint32_t n_features, BatchParam _batch_param) : evaluator_{_param, n_features, ctx->gpu_id}, ctx_(ctx), - page(_page), feature_types{_feature_types}, param(std::move(_param)), column_sampler(column_sampler_seed), interaction_constraints(param, n_features), batch_param(std::move(_batch_param)) { - sampler.reset(new GradientBasedSampler(ctx, page, _n_rows, batch_param, param.subsample, - param.sampling_method)); + sampler.reset(new GradientBasedSampler(ctx, is_external_memory, _n_rows, batch_param, + param.subsample, param.sampling_method)); if (!param.monotone_constraints.empty()) { // Copy assigning an empty vector causes an exception in MSVC debug builds monotone_constraints = param.monotone_constraints; } - // Init histogram - hist.Init(ctx_->gpu_id, page->Cuts().TotalBins()); monitor.Init(std::string("GPUHistMakerDevice") + std::to_string(ctx_->gpu_id)); - feature_groups.reset(new FeatureGroups(page->Cuts(), page->is_dense, - dh::MaxSharedMemoryOptin(ctx_->gpu_id), - sizeof(GradientSumT))); } ~GPUHistMakerDevice() { // NOLINT @@ -247,9 +240,6 @@ struct GPUHistMakerDevice { param.colsample_bytree); dh::safe_cuda(cudaSetDevice(ctx_->gpu_id)); - this->evaluator_.Reset(page->Cuts(), feature_types, dmat->Info().num_col_, param, - ctx_->gpu_id); - this->interaction_constraints.Reset(); if (d_gpair.size() != dh_gpair->Size()) { @@ -262,11 +252,22 @@ struct GPUHistMakerDevice { page = sample.page; gpair = sample.gpair; + this->evaluator_.Reset(page->Cuts(), feature_types, dmat->Info().num_col_, param, ctx_->gpu_id); + quantiser.reset(new GradientQuantiser(this->gpair)); row_partitioner.reset(); // Release the device memory first before reallocating row_partitioner.reset(new RowPartitioner(ctx_->gpu_id, sample.sample_rows)); + + // Init histogram + hist.Init(ctx_->gpu_id, page->Cuts().TotalBins()); hist.Reset(); + + if (!feature_groups) { + feature_groups.reset(new FeatureGroups(page->Cuts(), page->is_dense, + dh::MaxSharedMemoryOptin(ctx_->gpu_id), + sizeof(GradientSumT))); + } } GPUExpandEntry EvaluateRootSplit(GradientPairInt64 root_sum) { @@ -809,12 +810,11 @@ class GPUHistMaker : public TreeUpdater { collective::Broadcast(&column_sampling_seed, sizeof(column_sampling_seed), 0); auto batch_param = BatchParam{param->max_bin, TrainParam::DftSparseThreshold()}; - auto page = (*dmat->GetBatches(ctx_, batch_param).begin()).Impl(); dh::safe_cuda(cudaSetDevice(ctx_->gpu_id)); info_->feature_types.SetDevice(ctx_->gpu_id); maker.reset(new GPUHistMakerDevice( - ctx_, page, info_->feature_types.ConstDeviceSpan(), info_->num_row_, *param, - column_sampling_seed, info_->num_col_, batch_param)); + ctx_, !dmat->SingleColBlock(), info_->feature_types.ConstDeviceSpan(), info_->num_row_, + *param, column_sampling_seed, info_->num_col_, batch_param)); p_last_fmat_ = dmat; initialised_ = true; From 4521f0468c1af4d5b6d010e202a2db6d6cc0845b Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 03:35:22 +0800 Subject: [PATCH 21/51] use ctx. --- src/tree/gpu_hist/gradient_based_sampler.cu | 44 +++++++++----------- src/tree/gpu_hist/gradient_based_sampler.cuh | 6 +-- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu index b9ca2753bd25..7e051d80c9da 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cu +++ b/src/tree/gpu_hist/gradient_based_sampler.cu @@ -264,10 +264,9 @@ GradientBasedSample GradientBasedSampling::Sample(Context const* ctx, return {n_rows, page, gpair}; } -ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling( - size_t n_rows, - BatchParam batch_param, - float subsample) +ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling(size_t n_rows, + BatchParam batch_param, + float subsample) : batch_param_(std::move(batch_param)), subsample_(subsample), threshold_(n_rows + 1, 0.0f), @@ -277,16 +276,15 @@ ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling( GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* ctx, common::Span gpair, DMatrix* dmat) { - size_t n_rows = dmat->Info().num_row_; + auto cuctx = ctx->CUDACtx(); + bst_row_t n_rows = dmat->Info().num_row_; size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex( gpair, dh::ToSpan(threshold_), dh::ToSpan(grad_sum_), n_rows * subsample_); // Perform Poisson sampling in place. - thrust::transform(dh::tbegin(gpair), dh::tend(gpair), - thrust::counting_iterator(0), - dh::tbegin(gpair), - PoissonSampling(dh::ToSpan(threshold_), - threshold_index, + thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), + thrust::counting_iterator(0), dh::tbegin(gpair), + PoissonSampling(dh::ToSpan(threshold_), threshold_index, RandomWeight(common::GlobalRandom()()))); // Count the sampled rows. @@ -294,16 +292,15 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c // Compact gradient pairs. gpair_.resize(sample_rows); - thrust::copy_if(dh::tbegin(gpair), dh::tend(gpair), gpair_.begin(), IsNonZero()); + thrust::copy_if(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), gpair_.begin(), IsNonZero()); // Index the sample rows. - thrust::transform(dh::tbegin(gpair), dh::tend(gpair), sample_row_index_.begin(), IsNonZero()); - thrust::exclusive_scan(sample_row_index_.begin(), sample_row_index_.end(), - sample_row_index_.begin()); - thrust::transform(dh::tbegin(gpair), dh::tend(gpair), - sample_row_index_.begin(), - sample_row_index_.begin(), - ClearEmptyRows()); + thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), sample_row_index_.begin(), + IsNonZero()); + thrust::exclusive_scan(cuctx->CTP(), sample_row_index_.begin(), sample_row_index_.end(), + sample_row_index_.begin()); + thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), sample_row_index_.begin(), + sample_row_index_.begin(), ClearEmptyRows()); auto batch_iterator = dmat->GetBatches(ctx, batch_param_); auto first_page = (*batch_iterator.begin()).Impl(); @@ -365,11 +362,11 @@ GradientBasedSample GradientBasedSampler::Sample(Context const* ctx, return sample; } -size_t GradientBasedSampler::CalculateThresholdIndex( - common::Span gpair, common::Span threshold, - common::Span grad_sum, size_t sample_rows) { - thrust::fill(dh::tend(threshold) - 1, dh::tend(threshold), - std::numeric_limits::max()); +size_t GradientBasedSampler::CalculateThresholdIndex(common::Span gpair, + common::Span threshold, + common::Span grad_sum, + size_t sample_rows) { + thrust::fill(dh::tend(threshold) - 1, dh::tend(threshold), std::numeric_limits::max()); thrust::transform(dh::tbegin(gpair), dh::tend(gpair), dh::tbegin(threshold), CombineGradientPair()); thrust::sort(dh::tbegin(threshold), dh::tend(threshold) - 1); @@ -382,6 +379,5 @@ size_t GradientBasedSampler::CalculateThresholdIndex( thrust::min_element(dh::tbegin(grad_sum), dh::tend(grad_sum)); return thrust::distance(dh::tbegin(grad_sum), min) + 1; } - }; // namespace tree }; // namespace xgboost diff --git a/src/tree/gpu_hist/gradient_based_sampler.cuh b/src/tree/gpu_hist/gradient_based_sampler.cuh index 8674b5743536..8013f2ec4a0a 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cuh +++ b/src/tree/gpu_hist/gradient_based_sampler.cuh @@ -104,11 +104,11 @@ class ExternalMemoryGradientBasedSampling : public SamplingStrategy { private: BatchParam batch_param_; float subsample_; - dh::caching_device_vector threshold_; - dh::caching_device_vector grad_sum_; + dh::device_vector threshold_; + dh::device_vector grad_sum_; std::unique_ptr page_; dh::device_vector gpair_; - dh::caching_device_vector sample_row_index_; + dh::device_vector sample_row_index_; }; /*! \brief Draw a sample of rows from a DMatrix. From 39ed2181ea0331b6eca825318ded01f67caad18c Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 03:40:09 +0800 Subject: [PATCH 22/51] comment. --- src/tree/gpu_hist/gradient_based_sampler.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu index 7e051d80c9da..11ed03d0d5a1 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cu +++ b/src/tree/gpu_hist/gradient_based_sampler.cu @@ -318,9 +318,10 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c return {sample_rows, page_.get(), dh::ToSpan(gpair_)}; } -GradientBasedSampler::GradientBasedSampler(Context const* ctx, bool is_external_memory, +GradientBasedSampler::GradientBasedSampler(Context const* /*ctx*/, bool is_external_memory, size_t n_rows, const BatchParam& batch_param, float subsample, int sampling_method) { + // The ctx is kept here for future development of stream-based operations. monitor_.Init("gradient_based_sampler"); bool is_sampling = subsample < 1.0; From a73612543bf1167ca928ea35e1b33ed7fa14c2d4 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 04:03:12 +0800 Subject: [PATCH 23/51] lint. --- src/common/io.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/io.cc b/src/common/io.cc index ef34ffacac06..a3b386c9f59b 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -15,6 +15,7 @@ #include #include #include +#include // for vector #include "io.h" #include "xgboost/logging.h" From a61a079c22dd14d1802239a445f9126dfaaae6e9 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 05:20:16 +0800 Subject: [PATCH 24/51] doc. --- doc/tutorials/external_memory.rst | 141 +++++++++++++++++++----------- 1 file changed, 89 insertions(+), 52 deletions(-) diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst index 31d2cf8657ac..8888c45fa191 100644 --- a/doc/tutorials/external_memory.rst +++ b/doc/tutorials/external_memory.rst @@ -2,24 +2,27 @@ Using XGBoost External Memory Version ##################################### -XGBoost supports loading data from external memory using builtin data parser. And -starting from version 1.5, users can also define a custom iterator to load data in chunks. -The feature is still experimental and not yet ready for production use. In this tutorial -we will introduce both methods. Please note that training on data from external memory is -not supported by ``exact`` tree method. +When working with large datasets, training XGBoost models can be challenging as the entire +dataset needs to be loaded into memory. This can be costly and sometimes +infeasible. Staring from 1.5, users can define a custom iterator to load data in chunks +for running XGBoost algorithms. External memory can be used for both training and +prediction, but training is the primary use case and it will be our focus in this +tutorial. For prediction and evaluation, users can iterate through the data themseleves +while training requires the full dataset to be loaded to the memory. + +During training, there are two different approaches for external memory support available +in XGBoost, one for CPU-based algorithms like ``hist`` and ``approx``, another one for the +GPU-based training algorithm. We will introduce them in the following sections. -.. warning:: +.. note:: - The implementation of external memory uses ``mmap`` and is not tested against system - errors like disconnected network devices (`SIGBUS`). In addition, Windows is not yet - supported. + Training on data from external memory is not supported by the ``exact`` tree method. .. note:: - When externel memory is used, the CPU training performance is IO bounded. Meaning, the - training speed is almost exclusively determined by the disk IO speed. For GPU, please - read on and see the gradient-based sampling with external memory. During benchmark, we - used a NVME connected to a PCIE slot, the performance is "usable" with ``hist`` on CPU. + The implementation of external memory uses ``mmap`` and is not tested against system + errors like disconnected network devices (`SIGBUS`). In addition, Windows is not yet + supported. ************* Data Iterator @@ -28,8 +31,8 @@ Data Iterator Starting from XGBoost 1.5, users can define their own data loader using Python or C interface. There are some examples in the ``demo`` directory for quick start. This is a generalized version of text input external memory, where users no longer need to prepare a -text file that XGBoost recognizes. To enable the feature, user need to define a data -iterator with 2 class methods ``next`` and ``reset`` then pass it into ``DMatrix`` +text file that XGBoost recognizes. To enable the feature, users need to define a data +iterator with 2 class methods: ``next`` and ``reset``, then pass it into the ``DMatrix`` constructor. .. code-block:: python @@ -73,18 +76,84 @@ constructor. # Other tree methods including ``hist`` and ``gpu_hist`` also work, but has some caveats # as noted in following sections. - booster = xgboost.train({"tree_method": "approx"}, Xy) + booster = xgboost.train({"tree_method": "hist"}, Xy) + + +The above snippet is a simplified version of ``demo/guide-python/external_memory.py``. +For an example in C, please see ``demo/c-api/external-memory/``. The iterator is the +common interface for using external memory with XGBoost, you can pass the resulting +``DMatrix`` object for training, prediction, and evaluation. + +It is important to set the batch size based on the memory available. A good starting point +is to set the batch size to 10GB per batch if you have 64GB of memory. It is *not* +recommended to set small batch sizes like 32 samples per batch, as this can seriously hurt +performance in gradient boosting. + +*********** +CPU Version +*********** + +In the previous section, we demonstrated how to train a tree-based model using the +``hist`` tree method on a CPU. This method involves iterating through data batches stored +in a cache during tree construction. For optimal performance, we recommend using the +``grow_policy=depthwise`` setting, which allows XGBoost to build an entire layer of tree +nodes with only a few batch iterations. Conversely, using the ``lossguide`` policy +requires XGBoost to iterate over the data set for each tree node, resulting in slower +performance. + +If external memory is used, the performance of CPU training is limited by IO +(input/output) speed. This means that the disk IO speed primarily determines the training +speed. During benchmarking, we used an NVME connected to a PCIe-4 slot, other types of +storage can be too slow for practical usage. In addition, your system may perform caching +to reduce the overhead of file reading. + +********************************** +GPU Version (GPU Hist tree method) +********************************** + +External memory is supported by GPU algorithms (i.e. when ``tree_method`` is set to +``gpu_hist``). However, the algorithm used for GPU is different from the one used for +CPU. When training on a CPU, the tree method iterates through all batches from external +memory for each step of the tree construction algorithm. On the other hand, the GPU +algorithm concatenates all batches into one and stores it in GPU memory. To reduce overall +memory usage, users can utilize subsampling. The good news is that the GPU hist tree +method supports gradient-based sampling, enabling users to set a low sampling rate without +compromising accuracy. + +.. code-block:: python + + param = { + ... + 'subsample': 0.2, + 'sampling_method': 'gradient_based', + } + +For more information about the sampling algorithm and its use in external memory training, +see `this paper `_. + +.. warning:: + + When GPU is running out of memory during iteration on external memory, user might + recieve a segfault instead of an OOM exception. +******* +Remarks +******* -The above snippet is a simplified version of ``demo/guide-python/external_memory.py``. For -an example in C, please see ``demo/c-api/external-memory/``. +When using external memory with XBGoost, data is divided into smaller chunks so that only +a fraction of it needs to be stored in memory at any given time. It's important to note +that this method only applies to the predictor data (``X``), while other data, like labels +and internal runtime structures are concatenated. This means that memory reduction is most +effective when dealing with wide datasets where ``X`` is larger compared to other data +like ``y``, while it has little impact on slim datasets. **************** Text File Inputs **************** -There is no big difference between using external memory version and in-memory version. -The only difference is the filename format. +This is the original form of external memory support, users are encouraged to use custom +data iterator instead. There is no big difference between using external memory version of +text input and the in-memory version. The only difference is the filename format. The external memory version takes in the following `URI `_ format: @@ -117,35 +186,3 @@ XGBoost will first load ``agaricus.txt.train`` in, preprocess it, then write to more notes about text input formats, see :doc:`/tutorials/input_format`. For CLI version, simply add the cache suffix, e.g. ``"../data/agaricus.txt.train?format=libsvm#dtrain.cache"``. - - -********************************** -GPU Version (GPU Hist tree method) -********************************** -External memory is supported in GPU algorithms (i.e. when ``tree_method`` is set to ``gpu_hist``). - -If you are still getting out-of-memory errors after enabling external memory, try subsampling the -data to further reduce GPU memory usage: - -.. code-block:: python - - param = { - ... - 'subsample': 0.1, - 'sampling_method': 'gradient_based', - } - -For more information, see `this paper `_. Internally -the tree method still concatenate all the chunks into 1 final histogram index due to -performance reason, but in compressed format. So its scalability has an upper bound but -still has lower memory cost in general. - -*********** -CPU Version -*********** - -For CPU histogram based tree methods (``approx``, ``hist``) it's recommended to use -``grow_policy=depthwise`` for performance reason. Iterating over data batches is slow, -with ``depthwise`` policy XGBoost can build a entire layer of tree nodes with a few -iterations, while with ``lossguide`` XGBoost needs to iterate over the data set for each -tree node. From 4989269b8181c99f159270b81d8605d0b7ea5f92 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 16:36:47 +0800 Subject: [PATCH 25/51] windows mmap --- src/common/io.cc | 65 +++++++++++++++++++++++++++++++++++++++++------- src/common/io.h | 14 +++++------ 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index a560c94a1d89..9762bbf01d26 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -5,8 +5,11 @@ #include // for open, O_RDONLY #include // for mmap, mmap64, munmap #include -#include // for close +#include // for close, getpagesize +#elif defined(_MSC_VER) +#include #endif // defined(__unix__) + #include #include // for errno #include @@ -157,34 +160,78 @@ std::string FileExtension(std::string fname, bool lower) { } } -void* PrivateMmapStream::Open(StringView path, bool read_only, std::size_t offset, +std::size_t GetPageSize() { +#if defined(_MSC_VER) + SYSTEM_INFO sys_info; + GetSystemInfo(&sys_info); + return sys_info.dwPageSize; +#else + return getpagesize(); +#endif +} + +struct PrivateMmapStream::MMAPFile { +#if defined(_MSC_VER) + HANDLE fd; +#else + std::int32_t fd; +#endif + std::string path; +}; + +PrivateMmapStream::PrivateMmapStream(std::string path, bool read_only, std::size_t offset, + std::size_t length) + : MemoryFixSizeBuffer{Open(std::move(path), read_only, offset, length), length} {} + +void* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offset, std::size_t length) { - fd_ = open(path.c_str(), O_RDONLY); - CHECK_GE(fd_, 0) << "Failed to open:" << path << ". " << strerror(errno); +#if defined(_MSC_VER) + HANDLE fd = CreateFile(path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, nullptr); + CHECK_NE(fd, INVALID_HANDLE_VALUE) << "Failed to open:" << path; +#else + auto fd = open(path.c_str(), O_RDONLY); +#endif + CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << strerror(errno); + handle_ = std::make_unique(fd, std::move(path)); - char* ptr{nullptr}; + void* ptr{nullptr}; +#if defined(__linux__) || defined(__GLIBC__) int prot{PROT_READ}; if (!read_only) { prot |= PROT_WRITE; } -#if defined(__linux__) || defined(__GLIBC__) ptr = reinterpret_cast(mmap64(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); + CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << strerror(errno); #elif defined(_MSC_VER) - // fixme: not yet implemented - ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); + auto file_size = GetFileSize(handle_->fd, nullptr); + DWORD access = read_only ? PAGE_READONLY : PAGE_READWRITE; + auto map_file = CreateFileMapping(handle_->fd, nullptr, access, 0, file_size, nullptr); + access = read_only ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS; + ptr = MapViewOfFile(map_file, access, 0, offset, length); + CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << GetLastError(); #else CHECK_LE(offset, std::numeric_limits::max()) << "File size has exceeded the limit on the current system."; + int prot{PROT_READ}; + if (!read_only) { + prot |= PROT_WRITE; + } ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); -#endif // defined(__linux__) CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << strerror(errno); +#endif // defined(__linux__) return ptr; } PrivateMmapStream::~PrivateMmapStream() { +#if defined(_MSC_VER) + CHECK(UnmapViewOfFile(p_buffer_)) "Faled to munmap." << path_ << ". " << GetLastError(); + CloseHandle(); +#else CHECK_NE(munmap(p_buffer_, buffer_size_), -1) << "Faled to munmap." << path_ << ". " << strerror(errno); CHECK_NE(close(fd_), -1) << "Faled to close: " << path_ << ". " << strerror(errno); +#endif } } // namespace common } // namespace xgboost diff --git a/src/common/io.h b/src/common/io.h index b3bf0cfe3973..af651c08d626 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -10,11 +10,11 @@ #include #include -#include #include #include #include // for string +#include // for move #include "common.h" @@ -129,14 +129,16 @@ inline std::string ReadAll(std::string const &path) { return content; } +std::size_t GetPageSize(); /** * \brief Private mmap file, copy-on-write */ class PrivateMmapStream : public MemoryFixSizeBuffer { - std::int32_t fd_; - std::string path_; + struct MMAPFile; - void* Open(StringView path, bool read_only, std::size_t offset, std::size_t length); + std::unique_ptr handle_; + + void* Open(std::string path, bool read_only, std::size_t offset, std::size_t length); public: /** @@ -148,9 +150,7 @@ class PrivateMmapStream : public MemoryFixSizeBuffer { * @param length See the `length` parameter of `mmap` for details. */ explicit PrivateMmapStream(std::string path, bool read_only, std::size_t offset, - std::size_t length) - : MemoryFixSizeBuffer{Open(StringView{path}, read_only, offset, length), length}, - path_{path} {} + std::size_t length); ~PrivateMmapStream() override; }; From 341c8fbbc2bd732dac88a2e43cf86883fb075c51 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 17:26:16 +0800 Subject: [PATCH 26/51] compile --- src/common/io.cc | 24 ++++++++++++++++++++---- src/data/sparse_page_source.h | 2 +- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index 5f8a13f2c4f6..f6bf15fc4a3f 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -1,6 +1,10 @@ /** * Copyright 2019-2023, by XGBoost Contributors */ +#if !defined(NOMINMAX) && defined(_WIN32) +#define NOMINMAX +#endif // !defined(NOMINMAX) + #if defined(__unix__) || defined(__APPLE__) #include // for open, O_RDONLY #include // for mmap, mmap64, munmap @@ -162,8 +166,18 @@ std::string FileExtension(std::string fname, bool lower) { } } +std::size_t GetPageSize() { +#if defined(_MSC_VER) + SYSTEM_INFO sys_info; + GetSystemInfo(&sys_info); + return sys_info.dwPageSize; +#else + return getpagesize(); +#endif +} + std::size_t PadPageForMmap(std::size_t file_bytes, dmlc::Stream* fo) { - decltype(file_bytes) page_size = getpagesize(); + decltype(file_bytes) page_size = GetPageSize(); CHECK(page_size != 0 && page_size % 2 == 0) << "Failed to get page size on the current system."; CHECK_NE(file_bytes, 0) << "Empty page encountered."; auto n_pages = file_bytes / page_size + !!(file_bytes % page_size != 0); @@ -195,9 +209,9 @@ void* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs CHECK_NE(fd, INVALID_HANDLE_VALUE) << "Failed to open:" << path; #else auto fd = open(path.c_str(), O_RDONLY); -#endif CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << strerror(errno); - handle_ = std::make_unique(fd, std::move(path)); +#endif + handle_.reset(new MMAPFile{fd, std::move(path)}); void* ptr{nullptr}; #if defined(__linux__) || defined(__GLIBC__) @@ -212,7 +226,9 @@ void* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs DWORD access = read_only ? PAGE_READONLY : PAGE_READWRITE; auto map_file = CreateFileMapping(handle_->fd, nullptr, access, 0, file_size, nullptr); access = read_only ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS; - ptr = MapViewOfFile(map_file, access, 0, offset, length); + std::uint32_t loff = static_cast(offset); + std::uint32_t hoff = offset >> 32; + ptr = MapViewOfFile(map_file, access, hoff, loff, length); CHECK_NE(ptr, nullptr) << "Failed to map: " << handle_->path << ". " << GetLastError(); #else CHECK_LE(offset, std::numeric_limits::max()) diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index a073e51b5008..3bf976d2af5c 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -261,7 +261,7 @@ class SparsePageSource : public SparsePageSourceImpl { iter_{iter}, proxy_{proxy} { if (!cache_info_->written) { iter_.Reset(); - CHECK_EQ(iter_.Next(), 1) << "Must have at least 1 batch."; + CHECK(iter_.Next()) << "Must have at least 1 batch."; } this->Fetch(); } From 2660c66c3105f504a8daf9ab37d752c9ccfb1529 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 19:06:59 +0800 Subject: [PATCH 27/51] Pad the file for windows. --- rabit/include/rabit/internal/io.h | 14 +++++++++--- src/common/io.cc | 37 ++++++++++++++++++++++--------- src/common/io.h | 4 ++-- 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/rabit/include/rabit/internal/io.h b/rabit/include/rabit/internal/io.h index 64633b2155aa..6a7d1227f4ae 100644 --- a/rabit/include/rabit/internal/io.h +++ b/rabit/include/rabit/internal/io.h @@ -6,6 +6,11 @@ */ #ifndef RABIT_INTERNAL_IO_H_ #define RABIT_INTERNAL_IO_H_ + +#if !defined(NOMINMAX) && defined(_WIN32) +#define NOMINMAX +#endif // !defined(NOMINMAX) + #include #include #include @@ -26,6 +31,9 @@ struct MemoryFixSizeBuffer : public SeekStream { // similar to SEEK_END in libc static size_t constexpr kSeekEnd = std::numeric_limits::max(); +protected: + MemoryFixSizeBuffer() = default; + public: MemoryFixSizeBuffer(void *p_buffer, size_t buffer_size) : p_buffer_(reinterpret_cast(p_buffer)), @@ -62,11 +70,11 @@ struct MemoryFixSizeBuffer : public SeekStream { protected: /*! \brief in memory buffer */ - char *p_buffer_; + char* p_buffer_{nullptr}; /*! \brief current pointer */ - size_t buffer_size_; + std::size_t buffer_size_{ 0 }; /*! \brief current pointer */ - size_t curr_ptr_; + std::size_t curr_ptr_{ 0 }; }; // class MemoryFixSizeBuffer /*! \brief a in memory buffer that can be read and write as stream interface */ diff --git a/src/common/io.cc b/src/common/io.cc index f6bf15fc4a3f..bf7c47ea2481 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -11,6 +11,7 @@ #include #include // for close, getpagesize #elif defined(_MSC_VER) +#define WIN32_LEAN_AND_MEAN #include #endif // defined(__unix__) @@ -26,6 +27,7 @@ #include "io.h" #include "xgboost/logging.h" +#include "xgboost/collective/socket.h" namespace xgboost { namespace common { @@ -170,7 +172,8 @@ std::size_t GetPageSize() { #if defined(_MSC_VER) SYSTEM_INFO sys_info; GetSystemInfo(&sys_info); - return sys_info.dwPageSize; + // During testing, `sys_info.dwPageSize` is of size 4096 while `dwAllocationGranularity` is of size 65536. + return sys_info.dwAllocationGranularity; #else return getpagesize(); #endif @@ -190,18 +193,21 @@ std::size_t PadPageForMmap(std::size_t file_bytes, dmlc::Stream* fo) { struct PrivateMmapStream::MMAPFile { #if defined(_MSC_VER) - HANDLE fd; + HANDLE fd{ INVALID_HANDLE_VALUE }; #else - std::int32_t fd; + std::int32_t fd {0}; #endif std::string path; }; PrivateMmapStream::PrivateMmapStream(std::string path, bool read_only, std::size_t offset, std::size_t length) - : MemoryFixSizeBuffer{Open(std::move(path), read_only, offset, length), length} {} + : MemoryFixSizeBuffer{} { + this->p_buffer_ = Open(std::move(path), read_only, offset, length); + this->buffer_size_ = length; +} -void* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offset, +char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offset, std::size_t length) { #if defined(_MSC_VER) HANDLE fd = CreateFile(path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, @@ -211,7 +217,8 @@ void* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs auto fd = open(path.c_str(), O_RDONLY); CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << strerror(errno); #endif - handle_.reset(new MMAPFile{fd, std::move(path)}); + handle_ = nullptr; + handle_.reset(new MMAPFile{fd, path}); void* ptr{nullptr}; #if defined(__linux__) || defined(__GLIBC__) @@ -228,8 +235,12 @@ void* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs access = read_only ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS; std::uint32_t loff = static_cast(offset); std::uint32_t hoff = offset >> 32; + CHECK(map_file) << "Failed to map: " << handle_->path << ". " << GetLastError();; ptr = MapViewOfFile(map_file, access, hoff, loff, length); - CHECK_NE(ptr, nullptr) << "Failed to map: " << handle_->path << ". " << GetLastError(); + if (ptr == nullptr) { + system::ThrowAtError("MapViewOfFile"); + } + CHECK_NE(ptr, nullptr) << "Failed to map: " << handle_->path << ". " << GetLastError() ; #else CHECK_LE(offset, std::numeric_limits::max()) << "File size has exceeded the limit on the current system."; @@ -240,18 +251,22 @@ void* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << handle_->path << ". " << strerror(errno); #endif // defined(__linux__) - return ptr; + return reinterpret_cast(ptr); } PrivateMmapStream::~PrivateMmapStream() { CHECK(handle_); #if defined(_MSC_VER) - CHECK(UnmapViewOfFile(p_buffer_)) "Faled to munmap." << handle_->path << ". " << GetLastError(); - CloseHandle(handle_->fd); + if (p_buffer_) { + CHECK(UnmapViewOfFile(p_buffer_)) "Faled to munmap." << GetLastError(); + } + if (handle_->fd != INVALID_HANDLE_VALUE) { + CHECK(CloseHandle(handle_->fd)); + } #else CHECK_NE(munmap(p_buffer_, buffer_size_), -1) << "Faled to munmap." << handle_->path << ". " << strerror(errno); - CHECK_NE(close(fd_), -1) << "Faled to close: " << handle_->path << ". " << strerror(errno); + CHECK_NE(close(handle_->fd), -1) << "Faled to close: " << handle_->path << ". " << strerror(errno); #endif } } // namespace common diff --git a/src/common/io.h b/src/common/io.h index cd82782b025b..bdebc5ac3298 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -146,9 +146,9 @@ std::size_t PadPageForMmap(std::size_t file_bytes, dmlc::Stream* fo); class PrivateMmapStream : public MemoryFixSizeBuffer { struct MMAPFile; - std::unique_ptr handle_; + std::unique_ptr handle_{nullptr}; - void* Open(std::string path, bool read_only, std::size_t offset, std::size_t length); + char* Open(std::string path, bool read_only, std::size_t offset, std::size_t length); public: /** From 58c0d99d341f2028ca121f5329a890fc59355eaa Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 20:29:27 +0800 Subject: [PATCH 28/51] Avoid padding the data. --- src/common/io.cc | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index bf7c47ea2481..ba8fd130cf6a 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -187,8 +187,8 @@ std::size_t PadPageForMmap(std::size_t file_bytes, dmlc::Stream* fo) { auto padded = n_pages * page_size; auto padding = padded - file_bytes; std::vector padding_bytes(padding, 0); - fo->Write(padding_bytes.data(), padding_bytes.size()); - return padded; + // fo->Write(padding_bytes.data(), padding_bytes.size()); + return file_bytes; } struct PrivateMmapStream::MMAPFile { @@ -197,6 +197,8 @@ struct PrivateMmapStream::MMAPFile { #else std::int32_t fd {0}; #endif + char* base_ptr{ nullptr }; + std::size_t base_size{0}; std::string path; }; @@ -217,30 +219,31 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs auto fd = open(path.c_str(), O_RDONLY); CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << strerror(errno); #endif - handle_ = nullptr; - handle_.reset(new MMAPFile{fd, path}); - void* ptr{nullptr}; + char* ptr{nullptr}; + auto view_start = offset / GetPageSize() * GetPageSize(); + auto view_size = length + (offset - view_start); + std::cout << view_start << " size: " << view_size << std::endl; #if defined(__linux__) || defined(__GLIBC__) int prot{PROT_READ}; if (!read_only) { prot |= PROT_WRITE; } - ptr = reinterpret_cast(mmap64(nullptr, length, prot, MAP_PRIVATE, handle_->fd, offset)); - CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << handle_->path << ". " << strerror(errno); + ptr = reinterpret_cast(mmap64(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); + CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << strerror(errno); #elif defined(_MSC_VER) - auto file_size = GetFileSize(handle_->fd, nullptr); + auto file_size = GetFileSize(fd, nullptr); DWORD access = read_only ? PAGE_READONLY : PAGE_READWRITE; - auto map_file = CreateFileMapping(handle_->fd, nullptr, access, 0, file_size, nullptr); + auto map_file = CreateFileMapping(fd, nullptr, access, 0, file_size, nullptr); access = read_only ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS; - std::uint32_t loff = static_cast(offset); - std::uint32_t hoff = offset >> 32; - CHECK(map_file) << "Failed to map: " << handle_->path << ". " << GetLastError();; - ptr = MapViewOfFile(map_file, access, hoff, loff, length); + std::uint32_t loff = static_cast(view_start); + std::uint32_t hoff = view_start >> 32; + CHECK(map_file) << "Failed to map: " << path << ". " << GetLastError(); + ptr = reinterpret_cast(MapViewOfFile(map_file, access, hoff, loff, view_size)); if (ptr == nullptr) { system::ThrowAtError("MapViewOfFile"); } - CHECK_NE(ptr, nullptr) << "Failed to map: " << handle_->path << ". " << GetLastError() ; + CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << GetLastError(); #else CHECK_LE(offset, std::numeric_limits::max()) << "File size has exceeded the limit on the current system."; @@ -248,23 +251,26 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs if (!read_only) { prot |= PROT_WRITE; } - ptr = reinterpret_cast(mmap(nullptr, length, prot, MAP_PRIVATE, fd_, offset)); - CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << handle_->path << ". " << strerror(errno); + ptr = reinterpret_cast(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); + CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << strerror(errno); #endif // defined(__linux__) - return reinterpret_cast(ptr); + + handle_.reset(new MMAPFile{ fd, ptr, view_size, std::move(path) }); + ptr += (offset - view_start); + return ptr; } PrivateMmapStream::~PrivateMmapStream() { CHECK(handle_); #if defined(_MSC_VER) if (p_buffer_) { - CHECK(UnmapViewOfFile(p_buffer_)) "Faled to munmap." << GetLastError(); + CHECK(UnmapViewOfFile(handle_->base_ptr)) "Faled to munmap." << GetLastError(); } if (handle_->fd != INVALID_HANDLE_VALUE) { CHECK(CloseHandle(handle_->fd)); } #else - CHECK_NE(munmap(p_buffer_, buffer_size_), -1) + CHECK_NE(munmap(handle_->base_ptr, handle_->base_size), -1) << "Faled to munmap." << handle_->path << ". " << strerror(errno); CHECK_NE(close(handle_->fd), -1) << "Faled to close: " << handle_->path << ". " << strerror(errno); #endif From 5baf5ca8960256903fef946d5144281489a887f6 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 20:34:13 +0800 Subject: [PATCH 29/51] Cleanup. --- src/common/io.cc | 26 ++++++++------------------ src/common/io.h | 9 --------- src/data/sparse_page_source.h | 3 +-- tests/cpp/common/test_io.cc | 3 +-- 4 files changed, 10 insertions(+), 31 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index ba8fd130cf6a..e395f13f0fff 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -172,32 +172,21 @@ std::size_t GetPageSize() { #if defined(_MSC_VER) SYSTEM_INFO sys_info; GetSystemInfo(&sys_info); - // During testing, `sys_info.dwPageSize` is of size 4096 while `dwAllocationGranularity` is of size 65536. + // During testing, `sys_info.dwPageSize` is of size 4096 while `dwAllocationGranularity` is of + // size 65536. return sys_info.dwAllocationGranularity; #else return getpagesize(); #endif } -std::size_t PadPageForMmap(std::size_t file_bytes, dmlc::Stream* fo) { - decltype(file_bytes) page_size = GetPageSize(); - CHECK(page_size != 0 && page_size % 2 == 0) << "Failed to get page size on the current system."; - CHECK_NE(file_bytes, 0) << "Empty page encountered."; - auto n_pages = file_bytes / page_size + !!(file_bytes % page_size != 0); - auto padded = n_pages * page_size; - auto padding = padded - file_bytes; - std::vector padding_bytes(padding, 0); - // fo->Write(padding_bytes.data(), padding_bytes.size()); - return file_bytes; -} - struct PrivateMmapStream::MMAPFile { #if defined(_MSC_VER) - HANDLE fd{ INVALID_HANDLE_VALUE }; + HANDLE fd{INVALID_HANDLE_VALUE}; #else - std::int32_t fd {0}; + std::int32_t fd{0}; #endif - char* base_ptr{ nullptr }; + char* base_ptr{nullptr}; std::size_t base_size{0}; std::string path; }; @@ -255,7 +244,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << strerror(errno); #endif // defined(__linux__) - handle_.reset(new MMAPFile{ fd, ptr, view_size, std::move(path) }); + handle_.reset(new MMAPFile{fd, ptr, view_size, std::move(path)}); ptr += (offset - view_start); return ptr; } @@ -272,7 +261,8 @@ PrivateMmapStream::~PrivateMmapStream() { #else CHECK_NE(munmap(handle_->base_ptr, handle_->base_size), -1) << "Faled to munmap." << handle_->path << ". " << strerror(errno); - CHECK_NE(close(handle_->fd), -1) << "Faled to close: " << handle_->path << ". " << strerror(errno); + CHECK_NE(close(handle_->fd), -1) + << "Faled to close: " << handle_->path << ". " << strerror(errno); #endif } } // namespace common diff --git a/src/common/io.h b/src/common/io.h index bdebc5ac3298..a64d10b490e0 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -130,15 +130,6 @@ inline std::string ReadAll(std::string const &path) { } std::size_t GetPageSize(); -/** - * @brief Pad the output file for a page to make it mmap compatible. - * - * @param file_bytes The size of the output file - * @param fo Stream used to write the file. - * - * @return The file size after being padded. - */ -std::size_t PadPageForMmap(std::size_t file_bytes, dmlc::Stream* fo); /** * @brief Private mmap file, copy-on-write. File must be properly aligned by `PadPageForMmap()`. diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 3bf976d2af5c..26b2123f0be0 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -174,12 +174,11 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } auto bytes = fmt->Write(*page_, fo.get()); - auto padded = common::PadPageForMmap(bytes, fo.get()); timer.Stop(); LOG(INFO) << static_cast(bytes) / 1024.0 / 1024.0 << " MB written in " << timer.ElapsedSeconds() << " seconds."; - cache_info_->Push(padded); + cache_info_->Push(bytes); } virtual void Fetch() = 0; diff --git a/tests/cpp/common/test_io.cc b/tests/cpp/common/test_io.cc index 0a5881991bdd..de9cf0fcf28a 100644 --- a/tests/cpp/common/test_io.cc +++ b/tests/cpp/common/test_io.cc @@ -111,8 +111,7 @@ TEST(IO, PrivateMmapStream) { fo->Write(data.data(), data.size() * sizeof(T)); std::size_t bytes = sizeof(std::uint64_t) + data.size() * sizeof(T); - auto padded = common::PadPageForMmap(bytes, fo.get()); - offset.push_back(padded); + offset.push_back(bytes); batches.emplace_back(std::move(data)); } From 925245c4e9bc1f9d3789cd71c7211fd60b16c655 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 13 Jun 2023 22:11:48 +0800 Subject: [PATCH 30/51] debug. --- src/common/io.cc | 3 ++- src/data/sparse_page_raw_format.cc | 2 ++ src/data/sparse_page_source.h | 7 ++++--- tests/cpp/common/test_io.cc | 2 +- tests/cpp/test_main.cc | 3 ++- tests/cpp/tree/hist/test_histogram.cc | 9 +-------- 6 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index e395f13f0fff..a3a36018c7fa 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -212,7 +212,8 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs char* ptr{nullptr}; auto view_start = offset / GetPageSize() * GetPageSize(); auto view_size = length + (offset - view_start); - std::cout << view_start << " size: " << view_size << std::endl; + std::cout << "offset:" << offset << ", length:" << length << ", start:" << view_start << ", size:" << view_size << std::endl; + // std::cout << view_start << " size: " << view_size << std::endl; #if defined(__linux__) || defined(__GLIBC__) int prot{PROT_READ}; if (!read_only) { diff --git a/src/data/sparse_page_raw_format.cc b/src/data/sparse_page_raw_format.cc index 1e5d1ec71f72..42dd571ac3c6 100644 --- a/src/data/sparse_page_raw_format.cc +++ b/src/data/sparse_page_raw_format.cc @@ -22,6 +22,7 @@ class SparsePageRawFormat : public SparsePageFormat { if (!fi->Read(&offset_vec)) { return false; } + std::cout << "read size:" << offset_vec.size() << " back:" << offset_vec.back() << std::endl; auto& data_vec = page->data.HostVector(); CHECK_NE(page->offset.Size(), 0U) << "Invalid SparsePage file"; data_vec.resize(offset_vec.back()); @@ -37,6 +38,7 @@ class SparsePageRawFormat : public SparsePageFormat { size_t Write(const T& page, dmlc::Stream* fo) override { const auto& offset_vec = page.offset.HostVector(); + std::cout << "write size:" << offset_vec.size() << " back:" << offset_vec.back() << std::endl; const auto& data_vec = page.data.HostVector(); CHECK(page.offset.Size() != 0 && offset_vec[0] == 0); CHECK_EQ(offset_vec.back(), page.data.Size()); diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 26b2123f0be0..5696c9b40872 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -115,7 +115,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } // An heuristic for number of pre-fetched batches. We can make it part of BatchParam // to let user adjust number of pre-fetched batches when needed. - uint32_t constexpr kPreFetch = 4; + uint32_t constexpr kPreFetch = 1; size_t n_prefetch_batches = std::min(kPreFetch, n_batches_); CHECK_GT(n_prefetch_batches, 0) << "total batches:" << n_batches_; @@ -132,7 +132,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { CHECK_LT(fetch_it, cache_info_->offset.size()); ring_->at(fetch_it) = std::async(std::launch::async, [fetch_it, self, this]() { auto page = std::make_shared(); - this->exec_.Run([&] { +// this->exec_.Run([&] { common::Timer timer; timer.Start(); std::unique_ptr> fmt{CreatePageFormat("raw")}; @@ -144,7 +144,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { auto fi = std::make_unique(n, true, offset, length); CHECK(fmt->Read(page.get(), fi.get())); LOG(INFO) << "Read a page in " << timer.ElapsedSeconds() << " seconds."; - }); + // }); return page; }); } @@ -174,6 +174,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } auto bytes = fmt->Write(*page_, fo.get()); + std::cout << "wrote: " << bytes << std::endl; timer.Stop(); LOG(INFO) << static_cast(bytes) / 1024.0 / 1024.0 << " MB written in " diff --git a/tests/cpp/common/test_io.cc b/tests/cpp/common/test_io.cc index de9cf0fcf28a..81316363e6b6 100644 --- a/tests/cpp/common/test_io.cc +++ b/tests/cpp/common/test_io.cc @@ -103,7 +103,7 @@ TEST(IO, PrivateMmapStream) { { std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; for (std::size_t i = 0; i < n_batches; ++i) { - std::size_t size = (i + 1) * 2; + std::size_t size = (i + 1) * 8192; std::vector data(size, 0); std::iota(data.begin(), data.end(), i * i); diff --git a/tests/cpp/test_main.cc b/tests/cpp/test_main.cc index b93329c2e788..317e29f321e7 100644 --- a/tests/cpp/test_main.cc +++ b/tests/cpp/test_main.cc @@ -11,7 +11,8 @@ int main(int argc, char ** argv) { xgboost::Args args {{"verbosity", "2"}}; xgboost::ConsoleLogger::Configure(args); - + ::testing::GTEST_FLAG(filter) = "CPUHistogram.ExternalMemory*"; + // ::testing::GTEST_FLAG(filter) = "IO.PrivateMmapStream"; testing::InitGoogleTest(&argc, argv); testing::FLAGS_gtest_death_test_style = "threadsafe"; auto rmm_alloc = xgboost::SetUpRMMResourceForCppTests(argc, argv); diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc index 8eb043ceca20..86062e81fb5c 100644 --- a/tests/cpp/tree/hist/test_histogram.cc +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -477,15 +477,8 @@ TEST(CPUHistogram, ExternalMemory) { int32_t constexpr kBins = 256; Context ctx; + std::cout << "l:" << __LINE__ << std::endl; TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span{}, false}, true, false); - TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span{}, false}, true, true); - - float sparse_thresh{0.5}; - TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false); - TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true); - sparse_thresh = std::numeric_limits::quiet_NaN(); - TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false); - TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true); } } // namespace tree } // namespace xgboost From bcf4cdb491628b47e8ff03a46d6b15d716791a9a Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 14 Jun 2023 01:14:03 +0800 Subject: [PATCH 31/51] Fix. --- src/data/sparse_page_raw_format.cc | 2 -- src/data/sparse_page_source.h | 4 ++-- tests/cpp/test_main.cc | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/data/sparse_page_raw_format.cc b/src/data/sparse_page_raw_format.cc index 42dd571ac3c6..1e5d1ec71f72 100644 --- a/src/data/sparse_page_raw_format.cc +++ b/src/data/sparse_page_raw_format.cc @@ -22,7 +22,6 @@ class SparsePageRawFormat : public SparsePageFormat { if (!fi->Read(&offset_vec)) { return false; } - std::cout << "read size:" << offset_vec.size() << " back:" << offset_vec.back() << std::endl; auto& data_vec = page->data.HostVector(); CHECK_NE(page->offset.Size(), 0U) << "Invalid SparsePage file"; data_vec.resize(offset_vec.back()); @@ -38,7 +37,6 @@ class SparsePageRawFormat : public SparsePageFormat { size_t Write(const T& page, dmlc::Stream* fo) override { const auto& offset_vec = page.offset.HostVector(); - std::cout << "write size:" << offset_vec.size() << " back:" << offset_vec.back() << std::endl; const auto& data_vec = page.data.HostVector(); CHECK(page.offset.Size() != 0 && offset_vec[0] == 0); CHECK_EQ(offset_vec.back(), page.data.Size()); diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 5696c9b40872..4f54ecb3326e 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -168,9 +168,9 @@ class SparsePageSourceImpl : public BatchIteratorImpl { auto name = cache_info_->ShardName(); std::unique_ptr fo; if (this->Iter() == 0) { - fo.reset(dmlc::Stream::Create(name.c_str(), "w")); + fo.reset(dmlc::Stream::Create(name.c_str(), "wb")); } else { - fo.reset(dmlc::Stream::Create(name.c_str(), "a")); + fo.reset(dmlc::Stream::Create(name.c_str(), "ab")); } auto bytes = fmt->Write(*page_, fo.get()); diff --git a/tests/cpp/test_main.cc b/tests/cpp/test_main.cc index 317e29f321e7..66eae2cb7188 100644 --- a/tests/cpp/test_main.cc +++ b/tests/cpp/test_main.cc @@ -11,8 +11,6 @@ int main(int argc, char ** argv) { xgboost::Args args {{"verbosity", "2"}}; xgboost::ConsoleLogger::Configure(args); - ::testing::GTEST_FLAG(filter) = "CPUHistogram.ExternalMemory*"; - // ::testing::GTEST_FLAG(filter) = "IO.PrivateMmapStream"; testing::InitGoogleTest(&argc, argv); testing::FLAGS_gtest_death_test_style = "threadsafe"; auto rmm_alloc = xgboost::SetUpRMMResourceForCppTests(argc, argv); From c195db16191fe37a07e9bd7b03a15d02505a208b Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 14 Jun 2023 01:25:40 +0800 Subject: [PATCH 32/51] Cleanup. --- src/common/io.cc | 2 -- src/data/sparse_page_source.h | 7 ++-- tests/cpp/data/test_sparse_page_raw_format.cc | 35 +++++++++++++++++++ tests/cpp/test_main.cc | 1 + tests/cpp/tree/hist/test_histogram.cc | 9 ++++- 5 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index a3a36018c7fa..b8d938e8c909 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -212,8 +212,6 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs char* ptr{nullptr}; auto view_start = offset / GetPageSize() * GetPageSize(); auto view_size = length + (offset - view_start); - std::cout << "offset:" << offset << ", length:" << length << ", start:" << view_start << ", size:" << view_size << std::endl; - // std::cout << view_start << " size: " << view_size << std::endl; #if defined(__linux__) || defined(__GLIBC__) int prot{PROT_READ}; if (!read_only) { diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 4f54ecb3326e..cfa1a6b4cce7 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -115,7 +115,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } // An heuristic for number of pre-fetched batches. We can make it part of BatchParam // to let user adjust number of pre-fetched batches when needed. - uint32_t constexpr kPreFetch = 1; + uint32_t constexpr kPreFetch = 4; size_t n_prefetch_batches = std::min(kPreFetch, n_batches_); CHECK_GT(n_prefetch_batches, 0) << "total batches:" << n_batches_; @@ -132,7 +132,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { CHECK_LT(fetch_it, cache_info_->offset.size()); ring_->at(fetch_it) = std::async(std::launch::async, [fetch_it, self, this]() { auto page = std::make_shared(); -// this->exec_.Run([&] { + this->exec_.Run([&] { common::Timer timer; timer.Start(); std::unique_ptr> fmt{CreatePageFormat("raw")}; @@ -144,7 +144,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { auto fi = std::make_unique(n, true, offset, length); CHECK(fmt->Read(page.get(), fi.get())); LOG(INFO) << "Read a page in " << timer.ElapsedSeconds() << " seconds."; - // }); + }); return page; }); } @@ -174,7 +174,6 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } auto bytes = fmt->Write(*page_, fo.get()); - std::cout << "wrote: " << bytes << std::endl; timer.Stop(); LOG(INFO) << static_cast(bytes) / 1024.0 / 1024.0 << " MB written in " diff --git a/tests/cpp/data/test_sparse_page_raw_format.cc b/tests/cpp/data/test_sparse_page_raw_format.cc index 722655880899..8121556363c8 100644 --- a/tests/cpp/data/test_sparse_page_raw_format.cc +++ b/tests/cpp/data/test_sparse_page_raw_format.cc @@ -59,5 +59,40 @@ TEST(SparsePageRawFormat, CSCPage) { TEST(SparsePageRawFormat, SortedCSCPage) { TestSparsePageRawFormat(); } + +TEST(Debug, WritePage) { + std::string path {"testfile"}; + std::unique_ptr> fmt{CreatePageFormat("raw")}; + auto Xy = RandomDataGenerator{ 8192 * 8, 12, 0.0 }.GenerateDMatrix(); + { + std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; + for (auto const& page : Xy->GetBatches()) { + std::cout << "back:" << page.offset.HostVector().back() << std::endl; + fmt->Write(page, fo.get()); + } + } + { + std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "ab")}; + for (auto const& page : Xy->GetBatches()) { + std::cout << "back:" << page.offset.HostVector().back() << std::endl; + fmt->Write(page, fo.get()); + } + } + + { + std::unique_ptr> fmt{CreatePageFormat("raw")}; + std::unique_ptr fi{dmlc::SeekStream::CreateForRead(path.c_str())}; + { + SparsePage page; + fmt->Read(&page, fi.get()); + std::cout << "back:" << page.offset.HostVector().back() << std::endl; + } + { + SparsePage page; + fmt->Read(&page, fi.get()); + std::cout << "back:" << page.offset.HostVector().back() << std::endl; + } + } +} } // namespace data } // namespace xgboost diff --git a/tests/cpp/test_main.cc b/tests/cpp/test_main.cc index 66eae2cb7188..b93329c2e788 100644 --- a/tests/cpp/test_main.cc +++ b/tests/cpp/test_main.cc @@ -11,6 +11,7 @@ int main(int argc, char ** argv) { xgboost::Args args {{"verbosity", "2"}}; xgboost::ConsoleLogger::Configure(args); + testing::InitGoogleTest(&argc, argv); testing::FLAGS_gtest_death_test_style = "threadsafe"; auto rmm_alloc = xgboost::SetUpRMMResourceForCppTests(argc, argv); diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc index 86062e81fb5c..8eb043ceca20 100644 --- a/tests/cpp/tree/hist/test_histogram.cc +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -477,8 +477,15 @@ TEST(CPUHistogram, ExternalMemory) { int32_t constexpr kBins = 256; Context ctx; - std::cout << "l:" << __LINE__ << std::endl; TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span{}, false}, true, false); + TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span{}, false}, true, true); + + float sparse_thresh{0.5}; + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false); + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true); + sparse_thresh = std::numeric_limits::quiet_NaN(); + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false); + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true); } } // namespace tree } // namespace xgboost From 9bbecf5e73aba98e65a83223ba3fdb447d5771f2 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 14 Jun 2023 01:47:55 +0800 Subject: [PATCH 33/51] Cleanup. --- doc/tutorials/external_memory.rst | 3 +- python-package/xgboost/testing/__init__.py | 8 ----- tests/cpp/data/test_sparse_page_raw_format.cc | 35 ------------------- 3 files changed, 1 insertion(+), 45 deletions(-) diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst index 8888c45fa191..71f3b176d8c6 100644 --- a/doc/tutorials/external_memory.rst +++ b/doc/tutorials/external_memory.rst @@ -21,8 +21,7 @@ GPU-based training algorithm. We will introduce them in the following sections. .. note:: The implementation of external memory uses ``mmap`` and is not tested against system - errors like disconnected network devices (`SIGBUS`). In addition, Windows is not yet - supported. + errors like disconnected network devices (`SIGBUS`). ************* Data Iterator diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 523f3f99c053..70e5361011b3 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -93,14 +93,6 @@ def no_ipv6() -> PytestSkip: return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."} -def no_unix() -> PytestSkip: - """PyTest skip mark for non-unix.""" - return { - "condition": system() == "Windows", - "reason": "unix system is required to be enabled.", - } - - def no_ubjson() -> PytestSkip: return no_mod("ubjson") diff --git a/tests/cpp/data/test_sparse_page_raw_format.cc b/tests/cpp/data/test_sparse_page_raw_format.cc index 8121556363c8..722655880899 100644 --- a/tests/cpp/data/test_sparse_page_raw_format.cc +++ b/tests/cpp/data/test_sparse_page_raw_format.cc @@ -59,40 +59,5 @@ TEST(SparsePageRawFormat, CSCPage) { TEST(SparsePageRawFormat, SortedCSCPage) { TestSparsePageRawFormat(); } - -TEST(Debug, WritePage) { - std::string path {"testfile"}; - std::unique_ptr> fmt{CreatePageFormat("raw")}; - auto Xy = RandomDataGenerator{ 8192 * 8, 12, 0.0 }.GenerateDMatrix(); - { - std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; - for (auto const& page : Xy->GetBatches()) { - std::cout << "back:" << page.offset.HostVector().back() << std::endl; - fmt->Write(page, fo.get()); - } - } - { - std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "ab")}; - for (auto const& page : Xy->GetBatches()) { - std::cout << "back:" << page.offset.HostVector().back() << std::endl; - fmt->Write(page, fo.get()); - } - } - - { - std::unique_ptr> fmt{CreatePageFormat("raw")}; - std::unique_ptr fi{dmlc::SeekStream::CreateForRead(path.c_str())}; - { - SparsePage page; - fmt->Read(&page, fi.get()); - std::cout << "back:" << page.offset.HostVector().back() << std::endl; - } - { - SparsePage page; - fmt->Read(&page, fi.get()); - std::cout << "back:" << page.offset.HostVector().back() << std::endl; - } - } -} } // namespace data } // namespace xgboost From d3987e8cfd6269c6d36bd5f50c7c2256b86eea12 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 14 Jun 2023 02:57:42 +0800 Subject: [PATCH 34/51] cleanup. --- src/common/io.cc | 51 ++++++++++++++++++++++---------------- tests/python/test_demos.py | 1 - 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index b8d938e8c909..910a7bbaccb8 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -5,29 +5,33 @@ #define NOMINMAX #endif // !defined(NOMINMAX) -#if defined(__unix__) || defined(__APPLE__) +#if defined(__unix__) || defined(__APPLE__) || defined(__MINGW32__) #include // for open, O_RDONLY #include // for mmap, mmap64, munmap -#include -#include // for close, getpagesize +#include // for close, getpagesize #elif defined(_MSC_VER) #define WIN32_LEAN_AND_MEAN #include -#endif // defined(__unix__) +#endif // defined(__unix__) -#include -#include // for errno -#include -#include -#include // for numeric_limits -#include -#include -#include -#include // for vector +#include // for copy, transform +#include // for tolower +#include // for errno +#include // for size_t +#include // for int32_t, uint32_t +#include // for memcpy +#include // for ifstream +#include // for distance +#include // for numeric_limits +#include // for unique_ptr +#include // for string +#include // for error_code, system_category +#include // for move +#include // for vector #include "io.h" +#include "xgboost/collective/socket.h" // for LastError #include "xgboost/logging.h" -#include "xgboost/collective/socket.h" namespace xgboost { namespace common { @@ -191,6 +195,14 @@ struct PrivateMmapStream::MMAPFile { std::string path; }; +namespace { +auto SystemErrorMsg() { + std::int32_t errsv = system::LastError(); + auto err = std::error_code{errsv, std::system_category()}; + return err; +} +} // anonymous namespace + PrivateMmapStream::PrivateMmapStream(std::string path, bool read_only, std::size_t offset, std::size_t length) : MemoryFixSizeBuffer{} { @@ -206,7 +218,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs CHECK_NE(fd, INVALID_HANDLE_VALUE) << "Failed to open:" << path; #else auto fd = open(path.c_str(), O_RDONLY); - CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << strerror(errno); + CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << SystemErrorMsg(); #endif char* ptr{nullptr}; @@ -218,7 +230,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs prot |= PROT_WRITE; } ptr = reinterpret_cast(mmap64(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); - CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << strerror(errno); + CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg(); #elif defined(_MSC_VER) auto file_size = GetFileSize(fd, nullptr); DWORD access = read_only ? PAGE_READONLY : PAGE_READWRITE; @@ -228,10 +240,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs std::uint32_t hoff = view_start >> 32; CHECK(map_file) << "Failed to map: " << path << ". " << GetLastError(); ptr = reinterpret_cast(MapViewOfFile(map_file, access, hoff, loff, view_size)); - if (ptr == nullptr) { - system::ThrowAtError("MapViewOfFile"); - } - CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << GetLastError(); + CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << SystemErrorMsg(); #else CHECK_LE(offset, std::numeric_limits::max()) << "File size has exceeded the limit on the current system."; @@ -240,7 +249,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs prot |= PROT_WRITE; } ptr = reinterpret_cast(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); - CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << strerror(errno); + CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg(); #endif // defined(__linux__) handle_.reset(new MMAPFile{fd, ptr, view_size, std::move(path)}); diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py index 90c72c8e6233..c54f35046f8a 100644 --- a/tests/python/test_demos.py +++ b/tests/python/test_demos.py @@ -103,7 +103,6 @@ def test_cross_validation_demo(): subprocess.check_call(cmd) -@pytest.mark.skipif(**tm.no_unix()) def test_external_memory_demo(): script = os.path.join(PYTHON_DEMO_DIR, 'external_memory.py') cmd = ['python', script] From 788f2b6fc4c08f5b7b9439f2723f8e3e4f3254c0 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 14 Jun 2023 03:10:03 +0800 Subject: [PATCH 35/51] GPU compilation. --- src/common/io.cc | 14 +++++++------- src/common/io.h | 3 +-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index 910a7bbaccb8..d7999fed9982 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -203,13 +203,6 @@ auto SystemErrorMsg() { } } // anonymous namespace -PrivateMmapStream::PrivateMmapStream(std::string path, bool read_only, std::size_t offset, - std::size_t length) - : MemoryFixSizeBuffer{} { - this->p_buffer_ = Open(std::move(path), read_only, offset, length); - this->buffer_size_ = length; -} - char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offset, std::size_t length) { #if defined(_MSC_VER) @@ -257,6 +250,13 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs return ptr; } +PrivateMmapStream::PrivateMmapStream(std::string path, bool read_only, std::size_t offset, + std::size_t length) + : MemoryFixSizeBuffer{}, handle_{nullptr} { + this->p_buffer_ = Open(std::move(path), read_only, offset, length); + this->buffer_size_ = length; +} + PrivateMmapStream::~PrivateMmapStream() { CHECK(handle_); #if defined(_MSC_VER) diff --git a/src/common/io.h b/src/common/io.h index a64d10b490e0..f99cb7a47215 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -136,8 +136,7 @@ std::size_t GetPageSize(); */ class PrivateMmapStream : public MemoryFixSizeBuffer { struct MMAPFile; - - std::unique_ptr handle_{nullptr}; + std::unique_ptr handle_; char* Open(std::string path, bool read_only, std::size_t offset, std::size_t length); From e88f5614f03354b7c15c43e93c20b2252681d571 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 14 Jun 2023 05:41:15 +0800 Subject: [PATCH 36/51] lint. --- src/common/io.cc | 14 +++++++------- src/common/io.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index d7999fed9982..74529d12e1d6 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -5,11 +5,11 @@ #define NOMINMAX #endif // !defined(NOMINMAX) -#if defined(__unix__) || defined(__APPLE__) || defined(__MINGW32__) +#if defined(__unix__) || defined(__APPLE__) #include // for open, O_RDONLY #include // for mmap, mmap64, munmap #include // for close, getpagesize -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) || defined(__MINGW32__) #define WIN32_LEAN_AND_MEAN #include #endif // defined(__unix__) @@ -231,7 +231,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs access = read_only ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS; std::uint32_t loff = static_cast(view_start); std::uint32_t hoff = view_start >> 32; - CHECK(map_file) << "Failed to map: " << path << ". " << GetLastError(); + CHECK(map_file) << "Failed to map: " << path << ". " << SystemErrorMsg(); ptr = reinterpret_cast(MapViewOfFile(map_file, access, hoff, loff, view_size)); CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << SystemErrorMsg(); #else @@ -261,16 +261,16 @@ PrivateMmapStream::~PrivateMmapStream() { CHECK(handle_); #if defined(_MSC_VER) if (p_buffer_) { - CHECK(UnmapViewOfFile(handle_->base_ptr)) "Faled to munmap." << GetLastError(); + CHECK(UnmapViewOfFile(handle_->base_ptr)) "Faled to munmap. " << SystemErrorMsg(); } if (handle_->fd != INVALID_HANDLE_VALUE) { - CHECK(CloseHandle(handle_->fd)); + CHECK(CloseHandle(handle_->fd)) << "Failed to close handle. " << SystemErrorMsg(); } #else CHECK_NE(munmap(handle_->base_ptr, handle_->base_size), -1) - << "Faled to munmap." << handle_->path << ". " << strerror(errno); + << "Faled to munmap." << handle_->path << ". " << SystemErrorMsg(); CHECK_NE(close(handle_->fd), -1) - << "Faled to close: " << handle_->path << ". " << strerror(errno); + << "Faled to close: " << handle_->path << ". " << SystemErrorMsg(); #endif } } // namespace common diff --git a/src/common/io.h b/src/common/io.h index f99cb7a47215..922cb520ea56 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -13,8 +13,8 @@ #include #include +#include // for unique_ptr #include // for string -#include // for move #include "common.h" From 6a026011df2a7f7578ea6e30c195dff3dad55f60 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 14 Jun 2023 05:42:22 +0800 Subject: [PATCH 37/51] log time. --- src/data/sparse_page_source.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index cfa1a6b4cce7..9417523f9744 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -15,8 +15,8 @@ #include #include "../common/common.h" -#include "../common/io.h" // for PrivateMmapStream, PadPageForMMAP -#include "../common/timer.h" +#include "../common/io.h" // for PrivateMmapStream, PadPageForMMAP +#include "../common/timer.h" // for Monitor, Timer #include "adapter.h" #include "dmlc/common.h" // OMPException #include "proxy_dmatrix.h" @@ -104,6 +104,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { // can pre-fetch data in a ring. std::unique_ptr ring_{new Ring}; dmlc::OMPException exec_; + common::Monitor monitor_; bool ReadCache() { CHECK(!at_end_); @@ -123,6 +124,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { exec_.Rethrow(); + monitor_.Start("launch"); for (std::size_t i = 0; i < n_prefetch_batches; ++i, ++fetch_it) { fetch_it %= n_batches_; // ring if (ring_->at(fetch_it).valid()) { @@ -148,11 +150,14 @@ class SparsePageSourceImpl : public BatchIteratorImpl { return page; }); } + monitor_.Stop("launch"); CHECK_EQ(std::count_if(ring_->cbegin(), ring_->cend(), [](auto const& f) { return f.valid(); }), n_prefetch_batches) << "Sparse DMatrix assumes forward iteration."; + monitor_.Start("Wait"); page_ = (*ring_)[count_].get(); + monitor_.Stop("Wait"); CHECK(!(*ring_)[count_].valid()); exec_.Rethrow(); @@ -184,10 +189,15 @@ class SparsePageSourceImpl : public BatchIteratorImpl { virtual void Fetch() = 0; public: - SparsePageSourceImpl(float missing, int nthreads, bst_feature_t n_features, - uint32_t n_batches, std::shared_ptr cache) - : missing_{missing}, nthreads_{nthreads}, n_features_{n_features}, - n_batches_{n_batches}, cache_info_{std::move(cache)} {} + SparsePageSourceImpl(float missing, int nthreads, bst_feature_t n_features, uint32_t n_batches, + std::shared_ptr cache) + : missing_{missing}, + nthreads_{nthreads}, + n_features_{n_features}, + n_batches_{n_batches}, + cache_info_{std::move(cache)} { + monitor_.Init(typeid(S).name()); // not pretty, but works for basic profiling + } SparsePageSourceImpl(SparsePageSourceImpl const &that) = delete; From 9dd5812c5a7f34f36f11f7438d60f058e11d3f22 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 14 Jun 2023 05:47:23 +0800 Subject: [PATCH 38/51] improve the tests. --- python-package/xgboost/testing/__init__.py | 8 +++++--- tests/python-gpu/test_from_cudf.py | 2 +- tests/python/test_data_iterator.py | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 70e5361011b3..f6abb867e3db 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -198,14 +198,14 @@ def __init__( X: Sequence, y: Sequence, w: Optional[Sequence], - cache: Optional[str] = "./", + cache: Optional[str], ) -> None: assert len(X) == len(y) self.X = X self.y = y self.w = w self.it = 0 - super().__init__(cache) + super().__init__(cache_prefix=cache) def next(self, input_data: Callable) -> int: if self.it == len(self.X): @@ -347,7 +347,9 @@ def get_external_dmat(self) -> xgb.DMatrix: if w is not None: weight.append(w) - it = IteratorForTest(predictor, response, weight if weight else None) + it = IteratorForTest( + predictor, response, weight if weight else None, cache="cache" + ) return xgb.DMatrix(it) def __repr__(self) -> str: diff --git a/tests/python-gpu/test_from_cudf.py b/tests/python-gpu/test_from_cudf.py index 523dbf9312a4..610c717a96d1 100644 --- a/tests/python-gpu/test_from_cudf.py +++ b/tests/python-gpu/test_from_cudf.py @@ -305,7 +305,7 @@ def __init__(self, categorical): self._labels = [rng.randn(self.rows)] * self.BATCHES self.it = 0 # set iterator to 0 - super().__init__() + super().__init__(cache_prefix=None) def as_array(self): import cudf diff --git a/tests/python/test_data_iterator.py b/tests/python/test_data_iterator.py index 0590a4954e60..24c117f15d8f 100644 --- a/tests/python/test_data_iterator.py +++ b/tests/python/test_data_iterator.py @@ -64,7 +64,8 @@ def run_data_iterator( subsample_rate = 0.8 if subsample else 1.0 it = IteratorForTest( - *make_batches(n_samples_per_batch, n_features, n_batches, use_cupy) + *make_batches(n_samples_per_batch, n_features, n_batches, use_cupy), + cache="cache" ) if n_batches == 0: with pytest.raises(ValueError, match="1 batch"): From 94b8a0d593d99ec5b6d42ecf47bb909108bfaaf2 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 14 Jun 2023 18:31:34 +0800 Subject: [PATCH 39/51] Timer. --- src/data/sparse_page_source.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 9417523f9744..0060f2d4f176 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -145,7 +145,10 @@ class SparsePageSourceImpl : public BatchIteratorImpl { auto fi = std::make_unique(n, true, offset, length); CHECK(fmt->Read(page.get(), fi.get())); - LOG(INFO) << "Read a page in " << timer.ElapsedSeconds() << " seconds."; + timer.Stop(); + + LOG(INFO) << "Read a page `" << typeid(S).name() << "` in " << timer.ElapsedSeconds() + << " seconds."; }); return page; }); From a4e11d3c23ae69bd38b294e0ea3f70d598008703 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 00:19:00 +0800 Subject: [PATCH 40/51] fix win leak --- src/common/io.cc | 18 ++++++++++++------ src/data/sparse_page_source.h | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index 74529d12e1d6..bce5428e2891 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -173,7 +173,7 @@ std::string FileExtension(std::string fname, bool lower) { } std::size_t GetPageSize() { -#if defined(_MSC_VER) +#if defined(_MSC_VER) || defined(__MINGW32__) SYSTEM_INFO sys_info; GetSystemInfo(&sys_info); // During testing, `sys_info.dwPageSize` is of size 4096 while `dwAllocationGranularity` is of @@ -187,6 +187,7 @@ std::size_t GetPageSize() { struct PrivateMmapStream::MMAPFile { #if defined(_MSC_VER) HANDLE fd{INVALID_HANDLE_VALUE}; + HANDLE file_map{ INVALID_HANDLE_VALUE }; #else std::int32_t fd{0}; #endif @@ -224,7 +225,8 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs } ptr = reinterpret_cast(mmap64(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg(); -#elif defined(_MSC_VER) + handle_.reset(new MMAPFile{ fd, ptr, view_size, std::move(path) }); +#elif defined(_MSC_VER) || defined(__MINGW32__) auto file_size = GetFileSize(fd, nullptr); DWORD access = read_only ? PAGE_READONLY : PAGE_READWRITE; auto map_file = CreateFileMapping(fd, nullptr, access, 0, file_size, nullptr); @@ -234,6 +236,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs CHECK(map_file) << "Failed to map: " << path << ". " << SystemErrorMsg(); ptr = reinterpret_cast(MapViewOfFile(map_file, access, hoff, loff, view_size)); CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << SystemErrorMsg(); + handle_.reset(new MMAPFile{ fd, map_file, ptr, view_size, std::move(path) }); #else CHECK_LE(offset, std::numeric_limits::max()) << "File size has exceeded the limit on the current system."; @@ -243,9 +246,9 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs } ptr = reinterpret_cast(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg(); + handle_.reset(new MMAPFile{ fd, ptr, view_size, std::move(path) }); #endif // defined(__linux__) - handle_.reset(new MMAPFile{fd, ptr, view_size, std::move(path)}); ptr += (offset - view_start); return ptr; } @@ -261,14 +264,17 @@ PrivateMmapStream::~PrivateMmapStream() { CHECK(handle_); #if defined(_MSC_VER) if (p_buffer_) { - CHECK(UnmapViewOfFile(handle_->base_ptr)) "Faled to munmap. " << SystemErrorMsg(); + CHECK(UnmapViewOfFile(handle_->base_ptr)) "Faled to call munmap: " << SystemErrorMsg(); } if (handle_->fd != INVALID_HANDLE_VALUE) { - CHECK(CloseHandle(handle_->fd)) << "Failed to close handle. " << SystemErrorMsg(); + CHECK(CloseHandle(handle_->fd)) << "Failed to close handle: " << SystemErrorMsg(); + } + if (handle_->file_map != INVALID_HANDLE_VALUE) { + CHECK(CloseHandle(handle_->file_map)) << "Failed to close mapping object: " << SystemErrorMsg(); } #else CHECK_NE(munmap(handle_->base_ptr, handle_->base_size), -1) - << "Faled to munmap." << handle_->path << ". " << SystemErrorMsg(); + << "Faled to call munmap: " << handle_->path << ". " << SystemErrorMsg(); CHECK_NE(close(handle_->fd), -1) << "Faled to close: " << handle_->path << ". " << SystemErrorMsg(); #endif diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 9417523f9744..0d90035c052d 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -29,7 +29,7 @@ namespace data { inline void TryDeleteCacheFile(const std::string& file) { if (std::remove(file.c_str()) != 0) { LOG(WARNING) << "Couldn't remove external memory cache file " << file - << "; you may want to remove it manually"; + << "; you may want to remove it manually"; } } From 8cdbb8785ceea6a4566845be0e2dfb37e97078f0 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 00:38:52 +0800 Subject: [PATCH 41/51] mingw --- src/common/io.cc | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index bce5428e2891..54e99cb50bf9 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -5,11 +5,19 @@ #define NOMINMAX #endif // !defined(NOMINMAX) +#if !defined(xgboost_IS_WIN) + +#if defined(_MSC_VER) || defined(__MINGW32__) +#define xgboost_IS_WIN 1 +#endif // defined(_MSC_VER) || defined(__MINGW32__) + +#endif // !defined(xgboost_IS_WIN) + #if defined(__unix__) || defined(__APPLE__) #include // for open, O_RDONLY #include // for mmap, mmap64, munmap #include // for close, getpagesize -#elif defined(_MSC_VER) || defined(__MINGW32__) +#elif defined(xgboost_IS_WIN) #define WIN32_LEAN_AND_MEAN #include #endif // defined(__unix__) @@ -173,7 +181,7 @@ std::string FileExtension(std::string fname, bool lower) { } std::size_t GetPageSize() { -#if defined(_MSC_VER) || defined(__MINGW32__) +#if defined(xgboost_IS_WIN) SYSTEM_INFO sys_info; GetSystemInfo(&sys_info); // During testing, `sys_info.dwPageSize` is of size 4096 while `dwAllocationGranularity` is of @@ -185,7 +193,7 @@ std::size_t GetPageSize() { } struct PrivateMmapStream::MMAPFile { -#if defined(_MSC_VER) +#if defined(xgboost_IS_WIN) HANDLE fd{INVALID_HANDLE_VALUE}; HANDLE file_map{ INVALID_HANDLE_VALUE }; #else @@ -206,7 +214,7 @@ auto SystemErrorMsg() { char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offset, std::size_t length) { -#if defined(_MSC_VER) +#if defined(xgboost_IS_WIN) HANDLE fd = CreateFile(path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, nullptr); CHECK_NE(fd, INVALID_HANDLE_VALUE) << "Failed to open:" << path; @@ -226,7 +234,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs ptr = reinterpret_cast(mmap64(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg(); handle_.reset(new MMAPFile{ fd, ptr, view_size, std::move(path) }); -#elif defined(_MSC_VER) || defined(__MINGW32__) +#elif defined(xgboost_IS_WIN) auto file_size = GetFileSize(fd, nullptr); DWORD access = read_only ? PAGE_READONLY : PAGE_READWRITE; auto map_file = CreateFileMapping(fd, nullptr, access, 0, file_size, nullptr); @@ -262,7 +270,7 @@ PrivateMmapStream::PrivateMmapStream(std::string path, bool read_only, std::size PrivateMmapStream::~PrivateMmapStream() { CHECK(handle_); -#if defined(_MSC_VER) +#if defined(xgboost_IS_WIN) if (p_buffer_) { CHECK(UnmapViewOfFile(handle_->base_ptr)) "Faled to call munmap: " << SystemErrorMsg(); } @@ -281,3 +289,7 @@ PrivateMmapStream::~PrivateMmapStream() { } } // namespace common } // namespace xgboost + +#if defined(xgboost_IS_WIN) +#undef xgboost_IS_WIN +#endif // defined(xgboost_IS_WIN) From 22ae3f63a3ea07335c8225a7d342929e9a06cacb Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 00:54:05 +0800 Subject: [PATCH 42/51] reduce page number. --- src/data/sparse_page_source.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 0060f2d4f176..ddadbeeecc58 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -116,7 +116,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { } // An heuristic for number of pre-fetched batches. We can make it part of BatchParam // to let user adjust number of pre-fetched batches when needed. - uint32_t constexpr kPreFetch = 4; + uint32_t constexpr kPreFetch = 3; size_t n_prefetch_batches = std::min(kPreFetch, n_batches_); CHECK_GT(n_prefetch_batches, 0) << "total batches:" << n_batches_; From c8726c34d15a2f21654e3cb0b47a537fdaf444ba Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 00:56:56 +0800 Subject: [PATCH 43/51] polishing. --- demo/guide-python/external_memory.py | 4 +- doc/tutorials/external_memory.rst | 32 ++++++--- rabit/include/rabit/internal/io.h | 74 ++++++++++---------- src/common/io.cc | 39 ++++++----- src/common/io.h | 7 +- src/tree/gpu_hist/gradient_based_sampler.cuh | 4 +- 6 files changed, 88 insertions(+), 72 deletions(-) diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py index fa54d184814a..11a05c61c609 100644 --- a/demo/guide-python/external_memory.py +++ b/demo/guide-python/external_memory.py @@ -82,8 +82,8 @@ def main(tmpdir: str) -> xgboost.Booster: missing = np.NaN Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False) - # Other tree methods including ``approx``, ``hist``, and ``gpu_hist`` are supported, - # see tutorial in doc for details. + # Other tree methods including ``approx``, and ``gpu_hist`` are supported. GPU + # behaves differently than CPU tree methods. See tutorial in doc for details. booster = xgboost.train( {"tree_method": "hist", "max_depth": 4}, Xy, diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst index 71f3b176d8c6..92a990d43fcf 100644 --- a/doc/tutorials/external_memory.rst +++ b/doc/tutorials/external_memory.rst @@ -8,10 +8,10 @@ infeasible. Staring from 1.5, users can define a custom iterator to load data in for running XGBoost algorithms. External memory can be used for both training and prediction, but training is the primary use case and it will be our focus in this tutorial. For prediction and evaluation, users can iterate through the data themseleves -while training requires the full dataset to be loaded to the memory. +while training requires the full dataset to be loaded into the memory. -During training, there are two different approaches for external memory support available -in XGBoost, one for CPU-based algorithms like ``hist`` and ``approx``, another one for the +During training, there are two different modes for external memory support available in +XGBoost, one for CPU-based algorithms like ``hist`` and ``approx``, another one for the GPU-based training algorithm. We will introduce them in the following sections. .. note:: @@ -20,8 +20,7 @@ GPU-based training algorithm. We will introduce them in the following sections. .. note:: - The implementation of external memory uses ``mmap`` and is not tested against system - errors like disconnected network devices (`SIGBUS`). + The feature is still experimental as of 2.0. The performance is not well optimized. ************* Data Iterator @@ -78,7 +77,7 @@ constructor. booster = xgboost.train({"tree_method": "hist"}, Xy) -The above snippet is a simplified version of ``demo/guide-python/external_memory.py``. +The above snippet is a simplified version of :ref:`sphx_glr_python_examples_external_memory.py`. For an example in C, please see ``demo/c-api/external-memory/``. The iterator is the common interface for using external memory with XGBoost, you can pass the resulting ``DMatrix`` object for training, prediction, and evaluation. @@ -102,7 +101,7 @@ performance. If external memory is used, the performance of CPU training is limited by IO (input/output) speed. This means that the disk IO speed primarily determines the training -speed. During benchmarking, we used an NVME connected to a PCIe-4 slot, other types of +speed. During benchmarking, we used an NVMe connected to a PCIe-4 slot, other types of storage can be too slow for practical usage. In addition, your system may perform caching to reduce the overhead of file reading. @@ -146,6 +145,17 @@ and internal runtime structures are concatenated. This means that memory reducti effective when dealing with wide datasets where ``X`` is larger compared to other data like ``y``, while it has little impact on slim datasets. + +Starting with XGBoost 2.0, the implementation of external memory uses ``mmap`` and is not +tested against system errors like disconnected network devices (`SIGBUS`). Due to the +intense IO operations, we recommend more robust solutions like NVMe. Also, it's worth +noting that most tests have been conducted on Linux distributions. + +Another important point to keep in mind is that creating the initial cache for XGBoost may +take some time. The interface to external memory is through custom iterators, which may or +may not be thread-safe. Therefore, initialization is performed sequentially. + + **************** Text File Inputs **************** @@ -154,7 +164,8 @@ This is the original form of external memory support, users are encouraged to us data iterator instead. There is no big difference between using external memory version of text input and the in-memory version. The only difference is the filename format. -The external memory version takes in the following `URI `_ format: +The external memory version takes in the following `URI +`_ format: .. code-block:: none @@ -172,9 +183,8 @@ To load from csv files, use the following syntax: where ``label_column`` should point to the csv column acting as the label. -To provide a simple example for illustration, extracting the code from -`demo/guide-python/external_memory.py `_. If -you have a dataset stored in a file similar to ``agaricus.txt.train`` with LIBSVM format, the external memory support can be enabled by: +If you have a dataset stored in a file similar to ``demo/data/agaricus.txt.train`` with LIBSVM +format, the external memory support can be enabled by: .. code-block:: python diff --git a/rabit/include/rabit/internal/io.h b/rabit/include/rabit/internal/io.h index 6a7d1227f4ae..a12e1decdbb7 100644 --- a/rabit/include/rabit/internal/io.h +++ b/rabit/include/rabit/internal/io.h @@ -1,5 +1,5 @@ -/*! - * Copyright (c) 2014-2019 by Contributors +/** + * Copyright 2014-2023, XGBoost Contributors * \file io.h * \brief utilities with different serializable implementations * \author Tianqi Chen @@ -7,17 +7,15 @@ #ifndef RABIT_INTERNAL_IO_H_ #define RABIT_INTERNAL_IO_H_ -#if !defined(NOMINMAX) && defined(_WIN32) -#define NOMINMAX -#endif // !defined(NOMINMAX) - -#include -#include -#include -#include #include -#include +#include // for size_t +#include +#include // for memcpy #include +#include +#include +#include + #include "rabit/internal/utils.h" #include "rabit/serializable.h" @@ -25,57 +23,61 @@ namespace rabit { namespace utils { /*! \brief re-use definition of dmlc::SeekStream */ using SeekStream = dmlc::SeekStream; -/*! \brief fixed size memory buffer */ +/** + * @brief Fixed size memory buffer as a stream. + */ struct MemoryFixSizeBuffer : public SeekStream { public: // similar to SEEK_END in libc - static size_t constexpr kSeekEnd = std::numeric_limits::max(); + static std::size_t constexpr kSeekEnd = std::numeric_limits::max(); -protected: + protected: MemoryFixSizeBuffer() = default; public: - MemoryFixSizeBuffer(void *p_buffer, size_t buffer_size) - : p_buffer_(reinterpret_cast(p_buffer)), - buffer_size_(buffer_size) { - curr_ptr_ = 0; - } + /** + * @brief Ctor + * + * @param p_buffer Pointer to the source buffer with size `buffer_size`. + * @param buffer_size Size of the source buffer + */ + MemoryFixSizeBuffer(void *p_buffer, std::size_t buffer_size) + : p_buffer_(reinterpret_cast(p_buffer)), buffer_size_(buffer_size) {} ~MemoryFixSizeBuffer() override = default; - size_t Read(void *ptr, size_t size) override { - size_t nread = std::min(buffer_size_ - curr_ptr_, size); + + std::size_t Read(void *ptr, std::size_t size) override { + std::size_t nread = std::min(buffer_size_ - curr_ptr_, size); if (nread != 0) std::memcpy(ptr, p_buffer_ + curr_ptr_, nread); curr_ptr_ += nread; return nread; } - void Write(const void *ptr, size_t size) override { + void Write(const void *ptr, std::size_t size) override { if (size == 0) return; - utils::Assert(curr_ptr_ + size <= buffer_size_, - "write position exceed fixed buffer size"); + CHECK_LE(curr_ptr_ + size, buffer_size_); std::memcpy(p_buffer_ + curr_ptr_, ptr, size); curr_ptr_ += size; } - void Seek(size_t pos) override { + void Seek(std::size_t pos) override { if (pos == kSeekEnd) { curr_ptr_ = buffer_size_; } else { - curr_ptr_ = static_cast(pos); + curr_ptr_ = static_cast(pos); } } - size_t Tell() override { - return curr_ptr_; - } - virtual bool AtEnd() const { - return curr_ptr_ == buffer_size_; - } + /** + * @brief Current position in the buffer (stream). + */ + std::size_t Tell() override { return curr_ptr_; } + virtual bool AtEnd() const { return curr_ptr_ == buffer_size_; } protected: /*! \brief in memory buffer */ - char* p_buffer_{nullptr}; + char *p_buffer_{nullptr}; /*! \brief current pointer */ - std::size_t buffer_size_{ 0 }; + std::size_t buffer_size_{0}; /*! \brief current pointer */ - std::size_t curr_ptr_{ 0 }; -}; // class MemoryFixSizeBuffer + std::size_t curr_ptr_{0}; +}; /*! \brief a in memory buffer that can be read and write as stream interface */ struct MemoryBufferStream : public SeekStream { diff --git a/src/common/io.cc b/src/common/io.cc index 54e99cb50bf9..e4c43eb99b4c 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -180,22 +180,10 @@ std::string FileExtension(std::string fname, bool lower) { } } -std::size_t GetPageSize() { -#if defined(xgboost_IS_WIN) - SYSTEM_INFO sys_info; - GetSystemInfo(&sys_info); - // During testing, `sys_info.dwPageSize` is of size 4096 while `dwAllocationGranularity` is of - // size 65536. - return sys_info.dwAllocationGranularity; -#else - return getpagesize(); -#endif -} - struct PrivateMmapStream::MMAPFile { #if defined(xgboost_IS_WIN) HANDLE fd{INVALID_HANDLE_VALUE}; - HANDLE file_map{ INVALID_HANDLE_VALUE }; + HANDLE file_map{INVALID_HANDLE_VALUE}; #else std::int32_t fd{0}; #endif @@ -205,6 +193,19 @@ struct PrivateMmapStream::MMAPFile { }; namespace { +// Get system alignment value for IO with mmap. +std::size_t GetMmapAlignment() { +#if defined(xgboost_IS_WIN) + SYSTEM_INFO sys_info; + GetSystemInfo(&sys_info); + // During testing, `sys_info.dwPageSize` is of size 4096 while `dwAllocationGranularity` is of + // size 65536. + return sys_info.dwAllocationGranularity; +#else + return getpagesize(); +#endif +} + auto SystemErrorMsg() { std::int32_t errsv = system::LastError(); auto err = std::error_code{errsv, std::system_category()}; @@ -217,15 +218,17 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs #if defined(xgboost_IS_WIN) HANDLE fd = CreateFile(path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, nullptr); - CHECK_NE(fd, INVALID_HANDLE_VALUE) << "Failed to open:" << path; + CHECK_NE(fd, INVALID_HANDLE_VALUE) << "Failed to open:" << path << ". " << SystemErrorMsg(); #else auto fd = open(path.c_str(), O_RDONLY); CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << SystemErrorMsg(); #endif char* ptr{nullptr}; - auto view_start = offset / GetPageSize() * GetPageSize(); + // Round down for alignment. + auto view_start = offset / GetMmapAlignment() * GetMmapAlignment(); auto view_size = length + (offset - view_start); + #if defined(__linux__) || defined(__GLIBC__) int prot{PROT_READ}; if (!read_only) { @@ -233,7 +236,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs } ptr = reinterpret_cast(mmap64(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg(); - handle_.reset(new MMAPFile{ fd, ptr, view_size, std::move(path) }); + handle_.reset(new MMAPFile{fd, ptr, view_size, std::move(path)}); #elif defined(xgboost_IS_WIN) auto file_size = GetFileSize(fd, nullptr); DWORD access = read_only ? PAGE_READONLY : PAGE_READWRITE; @@ -244,7 +247,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs CHECK(map_file) << "Failed to map: " << path << ". " << SystemErrorMsg(); ptr = reinterpret_cast(MapViewOfFile(map_file, access, hoff, loff, view_size)); CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << SystemErrorMsg(); - handle_.reset(new MMAPFile{ fd, map_file, ptr, view_size, std::move(path) }); + handle_.reset(new MMAPFile{fd, map_file, ptr, view_size, std::move(path)}); #else CHECK_LE(offset, std::numeric_limits::max()) << "File size has exceeded the limit on the current system."; @@ -254,7 +257,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs } ptr = reinterpret_cast(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg(); - handle_.reset(new MMAPFile{ fd, ptr, view_size, std::move(path) }); + handle_.reset(new MMAPFile{fd, ptr, view_size, std::move(path)}); #endif // defined(__linux__) ptr += (offset - view_start); diff --git a/src/common/io.h b/src/common/io.h index 922cb520ea56..9e22a4e63020 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -129,10 +129,11 @@ inline std::string ReadAll(std::string const &path) { return content; } -std::size_t GetPageSize(); - /** - * @brief Private mmap file, copy-on-write. File must be properly aligned by `PadPageForMmap()`. + * @brief Private mmap file, copy-on-write when running on Linux-based distributions. + * + * It can calculate alignment automatically based on system page size (or allocation + * granularity on Windows). */ class PrivateMmapStream : public MemoryFixSizeBuffer { struct MMAPFile; diff --git a/src/tree/gpu_hist/gradient_based_sampler.cuh b/src/tree/gpu_hist/gradient_based_sampler.cuh index 8013f2ec4a0a..c36fe3527233 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cuh +++ b/src/tree/gpu_hist/gradient_based_sampler.cuh @@ -1,5 +1,5 @@ -/*! - * Copyright 2019 by XGBoost Contributors +/** + * Copyright 2019-2023, XGBoost Contributors */ #pragma once #include From b5b57a0e3101923eb88fcb01d5961915982d5b01 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 01:57:32 +0800 Subject: [PATCH 44/51] Improve test. --- tests/cpp/common/test_io.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/cpp/common/test_io.cc b/tests/cpp/common/test_io.cc index 81316363e6b6..f1c8da2359fb 100644 --- a/tests/cpp/common/test_io.cc +++ b/tests/cpp/common/test_io.cc @@ -94,7 +94,12 @@ TEST(IO, PrivateMmapStream) { dmlc::TemporaryDirectory tempdir; auto path = tempdir.path + "/testfile"; - std::size_t n_batches{8}; + // The page size on Linux is usually set to 4096, while the allocation granularity on + // the Windows machine where this test is writted is 65536. We span the test to cover + // all of them. + std::size_t n_batches{64}; + std::size_t multiplier{2048}; + std::vector> batches; std::vector offset{0ul}; @@ -103,7 +108,7 @@ TEST(IO, PrivateMmapStream) { { std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; for (std::size_t i = 0; i < n_batches; ++i) { - std::size_t size = (i + 1) * 8192; + std::size_t size = (i + 1) * multiplier; std::vector data(size, 0); std::iota(data.begin(), data.end(), i * i); From 076a788c2dbb4845e4eee169444058a25885cc60 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 02:55:33 +0800 Subject: [PATCH 45/51] fix. --- src/tree/updater_gpu_hist.cu | 25 ++++++++++++++----------- tests/cpp/histogram_helpers.h | 4 ++++ tests/cpp/tree/test_gpu_hist.cu | 14 ++++++++++---- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index f62e78fa914a..195ef5411de7 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -230,9 +230,16 @@ struct GPUHistMakerDevice { dh::safe_cuda(cudaSetDevice(ctx_->gpu_id)); } + void InitFeatureGroupsOnce() { + if (!feature_groups) { + CHECK(page); + feature_groups.reset(new FeatureGroups(page->Cuts(), page->is_dense, + dh::MaxSharedMemoryOptin(ctx_->gpu_id), + sizeof(GradientSumT))); + } + } + // Reset values for each update iteration - // Note that the column sampler must be passed by value because it is not - // thread safe void Reset(HostDeviceVector* dh_gpair, DMatrix* dmat, int64_t num_columns) { auto const& info = dmat->Info(); this->column_sampler.Init(ctx_, num_columns, info.feature_weights.HostVector(), @@ -245,9 +252,9 @@ struct GPUHistMakerDevice { if (d_gpair.size() != dh_gpair->Size()) { d_gpair.resize(dh_gpair->Size()); } - dh::safe_cuda(cudaMemcpyAsync( - d_gpair.data().get(), dh_gpair->ConstDevicePointer(), - dh_gpair->Size() * sizeof(GradientPair), cudaMemcpyDeviceToDevice)); + dh::safe_cuda(cudaMemcpyAsync(d_gpair.data().get(), dh_gpair->ConstDevicePointer(), + dh_gpair->Size() * sizeof(GradientPair), + cudaMemcpyDeviceToDevice)); auto sample = sampler->Sample(ctx_, dh::ToSpan(d_gpair), dmat); page = sample.page; gpair = sample.gpair; @@ -257,17 +264,13 @@ struct GPUHistMakerDevice { quantiser.reset(new GradientQuantiser(this->gpair)); row_partitioner.reset(); // Release the device memory first before reallocating - row_partitioner.reset(new RowPartitioner(ctx_->gpu_id, sample.sample_rows)); + row_partitioner.reset(new RowPartitioner(ctx_->gpu_id, sample.sample_rows)); // Init histogram hist.Init(ctx_->gpu_id, page->Cuts().TotalBins()); hist.Reset(); - if (!feature_groups) { - feature_groups.reset(new FeatureGroups(page->Cuts(), page->is_dense, - dh::MaxSharedMemoryOptin(ctx_->gpu_id), - sizeof(GradientSumT))); - } + this->InitFeatureGroupsOnce(); } GPUExpandEntry EvaluateRootSplit(GradientPairInt64 root_sum) { diff --git a/tests/cpp/histogram_helpers.h b/tests/cpp/histogram_helpers.h index 127f6fe44da8..6774f531c922 100644 --- a/tests/cpp/histogram_helpers.h +++ b/tests/cpp/histogram_helpers.h @@ -2,6 +2,10 @@ #include "../../src/data/ellpack_page.cuh" #endif +#include // for SparsePage + +#include "./helpers.h" // for RandomDataGenerator + namespace xgboost { #if defined(__CUDACC__) namespace { diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index 1bd4ece2047c..fd3034db50b4 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -92,8 +92,8 @@ void TestBuildHist(bool use_shared_memory_histograms) { auto page = BuildEllpackPage(kNRows, kNCols); BatchParam batch_param{}; Context ctx{MakeCUDACtx(0)}; - GPUHistMakerDevice maker(&ctx, page.get(), {}, kNRows, param, kNCols, kNCols, - batch_param); + GPUHistMakerDevice maker(&ctx, /*is_external_memory=*/false, {}, kNRows, param, + kNCols, kNCols, batch_param); xgboost::SimpleLCG gen; xgboost::SimpleRealUniformDistribution dist(0.0f, 1.0f); HostDeviceVector gpair(kNRows); @@ -106,9 +106,15 @@ void TestBuildHist(bool use_shared_memory_histograms) { thrust::host_vector h_gidx_buffer (page->gidx_buffer.HostVector()); maker.row_partitioner.reset(new RowPartitioner(0, kNRows)); + + maker.hist.Init(0, page->Cuts().TotalBins()); maker.hist.AllocateHistograms({0}); + maker.gpair = gpair.DeviceSpan(); maker.quantiser.reset(new GradientQuantiser(maker.gpair)); + maker.page = page.get(); + + maker.InitFeatureGroupsOnce(); BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0), maker.feature_groups->DeviceAccessor(0), gpair.DeviceSpan(), @@ -126,8 +132,8 @@ void TestBuildHist(bool use_shared_memory_histograms) { std::vector solution = GetHostHistGpair(); for (size_t i = 0; i < h_result.size(); ++i) { auto result = maker.quantiser->ToFloatingPoint(h_result[i]); - EXPECT_NEAR(result.GetGrad(), solution[i].GetGrad(), 0.01f); - EXPECT_NEAR(result.GetHess(), solution[i].GetHess(), 0.01f); + ASSERT_NEAR(result.GetGrad(), solution[i].GetGrad(), 0.01f); + ASSERT_NEAR(result.GetHess(), solution[i].GetHess(), 0.01f); } } From 8b993ff6f0d451a21cf047f1c8029a310853e26c Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 06:08:23 +0800 Subject: [PATCH 46/51] Forbid pointer to bool cast. --- src/tree/gpu_hist/gradient_based_sampler.cu | 6 +++--- src/tree/gpu_hist/gradient_based_sampler.cuh | 4 ++-- src/tree/updater_gpu_hist.cu | 4 ++-- tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu | 8 +++++--- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu index 11ed03d0d5a1..5f763fb933bf 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cu +++ b/src/tree/gpu_hist/gradient_based_sampler.cu @@ -318,9 +318,9 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c return {sample_rows, page_.get(), dh::ToSpan(gpair_)}; } -GradientBasedSampler::GradientBasedSampler(Context const* /*ctx*/, bool is_external_memory, - size_t n_rows, const BatchParam& batch_param, - float subsample, int sampling_method) { +GradientBasedSampler::GradientBasedSampler(Context const* /*ctx*/, size_t n_rows, + const BatchParam& batch_param, float subsample, + int sampling_method, bool is_external_memory) { // The ctx is kept here for future development of stream-based operations. monitor_.Init("gradient_based_sampler"); diff --git a/src/tree/gpu_hist/gradient_based_sampler.cuh b/src/tree/gpu_hist/gradient_based_sampler.cuh index c36fe3527233..f89bf242e39a 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cuh +++ b/src/tree/gpu_hist/gradient_based_sampler.cuh @@ -122,8 +122,8 @@ class ExternalMemoryGradientBasedSampling : public SamplingStrategy { */ class GradientBasedSampler { public: - GradientBasedSampler(Context const* ctx, bool is_external_memory, size_t n_rows, - const BatchParam& batch_param, float subsample, int sampling_method); + GradientBasedSampler(Context const* ctx, size_t n_rows, const BatchParam& batch_param, + float subsample, int sampling_method, bool is_external_memory); /*! \brief Sample from a DMatrix based on the given gradient pairs. */ GradientBasedSample Sample(Context const* ctx, common::Span gpair, DMatrix* dmat); diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 195ef5411de7..64d9817dd965 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -216,8 +216,8 @@ struct GPUHistMakerDevice { column_sampler(column_sampler_seed), interaction_constraints(param, n_features), batch_param(std::move(_batch_param)) { - sampler.reset(new GradientBasedSampler(ctx, is_external_memory, _n_rows, batch_param, - param.subsample, param.sampling_method)); + sampler.reset(new GradientBasedSampler(ctx, _n_rows, batch_param, param.subsample, + param.sampling_method, is_external_memory)); if (!param.monotone_constraints.empty()) { // Copy assigning an empty vector causes an exception in MSVC debug builds monotone_constraints = param.monotone_constraints; diff --git a/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu b/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu index 95ae02aee46b..26ddfd8cc67f 100644 --- a/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu +++ b/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu @@ -39,7 +39,8 @@ void VerifySampling(size_t page_size, EXPECT_NE(page->n_rows, kRows); } - GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method); + GradientBasedSampler sampler(&ctx, kRows, param, subsample, sampling_method, + !fixed_size_sampling); auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get()); if (fixed_size_sampling) { @@ -93,7 +94,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) { auto page = (*dmat->GetBatches(&ctx, param).begin()).Impl(); EXPECT_NE(page->n_rows, kRows); - GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform); + GradientBasedSampler sampler(&ctx, kRows, param, kSubsample, TrainParam::kUniform, true); auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get()); auto sampled_page = sample.page; EXPECT_EQ(sample.sample_rows, kRows); @@ -141,7 +142,8 @@ TEST(GradientBasedSampler, GradientBasedSampling) { constexpr size_t kPageSize = 0; constexpr float kSubsample = 0.8; constexpr int kSamplingMethod = TrainParam::kGradientBased; - VerifySampling(kPageSize, kSubsample, kSamplingMethod); + constexpr bool kFixedSizeSampling = true; + VerifySampling(kPageSize, kSubsample, kSamplingMethod, kFixedSizeSampling); } TEST(GradientBasedSampler, GradientBasedSamplingExternalMemory) { From 6169fdcb6a1d1e6f980847525142fea22cb20b76 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 08:59:18 +0800 Subject: [PATCH 47/51] cleanup. --- src/common/io.cc | 66 ++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index e4c43eb99b4c..8ced0ead6a71 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -41,8 +41,7 @@ #include "xgboost/collective/socket.h" // for LastError #include "xgboost/logging.h" -namespace xgboost { -namespace common { +namespace xgboost::common { size_t PeekableInStream::Read(void* dptr, size_t size) { size_t nbuffer = buffer_.length() - buffer_ptr_; if (nbuffer == 0) return strm_->Read(dptr, size); @@ -118,11 +117,32 @@ void FixedSizeStream::Take(std::string* out) { *out = std::move(buffer_); } +namespace { +// Get system alignment value for IO with mmap. +std::size_t GetMmapAlignment() { +#if defined(xgboost_IS_WIN) + SYSTEM_INFO sys_info; + GetSystemInfo(&sys_info); + // During testing, `sys_info.dwPageSize` is of size 4096 while `dwAllocationGranularity` is of + // size 65536. + return sys_info.dwAllocationGranularity; +#else + return getpagesize(); +#endif +} + +auto SystemErrorMsg() { + std::int32_t errsv = system::LastError(); + auto err = std::error_code{errsv, std::system_category()}; + return err.message(); +} +} // anonymous namespace + std::string LoadSequentialFile(std::string uri, bool stream) { auto OpenErr = [&uri]() { std::string msg; msg = "Opening " + uri + " failed: "; - msg += strerror(errno); + msg += SystemErrorMsg(); LOG(FATAL) << msg; }; @@ -192,29 +212,12 @@ struct PrivateMmapStream::MMAPFile { std::string path; }; -namespace { -// Get system alignment value for IO with mmap. -std::size_t GetMmapAlignment() { -#if defined(xgboost_IS_WIN) - SYSTEM_INFO sys_info; - GetSystemInfo(&sys_info); - // During testing, `sys_info.dwPageSize` is of size 4096 while `dwAllocationGranularity` is of - // size 65536. - return sys_info.dwAllocationGranularity; -#else - return getpagesize(); -#endif -} - -auto SystemErrorMsg() { - std::int32_t errsv = system::LastError(); - auto err = std::error_code{errsv, std::system_category()}; - return err; -} -} // anonymous namespace - char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offset, std::size_t length) { + if (length == 0) { + return nullptr; + } + #if defined(xgboost_IS_WIN) HANDLE fd = CreateFile(path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, nullptr); @@ -284,14 +287,17 @@ PrivateMmapStream::~PrivateMmapStream() { CHECK(CloseHandle(handle_->file_map)) << "Failed to close mapping object: " << SystemErrorMsg(); } #else - CHECK_NE(munmap(handle_->base_ptr, handle_->base_size), -1) - << "Faled to call munmap: " << handle_->path << ". " << SystemErrorMsg(); - CHECK_NE(close(handle_->fd), -1) - << "Faled to close: " << handle_->path << ". " << SystemErrorMsg(); + if (handle_->base_ptr) { + CHECK_NE(munmap(handle_->base_ptr, handle_->base_size), -1) + << "Faled to call munmap: " << handle_->path << ". " << SystemErrorMsg(); + } + if (handle_->fd != 0) { + CHECK_NE(close(handle_->fd), -1) + << "Faled to close: " << handle_->path << ". " << SystemErrorMsg(); + } #endif } -} // namespace common -} // namespace xgboost +} // namespace xgboost::common #if defined(xgboost_IS_WIN) #undef xgboost_IS_WIN From 914a186047066167c466cf91a47a46fc6b99bfdc Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 09:06:59 +0800 Subject: [PATCH 48/51] read-only. --- src/common/io.cc | 25 +++++++++---------------- src/common/io.h | 16 +++++++++------- src/data/sparse_page_source.h | 8 +++----- tests/cpp/common/test_io.cc | 8 +++----- 4 files changed, 24 insertions(+), 33 deletions(-) diff --git a/src/common/io.cc b/src/common/io.cc index 8ced0ead6a71..d6a9fc02c7c8 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -200,7 +200,7 @@ std::string FileExtension(std::string fname, bool lower) { } } -struct PrivateMmapStream::MMAPFile { +struct PrivateMmapConstStream::MMAPFile { #if defined(xgboost_IS_WIN) HANDLE fd{INVALID_HANDLE_VALUE}; HANDLE file_map{INVALID_HANDLE_VALUE}; @@ -212,8 +212,7 @@ struct PrivateMmapStream::MMAPFile { std::string path; }; -char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offset, - std::size_t length) { +char* PrivateMmapConstStream::Open(std::string path, std::size_t offset, std::size_t length) { if (length == 0) { return nullptr; } @@ -223,7 +222,7 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, nullptr); CHECK_NE(fd, INVALID_HANDLE_VALUE) << "Failed to open:" << path << ". " << SystemErrorMsg(); #else - auto fd = open(path.c_str(), O_RDONLY); + auto fd = open(path.c_str(), O_RDWR); CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << SystemErrorMsg(); #endif @@ -234,17 +233,14 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs #if defined(__linux__) || defined(__GLIBC__) int prot{PROT_READ}; - if (!read_only) { - prot |= PROT_WRITE; - } ptr = reinterpret_cast(mmap64(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg(); handle_.reset(new MMAPFile{fd, ptr, view_size, std::move(path)}); #elif defined(xgboost_IS_WIN) auto file_size = GetFileSize(fd, nullptr); - DWORD access = read_only ? PAGE_READONLY : PAGE_READWRITE; + DWORD access = PAGE_READONLY; auto map_file = CreateFileMapping(fd, nullptr, access, 0, file_size, nullptr); - access = read_only ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS; + access = FILE_MAP_READ; std::uint32_t loff = static_cast(view_start); std::uint32_t hoff = view_start >> 32; CHECK(map_file) << "Failed to map: " << path << ". " << SystemErrorMsg(); @@ -255,9 +251,6 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs CHECK_LE(offset, std::numeric_limits::max()) << "File size has exceeded the limit on the current system."; int prot{PROT_READ}; - if (!read_only) { - prot |= PROT_WRITE; - } ptr = reinterpret_cast(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg(); handle_.reset(new MMAPFile{fd, ptr, view_size, std::move(path)}); @@ -267,14 +260,14 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs return ptr; } -PrivateMmapStream::PrivateMmapStream(std::string path, bool read_only, std::size_t offset, - std::size_t length) +PrivateMmapConstStream::PrivateMmapConstStream(std::string path, std::size_t offset, + std::size_t length) : MemoryFixSizeBuffer{}, handle_{nullptr} { - this->p_buffer_ = Open(std::move(path), read_only, offset, length); + this->p_buffer_ = Open(std::move(path), offset, length); this->buffer_size_ = length; } -PrivateMmapStream::~PrivateMmapStream() { +PrivateMmapConstStream::~PrivateMmapConstStream() { CHECK(handle_); #if defined(xgboost_IS_WIN) if (p_buffer_) { diff --git a/src/common/io.h b/src/common/io.h index 9e22a4e63020..61324820b359 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -130,30 +130,32 @@ inline std::string ReadAll(std::string const &path) { } /** - * @brief Private mmap file, copy-on-write when running on Linux-based distributions. + * @brief Private mmap file as a read-only stream. * * It can calculate alignment automatically based on system page size (or allocation * granularity on Windows). */ -class PrivateMmapStream : public MemoryFixSizeBuffer { +class PrivateMmapConstStream : public MemoryFixSizeBuffer { struct MMAPFile; std::unique_ptr handle_; - char* Open(std::string path, bool read_only, std::size_t offset, std::size_t length); + char* Open(std::string path, std::size_t offset, std::size_t length); public: /** * @brief Construct a private mmap stream. * * @param path File path. - * @param read_only See the `prot` parameter of `mmap` for details. * @param offset See the `offset` parameter of `mmap` for details. * @param length See the `length` parameter of `mmap` for details. */ - explicit PrivateMmapStream(std::string path, bool read_only, std::size_t offset, - std::size_t length); + explicit PrivateMmapConstStream(std::string path, std::size_t offset, std::size_t length); + std::size_t Read(void*, std::size_t) override { + LOG(FATAL) << "Read-only stream."; + return 0; + } - ~PrivateMmapStream() override; + ~PrivateMmapConstStream() override; }; } // namespace common } // namespace xgboost diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index e6f7bc1ec426..b4e42f2db421 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -24,8 +24,7 @@ #include "xgboost/base.h" #include "xgboost/data.h" -namespace xgboost { -namespace data { +namespace xgboost::data { inline void TryDeleteCacheFile(const std::string& file) { if (std::remove(file.c_str()) != 0) { LOG(WARNING) << "Couldn't remove external memory cache file " << file @@ -143,7 +142,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl { std::uint64_t offset = self->cache_info_->offset.at(fetch_it); std::uint64_t length = self->cache_info_->offset.at(fetch_it + 1) - offset; - auto fi = std::make_unique(n, true, offset, length); + auto fi = std::make_unique(n, offset, length); CHECK(fmt->Read(page.get(), fi.get())); timer.Stop(); @@ -401,6 +400,5 @@ class SortedCSCPageSource : public PageSourceIncMixIn { this->Fetch(); } }; -} // namespace data -} // namespace xgboost +} // namespace xgboost::data #endif // XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ diff --git a/tests/cpp/common/test_io.cc b/tests/cpp/common/test_io.cc index f1c8da2359fb..a64b60b800a9 100644 --- a/tests/cpp/common/test_io.cc +++ b/tests/cpp/common/test_io.cc @@ -9,8 +9,7 @@ #include "../helpers.h" #include "../filesystem.h" // dmlc::TemporaryDirectory -namespace xgboost { -namespace common { +namespace xgboost::common { TEST(MemoryFixSizeBuffer, Seek) { size_t constexpr kSize { 64 }; std::vector memory( kSize ); @@ -128,7 +127,7 @@ TEST(IO, PrivateMmapStream) { for (std::size_t i = 0; i < n_batches; ++i) { std::size_t off = offset[i]; std::size_t n = offset.at(i + 1) - offset[i]; - std::unique_ptr fi{std::make_unique(path, true, off, n)}; + std::unique_ptr fi{std::make_unique(path, off, n)}; std::vector data; std::uint64_t size{0}; @@ -139,5 +138,4 @@ TEST(IO, PrivateMmapStream) { ASSERT_EQ(data, batches[i]); } } -} // namespace common -} // namespace xgboost +} // namespace xgboost::common From f3e39aca887febc71b7b22c9227b6368d4522ef7 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 09:11:10 +0800 Subject: [PATCH 49/51] read-only. --- src/common/io.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/io.cc b/src/common/io.cc index d6a9fc02c7c8..ba97db574342 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -222,7 +222,7 @@ char* PrivateMmapConstStream::Open(std::string path, std::size_t offset, std::si FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, nullptr); CHECK_NE(fd, INVALID_HANDLE_VALUE) << "Failed to open:" << path << ". " << SystemErrorMsg(); #else - auto fd = open(path.c_str(), O_RDWR); + auto fd = open(path.c_str(), O_RDONLY); CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << SystemErrorMsg(); #endif From de4f71c537f910e5821ce85f1c252d36023deb08 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 15 Jun 2023 09:23:59 +0800 Subject: [PATCH 50/51] fix. --- src/common/io.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/common/io.h b/src/common/io.h index 61324820b359..ab408dec143e 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -150,10 +150,7 @@ class PrivateMmapConstStream : public MemoryFixSizeBuffer { * @param length See the `length` parameter of `mmap` for details. */ explicit PrivateMmapConstStream(std::string path, std::size_t offset, std::size_t length); - std::size_t Read(void*, std::size_t) override { - LOG(FATAL) << "Read-only stream."; - return 0; - } + void Write(void const*, std::size_t) override { LOG(FATAL) << "Read-only stream."; } ~PrivateMmapConstStream() override; }; From 6552242ec9be618195adb78069472ad96197ed08 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 16 Jun 2023 05:17:52 +0800 Subject: [PATCH 51/51] Don't blame. --- doc/tutorials/external_memory.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst index 92a990d43fcf..f5b6132c7c48 100644 --- a/doc/tutorials/external_memory.rst +++ b/doc/tutorials/external_memory.rst @@ -145,11 +145,9 @@ and internal runtime structures are concatenated. This means that memory reducti effective when dealing with wide datasets where ``X`` is larger compared to other data like ``y``, while it has little impact on slim datasets. - -Starting with XGBoost 2.0, the implementation of external memory uses ``mmap`` and is not -tested against system errors like disconnected network devices (`SIGBUS`). Due to the -intense IO operations, we recommend more robust solutions like NVMe. Also, it's worth -noting that most tests have been conducted on Linux distributions. +Starting with XGBoost 2.0, the implementation of external memory uses ``mmap``. It is not +yet tested against system errors like disconnected network devices (`SIGBUS`). Also, it's +worth noting that most tests have been conducted on Linux distributions. Another important point to keep in mind is that creating the initial cache for XGBoost may take some time. The interface to external memory is through custom iterators, which may or