diff --git a/include/xgboost/context.h b/include/xgboost/context.h index aaa1e3eb88b3..f1cd391df5b6 100644 --- a/include/xgboost/context.h +++ b/include/xgboost/context.h @@ -50,7 +50,19 @@ struct Context : public XGBoostParameter { bool IsCPU() const { return gpu_id == kCpuId; } bool IsCUDA() const { return !IsCPU(); } + CUDAContext const* CUDACtx() const; + // Make a CUDA context based on the current context. + Context MakeCUDA(std::int32_t device = 0) const { + Context ctx = *this; + ctx.gpu_id = device; + return ctx; + } + Context MakeCPU() const { + Context ctx = *this; + ctx.gpu_id = kCpuId; + return ctx; + } // declare parameters DMLC_DECLARE_PARAMETER(Context) { diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 3f7b6ad85b61..6305abff840e 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -1,5 +1,5 @@ -/*! - * Copyright (c) 2015-2022 by XGBoost Contributors +/** + * Copyright 2015-2023 by XGBoost Contributors * \file data.h * \brief The input data structure of xgboost. * \author Tianqi Chen @@ -238,44 +238,72 @@ struct Entry { } }; -/*! - * \brief Parameters for constructing batches. +/** + * \brief Parameters for constructing histogram index batches. */ struct BatchParam { - /*! \brief The GPU device to use. */ - int gpu_id {-1}; - /*! \brief Maximum number of bins per feature for histograms. */ + /** + * \brief Maximum number of bins per feature for histograms. + */ bst_bin_t max_bin{0}; - /*! \brief Hessian, used for sketching with future approx implementation. */ + /** + * \brief Hessian, used for sketching with future approx implementation. + */ common::Span hess; - /*! \brief Whether should DMatrix regenerate the batch. Only used for GHistIndex. */ - bool regen {false}; - /*! \brief Parameter used to generate column matrix for hist. */ + /** + * \brief Whether should we force DMatrix to regenerate the batch. Only used for + * GHistIndex. + */ + bool regen{false}; + /** + * \brief Forbid regenerating the gradient index. Used for internal validation. + */ + bool forbid_regen{false}; + /** + * \brief Parameter used to generate column matrix for hist. + */ double sparse_thresh{std::numeric_limits::quiet_NaN()}; + /** + * \brief Exact or others that don't need histogram. + */ BatchParam() = default; - // GPU Hist - BatchParam(int32_t device, bst_bin_t max_bin) - : gpu_id{device}, max_bin{max_bin} {} - // Hist + /** + * \brief Used by the hist tree method. + */ BatchParam(bst_bin_t max_bin, double sparse_thresh) : max_bin{max_bin}, sparse_thresh{sparse_thresh} {} - // Approx /** - * \brief Get batch with sketch weighted by hessian. The batch will be regenerated if - * the span is changed, so caller should keep the span for each iteration. + * \brief Used by the approx tree method. + * + * Get batch with sketch weighted by hessian. The batch will be regenerated if the + * span is changed, so caller should keep the span for each iteration. */ BatchParam(bst_bin_t max_bin, common::Span hessian, bool regenerate) : max_bin{max_bin}, hess{hessian}, regen{regenerate} {} - bool operator!=(BatchParam const& other) const { - if (hess.empty() && other.hess.empty()) { - return gpu_id != other.gpu_id || max_bin != other.max_bin; - } - return gpu_id != other.gpu_id || max_bin != other.max_bin || hess.data() != other.hess.data(); + bool ParamNotEqual(BatchParam const& other) const { + // Check non-floating parameters. + bool cond = max_bin != other.max_bin; + // Check sparse thresh. + bool l_nan = std::isnan(sparse_thresh); + bool r_nan = std::isnan(other.sparse_thresh); + bool st_chg = (l_nan != r_nan) || (!l_nan && !r_nan && (sparse_thresh != other.sparse_thresh)); + cond |= st_chg; + + return cond; } - bool operator==(BatchParam const& other) const { - return !(*this != other); + bool Initialized() const { return max_bin != 0; } + /** + * \brief Make a copy of self for DMatrix to describe how its existing index was generated. + */ + BatchParam MakeCache() const { + auto p = *this; + // These parameters have nothing to do with how the gradient index was generated in the + // first place. + p.regen = false; + p.forbid_regen = false; + return p; } }; @@ -435,7 +463,7 @@ class EllpackPage { * This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix * in CSR format. */ - explicit EllpackPage(DMatrix* dmat, const BatchParam& param); + explicit EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param); /*! \brief Destructor. */ ~EllpackPage(); @@ -551,7 +579,9 @@ class DMatrix { template BatchSet GetBatches(); template - BatchSet GetBatches(const BatchParam& param); + BatchSet GetBatches(Context const* ctx); + template + BatchSet GetBatches(Context const* ctx, const BatchParam& param); template bool PageExists() const; @@ -658,18 +688,19 @@ class DMatrix { protected: virtual BatchSet GetRowBatches() = 0; - virtual BatchSet GetColumnBatches() = 0; - virtual BatchSet GetSortedColumnBatches() = 0; - virtual BatchSet GetEllpackBatches(const BatchParam& param) = 0; - virtual BatchSet GetGradientIndex(const BatchParam& param) = 0; - virtual BatchSet GetExtBatches(BatchParam const& param) = 0; + virtual BatchSet GetColumnBatches(Context const* ctx) = 0; + virtual BatchSet GetSortedColumnBatches(Context const* ctx) = 0; + virtual BatchSet GetEllpackBatches(Context const* ctx, BatchParam const& param) = 0; + virtual BatchSet GetGradientIndex(Context const* ctx, + BatchParam const& param) = 0; + virtual BatchSet GetExtBatches(Context const* ctx, BatchParam const& param) = 0; virtual bool EllpackExists() const = 0; virtual bool GHistIndexExists() const = 0; virtual bool SparsePageExists() const = 0; }; -template<> +template <> inline BatchSet DMatrix::GetBatches() { return GetRowBatches(); } @@ -684,34 +715,39 @@ inline bool DMatrix::PageExists() const { return this->GHistIndexExists(); } -template<> +template <> inline bool DMatrix::PageExists() const { return this->SparsePageExists(); } -template<> -inline BatchSet DMatrix::GetBatches() { - return GetColumnBatches(); +template <> +inline BatchSet DMatrix::GetBatches(Context const*) { + return GetRowBatches(); } -template<> -inline BatchSet DMatrix::GetBatches() { - return GetSortedColumnBatches(); +template <> +inline BatchSet DMatrix::GetBatches(Context const* ctx) { + return GetColumnBatches(ctx); } -template<> -inline BatchSet DMatrix::GetBatches(const BatchParam& param) { - return GetEllpackBatches(param); +template <> +inline BatchSet DMatrix::GetBatches(Context const* ctx) { + return GetSortedColumnBatches(ctx); +} + +template <> +inline BatchSet DMatrix::GetBatches(Context const* ctx, BatchParam const& param) { + return GetEllpackBatches(ctx, param); } template <> -inline BatchSet DMatrix::GetBatches(const BatchParam& param) { - return GetGradientIndex(param); +inline BatchSet DMatrix::GetBatches(Context const* ctx, BatchParam const& param) { + return GetGradientIndex(ctx, param); } template <> -inline BatchSet DMatrix::GetBatches() { - return GetExtBatches(BatchParam{}); +inline BatchSet DMatrix::GetBatches(Context const* ctx, BatchParam const& param) { + return GetExtBatches(ctx, param); } } // namespace xgboost diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 026381fe1436..7bf3cf45b099 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -317,13 +317,15 @@ def get_dmat(self) -> xgb.DMatrix: enable_categorical=True, ) - def get_device_dmat(self) -> xgb.QuantileDMatrix: + def get_device_dmat(self, max_bin: Optional[int]) -> xgb.QuantileDMatrix: import cupy as cp w = None if self.w is None else cp.array(self.w) X = cp.array(self.X, dtype=np.float32) y = cp.array(self.y, dtype=np.float32) - return xgb.QuantileDMatrix(X, y, weight=w, base_margin=self.margin) + return xgb.QuantileDMatrix( + X, y, weight=w, base_margin=self.margin, max_bin=max_bin + ) def get_external_dmat(self) -> xgb.DMatrix: n_samples = self.X.shape[0] diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 59cb429da6bc..b35879fd7c66 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -3,30 +3,50 @@ */ #include "xgboost/c_api.h" -#include - -#include -#include -#include -#include -#include - -#include "../collective/communicator-inl.h" -#include "../common/api_entry.h" // XGBAPIThreadLocalEntry -#include "../common/charconv.h" -#include "../common/io.h" -#include "../data/adapter.h" -#include "../data/simple_dmatrix.h" -#include "c_api_utils.h" -#include "xgboost/base.h" -#include "xgboost/data.h" -#include "xgboost/global_config.h" -#include "xgboost/host_device_vector.h" -#include "xgboost/json.h" -#include "xgboost/learner.h" -#include "xgboost/logging.h" -#include "xgboost/string_view.h" // StringView -#include "xgboost/version_config.h" +#include // for copy +#include // for strtoimax +#include // for nan +#include // for strcmp +#include // for operator<<, basic_ostream, ios, stringstream +#include // for less +#include // for numeric_limits +#include // for operator!=, _Rb_tree_const_iterator, _Rb_tre... +#include // for shared_ptr, allocator, __shared_ptr_access +#include // for char_traits, basic_string, operator==, string +#include // for errc +#include // for pair +#include // for vector + +#include "../collective/communicator-inl.h" // for Allreduce, Broadcast, Finalize, GetProcessor... +#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry +#include "../common/charconv.h" // for from_chars, to_chars, NumericLimits, from_ch... +#include "../common/io.h" // for FileExtension, LoadSequentialFile, MemoryBuf... +#include "../common/threading_utils.h" // for OmpGetNumThreads, ParallelFor +#include "../data/adapter.h" // for ArrayAdapter, DenseAdapter, RecordBatchesIte... +#include "../data/proxy_dmatrix.h" // for DMatrixProxy +#include "../data/simple_dmatrix.h" // for SimpleDMatrix +#include "c_api_error.h" // for xgboost_CHECK_C_ARG_PTR, API_END, API_BEGIN +#include "c_api_utils.h" // for RequiredArg, OptionalArg, GetMissing, CastDM... +#include "dmlc/base.h" // for BeginPtr, DMLC_ATTRIBUTE_UNUSED +#include "dmlc/io.h" // for Stream +#include "dmlc/parameter.h" // for FieldAccessEntry, FieldEntry, ParamManager +#include "dmlc/thread_local.h" // for ThreadLocalStore +#include "rabit/c_api.h" // for RabitLinkTag +#include "rabit/rabit.h" // for CheckPoint, LoadCheckPoint +#include "xgboost/base.h" // for bst_ulong, bst_float, GradientPair, bst_feat... +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" // for DMatrix, MetaInfo, DataType, ExtSparsePage +#include "xgboost/feature_map.h" // for FeatureMap +#include "xgboost/global_config.h" // for GlobalConfiguration, GlobalConfigThreadLocal... +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/intrusive_ptr.h" // for xgboost +#include "xgboost/json.h" // for Json, get, Integer, IsA, Boolean, String +#include "xgboost/learner.h" // for Learner, PredictionType +#include "xgboost/logging.h" // for LOG_FATAL, LogMessageFatal, CHECK, LogCheck_EQ +#include "xgboost/predictor.h" // for PredictionCacheEntry +#include "xgboost/span.h" // for Span +#include "xgboost/string_view.h" // for StringView, operator<< +#include "xgboost/version_config.h" // for XGBOOST_VER_MAJOR, XGBOOST_VER_MINOR, XGBOOS... #if defined(XGBOOST_USE_FEDERATED) #include "../../plugin/federated/federated_server.h" @@ -341,10 +361,10 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand API_END(); } -XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle* out) { +XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out) { API_BEGIN(); xgboost_CHECK_C_ARG_PTR(out); - *out = new std::shared_ptr(new xgboost::data::DMatrixProxy);; + *out = new std::shared_ptr(new xgboost::data::DMatrixProxy); API_END(); } @@ -746,7 +766,7 @@ XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config CHECK_LE(p_m->Info().num_col_, std::numeric_limits::max()); - for (auto const &page : p_m->GetBatches()) { + for (auto const &page : p_m->GetBatches(p_m->Ctx(), BatchParam{})) { CHECK(page.page); auto const &h_offset = page.page->offset.ConstHostVector(); std::copy(h_offset.cbegin(), h_offset.cend(), out_indptr); diff --git a/src/common/error_msg.h b/src/common/error_msg.h index 4415bf2ee81b..3f57a63a3413 100644 --- a/src/common/error_msg.h +++ b/src/common/error_msg.h @@ -28,5 +28,10 @@ constexpr StringView InfInData() { constexpr StringView NoF128() { return "128-bit floating point is not supported on current platform."; } + +constexpr StringView InconsistentMaxBin() { + return "Inconsistent `max_bin`. `max_bin` should be the same across different QuantileDMatrix, " + "and consistent with the Booster being trained."; +} } // namespace xgboost::error #endif // XGBOOST_COMMON_ERROR_MSG_H_ diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index f97003d1d3c4..c9b50792d073 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -2,15 +2,18 @@ * Copyright 2017-2023 by XGBoost Contributors * \file hist_util.cc */ +#include "hist_util.h" + #include #include -#include "xgboost/base.h" #include "../common/common.h" -#include "hist_util.h" #include "column_matrix.h" #include "quantile.h" +#include "xgboost/base.h" +#include "xgboost/context.h" // Context +#include "xgboost/data.h" // SparsePage, SortedCSCPage #if defined(XGBOOST_MM_PREFETCH_PRESENT) #include @@ -28,10 +31,11 @@ HistogramCuts::HistogramCuts() { cut_ptrs_.HostVector().emplace_back(0); } -HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, bool use_sorted, +HistogramCuts SketchOnDMatrix(Context const *ctx, DMatrix *m, bst_bin_t max_bins, bool use_sorted, Span const hessian) { HistogramCuts out; - auto const& info = m->Info(); + auto const &info = m->Info(); + auto n_threads = ctx->Threads(); std::vector reduced(info.num_col_, 0); for (auto const &page : m->GetBatches()) { auto const &entries_per_column = @@ -44,16 +48,19 @@ HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, b } if (!use_sorted) { - HostSketchContainer container(max_bins, m->Info().feature_types.ConstHostSpan(), reduced, - HostSketchContainer::UseGroup(info), n_threads); - for (auto const& page : m->GetBatches()) { + HostSketchContainer container(ctx, max_bins, m->Info().feature_types.ConstHostSpan(), reduced, + HostSketchContainer::UseGroup(info)); + for (auto const &page : m->GetBatches()) { container.PushRowPage(page, info, hessian); } container.MakeCuts(m->Info(), &out); } else { - SortedSketchContainer container{max_bins, m->Info().feature_types.ConstHostSpan(), reduced, - HostSketchContainer::UseGroup(info), n_threads}; - for (auto const& page : m->GetBatches()) { + SortedSketchContainer container{ctx, + max_bins, + m->Info().feature_types.ConstHostSpan(), + reduced, + HostSketchContainer::UseGroup(info)}; + for (auto const &page : m->GetBatches(ctx)) { container.PushColPage(page, info, hessian); } container.MakeCuts(m->Info(), &out); diff --git a/src/common/hist_util.h b/src/common/hist_util.h index d95d405eb1df..6380952d7f61 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -170,7 +170,7 @@ class HistogramCuts { * \param use_sorted Whether should we use SortedCSC for sketching, it's more efficient * but consumes more memory. */ -HistogramCuts SketchOnDMatrix(DMatrix* m, int32_t max_bins, int32_t n_threads, +HistogramCuts SketchOnDMatrix(Context const* ctx, DMatrix* m, bst_bin_t max_bins, bool use_sorted = false, Span const hessian = {}); enum BinTypeSize : uint8_t { diff --git a/src/common/quantile.cc b/src/common/quantile.cc index 60626052c61c..a93184b95ca2 100644 --- a/src/common/quantile.cc +++ b/src/common/quantile.cc @@ -16,16 +16,16 @@ namespace xgboost { namespace common { template -SketchContainerImpl::SketchContainerImpl(std::vector columns_size, +SketchContainerImpl::SketchContainerImpl(Context const *ctx, + std::vector columns_size, int32_t max_bins, Span feature_types, - bool use_group, - int32_t n_threads) + bool use_group) : feature_types_(feature_types.cbegin(), feature_types.cend()), columns_size_{std::move(columns_size)}, max_bins_{max_bins}, use_group_ind_{use_group}, - n_threads_{n_threads} { + n_threads_{ctx->Threads()} { monitor_.Init(__func__); CHECK_NE(columns_size_.size(), 0); sketches_.resize(columns_size_.size()); @@ -380,13 +380,13 @@ auto AddCategories(std::set const &categories, HistogramCuts *cuts) { } template -void SketchContainerImpl::MakeCuts(MetaInfo const& info, HistogramCuts* cuts) { +void SketchContainerImpl::MakeCuts(MetaInfo const &info, HistogramCuts *p_cuts) { monitor_.Start(__func__); std::vector reduced; std::vector num_cuts; this->AllReduce(info, &reduced, &num_cuts); - cuts->min_vals_.HostVector().resize(sketches_.size(), 0.0f); + p_cuts->min_vals_.HostVector().resize(sketches_.size(), 0.0f); std::vector final_summaries(reduced.size()); ParallelFor(reduced.size(), n_threads_, Sched::Guided(), [&](size_t fidx) { @@ -401,48 +401,48 @@ void SketchContainerImpl::MakeCuts(MetaInfo const& info, HistogramCuts a.SetPrune(reduced[fidx], max_num_bins + 1); CHECK(a.data && reduced[fidx].data); const bst_float mval = a.data[0].value; - cuts->min_vals_.HostVector()[fidx] = mval - fabs(mval) - 1e-5f; + p_cuts->min_vals_.HostVector()[fidx] = mval - fabs(mval) - 1e-5f; } else { // Empty column. const float mval = 1e-5f; - cuts->min_vals_.HostVector()[fidx] = mval; + p_cuts->min_vals_.HostVector()[fidx] = mval; } }); float max_cat{-1.f}; for (size_t fid = 0; fid < reduced.size(); ++fid) { size_t max_num_bins = std::min(num_cuts[fid], max_bins_); - typename WQSketch::SummaryContainer const& a = final_summaries[fid]; + typename WQSketch::SummaryContainer const &a = final_summaries[fid]; if (IsCat(feature_types_, fid)) { - max_cat = std::max(max_cat, AddCategories(categories_.at(fid), cuts)); + max_cat = std::max(max_cat, AddCategories(categories_.at(fid), p_cuts)); } else { - AddCutPoint(a, max_num_bins, cuts); + AddCutPoint(a, max_num_bins, p_cuts); // push a value that is greater than anything const bst_float cpt = - (a.size > 0) ? a.data[a.size - 1].value : cuts->min_vals_.HostVector()[fid]; + (a.size > 0) ? a.data[a.size - 1].value : p_cuts->min_vals_.HostVector()[fid]; // this must be bigger than last value in a scale const bst_float last = cpt + (fabs(cpt) + 1e-5f); - cuts->cut_values_.HostVector().push_back(last); + p_cuts->cut_values_.HostVector().push_back(last); } // Ensure that every feature gets at least one quantile point - CHECK_LE(cuts->cut_values_.HostVector().size(), std::numeric_limits::max()); - auto cut_size = static_cast(cuts->cut_values_.HostVector().size()); - CHECK_GT(cut_size, cuts->cut_ptrs_.HostVector().back()); - cuts->cut_ptrs_.HostVector().push_back(cut_size); + CHECK_LE(p_cuts->cut_values_.HostVector().size(), std::numeric_limits::max()); + auto cut_size = static_cast(p_cuts->cut_values_.HostVector().size()); + CHECK_GT(cut_size, p_cuts->cut_ptrs_.HostVector().back()); + p_cuts->cut_ptrs_.HostVector().push_back(cut_size); } - cuts->SetCategorical(this->has_categorical_, max_cat); + p_cuts->SetCategorical(this->has_categorical_, max_cat); monitor_.Stop(__func__); } template class SketchContainerImpl>; template class SketchContainerImpl>; -HostSketchContainer::HostSketchContainer(int32_t max_bins, common::Span ft, - std::vector columns_size, bool use_group, - int32_t n_threads) - : SketchContainerImpl{columns_size, max_bins, ft, use_group, n_threads} { +HostSketchContainer::HostSketchContainer(Context const *ctx, bst_bin_t max_bins, + common::Span ft, + std::vector columns_size, bool use_group) + : SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} { monitor_.Init(__func__); ParallelFor(sketches_.size(), n_threads_, Sched::Auto(), [&](auto i) { auto n_bins = std::min(static_cast(max_bins_), columns_size_[i]); diff --git a/src/common/quantile.h b/src/common/quantile.h index f8d347112772..0a82f7c9074d 100644 --- a/src/common/quantile.h +++ b/src/common/quantile.h @@ -800,9 +800,8 @@ class SketchContainerImpl { * \param max_bins maximum number of bins for each feature. * \param use_group whether is assigned to group to data instance. */ - SketchContainerImpl(std::vector columns_size, int32_t max_bins, - common::Span feature_types, bool use_group, - int32_t n_threads); + SketchContainerImpl(Context const *ctx, std::vector columns_size, int32_t max_bins, + common::Span feature_types, bool use_group); static bool UseGroup(MetaInfo const &info) { size_t const num_groups = @@ -894,8 +893,8 @@ class HostSketchContainer : public SketchContainerImpl; public: - HostSketchContainer(int32_t max_bins, common::Span ft, - std::vector columns_size, bool use_group, int32_t n_threads); + HostSketchContainer(Context const *ctx, bst_bin_t max_bins, common::Span ft, + std::vector columns_size, bool use_group); template void PushAdapterBatch(Batch const &batch, size_t base_rowid, MetaInfo const &info, float missing); @@ -990,10 +989,10 @@ class SortedSketchContainer : public SketchContainerImpl>; public: - explicit SortedSketchContainer(int32_t max_bins, common::Span ft, - std::vector columns_size, bool use_group, - int32_t n_threads) - : SketchContainerImpl{columns_size, max_bins, ft, use_group, n_threads} { + explicit SortedSketchContainer(Context const *ctx, int32_t max_bins, + common::Span ft, + std::vector columns_size, bool use_group) + : SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} { monitor_.Init(__func__); sketches_.resize(columns_size.size()); size_t i = 0; diff --git a/src/data/batch_utils.h b/src/data/batch_utils.h new file mode 100644 index 000000000000..f75d24ffd02a --- /dev/null +++ b/src/data/batch_utils.h @@ -0,0 +1,33 @@ +/** + * Copyright 2023, XGBoost Contributors + */ +#ifndef XGBOOST_DATA_BATCH_UTILS_H_ +#define XGBOOST_DATA_BATCH_UTILS_H_ + +#include "xgboost/data.h" // for BatchParam + +namespace xgboost::data::detail { +// At least one batch parameter is initialized. +inline void CheckEmpty(BatchParam const& l, BatchParam const& r) { + if (!l.Initialized()) { + CHECK(r.Initialized()) << "Batch parameter is not initialized."; + } +} + +/** + * \brief Should we regenerate the gradient index? + * + * \param old Parameter stored in DMatrix. + * \param p New parameter passed in by caller. + */ +inline bool RegenGHist(BatchParam old, BatchParam p) { + // Parameter is renewed or caller requests a regen + if (!p.Initialized()) { + // Empty parameter is passed in, don't regenerate so that we can use gindex in + // predictor, which doesn't have any training parameter. + return false; + } + return p.regen || old.ParamNotEqual(p); +} +} // namespace xgboost::data::detail +#endif // XGBOOST_DATA_BATCH_UTILS_H_ diff --git a/src/data/ellpack_page.cc b/src/data/ellpack_page.cc index b1f24506e1dc..1fd8f12b2a35 100644 --- a/src/data/ellpack_page.cc +++ b/src/data/ellpack_page.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2019 XGBoost contributors +/** + * Copyright 2019-2023, XGBoost contributors */ #ifndef XGBOOST_USE_CUDA @@ -12,7 +12,7 @@ class EllpackPageImpl {}; EllpackPage::EllpackPage() = default; -EllpackPage::EllpackPage(DMatrix*, const BatchParam&) { +EllpackPage::EllpackPage(Context const*, DMatrix*, const BatchParam&) { LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but " "EllpackPage is required"; } diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu index d631407a1eb0..4409a7ebbea7 100644 --- a/src/data/ellpack_page.cu +++ b/src/data/ellpack_page.cu @@ -17,8 +17,8 @@ namespace xgboost { EllpackPage::EllpackPage() : impl_{new EllpackPageImpl()} {} -EllpackPage::EllpackPage(DMatrix* dmat, const BatchParam& param) - : impl_{new EllpackPageImpl(dmat, param)} {} +EllpackPage::EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param) + : impl_{new EllpackPageImpl{ctx, dmat, param}} {} EllpackPage::~EllpackPage() = default; @@ -105,29 +105,29 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts, } // Construct an ELLPACK matrix in memory. -EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param) +EllpackPageImpl::EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& param) : is_dense(dmat->IsDense()) { monitor_.Init("ellpack_page"); - dh::safe_cuda(cudaSetDevice(param.gpu_id)); + dh::safe_cuda(cudaSetDevice(ctx->gpu_id)); n_rows = dmat->Info().num_row_; monitor_.Start("Quantiles"); // Create the quantile sketches for the dmatrix and initialize HistogramCuts. row_stride = GetRowStride(dmat); - cuts_ = common::DeviceSketch(param.gpu_id, dmat, param.max_bin); + cuts_ = common::DeviceSketch(ctx->gpu_id, dmat, param.max_bin); monitor_.Stop("Quantiles"); monitor_.Start("InitCompressedData"); - this->InitCompressedData(param.gpu_id); + this->InitCompressedData(ctx->gpu_id); monitor_.Stop("InitCompressedData"); - dmat->Info().feature_types.SetDevice(param.gpu_id); + dmat->Info().feature_types.SetDevice(ctx->gpu_id); auto ft = dmat->Info().feature_types.ConstDeviceSpan(); monitor_.Start("BinningCompression"); CHECK(dmat->SingleColBlock()); for (const auto& batch : dmat->GetBatches()) { - CreateHistIndices(param.gpu_id, batch, ft); + CreateHistIndices(ctx->gpu_id, batch, ft); } monitor_.Stop("BinningCompression"); } diff --git a/src/data/ellpack_page.cuh b/src/data/ellpack_page.cuh index faf44b3b60d3..ee6a2c221ba3 100644 --- a/src/data/ellpack_page.cuh +++ b/src/data/ellpack_page.cuh @@ -155,7 +155,7 @@ class EllpackPageImpl { * This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix * in CSR format. */ - explicit EllpackPageImpl(DMatrix* dmat, const BatchParam& parm); + explicit EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& parm); template explicit EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense, diff --git a/src/data/ellpack_page_source.cu b/src/data/ellpack_page_source.cu index 872cb0cc657f..fb414f4aef79 100644 --- a/src/data/ellpack_page_source.cu +++ b/src/data/ellpack_page_source.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2019-2022 XGBoost contributors +/** + * Copyright 2019-2023, XGBoost contributors */ #include #include @@ -10,7 +10,7 @@ namespace xgboost { namespace data { void EllpackPageSource::Fetch() { - dh::safe_cuda(cudaSetDevice(param_.gpu_id)); + dh::safe_cuda(cudaSetDevice(device_)); if (!this->ReadCache()) { if (count_ != 0 && !sync_) { // source is initialized to be the 0th page during construction, so when count_ is 0 @@ -22,8 +22,7 @@ void EllpackPageSource::Fetch() { auto const &csr = source_->Page(); this->page_.reset(new EllpackPage{}); auto *impl = this->page_->Impl(); - *impl = EllpackPageImpl(param_.gpu_id, *cuts_, *csr, is_dense_, row_stride_, - feature_types_); + *impl = EllpackPageImpl(device_, *cuts_, *csr, is_dense_, row_stride_, feature_types_); page_->SetBaseRowId(csr->base_rowid); this->WriteCache(); } diff --git a/src/data/ellpack_page_source.h b/src/data/ellpack_page_source.h index dc080247287c..121ffcf9e613 100644 --- a/src/data/ellpack_page_source.h +++ b/src/data/ellpack_page_source.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2019-2022 by XGBoost Contributors +/** + * Copyright 2019-2023, XGBoost Contributors */ #ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_ @@ -23,19 +23,21 @@ class EllpackPageSource : public PageSourceIncMixIn { BatchParam param_; common::Span feature_types_; std::unique_ptr cuts_; + std::int32_t device_; public: EllpackPageSource(float missing, int nthreads, bst_feature_t n_features, size_t n_batches, std::shared_ptr cache, BatchParam param, std::unique_ptr cuts, bool is_dense, size_t row_stride, common::Span feature_types, - std::shared_ptr source) + std::shared_ptr source, std::int32_t device) : PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache, false), is_dense_{is_dense}, row_stride_{row_stride}, param_{std::move(param)}, feature_types_{feature_types}, - cuts_{std::move(cuts)} { + cuts_{std::move(cuts)}, + device_{device} { this->source_ = source; this->Fetch(); } diff --git a/src/data/gradient_index.cc b/src/data/gradient_index.cc index 3b3323bb5694..11e9a4becf33 100644 --- a/src/data/gradient_index.cc +++ b/src/data/gradient_index.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2017-2022 by XGBoost Contributors +/** + * Copyright 2017-2023, XGBoost Contributors * \brief Data type for fast histogram aggregation. */ #include "gradient_index.h" @@ -19,18 +19,18 @@ namespace xgboost { GHistIndexMatrix::GHistIndexMatrix() : columns_{std::make_unique()} {} -GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat, - double sparse_thresh, bool sorted_sketch, int32_t n_threads, +GHistIndexMatrix::GHistIndexMatrix(Context const *ctx, DMatrix *p_fmat, bst_bin_t max_bins_per_feat, + double sparse_thresh, bool sorted_sketch, common::Span hess) : max_numeric_bins_per_feat{max_bins_per_feat} { CHECK(p_fmat->SingleColBlock()); // We use sorted sketching for approx tree method since it's more efficient in // computation time (but higher memory usage). - cut = common::SketchOnDMatrix(p_fmat, max_bins_per_feat, n_threads, sorted_sketch, hess); + cut = common::SketchOnDMatrix(ctx, p_fmat, max_bins_per_feat, sorted_sketch, hess); const uint32_t nbins = cut.Ptrs().back(); hit_count.resize(nbins, 0); - hit_count_tloc_.resize(n_threads * nbins, 0); + hit_count_tloc_.resize(ctx->Threads() * nbins, 0); size_t new_size = 1; for (const auto &batch : p_fmat->GetBatches()) { @@ -45,7 +45,7 @@ GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat, auto ft = p_fmat->Info().feature_types.ConstHostSpan(); for (const auto &batch : p_fmat->GetBatches()) { - this->PushBatch(batch, ft, n_threads); + this->PushBatch(batch, ft, ctx->Threads()); } this->columns_ = std::make_unique(); @@ -54,7 +54,7 @@ GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat, // hist CHECK(!sorted_sketch); for (auto const &page : p_fmat->GetBatches()) { - this->columns_->InitFromSparse(page, *this, sparse_thresh, n_threads); + this->columns_->InitFromSparse(page, *this, sparse_thresh, ctx->Threads()); } } } diff --git a/src/data/gradient_index.h b/src/data/gradient_index.h index 4c35870db595..d36373d6b7f9 100644 --- a/src/data/gradient_index.h +++ b/src/data/gradient_index.h @@ -19,7 +19,6 @@ #include "../common/threading_utils.h" #include "../common/transform_iterator.h" // for MakeIndexTransformIter #include "adapter.h" -#include "proxy_dmatrix.h" #include "xgboost/base.h" #include "xgboost/data.h" @@ -155,8 +154,8 @@ class GHistIndexMatrix { /** * \brief Constrcutor for SimpleDMatrix. */ - GHistIndexMatrix(DMatrix* x, bst_bin_t max_bins_per_feat, double sparse_thresh, - bool sorted_sketch, int32_t n_threads, common::Span hess = {}); + GHistIndexMatrix(Context const* ctx, DMatrix* x, bst_bin_t max_bins_per_feat, + double sparse_thresh, bool sorted_sketch, common::Span hess = {}); /** * \brief Constructor for Iterative DMatrix. Initialize basic information and prepare * for push batch. @@ -295,28 +294,5 @@ void AssignColumnBinIndex(GHistIndexMatrix const& page, Fn&& assign) { } }); } - -/** - * \brief Should we regenerate the gradient index? - * - * \param old Parameter stored in DMatrix. - * \param p New parameter passed in by caller. - */ -inline bool RegenGHist(BatchParam old, BatchParam p) { - // parameter is renewed or caller requests a regen - if (p == BatchParam{}) { - // empty parameter is passed in, don't regenerate so that we can use gindex in - // predictor, which doesn't have any training parameter. - return false; - } - - // Avoid comparing nan values. - bool l_nan = std::isnan(old.sparse_thresh); - bool r_nan = std::isnan(p.sparse_thresh); - // regenerate if parameter is changed. - bool st_chg = (l_nan != r_nan) || (!l_nan && !r_nan && (old.sparse_thresh != p.sparse_thresh)); - bool param_chg = old.gpu_id != p.gpu_id || old.max_bin != p.max_bin; - return p.regen || param_chg || st_chg; -} } // namespace xgboost #endif // XGBOOST_DATA_GRADIENT_INDEX_H_ diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc index 3a473122a0ef..8eb1c203432f 100644 --- a/src/data/iterative_dmatrix.cc +++ b/src/data/iterative_dmatrix.cc @@ -1,25 +1,26 @@ -/*! - * Copyright 2022 XGBoost contributors +/** + * Copyright 2022-2023, XGBoost contributors */ #include "iterative_dmatrix.h" -#include // std::copy -#include // std::size_t -#include // std::underlying_type_t -#include // std::vector +#include // for copy +#include // for size_t +#include // for shared_ptr +#include // for underlying_type_t +#include // for vector #include "../collective/communicator-inl.h" #include "../common/categorical.h" // common::IsCat #include "../common/column_matrix.h" -#include "../tree/param.h" // FIXME(jiamingy): Find a better way to share this parameter. +#include "../tree/param.h" // FIXME(jiamingy): Find a better way to share this parameter. +#include "batch_utils.h" // for RegenGHist #include "gradient_index.h" #include "proxy_dmatrix.h" #include "simple_batch_iterator.h" -#include "xgboost/data.h" // FeatureType +#include "xgboost/data.h" // for FeatureType, DMatrix #include "xgboost/logging.h" -namespace xgboost { -namespace data { +namespace xgboost::data { IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy, std::shared_ptr ref, DataIterResetCallback* reset, XGDMatrixCallbackNext* next, float missing, int nthread, @@ -34,60 +35,61 @@ IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle pro auto d = MakeProxy(proxy_)->DeviceIdx(); - StringView msg{"All batch should be on the same device."}; - if (batch_param_.gpu_id != Context::kCpuId) { - CHECK_EQ(d, batch_param_.gpu_id) << msg; - } - - batch_param_ = BatchParam{d, max_bin}; + Context ctx; + ctx.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}}); // hardcoded parameter. - batch_param_.sparse_thresh = tree::TrainParam::DftSparseThreshold(); + BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()}; - ctx_.UpdateAllowUnknown( - Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}}); - if (ctx_.IsCPU()) { - this->InitFromCPU(iter_handle, missing, ref); + if (ctx.IsCPU()) { + this->InitFromCPU(&ctx, p, iter_handle, missing, ref); } else { - this->InitFromCUDA(iter_handle, missing, ref); + this->InitFromCUDA(&ctx, p, iter_handle, missing, ref); } + + this->fmat_ctx_ = ctx; + this->batch_ = p; } -void GetCutsFromRef(std::shared_ptr ref_, bst_feature_t n_features, BatchParam p, - common::HistogramCuts* p_cuts) { - CHECK(ref_); +void GetCutsFromRef(Context const* ctx, std::shared_ptr ref, bst_feature_t n_features, + BatchParam p, common::HistogramCuts* p_cuts) { + CHECK(ref); CHECK(p_cuts); - auto csr = [&]() { - for (auto const& page : ref_->GetBatches(p)) { + p.forbid_regen = true; + // Fetch cuts from GIDX + auto csr = [&] { + for (auto const& page : ref->GetBatches(ctx, p)) { *p_cuts = page.cut; break; } }; - auto ellpack = [&]() { - // workaround ellpack being initialized from CPU. - if (p.gpu_id == Context::kCpuId) { - p.gpu_id = ref_->Ctx()->gpu_id; - } - if (p.gpu_id == Context::kCpuId) { - p.gpu_id = 0; - } - for (auto const& page : ref_->GetBatches(p)) { + // Fetch cuts from Ellpack. + auto ellpack = [&] { + for (auto const& page : ref->GetBatches(ctx, p)) { GetCutsFromEllpack(page, p_cuts); break; } }; - if (ref_->PageExists()) { + if (ref->PageExists() && ref->PageExists()) { + // Both exists + if (ctx->IsCPU()) { + csr(); + } else { + ellpack(); + } + } else if (ref->PageExists()) { csr(); - } else if (ref_->PageExists()) { + } else if (ref->PageExists()) { ellpack(); } else { - if (p.gpu_id == Context::kCpuId) { + // None exist + if (ctx->IsCPU()) { csr(); } else { ellpack(); } } - CHECK_EQ(ref_->Info().num_col_, n_features) + CHECK_EQ(ref->Info().num_col_, n_features) << "Invalid ref DMatrix, different number of features."; } @@ -112,7 +114,8 @@ void SyncFeatureType(std::vector* p_h_ft) { } } // anonymous namespace -void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, +void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p, + DataIterHandle iter_handle, float missing, std::shared_ptr ref) { DMatrixProxy* proxy = MakeProxy(proxy_); CHECK(proxy); @@ -133,7 +136,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, auto const is_valid = data::IsValidFunctor{missing}; auto nnz_cnt = [&]() { return HostAdapterDispatch(proxy, [&](auto const& value) { - size_t n_threads = ctx_.Threads(); + size_t n_threads = ctx->Threads(); size_t n_features = column_sizes.size(); linalg::Tensor column_sizes_tloc({n_threads, n_features}, Context::kCpuId); column_sizes_tloc.Data()->Fill(0ul); @@ -158,10 +161,10 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, }); }; - size_t n_features = 0; - size_t n_batches = 0; - size_t accumulated_rows{0}; - size_t nnz{0}; + std::uint64_t n_features = 0; + std::size_t n_batches = 0; + std::uint64_t accumulated_rows{0}; + std::uint64_t nnz{0}; /** * CPU impl needs an additional loop for accumulating the column size. @@ -203,7 +206,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, accumulated_rows = 0; std::vector h_ft; if (ref) { - GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts); + GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts); h_ft = ref->Info().feature_types.HostVector(); } else { size_t i = 0; @@ -211,9 +214,8 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, if (!p_sketch) { h_ft = proxy->Info().feature_types.ConstHostVector(); SyncFeatureType(&h_ft); - p_sketch.reset(new common::HostSketchContainer{ - batch_param_.max_bin, h_ft, column_sizes, !proxy->Info().group_ptr_.empty(), - ctx_.Threads()}); + p_sketch.reset(new common::HostSketchContainer{ctx, p.max_bin, h_ft, column_sizes, + !proxy->Info().group_ptr_.empty()}); } HostAdapterDispatch(proxy, [&](auto const& batch) { proxy->Info().num_nonzero_ = batch_nnz[i]; @@ -237,15 +239,15 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, /** * Generate gradient index. */ - this->ghist_ = std::make_unique(Info(), std::move(cuts), batch_param_.max_bin); + this->ghist_ = std::make_unique(Info(), std::move(cuts), p.max_bin); size_t rbegin = 0; size_t prev_sum = 0; size_t i = 0; while (iter.Next()) { HostAdapterDispatch(proxy, [&](auto const& batch) { proxy->Info().num_nonzero_ = batch_nnz[i]; - this->ghist_->PushAdapterBatch(&ctx_, rbegin, prev_sum, batch, missing, h_ft, - batch_param_.sparse_thresh, Info().num_row_); + this->ghist_->PushAdapterBatch(ctx, rbegin, prev_sum, batch, missing, h_ft, p.sparse_thresh, + Info().num_row_); }); if (n_batches != 1) { this->info_.Extend(std::move(proxy->Info()), false, true); @@ -265,7 +267,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, accumulated_rows = 0; while (iter.Next()) { HostAdapterDispatch(proxy, [&](auto const& batch) { - this->ghist_->PushAdapterBatchColumns(&ctx_, batch, missing, accumulated_rows); + this->ghist_->PushAdapterBatchColumns(ctx, batch, missing, accumulated_rows); }); accumulated_rows += num_rows(); } @@ -282,11 +284,27 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, Info().feature_types.HostVector() = h_ft; } -BatchSet IterativeDMatrix::GetGradientIndex(BatchParam const& param) { - CheckParam(param); +BatchSet IterativeDMatrix::GetGradientIndex(Context const* ctx, + BatchParam const& param) { + if (param.Initialized()) { + CheckParam(param); + CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin(); + } + if (!ellpack_ && !ghist_) { + LOG(FATAL) << "`QuantileDMatrix` not initialized."; + } + if (!ghist_) { - CHECK(ellpack_); - ghist_ = std::make_shared(&ctx_, Info(), *ellpack_, param); + if (ctx->IsCPU()) { + ghist_ = std::make_shared(ctx, Info(), *ellpack_, param); + } else if (fmat_ctx_.IsCPU()) { + ghist_ = std::make_shared(&fmat_ctx_, Info(), *ellpack_, param); + } else { + // Can happen when QDM is initialized on GPU, but a CPU version is queried by a different QDM + // for cut reference. + auto cpu_ctx = ctx->MakeCPU(); + ghist_ = std::make_shared(&cpu_ctx, Info(), *ellpack_, param); + } } if (!std::isnan(param.sparse_thresh) && @@ -300,8 +318,9 @@ BatchSet IterativeDMatrix::GetGradientIndex(BatchParam const& return BatchSet(begin_iter); } -BatchSet IterativeDMatrix::GetExtBatches(BatchParam const& param) { - for (auto const& page : this->GetGradientIndex(param)) { +BatchSet IterativeDMatrix::GetExtBatches(Context const* ctx, + BatchParam const& param) { + for (auto const& page : this->GetGradientIndex(ctx, param)) { auto p_out = std::make_shared(); p_out->data.Resize(this->Info().num_nonzero_); p_out->offset.Resize(this->Info().num_row_ + 1); @@ -336,5 +355,26 @@ BatchSet IterativeDMatrix::GetExtBatches(BatchParam const& param) BatchIterator(new SimpleBatchIteratorImpl(nullptr)); return BatchSet(begin_iter); } -} // namespace data -} // namespace xgboost + +#if !defined(XGBOOST_USE_CUDA) +inline void IterativeDMatrix::InitFromCUDA(Context const*, BatchParam const&, DataIterHandle, float, + std::shared_ptr) { + // silent the warning about unused variables. + (void)(proxy_); + (void)(reset_); + (void)(next_); + common::AssertGPUSupport(); +} + +inline BatchSet IterativeDMatrix::GetEllpackBatches(Context const* ctx, + BatchParam const& param) { + common::AssertGPUSupport(); + auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(ellpack_)); + return BatchSet(BatchIterator(begin_iter)); +} + +inline void GetCutsFromEllpack(EllpackPage const&, common::HistogramCuts*) { + common::AssertGPUSupport(); +} +#endif // !defined(XGBOOST_USE_CUDA) +} // namespace xgboost::data diff --git a/src/data/iterative_dmatrix.cu b/src/data/iterative_dmatrix.cu index 5e7fc8d4f565..f23bbd5a14f0 100644 --- a/src/data/iterative_dmatrix.cu +++ b/src/data/iterative_dmatrix.cu @@ -1,22 +1,24 @@ -/*! - * Copyright 2020-2022 XGBoost contributors +/** + * Copyright 2020-2023, XGBoost contributors */ #include #include #include #include "../common/hist_util.cuh" +#include "batch_utils.h" // for RegenGHist #include "device_adapter.cuh" #include "ellpack_page.cuh" +#include "gradient_index.h" #include "iterative_dmatrix.h" #include "proxy_dmatrix.cuh" #include "proxy_dmatrix.h" #include "simple_batch_iterator.h" #include "sparse_page_source.h" -namespace xgboost { -namespace data { -void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, +namespace xgboost::data { +void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p, + DataIterHandle iter_handle, float missing, std::shared_ptr ref) { // A handle passed to external iterator. DMatrixProxy* proxy = MakeProxy(proxy_); @@ -46,7 +48,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, int32_t current_device; dh::safe_cuda(cudaGetDevice(¤t_device)); auto get_device = [&]() -> int32_t { - int32_t d = (ctx_.gpu_id == Context::kCpuId) ? current_device : ctx_.gpu_id; + std::int32_t d = (ctx->gpu_id == Context::kCpuId) ? current_device : ctx->gpu_id; CHECK_NE(d, Context::kCpuId); return d; }; @@ -57,8 +59,8 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, common::HistogramCuts cuts; do { // We use do while here as the first batch is fetched in ctor - ctx_.gpu_id = proxy->DeviceIdx(); - CHECK_LT(ctx_.gpu_id, common::AllVisibleGPUs()); + // ctx_.gpu_id = proxy->DeviceIdx(); + CHECK_LT(ctx->gpu_id, common::AllVisibleGPUs()); dh::safe_cuda(cudaSetDevice(get_device())); if (cols == 0) { cols = num_cols(); @@ -68,12 +70,12 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, CHECK_EQ(cols, num_cols()) << "Inconsistent number of columns."; } if (!ref) { - sketch_containers.emplace_back(proxy->Info().feature_types, batch_param_.max_bin, cols, - num_rows(), get_device()); + sketch_containers.emplace_back(proxy->Info().feature_types, p.max_bin, cols, num_rows(), + get_device()); auto* p_sketch = &sketch_containers.back(); proxy->Info().weights_.SetDevice(get_device()); Dispatch(proxy, [&](auto const& value) { - common::AdapterDeviceSketch(value, batch_param_.max_bin, proxy->Info(), missing, p_sketch); + common::AdapterDeviceSketch(value, p.max_bin, proxy->Info(), missing, p_sketch); }); } auto batch_rows = num_rows(); @@ -95,8 +97,8 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, if (!ref) { HostDeviceVector ft; common::SketchContainer final_sketch( - sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(), - batch_param_.max_bin, cols, accumulated_rows, get_device()); + sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(), p.max_bin, cols, + accumulated_rows, get_device()); for (auto const& sketch : sketch_containers) { final_sketch.Merge(sketch.ColumnsPtr(), sketch.Data()); final_sketch.FixError(); @@ -106,7 +108,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, final_sketch.MakeCuts(&cuts); } else { - GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts); + GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts); } this->info_.num_row_ = accumulated_rows; @@ -169,24 +171,34 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, info_.SynchronizeNumberOfColumns(); } -BatchSet IterativeDMatrix::GetEllpackBatches(BatchParam const& param) { - CheckParam(param); +BatchSet IterativeDMatrix::GetEllpackBatches(Context const* ctx, + BatchParam const& param) { + if (param.Initialized()) { + CheckParam(param); + CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin(); + } if (!ellpack_ && !ghist_) { LOG(FATAL) << "`QuantileDMatrix` not initialized."; } - if (!ellpack_ && ghist_) { + + if (!ellpack_) { ellpack_.reset(new EllpackPage()); - // Evaluation QuantileDMatrix initialized from CPU data might not have the correct GPU - // ID. - if (this->ctx_.IsCPU()) { - this->ctx_.gpu_id = param.gpu_id; - } - if (this->ctx_.IsCPU()) { - this->ctx_.gpu_id = dh::CurrentDevice(); + if (ctx->IsCUDA()) { + this->Info().feature_types.SetDevice(ctx->gpu_id); + *ellpack_->Impl() = + EllpackPageImpl(ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan()); + } else if (fmat_ctx_.IsCUDA()) { + this->Info().feature_types.SetDevice(fmat_ctx_.gpu_id); + *ellpack_->Impl() = + EllpackPageImpl(&fmat_ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan()); + } else { + // Can happen when QDM is initialized on CPU, but a GPU version is queried by a different QDM + // for cut reference. + auto cuda_ctx = ctx->MakeCUDA(); + this->Info().feature_types.SetDevice(cuda_ctx.gpu_id); + *ellpack_->Impl() = + EllpackPageImpl(&cuda_ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan()); } - this->Info().feature_types.SetDevice(this->ctx_.gpu_id); - *ellpack_->Impl() = - EllpackPageImpl(&ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan()); } CHECK(ellpack_); auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(ellpack_)); @@ -196,5 +208,4 @@ BatchSet IterativeDMatrix::GetEllpackBatches(BatchParam const& para void GetCutsFromEllpack(EllpackPage const& page, common::HistogramCuts* cuts) { *cuts = page.Impl()->Cuts(); } -} // namespace data -} // namespace xgboost +} // namespace xgboost::data diff --git a/src/data/iterative_dmatrix.h b/src/data/iterative_dmatrix.h index 28c4087c419a..bcaa5b63cc7c 100644 --- a/src/data/iterative_dmatrix.h +++ b/src/data/iterative_dmatrix.h @@ -1,6 +1,8 @@ -/*! - * Copyright 2020-2022 by Contributors +/** + * Copyright 2020-2023 by XGBoost Contributors * \file iterative_dmatrix.h + * + * \brief Implementation of the higher-level `QuantileDMatrix`. */ #ifndef XGBOOST_DATA_ITERATIVE_DMATRIX_H_ #define XGBOOST_DATA_ITERATIVE_DMATRIX_H_ @@ -10,10 +12,12 @@ #include #include +#include "../common/error_msg.h" #include "proxy_dmatrix.h" #include "simple_batch_iterator.h" #include "xgboost/base.h" #include "xgboost/c_api.h" +#include "xgboost/context.h" // for Context #include "xgboost/data.h" namespace xgboost { @@ -43,21 +47,17 @@ namespace data { */ class IterativeDMatrix : public DMatrix { MetaInfo info_; - Context ctx_; - BatchParam batch_param_; std::shared_ptr ellpack_; std::shared_ptr ghist_; + BatchParam batch_; DMatrixHandle proxy_; DataIterResetCallback *reset_; XGDMatrixCallbackNext *next_; + Context fmat_ctx_; void CheckParam(BatchParam const ¶m) { - // FIXME(Jiamingy): https://github.com/dmlc/xgboost/issues/7976 - if (param.max_bin != batch_param_.max_bin && param.max_bin != 0) { - LOG(WARNING) << "Inconsistent max_bin between Quantile DMatrix and Booster:" << param.max_bin - << " vs. " << batch_param_.max_bin; - } + CHECK_EQ(param.max_bin, batch_.max_bin) << error::InconsistentMaxBin(); CHECK(!param.regen && param.hess.empty()) << "Only `hist` and `gpu_hist` tree method can use `QuantileDMatrix`."; } @@ -68,8 +68,10 @@ class IterativeDMatrix : public DMatrix { return BatchSet(BatchIterator(nullptr)); } - void InitFromCUDA(DataIterHandle iter, float missing, std::shared_ptr ref); - void InitFromCPU(DataIterHandle iter_handle, float missing, std::shared_ptr ref); + void InitFromCUDA(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle, + float missing, std::shared_ptr ref); + void InitFromCPU(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle, + float missing, std::shared_ptr ref); public: explicit IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy, @@ -94,51 +96,40 @@ class IterativeDMatrix : public DMatrix { LOG(FATAL) << "Not implemented."; return BatchSet(BatchIterator(nullptr)); } - BatchSet GetColumnBatches() override { return InvalidTreeMethod(); } - BatchSet GetSortedColumnBatches() override { + BatchSet GetColumnBatches(Context const *) override { + return InvalidTreeMethod(); + } + BatchSet GetSortedColumnBatches(Context const *) override { return InvalidTreeMethod(); } - BatchSet GetGradientIndex(BatchParam const ¶m) override; + BatchSet GetGradientIndex(Context const *ctx, BatchParam const ¶m) override; - BatchSet GetEllpackBatches(const BatchParam ¶m) override; - BatchSet GetExtBatches(BatchParam const& param) override; + BatchSet GetEllpackBatches(Context const *ctx, const BatchParam ¶m) override; + BatchSet GetExtBatches(Context const *ctx, BatchParam const ¶m) override; bool SingleColBlock() const override { return true; } MetaInfo &Info() override { return info_; } MetaInfo const &Info() const override { return info_; } - Context const *Ctx() const override { return &ctx_; } + Context const *Ctx() const override { return &fmat_ctx_; } }; /** - * \brief Get quantile cuts from reference Quantile DMatrix. + * \brief Get quantile cuts from reference (Quantile)DMatrix. + * + * \param ctx The context of the new DMatrix. + * \param ref The reference DMatrix. + * \param n_features Number of features, used for validation only. + * \param p Batch parameter for the new DMatrix. + * \param p_cuts Output quantile cuts. */ -void GetCutsFromRef(std::shared_ptr ref_, bst_feature_t n_features, BatchParam p, - common::HistogramCuts *p_cuts); +void GetCutsFromRef(Context const *ctx, std::shared_ptr ref, bst_feature_t n_features, + BatchParam p, common::HistogramCuts *p_cuts); /** * \brief Get quantile cuts from ellpack page. */ void GetCutsFromEllpack(EllpackPage const &page, common::HistogramCuts *cuts); - -#if !defined(XGBOOST_USE_CUDA) -inline void IterativeDMatrix::InitFromCUDA(DataIterHandle, float, std::shared_ptr) { - // silent the warning about unused variables. - (void)(proxy_); - (void)(reset_); - (void)(next_); - common::AssertGPUSupport(); -} -inline BatchSet IterativeDMatrix::GetEllpackBatches(const BatchParam &) { - common::AssertGPUSupport(); - auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(ellpack_)); - return BatchSet(BatchIterator(begin_iter)); -} - -inline void GetCutsFromEllpack(EllpackPage const &, common::HistogramCuts *) { - common::AssertGPUSupport(); -} -#endif // !defined(XGBOOST_USE_CUDA) } // namespace data } // namespace xgboost diff --git a/src/data/proxy_dmatrix.h b/src/data/proxy_dmatrix.h index 7a15d6498745..2c18ffc79026 100644 --- a/src/data/proxy_dmatrix.h +++ b/src/data/proxy_dmatrix.h @@ -25,16 +25,11 @@ class DataIterProxy { NextFn* next_; public: - DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next) : - iter_{iter}, - reset_{reset}, next_{next} {} + DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next) + : iter_{iter}, reset_{reset}, next_{next} {} - bool Next() { - return next_(iter_); - } - void Reset() { - reset_(iter_); - } + bool Next() { return next_(iter_); } + void Reset() { reset_(iter_); } }; /* @@ -68,9 +63,8 @@ class DMatrixProxy : public DMatrix { } void SetArrayData(char const* c_interface); - void SetCSRData(char const *c_indptr, char const *c_indices, - char const *c_values, bst_feature_t n_features, - bool on_host); + void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values, + bst_feature_t n_features, bool on_host); MetaInfo& Info() override { return info_; } MetaInfo const& Info() const override { return info_; } @@ -81,6 +75,12 @@ class DMatrixProxy : public DMatrix { bool GHistIndexExists() const override { return false; } bool SparsePageExists() const override { return false; } + template + BatchSet NoBatch() { + LOG(FATAL) << "Proxy DMatrix cannot return data batch."; + return BatchSet(BatchIterator(nullptr)); + } + DMatrix* Slice(common::Span /*ridxs*/) override { LOG(FATAL) << "Slicing DMatrix is not supported for Proxy DMatrix."; return nullptr; @@ -89,29 +89,19 @@ class DMatrixProxy : public DMatrix { LOG(FATAL) << "Slicing DMatrix columns is not supported for Proxy DMatrix."; return nullptr; } - BatchSet GetRowBatches() override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); - } - BatchSet GetColumnBatches() override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); - } - BatchSet GetSortedColumnBatches() override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); + BatchSet GetRowBatches() override { return NoBatch(); } + BatchSet GetColumnBatches(Context const*) override { return NoBatch(); } + BatchSet GetSortedColumnBatches(Context const*) override { + return NoBatch(); } - BatchSet GetEllpackBatches(const BatchParam&) override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); + BatchSet GetEllpackBatches(Context const*, BatchParam const&) override { + return NoBatch(); } - BatchSet GetGradientIndex(const BatchParam&) override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); + BatchSet GetGradientIndex(Context const*, BatchParam const&) override { + return NoBatch(); } - BatchSet GetExtBatches(BatchParam const&) override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); + BatchSet GetExtBatches(Context const*, BatchParam const&) override { + return NoBatch(); } std::any Adapter() const { return batch_; } }; @@ -144,8 +134,7 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_ } else { LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name(); } - return std::result_of_t>()->Value()))>(); + return std::result_of_t>()->Value()))>(); } } } // namespace xgboost::data diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index e916311a5057..ab75cf03ecee 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -11,10 +11,12 @@ #include #include +#include "../common/error_msg.h" // for InconsistentMaxBin #include "../common/random.h" #include "../common/threading_utils.h" #include "./simple_batch_iterator.h" #include "adapter.h" +#include "batch_utils.h" // for CheckEmpty, RegenGHist #include "gradient_index.h" #include "xgboost/c_api.h" #include "xgboost/data.h" @@ -28,7 +30,7 @@ const MetaInfo& SimpleDMatrix::Info() const { return info_; } DMatrix* SimpleDMatrix::Slice(common::Span ridxs) { auto out = new SimpleDMatrix; SparsePage& out_page = *out->sparse_page_; - for (auto const &page : this->GetBatches()) { + for (auto const& page : this->GetBatches()) { auto batch = page.GetView(); auto& h_data = out_page.data.HostVector(); auto& h_offset = out_page.offset.HostVector(); @@ -42,7 +44,7 @@ DMatrix* SimpleDMatrix::Slice(common::Span ridxs) { out->Info() = this->Info().Slice(ridxs); out->Info().num_nonzero_ = h_offset.back(); } - out->ctx_ = this->ctx_; + out->fmat_ctx_ = this->fmat_ctx_; return out; } @@ -52,7 +54,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) { auto const slice_size = info_.num_col_ / num_slices; auto const slice_start = slice_size * slice_id; auto const slice_end = (slice_id == num_slices - 1) ? info_.num_col_ : slice_start + slice_size; - for (auto const &page : this->GetBatches()) { + for (auto const& page : this->GetBatches()) { auto batch = page.GetView(); auto& h_data = out_page.data.HostVector(); auto& h_offset = out_page.offset.HostVector(); @@ -60,9 +62,8 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) { for (bst_row_t i = 0; i < this->Info().num_row_; i++) { auto inst = batch[i]; auto prev_size = h_data.size(); - std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data), [&](Entry e) { - return e.index >= slice_start && e.index < slice_end; - }); + std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data), + [&](Entry e) { return e.index >= slice_start && e.index < slice_end; }); rptr += h_data.size() - prev_size; h_offset.emplace_back(rptr); } @@ -73,7 +74,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) { return out; } -void SimpleDMatrix::ReindexFeatures() { +void SimpleDMatrix::ReindexFeatures(Context const* ctx) { if (info_.IsVerticalFederated()) { std::vector buffer(collective::GetWorldSize()); buffer[collective::GetRank()] = info_.num_col_; @@ -82,72 +83,115 @@ void SimpleDMatrix::ReindexFeatures() { if (offset == 0) { return; } - sparse_page_->Reindex(offset, ctx_.Threads()); + sparse_page_->Reindex(offset, ctx->Threads()); } } BatchSet SimpleDMatrix::GetRowBatches() { // since csr is the default data structure so `source_` is always available. - auto begin_iter = BatchIterator( - new SimpleBatchIteratorImpl(sparse_page_)); + auto begin_iter = + BatchIterator(new SimpleBatchIteratorImpl(sparse_page_)); return BatchSet(begin_iter); } -BatchSet SimpleDMatrix::GetColumnBatches() { +BatchSet SimpleDMatrix::GetColumnBatches(Context const* ctx) { // column page doesn't exist, generate it if (!column_page_) { - column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx_.Threads()))); + column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads()))); } - auto begin_iter = - BatchIterator(new SimpleBatchIteratorImpl(column_page_)); + auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(column_page_)); return BatchSet(begin_iter); } -BatchSet SimpleDMatrix::GetSortedColumnBatches() { +BatchSet SimpleDMatrix::GetSortedColumnBatches(Context const* ctx) { // Sorted column page doesn't exist, generate it if (!sorted_column_page_) { sorted_column_page_.reset( - new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx_.Threads()))); - sorted_column_page_->SortRows(ctx_.Threads()); + new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads()))); + sorted_column_page_->SortRows(ctx->Threads()); } - auto begin_iter = BatchIterator( - new SimpleBatchIteratorImpl(sorted_column_page_)); + auto begin_iter = + BatchIterator(new SimpleBatchIteratorImpl(sorted_column_page_)); return BatchSet(begin_iter); } -namespace { -void CheckEmpty(BatchParam const& l, BatchParam const& r) { - if (l == BatchParam{}) { - CHECK(r != BatchParam{}) << "Batch parameter is not initialized."; +BatchSet SimpleDMatrix::GetEllpackBatches(Context const* ctx, + const BatchParam& param) { + detail::CheckEmpty(batch_param_, param); + if (ellpack_page_ && param.Initialized() && param.forbid_regen) { + if (detail::RegenGHist(batch_param_, param)) { + CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin(); + } + CHECK(!detail::RegenGHist(batch_param_, param)); } -} -} // anonymous namespace - -BatchSet SimpleDMatrix::GetEllpackBatches(const BatchParam& param) { - // ELLPACK page doesn't exist, generate it - CheckEmpty(batch_param_, param); - if (!ellpack_page_ || RegenGHist(batch_param_, param)) { - CHECK_GE(param.gpu_id, 0); + if (!ellpack_page_ || detail::RegenGHist(batch_param_, param)) { + // ELLPACK page doesn't exist, generate it + LOG(INFO) << "Generating new Ellpack page."; + // These places can ask for a ellpack page: + // - GPU hist: the ctx must be on CUDA. + // - IterativeDMatrix::InitFromCUDA: The ctx must be on CUDA. + // - IterativeDMatrix::InitFromCPU: It asks for ellpack only if it exists. It should + // not regen, otherwise it indicates a mismatched parameter like max_bin. CHECK_GE(param.max_bin, 2); - ellpack_page_.reset(new EllpackPage(this, param)); - batch_param_ = param; + if (ctx->IsCUDA()) { + // The context passed in is on GPU, we pick it first since we prioritize the context + // in Booster. + ellpack_page_.reset(new EllpackPage(ctx, this, param)); + } else if (fmat_ctx_.IsCUDA()) { + // DMatrix was initialized on GPU, we use the context from initialization. + ellpack_page_.reset(new EllpackPage(&fmat_ctx_, this, param)); + } else { + // Mismatched parameter, user set a new max_bin during training. + auto cuda_ctx = ctx->MakeCUDA(); + ellpack_page_.reset(new EllpackPage(&cuda_ctx, this, param)); + } + + batch_param_ = param.MakeCache(); } auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(ellpack_page_)); return BatchSet(begin_iter); } -BatchSet SimpleDMatrix::GetGradientIndex(const BatchParam& param) { - CheckEmpty(batch_param_, param); - if (!gradient_index_ || RegenGHist(batch_param_, param)) { +BatchSet SimpleDMatrix::GetGradientIndex(Context const* ctx, + const BatchParam& param) { + detail::CheckEmpty(batch_param_, param); + // Check whether we can regenerate the gradient index. This is to keep the consistency + // between evaluation data and training data. + if (gradient_index_ && param.Initialized() && param.forbid_regen) { + if (detail::RegenGHist(batch_param_, param)) { + CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin(); + } + CHECK(!detail::RegenGHist(batch_param_, param)) << "Inconsistent sparse threshold."; + } + if (!gradient_index_ || detail::RegenGHist(batch_param_, param)) { + // GIDX page doesn't exist, generate it LOG(INFO) << "Generating new Gradient Index."; + // These places can ask for a CSR gidx: + // - CPU Hist: the ctx must be on CPU. + // - IterativeDMatrix::InitFromCPU: The ctx must be on CPU. + // - IterativeDMatrix::InitFromCUDA: It asks for gidx only if it exists. It should not + // regen, otherwise it indicates a mismatched parameter like max_bin. CHECK_GE(param.max_bin, 2); - CHECK_EQ(param.gpu_id, -1); // Used only by approx. auto sorted_sketch = param.regen; - gradient_index_.reset(new GHistIndexMatrix(this, param.max_bin, param.sparse_thresh, - sorted_sketch, this->ctx_.Threads(), param.hess)); - batch_param_ = param; + if (ctx->IsCPU()) { + // The context passed in is on CPU, we pick it first since we prioritize the context + // in Booster. + gradient_index_.reset(new GHistIndexMatrix{ctx, this, param.max_bin, param.sparse_thresh, + sorted_sketch, param.hess}); + } else if (fmat_ctx_.IsCPU()) { + // DMatrix was initialized on CPU, we use the context from initialization. + gradient_index_.reset(new GHistIndexMatrix{&fmat_ctx_, this, param.max_bin, + param.sparse_thresh, sorted_sketch, param.hess}); + } else { + // Mismatched parameter, user set a new max_bin during training. + auto cpu_ctx = ctx->MakeCPU(); + gradient_index_.reset(new GHistIndexMatrix{&cpu_ctx, this, param.max_bin, param.sparse_thresh, + sorted_sketch, param.hess}); + } + + batch_param_ = param.MakeCache(); CHECK_EQ(batch_param_.hess.data(), param.hess.data()); } auto begin_iter = BatchIterator( @@ -155,7 +199,7 @@ BatchSet SimpleDMatrix::GetGradientIndex(const BatchParam& par return BatchSet(begin_iter); } -BatchSet SimpleDMatrix::GetExtBatches(BatchParam const&) { +BatchSet SimpleDMatrix::GetExtBatches(Context const*, BatchParam const&) { auto casted = std::make_shared(sparse_page_); CHECK(casted); auto begin_iter = @@ -166,7 +210,8 @@ BatchSet SimpleDMatrix::GetExtBatches(BatchParam const&) { template SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread, DataSplitMode data_split_mode) { - this->ctx_.nthread = nthread; + Context ctx; + ctx.Init(Args{{"nthread", std::to_string(nthread)}}); std::vector qids; uint64_t default_max = std::numeric_limits::max(); @@ -176,13 +221,13 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread, auto& data_vec = sparse_page_->data.HostVector(); uint64_t inferred_num_columns = 0; uint64_t total_batch_size = 0; - // batch_size is either number of rows or cols, depending on data layout + // batch_size is either number of rows or cols, depending on data layout adapter->BeforeFirst(); // Iterate over batches of input data while (adapter->Next()) { auto& batch = adapter->Value(); - auto batch_max_columns = sparse_page_->Push(batch, missing, ctx_.Threads()); + auto batch_max_columns = sparse_page_->Push(batch, missing, ctx.Threads()); inferred_num_columns = std::max(batch_max_columns, inferred_num_columns); total_batch_size += batch.Size(); // Append meta information if available @@ -229,19 +274,18 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread, info_.num_col_ = adapter->NumColumns(); } - // Synchronise worker columns info_.data_split_mode = data_split_mode; - ReindexFeatures(); + ReindexFeatures(&ctx); info_.SynchronizeNumberOfColumns(); if (adapter->NumRows() == kAdapterUnknownSize) { - using IteratorAdapterT - = IteratorAdapter; + using IteratorAdapterT = + IteratorAdapter; // If AdapterT is either IteratorAdapter or FileAdapter type, use the total batch size to // determine the correct number of rows, as offset_vec may be too short - if (std::is_same::value - || std::is_same::value) { + if (std::is_same::value || + std::is_same::value) { info_.num_row_ = total_batch_size; // Ensure offset_vec.size() - 1 == [number of rows] while (offset_vec.size() - 1 < total_batch_size) { @@ -265,9 +309,11 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread, info_.num_nonzero_ = data_vec.size(); // Sort the index for row partitioners used by variuos tree methods. - if (!sparse_page_->IsIndicesSorted(this->ctx_.Threads())) { - sparse_page_->SortIndices(this->ctx_.Threads()); + if (!sparse_page_->IsIndicesSorted(ctx.Threads())) { + sparse_page_->SortIndices(ctx.Threads()); } + + this->fmat_ctx_ = ctx; } SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) { @@ -280,12 +326,12 @@ SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) { } void SimpleDMatrix::SaveToLocalFile(const std::string& fname) { - std::unique_ptr fo(dmlc::Stream::Create(fname.c_str(), "w")); - int tmagic = kMagic; - fo->Write(tmagic); - info_.SaveBinary(fo.get()); - fo->Write(sparse_page_->offset.HostVector()); - fo->Write(sparse_page_->data.HostVector()); + std::unique_ptr fo(dmlc::Stream::Create(fname.c_str(), "w")); + int tmagic = kMagic; + fo->Write(tmagic); + info_.SaveBinary(fo.get()); + fo->Write(sparse_page_->offset.HostVector()); + fo->Write(sparse_page_->data.HostVector()); } template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread, @@ -305,14 +351,14 @@ template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing, template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread, DataSplitMode data_split_mode); template SimpleDMatrix::SimpleDMatrix( - IteratorAdapter - *adapter, + IteratorAdapter* adapter, float missing, int nthread, DataSplitMode data_split_mode); template <> SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread, DataSplitMode data_split_mode) { - ctx_.nthread = nthread; + Context ctx; + ctx.nthread = nthread; auto& offset_vec = sparse_page_->offset.HostVector(); auto& data_vec = sparse_page_->data.HostVector(); @@ -326,7 +372,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i size_t num_elements = 0; size_t num_rows = 0; // Import Arrow RecordBatches -#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx_.Threads()) +#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx.Threads()) for (int i = 0; i < static_cast(batches.size()); ++i) { // NOLINT num_elements += batches[i]->Import(missing); num_rows += batches[i]->Size(); @@ -348,7 +394,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i data_vec.resize(total_elements); offset_vec.resize(total_batch_size + 1); // Copy data into DMatrix -#pragma omp parallel num_threads(ctx_.Threads()) +#pragma omp parallel num_threads(ctx.Threads()) { #pragma omp for nowait for (int i = 0; i < static_cast(batches.size()); ++i) { // NOLINT @@ -372,12 +418,14 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i // Synchronise worker columns info_.num_col_ = adapter->NumColumns(); info_.data_split_mode = data_split_mode; - ReindexFeatures(); + ReindexFeatures(&ctx); info_.SynchronizeNumberOfColumns(); info_.num_row_ = total_batch_size; info_.num_nonzero_ = data_vec.size(); CHECK_EQ(offset_vec.back(), info_.num_nonzero_); + + fmat_ctx_ = ctx; } } // namespace data } // namespace xgboost diff --git a/src/data/simple_dmatrix.cu b/src/data/simple_dmatrix.cu index fc09f52c457d..b2be701d5e13 100644 --- a/src/data/simple_dmatrix.cu +++ b/src/data/simple_dmatrix.cu @@ -1,12 +1,14 @@ -/*! - * Copyright 2019-2021 by XGBoost Contributors +/** + * Copyright 2019-2023, XGBoost Contributors * \file simple_dmatrix.cu */ #include -#include + +#include "device_adapter.cuh" // for CurrentDevice #include "simple_dmatrix.cuh" #include "simple_dmatrix.h" -#include "device_adapter.cuh" +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" namespace xgboost { namespace data { @@ -15,7 +17,7 @@ namespace data { // Current implementation assumes a single batch. More batches can // be supported in future. Does not currently support inferring row/column size template -SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/, +SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthread, DataSplitMode data_split_mode) { CHECK(data_split_mode != DataSplitMode::kCol) << "Column-wise data split is currently not supported on the GPU."; @@ -24,6 +26,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread CHECK_GE(device, 0); dh::safe_cuda(cudaSetDevice(device)); + Context ctx; + ctx.Init(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(device)}}); + CHECK(adapter->NumRows() != kAdapterUnknownSize); CHECK(adapter->NumColumns() != kAdapterUnknownSize); @@ -33,13 +38,14 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread // Enforce single batch CHECK(!adapter->Next()); - info_.num_nonzero_ = - CopyToSparsePage(adapter->Value(), device, missing, sparse_page_.get()); + info_.num_nonzero_ = CopyToSparsePage(adapter->Value(), device, missing, sparse_page_.get()); info_.num_col_ = adapter->NumColumns(); info_.num_row_ = adapter->NumRows(); // Synchronise worker columns info_.data_split_mode = data_split_mode; info_.SynchronizeNumberOfColumns(); + + this->fmat_ctx_ = ctx; } template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing, diff --git a/src/data/simple_dmatrix.h b/src/data/simple_dmatrix.h index 853e765af72e..56685c1e6c77 100644 --- a/src/data/simple_dmatrix.h +++ b/src/data/simple_dmatrix.h @@ -32,7 +32,7 @@ class SimpleDMatrix : public DMatrix { MetaInfo& Info() override; const MetaInfo& Info() const override; - Context const* Ctx() const override { return &ctx_; } + Context const* Ctx() const override { return &fmat_ctx_; } bool SingleColBlock() const override { return true; } DMatrix* Slice(common::Span ridxs) override; @@ -43,11 +43,11 @@ class SimpleDMatrix : public DMatrix { protected: BatchSet GetRowBatches() override; - BatchSet GetColumnBatches() override; - BatchSet GetSortedColumnBatches() override; - BatchSet GetEllpackBatches(const BatchParam& param) override; - BatchSet GetGradientIndex(const BatchParam& param) override; - BatchSet GetExtBatches(BatchParam const& param) override; + BatchSet GetColumnBatches(Context const* ctx) override; + BatchSet GetSortedColumnBatches(Context const* ctx) override; + BatchSet GetEllpackBatches(Context const* ctx, const BatchParam& param) override; + BatchSet GetGradientIndex(Context const* ctx, const BatchParam& param) override; + BatchSet GetExtBatches(Context const* ctx, BatchParam const& param) override; MetaInfo info_; // Primary storage type @@ -69,10 +69,11 @@ class SimpleDMatrix : public DMatrix { * starting from 0. However, all the algorithms assume the features are globally indexed, so we * reindex the features based on the offset needed to obtain the global view. */ - void ReindexFeatures(); + void ReindexFeatures(Context const* ctx); private: - Context ctx_; + // Context used only for DMatrix initialization. + Context fmat_ctx_; }; } // namespace data } // namespace xgboost diff --git a/src/data/sparse_page_dmatrix.cc b/src/data/sparse_page_dmatrix.cc index 5e5b622af264..f84fa8c017bf 100644 --- a/src/data/sparse_page_dmatrix.cc +++ b/src/data/sparse_page_dmatrix.cc @@ -1,6 +1,7 @@ -/*! - * Copyright 2014-2022 by Contributors +/** + * Copyright 2014-2023 by XGBoost Contributors * \file sparse_page_dmatrix.cc + * * \brief The external memory version of Page Iterator. * \author Tianqi Chen */ @@ -8,11 +9,10 @@ #include "../collective/communicator-inl.h" #include "./simple_batch_iterator.h" +#include "batch_utils.h" // for RegenGHist #include "gradient_index.h" -namespace xgboost { -namespace data { - +namespace xgboost::data { MetaInfo &SparsePageDMatrix::Info() { return info_; } const MetaInfo &SparsePageDMatrix::Info() const { return info_; } @@ -46,7 +46,9 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p int32_t nthreads, std::string cache_prefix) : proxy_{proxy_handle}, iter_{iter_handle}, reset_{reset}, next_{next}, missing_{missing}, cache_prefix_{std::move(cache_prefix)} { - ctx_.nthread = nthreads; + Context ctx; + ctx.nthread = nthreads; + cache_prefix_ = cache_prefix_.empty() ? "DMatrix" : cache_prefix_; if (collective::IsDistributed()) { cache_prefix_ += ("-r" + std::to_string(collective::GetRank())); @@ -81,7 +83,7 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p // the proxy is iterated together with the sparse page source so we can obtain all // information in 1 pass. - for (auto const &page : this->GetRowBatchesImpl()) { + for (auto const &page : this->GetRowBatchesImpl(&ctx)) { this->info_.Extend(std::move(proxy->Info()), false, false); n_features = std::max(n_features, num_cols()); n_samples += num_rows(); @@ -98,9 +100,11 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p info_.SynchronizeNumberOfColumns(); CHECK_NE(info_.num_col_, 0); + + fmat_ctx_ = ctx; } -void SparsePageDMatrix::InitializeSparsePage() { +void SparsePageDMatrix::InitializeSparsePage(Context const *ctx) { auto id = MakeCache(this, ".row.page", cache_prefix_, &cache_info_); // Don't use proxy DMatrix once this is already initialized, this allows users to // release the iterator and data. @@ -110,33 +114,33 @@ void SparsePageDMatrix::InitializeSparsePage() { return; } - auto iter = DataIterProxy{ - iter_, reset_, next_}; + auto iter = DataIterProxy{iter_, reset_, next_}; DMatrixProxy *proxy = MakeProxy(proxy_); sparse_page_source_.reset(); // clear before creating new one to prevent conflicts. - sparse_page_source_ = std::make_shared( - iter, proxy, this->missing_, this->ctx_.Threads(), this->info_.num_col_, - this->n_batches_, cache_info_.at(id)); + sparse_page_source_ = std::make_shared(iter, proxy, this->missing_, + ctx->Threads(), this->info_.num_col_, + this->n_batches_, cache_info_.at(id)); } -BatchSet SparsePageDMatrix::GetRowBatchesImpl() { - this->InitializeSparsePage(); +BatchSet SparsePageDMatrix::GetRowBatchesImpl(Context const* ctx) { + this->InitializeSparsePage(ctx); auto begin_iter = BatchIterator(sparse_page_source_); return BatchSet(BatchIterator(begin_iter)); } BatchSet SparsePageDMatrix::GetRowBatches() { - return this->GetRowBatchesImpl(); + // Use context from initialization for the default row page. + return this->GetRowBatchesImpl(&fmat_ctx_); } -BatchSet SparsePageDMatrix::GetColumnBatches() { +BatchSet SparsePageDMatrix::GetColumnBatches(Context const *ctx) { auto id = MakeCache(this, ".col.page", cache_prefix_, &cache_info_); CHECK_NE(this->Info().num_col_, 0); - this->InitializeSparsePage(); + this->InitializeSparsePage(ctx); if (!column_source_) { - column_source_ = std::make_shared( - this->missing_, this->ctx_.Threads(), this->Info().num_col_, - this->n_batches_, cache_info_.at(id), sparse_page_source_); + column_source_ = + std::make_shared(this->missing_, ctx->Threads(), this->Info().num_col_, + this->n_batches_, cache_info_.at(id), sparse_page_source_); } else { column_source_->Reset(); } @@ -144,14 +148,14 @@ BatchSet SparsePageDMatrix::GetColumnBatches() { return BatchSet(BatchIterator(begin_iter)); } -BatchSet SparsePageDMatrix::GetSortedColumnBatches() { +BatchSet SparsePageDMatrix::GetSortedColumnBatches(Context const *ctx) { auto id = MakeCache(this, ".sorted.col.page", cache_prefix_, &cache_info_); CHECK_NE(this->Info().num_col_, 0); - this->InitializeSparsePage(); + this->InitializeSparsePage(ctx); if (!sorted_column_source_) { sorted_column_source_ = std::make_shared( - this->missing_, this->ctx_.Threads(), this->Info().num_col_, - this->n_batches_, cache_info_.at(id), sparse_page_source_); + this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id), + sparse_page_source_); } else { sorted_column_source_->Reset(); } @@ -159,27 +163,27 @@ BatchSet SparsePageDMatrix::GetSortedColumnBatches() { return BatchSet(BatchIterator(begin_iter)); } -BatchSet SparsePageDMatrix::GetGradientIndex(const BatchParam ¶m) { +BatchSet SparsePageDMatrix::GetGradientIndex(Context const *ctx, + const BatchParam ¶m) { CHECK_GE(param.max_bin, 2); auto id = MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_); - this->InitializeSparsePage(); - if (!cache_info_.at(id)->written || RegenGHist(batch_param_, param)) { + this->InitializeSparsePage(ctx); + if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) { cache_info_.erase(id); MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_); LOG(INFO) << "Generating new Gradient Index."; // Use sorted sketch for approx. auto sorted_sketch = param.regen; - auto cuts = - common::SketchOnDMatrix(this, param.max_bin, ctx_.Threads(), sorted_sketch, param.hess); - this->InitializeSparsePage(); // reset after use. + auto cuts = common::SketchOnDMatrix(ctx, this, param.max_bin, sorted_sketch, param.hess); + this->InitializeSparsePage(ctx); // reset after use. batch_param_ = param; ghist_index_source_.reset(); CHECK_NE(cuts.Values().size(), 0); auto ft = this->info_.feature_types.ConstHostSpan(); ghist_index_source_.reset(new GradientIndexPageSource( - this->missing_, this->ctx_.Threads(), this->Info().num_col_, this->n_batches_, - cache_info_.at(id), param, std::move(cuts), this->IsDense(), ft, sparse_page_source_)); + this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id), + param, std::move(cuts), this->IsDense(), ft, sparse_page_source_)); } else { CHECK(ghist_index_source_); ghist_index_source_->Reset(); @@ -189,11 +193,10 @@ BatchSet SparsePageDMatrix::GetGradientIndex(const BatchParam } #if !defined(XGBOOST_USE_CUDA) -BatchSet SparsePageDMatrix::GetEllpackBatches(const BatchParam &) { +BatchSet SparsePageDMatrix::GetEllpackBatches(Context const *, const BatchParam &) { common::AssertGPUSupport(); auto begin_iter = BatchIterator(ellpack_page_source_); return BatchSet(BatchIterator(begin_iter)); } #endif // !defined(XGBOOST_USE_CUDA) -} // namespace data -} // namespace xgboost +} // namespace xgboost::data diff --git a/src/data/sparse_page_dmatrix.cu b/src/data/sparse_page_dmatrix.cu index b36a0e2a37b9..0a4cde43d1fd 100644 --- a/src/data/sparse_page_dmatrix.cu +++ b/src/data/sparse_page_dmatrix.cu @@ -1,42 +1,40 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023 by XGBoost contributors */ -#include "sparse_page_source.h" #include "../common/hist_util.cuh" +#include "batch_utils.h" // for CheckEmpty, RegenGHist #include "ellpack_page.cuh" #include "sparse_page_dmatrix.h" +#include "sparse_page_source.h" -namespace xgboost { -namespace data { -BatchSet SparsePageDMatrix::GetEllpackBatches(const BatchParam& param) { - CHECK_GE(param.gpu_id, 0); +namespace xgboost::data { +BatchSet SparsePageDMatrix::GetEllpackBatches(Context const* ctx, + const BatchParam& param) { + CHECK(ctx->IsCUDA()); CHECK_GE(param.max_bin, 2); - if (!(batch_param_ != BatchParam{})) { - CHECK(param != BatchParam{}) << "Batch parameter is not initialized."; - } + detail::CheckEmpty(batch_param_, param); auto id = MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_); size_t row_stride = 0; - this->InitializeSparsePage(); - if (!cache_info_.at(id)->written || RegenGHist(batch_param_, param)) { + this->InitializeSparsePage(ctx); + if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) { // reinitialize the cache cache_info_.erase(id); MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_); std::unique_ptr cuts; - cuts.reset(new common::HistogramCuts{ - common::DeviceSketch(param.gpu_id, this, param.max_bin, 0)}); - this->InitializeSparsePage(); // reset after use. + cuts.reset( + new common::HistogramCuts{common::DeviceSketch(ctx->gpu_id, this, param.max_bin, 0)}); + this->InitializeSparsePage(ctx); // reset after use. row_stride = GetRowStride(this); - this->InitializeSparsePage(); // reset after use. + this->InitializeSparsePage(ctx); // reset after use. CHECK_NE(row_stride, 0); batch_param_ = param; auto ft = this->info_.feature_types.ConstDeviceSpan(); ellpack_page_source_.reset(); // release resources. ellpack_page_source_.reset(new EllpackPageSource( - this->missing_, this->ctx_.Threads(), this->Info().num_col_, - this->n_batches_, cache_info_.at(id), param, std::move(cuts), - this->IsDense(), row_stride, ft, sparse_page_source_)); + this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id), + param, std::move(cuts), this->IsDense(), row_stride, ft, sparse_page_source_, ctx->gpu_id)); } else { CHECK(sparse_page_source_); ellpack_page_source_->Reset(); @@ -45,5 +43,4 @@ BatchSet SparsePageDMatrix::GetEllpackBatches(const BatchParam& par auto begin_iter = BatchIterator(ellpack_page_source_); return BatchSet(BatchIterator(begin_iter)); } -} // namespace data -} // namespace xgboost +} // namespace xgboost::data diff --git a/src/data/sparse_page_dmatrix.h b/src/data/sparse_page_dmatrix.h index aa0be69845aa..02aa9a5c0e9e 100644 --- a/src/data/sparse_page_dmatrix.h +++ b/src/data/sparse_page_dmatrix.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2015-2021 by Contributors +/** + * Copyright 2015-2023, XGBoost Contributors * \file sparse_page_dmatrix.h * \brief External-memory version of DMatrix. * \author Tianqi Chen @@ -9,12 +9,13 @@ #include #include + #include +#include #include #include #include #include -#include #include "ellpack_page_source.h" #include "gradient_index_page_source.h" @@ -69,19 +70,18 @@ class SparsePageDMatrix : public DMatrix { XGDMatrixCallbackNext *next_; float missing_; - Context ctx_; + Context fmat_ctx_; std::string cache_prefix_; - uint32_t n_batches_ {0}; + uint32_t n_batches_{0}; // sparse page is the source to other page types, we make a special member function. - void InitializeSparsePage(); + void InitializeSparsePage(Context const *ctx); // Non-virtual version that can be used in constructor - BatchSet GetRowBatchesImpl(); + BatchSet GetRowBatchesImpl(Context const *ctx); public: - explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy, - DataIterResetCallback *reset, - XGDMatrixCallbackNext *next, float missing, - int32_t nthreads, std::string cache_prefix); + explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, + XGDMatrixCallbackNext *next, float missing, int32_t nthreads, + std::string cache_prefix); ~SparsePageDMatrix() override { // Clear out all resources before deleting the cache file. @@ -98,9 +98,9 @@ class SparsePageDMatrix : public DMatrix { } } - MetaInfo& Info() override; - const MetaInfo& Info() const override; - Context const* Ctx() const override { return &ctx_; } + MetaInfo &Info() override; + const MetaInfo &Info() const override; + Context const *Ctx() const override { return &fmat_ctx_; } bool SingleColBlock() const override { return false; } DMatrix *Slice(common::Span) override { @@ -114,11 +114,11 @@ class SparsePageDMatrix : public DMatrix { private: BatchSet GetRowBatches() override; - BatchSet GetColumnBatches() override; - BatchSet GetSortedColumnBatches() override; - BatchSet GetEllpackBatches(const BatchParam& param) override; - BatchSet GetGradientIndex(const BatchParam&) override; - BatchSet GetExtBatches(BatchParam const &) override { + BatchSet GetColumnBatches(Context const *ctx) override; + BatchSet GetSortedColumnBatches(Context const *ctx) override; + BatchSet GetEllpackBatches(Context const *ctx, const BatchParam ¶m) override; + BatchSet GetGradientIndex(Context const *ctx, const BatchParam &) override; + BatchSet GetExtBatches(Context const *, BatchParam const &) override { LOG(FATAL) << "Can not obtain a single CSR page for external memory DMatrix"; return BatchSet(BatchIterator(nullptr)); } @@ -141,9 +141,8 @@ inline std::string MakeId(std::string prefix, SparsePageDMatrix *ptr) { return prefix + "-" + ss.str(); } -inline std::string -MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix, - std::map> *out) { +inline std::string MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix, + std::map> *out) { auto &cache_info = *out; auto name = MakeId(prefix, ptr); auto id = name + format; diff --git a/src/linear/coordinate_common.h b/src/linear/coordinate_common.h index f61c423f0062..f08856bd1bfc 100644 --- a/src/linear/coordinate_common.h +++ b/src/linear/coordinate_common.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2018 by Contributors +/** + * Copyright 2018-2023 by XGBoost Contributors * \author Rory Mitchell */ #pragma once @@ -78,11 +78,12 @@ inline double CoordinateDeltaBias(double sum_grad, double sum_hess) { * * \return The gradient and diagonal Hessian entry for a given feature. */ -inline std::pair GetGradient(int group_idx, int num_group, int fidx, - const std::vector &gpair, +inline std::pair GetGradient(Context const *ctx, int group_idx, int num_group, + bst_feature_t fidx, + std::vector const &gpair, DMatrix *p_fmat) { double sum_grad = 0.0, sum_hess = 0.0; - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx)) { auto page = batch.GetView(); auto col = page[fidx]; const auto ndata = static_cast(col.size()); @@ -115,7 +116,7 @@ inline std::pair GetGradientParallel(Context const *ctx, int gro std::vector sum_grad_tloc(ctx->Threads(), 0.0); std::vector sum_hess_tloc(ctx->Threads(), 0.0); - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx)) { auto page = batch.GetView(); auto col = page[fidx]; const auto ndata = static_cast(col.size()); @@ -177,16 +178,16 @@ inline std::pair GetBiasGradientParallel(int group_idx, int num_ * \param in_gpair The gradient vector to be updated. * \param p_fmat The input feature matrix. */ -inline void UpdateResidualParallel(int fidx, int group_idx, int num_group, - float dw, std::vector *in_gpair, - DMatrix *p_fmat, int32_t n_threads) { +inline void UpdateResidualParallel(Context const *ctx, bst_feature_t fidx, int group_idx, + int num_group, float dw, std::vector *in_gpair, + DMatrix *p_fmat) { if (dw == 0.0f) return; - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx)) { auto page = batch.GetView(); auto col = page[fidx]; // update grad value const auto num_row = static_cast(col.size()); - common::ParallelFor(num_row, n_threads, [&](auto j) { + common::ParallelFor(num_row, ctx->Threads(), [&](auto j) { GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx]; if (p.GetHess() < 0.0f) return; p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0); @@ -203,12 +204,12 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group, * \param in_gpair The gradient vector to be updated. * \param p_fmat The input feature matrix. */ -inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias, - std::vector *in_gpair, DMatrix *p_fmat, - int32_t n_threads) { +inline void UpdateBiasResidualParallel(Context const *ctx, int group_idx, int num_group, + float dbias, std::vector *in_gpair, + DMatrix *p_fmat) { if (dbias == 0.0f) return; const auto ndata = static_cast(p_fmat->Info().num_row_); - common::ParallelFor(ndata, n_threads, [&](auto i) { + common::ParallelFor(ndata, ctx->Threads(), [&](auto i) { GradientPair &g = (*in_gpair)[i * num_group + group_idx]; if (g.GetHess() < 0.0f) return; g += GradientPair(g.GetHess() * dbias, 0); @@ -220,18 +221,16 @@ inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias * in coordinate descent algorithms. */ class FeatureSelector { - protected: - int32_t n_threads_{-1}; - public: - explicit FeatureSelector(int32_t n_threads) : n_threads_{n_threads} {} + FeatureSelector() = default; /*! \brief factory method */ - static FeatureSelector *Create(int choice, int32_t n_threads); + static FeatureSelector *Create(int choice); /*! \brief virtual destructor */ virtual ~FeatureSelector() = default; /** * \brief Setting up the selector state prior to looping through features. * + * \param ctx The booster context. * \param model The model. * \param gpair The gpair. * \param p_fmat The feature matrix. @@ -239,13 +238,12 @@ class FeatureSelector { * \param lambda Regularisation lambda. * \param param A parameter with algorithm-dependent use. */ - virtual void Setup(const gbm::GBLinearModel &, - const std::vector &, - DMatrix *, - float , float , int ) {} + virtual void Setup(Context const *, const gbm::GBLinearModel &, + const std::vector &, DMatrix *, float, float, int) {} /** * \brief Select next coordinate to update. * + * \param ctx Booster context * \param iteration The iteration in a loop through features * \param model The model. * \param group_idx Zero-based index of the group. @@ -256,11 +254,9 @@ class FeatureSelector { * * \return The index of the selected feature. -1 indicates none selected. */ - virtual int NextFeature(int iteration, - const gbm::GBLinearModel &model, - int group_idx, - const std::vector &gpair, - DMatrix *p_fmat, float alpha, float lambda) = 0; + virtual int NextFeature(Context const *ctx, int iteration, const gbm::GBLinearModel &model, + int group_idx, const std::vector &gpair, DMatrix *p_fmat, + float alpha, float lambda) = 0; }; /** @@ -269,9 +265,8 @@ class FeatureSelector { class CyclicFeatureSelector : public FeatureSelector { public: using FeatureSelector::FeatureSelector; - int NextFeature(int iteration, const gbm::GBLinearModel &model, - int , const std::vector &, - DMatrix *, float, float) override { + int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int, + const std::vector &, DMatrix *, float, float) override { return iteration % model.learner_model_param->num_feature; } }; @@ -283,8 +278,7 @@ class CyclicFeatureSelector : public FeatureSelector { class ShuffleFeatureSelector : public FeatureSelector { public: using FeatureSelector::FeatureSelector; - void Setup(const gbm::GBLinearModel &model, - const std::vector&, + void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector &, DMatrix *, float, float, int) override { if (feat_index_.size() == 0) { feat_index_.resize(model.learner_model_param->num_feature); @@ -293,9 +287,8 @@ class ShuffleFeatureSelector : public FeatureSelector { std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom()); } - int NextFeature(int iteration, const gbm::GBLinearModel &model, - int, const std::vector &, - DMatrix *, float, float) override { + int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int, + const std::vector &, DMatrix *, float, float) override { return feat_index_[iteration % model.learner_model_param->num_feature]; } @@ -310,9 +303,8 @@ class ShuffleFeatureSelector : public FeatureSelector { class RandomFeatureSelector : public FeatureSelector { public: using FeatureSelector::FeatureSelector; - int NextFeature(int, const gbm::GBLinearModel &model, - int, const std::vector &, - DMatrix *, float, float) override { + int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int, + const std::vector &, DMatrix *, float, float) override { return common::GlobalRandom()() % model.learner_model_param->num_feature; } }; @@ -329,8 +321,7 @@ class RandomFeatureSelector : public FeatureSelector { class GreedyFeatureSelector : public FeatureSelector { public: using FeatureSelector::FeatureSelector; - void Setup(const gbm::GBLinearModel &model, - const std::vector &, + void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector &, DMatrix *, float, float, int param) override { top_k_ = static_cast(param); const bst_uint ngroup = model.learner_model_param->num_output_group; @@ -344,7 +335,7 @@ class GreedyFeatureSelector : public FeatureSelector { } } - int NextFeature(int, const gbm::GBLinearModel &model, + int NextFeature(Context const* ctx, int, const gbm::GBLinearModel &model, int group_idx, const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda) override { // k-th selected feature for a group @@ -356,9 +347,9 @@ class GreedyFeatureSelector : public FeatureSelector { const bst_omp_uint nfeat = model.learner_model_param->num_feature; // Calculate univariate gradient sums std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.)); - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx)) { auto page = batch.GetView(); - common::ParallelFor(nfeat, this->n_threads_, [&](bst_omp_uint i) { + common::ParallelFor(nfeat, ctx->Threads(), [&](bst_omp_uint i) { const auto col = page[i]; const bst_uint ndata = col.size(); auto &sums = gpair_sums_[group_idx * nfeat + i]; @@ -406,9 +397,10 @@ class GreedyFeatureSelector : public FeatureSelector { class ThriftyFeatureSelector : public FeatureSelector { public: using FeatureSelector::FeatureSelector; - void Setup(const gbm::GBLinearModel &model, - const std::vector &gpair, - DMatrix *p_fmat, float alpha, float lambda, int param) override { + + void Setup(Context const *ctx, const gbm::GBLinearModel &model, + const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda, + int param) override { top_k_ = static_cast(param); if (param <= 0) top_k_ = std::numeric_limits::max(); const bst_uint ngroup = model.learner_model_param->num_output_group; @@ -422,10 +414,10 @@ class ThriftyFeatureSelector : public FeatureSelector { } // Calculate univariate gradient sums std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.)); - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx)) { auto page = batch.GetView(); // column-parallel is usually fastaer than row-parallel - common::ParallelFor(nfeat, this->n_threads_, [&](auto i) { + common::ParallelFor(nfeat, ctx->Threads(), [&](auto i) { const auto col = page[i]; const bst_uint ndata = col.size(); for (bst_uint gid = 0u; gid < ngroup; ++gid) { @@ -462,9 +454,8 @@ class ThriftyFeatureSelector : public FeatureSelector { } } - int NextFeature(int, const gbm::GBLinearModel &model, - int group_idx, const std::vector &, - DMatrix *, float, float) override { + int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int group_idx, + const std::vector &, DMatrix *, float, float) override { // k-th selected feature for a group auto k = counter_[group_idx]++; // stop after either reaching top-N or going through all the features in a group @@ -482,18 +473,18 @@ class ThriftyFeatureSelector : public FeatureSelector { std::vector> gpair_sums_; }; -inline FeatureSelector *FeatureSelector::Create(int choice, int32_t n_threads) { +inline FeatureSelector *FeatureSelector::Create(int choice) { switch (choice) { case kCyclic: - return new CyclicFeatureSelector(n_threads); + return new CyclicFeatureSelector; case kShuffle: - return new ShuffleFeatureSelector(n_threads); + return new ShuffleFeatureSelector; case kThrifty: - return new ThriftyFeatureSelector(n_threads); + return new ThriftyFeatureSelector; case kGreedy: - return new GreedyFeatureSelector(n_threads); + return new GreedyFeatureSelector; case kRandom: - return new RandomFeatureSelector(n_threads); + return new RandomFeatureSelector; default: LOG(FATAL) << "unknown coordinate selector: " << choice; } diff --git a/src/linear/updater_coordinate.cc b/src/linear/updater_coordinate.cc index 29ba5451b94e..84f15d706cf9 100644 --- a/src/linear/updater_coordinate.cc +++ b/src/linear/updater_coordinate.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2018 by Contributors +/** + * Copyright 2018-2023 by XGBoost Contributors * \author Rory Mitchell */ @@ -30,7 +30,7 @@ class CoordinateUpdater : public LinearUpdater { tparam_.UpdateAllowUnknown(args) }; cparam_.UpdateAllowUnknown(rest); - selector_.reset(FeatureSelector::Create(tparam_.feature_selector, ctx_->Threads())); + selector_.reset(FeatureSelector::Create(tparam_.feature_selector)); monitor_.Init("CoordinateUpdater"); } @@ -56,19 +56,17 @@ class CoordinateUpdater : public LinearUpdater { auto dbias = static_cast(tparam_.learning_rate * CoordinateDeltaBias(grad.first, grad.second)); model->Bias()[group_idx] += dbias; - UpdateBiasResidualParallel(group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat, - ctx_->Threads()); + UpdateBiasResidualParallel(ctx_, group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat); } // prepare for updating the weights - selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat, - tparam_.reg_alpha_denorm, - tparam_.reg_lambda_denorm, cparam_.top_k); + selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm, + tparam_.reg_lambda_denorm, cparam_.top_k); // update weights for (int group_idx = 0; group_idx < ngroup; ++group_idx) { for (unsigned i = 0U; i < model->learner_model_param->num_feature; i++) { - int fidx = selector_->NextFeature - (i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat, - tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm); + int fidx = + selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat, + tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm); if (fidx < 0) break; this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model); } @@ -76,8 +74,8 @@ class CoordinateUpdater : public LinearUpdater { monitor_.Stop("UpdateFeature"); } - inline void UpdateFeature(int fidx, int group_idx, std::vector *in_gpair, - DMatrix *p_fmat, gbm::GBLinearModel *model) { + void UpdateFeature(int fidx, int group_idx, std::vector *in_gpair, DMatrix *p_fmat, + gbm::GBLinearModel *model) { const int ngroup = model->learner_model_param->num_output_group; bst_float &w = (*model)[fidx][group_idx]; auto gradient = GetGradientParallel(ctx_, group_idx, ngroup, fidx, @@ -87,8 +85,7 @@ class CoordinateUpdater : public LinearUpdater { CoordinateDelta(gradient.first, gradient.second, w, tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm)); w += dw; - UpdateResidualParallel(fidx, group_idx, ngroup, dw, in_gpair, p_fmat, - ctx_->Threads()); + UpdateResidualParallel(ctx_, fidx, group_idx, ngroup, dw, in_gpair, p_fmat); } private: diff --git a/src/linear/updater_gpu_coordinate.cu b/src/linear/updater_gpu_coordinate.cu index b63c1317ee03..7d658cf7893f 100644 --- a/src/linear/updater_gpu_coordinate.cu +++ b/src/linear/updater_gpu_coordinate.cu @@ -32,7 +32,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT void Configure(Args const& args) override { tparam_.UpdateAllowUnknown(args); coord_param_.UpdateAllowUnknown(args); - selector_.reset(FeatureSelector::Create(tparam_.feature_selector, ctx_->Threads())); + selector_.reset(FeatureSelector::Create(tparam_.feature_selector)); monitor_.Init("GPUCoordinateUpdater"); } @@ -53,7 +53,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT num_row_ = static_cast(p_fmat->Info().num_row_); CHECK(p_fmat->SingleColBlock()); - SparsePage const& batch = *(p_fmat->GetBatches().begin()); + SparsePage const &batch = *(p_fmat->GetBatches(ctx_).begin()); auto page = batch.GetView(); if (IsEmpty()) { @@ -112,16 +112,15 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT this->UpdateBias(model); monitor_.Stop("UpdateBias"); // prepare for updating the weights - selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat, - tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm, - coord_param_.top_k); + selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm, + tparam_.reg_lambda_denorm, coord_param_.top_k); monitor_.Start("UpdateFeature"); for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group; ++group_idx) { for (auto i = 0U; i < model->learner_model_param->num_feature; i++) { - auto fidx = selector_->NextFeature( - i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat, - tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm); + auto fidx = + selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat, + tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm); if (fidx < 0) break; this->UpdateFeature(fidx, group_idx, model); } diff --git a/src/linear/updater_shotgun.cc b/src/linear/updater_shotgun.cc index d8592f1cf994..18b747f64985 100644 --- a/src/linear/updater_shotgun.cc +++ b/src/linear/updater_shotgun.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2018 by Contributors +/** + * Copyright 2018-2023 by XGBoost Contributors * \author Tianqi Chen, Rory Mitchell */ @@ -21,7 +21,7 @@ class ShotgunUpdater : public LinearUpdater { LOG(FATAL) << "Unsupported feature selector for shotgun updater.\n" << "Supported options are: {cyclic, shuffle}"; } - selector_.reset(FeatureSelector::Create(param_.feature_selector, ctx_->Threads())); + selector_.reset(FeatureSelector::Create(param_.feature_selector)); } void LoadConfig(Json const& in) override { auto const& config = get(in); @@ -45,18 +45,17 @@ class ShotgunUpdater : public LinearUpdater { auto dbias = static_cast(param_.learning_rate * CoordinateDeltaBias(grad.first, grad.second)); model->Bias()[gid] += dbias; - UpdateBiasResidualParallel(gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat, - ctx_->Threads()); + UpdateBiasResidualParallel(ctx_, gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat); } // lock-free parallel updates of weights - selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat, - param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0); - for (const auto &batch : p_fmat->GetBatches()) { + selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm, + param_.reg_lambda_denorm, 0); + for (const auto &batch : p_fmat->GetBatches(ctx_)) { auto page = batch.GetView(); const auto nfeat = static_cast(batch.Size()); common::ParallelFor(nfeat, ctx_->Threads(), [&](auto i) { - int ii = selector_->NextFeature(i, *model, 0, in_gpair->ConstHostVector(), p_fmat, + int ii = selector_->NextFeature(ctx_, i, *model, 0, in_gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm, param_.reg_lambda_denorm); if (ii < 0) return; const bst_uint fid = ii; diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index b3b4c5e80a3c..aa89729897fa 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -634,7 +634,7 @@ class CPUPredictor : public Predictor { if (!p_fmat->PageExists()) { std::vector workspace(p_fmat->Info().num_col_ * kUnroll * n_threads); auto ft = p_fmat->Info().feature_types.ConstHostVector(); - for (auto const &batch : p_fmat->GetBatches({})) { + for (auto const &batch : p_fmat->GetBatches(ctx_, {})) { if (blocked) { PredictBatchByBlockOfRowsKernel( GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model, diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index 2439a277f13d..11662f9b8608 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -706,7 +706,7 @@ class GPUPredictor : public xgboost::Predictor { } } else { size_t batch_offset = 0; - for (auto const& page : dmat->GetBatches(BatchParam{})) { + for (auto const& page : dmat->GetBatches(ctx_, BatchParam{})) { dmat->Info().feature_types.SetDevice(ctx_->gpu_id); auto feature_types = dmat->Info().feature_types.ConstDeviceSpan(); this->PredictInternal( @@ -983,7 +983,7 @@ class GPUPredictor : public xgboost::Predictor { batch_offset += batch.Size(); } } else { - for (auto const& batch : p_fmat->GetBatches(BatchParam{})) { + for (auto const& batch : p_fmat->GetBatches(ctx_, BatchParam{})) { bst_row_t batch_offset = 0; EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->gpu_id)}; size_t num_rows = batch.Size(); diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu index 6764973360d0..f22fa172fbfc 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cu +++ b/src/tree/gpu_hist/gradient_based_sampler.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2019-2021 by XGBoost Contributors +/** + * Copyright 2019-2023 by XGBoost Contributors */ #include #include @@ -12,6 +12,7 @@ #include #include "../../common/compressed_iterator.h" +#include "../../common/cuda_context.cuh" // for CUDAContext #include "../../common/random.h" #include "../param.h" #include "gradient_based_sampler.cuh" @@ -147,25 +148,26 @@ class PoissonSampling : public thrust::binary_function gpair, DMatrix* dmat) { +GradientBasedSample NoSampling::Sample(Context const*, common::Span gpair, + DMatrix* dmat) { return {dmat->Info().num_row_, page_, gpair}; } -ExternalMemoryNoSampling::ExternalMemoryNoSampling(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam& batch_param) - : batch_param_(batch_param), - page_(new EllpackPageImpl(batch_param.gpu_id, page->Cuts(), page->is_dense, - page->row_stride, n_rows)) {} +ExternalMemoryNoSampling::ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page, + size_t n_rows, BatchParam batch_param) + : batch_param_{std::move(batch_param)}, + page_(new EllpackPageImpl(ctx->gpu_id, page->Cuts(), page->is_dense, page->row_stride, + n_rows)) {} -GradientBasedSample ExternalMemoryNoSampling::Sample(common::Span gpair, +GradientBasedSample ExternalMemoryNoSampling::Sample(Context const* ctx, + common::Span gpair, DMatrix* dmat) { if (!page_concatenated_) { // Concatenate all the external memory ELLPACK pages into a single in-memory page. size_t offset = 0; - for (auto& batch : dmat->GetBatches(batch_param_)) { + for (auto& batch : dmat->GetBatches(ctx, batch_param_)) { auto page = batch.Impl(); - size_t num_elements = page_->Copy(batch_param_.gpu_id, page, offset); + size_t num_elements = page_->Copy(ctx->gpu_id, page, offset); offset += num_elements; } page_concatenated_ = true; @@ -176,12 +178,13 @@ GradientBasedSample ExternalMemoryNoSampling::Sample(common::Span UniformSampling::UniformSampling(EllpackPageImpl const* page, float subsample) : page_(page), subsample_(subsample) {} -GradientBasedSample UniformSampling::Sample(common::Span gpair, DMatrix* dmat) { +GradientBasedSample UniformSampling::Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) { // Set gradient pair to 0 with p = 1 - subsample - thrust::replace_if(dh::tbegin(gpair), dh::tend(gpair), - thrust::counting_iterator(0), - BernoulliTrial(common::GlobalRandom()(), subsample_), - GradientPair()); + auto cuctx = ctx->CUDACtx(); + thrust::replace_if(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), + thrust::counting_iterator(0), + BernoulliTrial(common::GlobalRandom()(), subsample_), GradientPair()); return {dmat->Info().num_row_, page_, gpair}; } @@ -192,7 +195,8 @@ ExternalMemoryUniformSampling::ExternalMemoryUniformSampling(size_t n_rows, subsample_(subsample), sample_row_index_(n_rows) {} -GradientBasedSample ExternalMemoryUniformSampling::Sample(common::Span gpair, +GradientBasedSample ExternalMemoryUniformSampling::Sample(Context const* ctx, + common::Span gpair, DMatrix* dmat) { // Set gradient pair to 0 with p = 1 - subsample thrust::replace_if(dh::tbegin(gpair), dh::tend(gpair), @@ -216,18 +220,17 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(common::SpanGetBatches(batch_param_); + auto batch_iterator = dmat->GetBatches(ctx, batch_param_); auto first_page = (*batch_iterator.begin()).Impl(); // Create a new ELLPACK page with empty rows. page_.reset(); // Release the device memory first before reallocating - page_.reset(new EllpackPageImpl( - batch_param_.gpu_id, first_page->Cuts(), first_page->is_dense, - first_page->row_stride, sample_rows)); + page_.reset(new EllpackPageImpl(ctx->gpu_id, first_page->Cuts(), first_page->is_dense, + first_page->row_stride, sample_rows)); // Compact the ELLPACK pages into the single sample page. thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0); for (auto& batch : batch_iterator) { - page_->Compact(batch_param_.gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_)); + page_->Compact(ctx->gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_)); } return {sample_rows, page_.get(), dh::ToSpan(gpair_)}; @@ -242,18 +245,17 @@ GradientBasedSampling::GradientBasedSampling(EllpackPageImpl const* page, threshold_(n_rows + 1, 0.0f), grad_sum_(n_rows, 0.0f) {} -GradientBasedSample GradientBasedSampling::Sample(common::Span gpair, - DMatrix* dmat) { +GradientBasedSample GradientBasedSampling::Sample(Context const* ctx, + common::Span gpair, DMatrix* dmat) { + auto cuctx = ctx->CUDACtx(); size_t n_rows = dmat->Info().num_row_; size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex( gpair, dh::ToSpan(threshold_), dh::ToSpan(grad_sum_), n_rows * subsample_); // Perform Poisson sampling in place. - thrust::transform(dh::tbegin(gpair), dh::tend(gpair), - thrust::counting_iterator(0), - dh::tbegin(gpair), - PoissonSampling(dh::ToSpan(threshold_), - threshold_index, + thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), + thrust::counting_iterator(0), dh::tbegin(gpair), + PoissonSampling(dh::ToSpan(threshold_), threshold_index, RandomWeight(common::GlobalRandom()()))); return {n_rows, page_, gpair}; } @@ -268,7 +270,8 @@ ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling( grad_sum_(n_rows, 0.0f), sample_row_index_(n_rows) {} -GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(common::Span gpair, +GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* ctx, + common::Span gpair, DMatrix* dmat) { size_t n_rows = dmat->Info().num_row_; size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex( @@ -298,28 +301,25 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(common::SpanGetBatches(batch_param_); + auto batch_iterator = dmat->GetBatches(ctx, batch_param_); auto first_page = (*batch_iterator.begin()).Impl(); // Create a new ELLPACK page with empty rows. page_.reset(); // Release the device memory first before reallocating - page_.reset(new EllpackPageImpl(batch_param_.gpu_id, first_page->Cuts(), - first_page->is_dense, + page_.reset(new EllpackPageImpl(ctx->gpu_id, first_page->Cuts(), first_page->is_dense, first_page->row_stride, sample_rows)); // Compact the ELLPACK pages into the single sample page. thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0); for (auto& batch : batch_iterator) { - page_->Compact(batch_param_.gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_)); + page_->Compact(ctx->gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_)); } return {sample_rows, page_.get(), dh::ToSpan(gpair_)}; } -GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam& batch_param, - float subsample, - int sampling_method) { +GradientBasedSampler::GradientBasedSampler(Context const* ctx, EllpackPageImpl const* page, + size_t n_rows, const BatchParam& batch_param, + float subsample, int sampling_method) { monitor_.Init("gradient_based_sampler"); bool is_sampling = subsample < 1.0; @@ -346,7 +346,7 @@ GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page, } } else { if (is_external_memory) { - strategy_.reset(new ExternalMemoryNoSampling(page, n_rows, batch_param)); + strategy_.reset(new ExternalMemoryNoSampling(ctx, page, n_rows, batch_param)); } else { strategy_.reset(new NoSampling(page)); } @@ -354,10 +354,10 @@ GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page, } // Sample a DMatrix based on the given gradient pairs. -GradientBasedSample GradientBasedSampler::Sample(common::Span gpair, - DMatrix* dmat) { +GradientBasedSample GradientBasedSampler::Sample(Context const* ctx, + common::Span gpair, DMatrix* dmat) { monitor_.Start("Sample"); - GradientBasedSample sample = strategy_->Sample(gpair, dmat); + GradientBasedSample sample = strategy_->Sample(ctx, gpair, dmat); monitor_.Stop("Sample"); return sample; } diff --git a/src/tree/gpu_hist/gradient_based_sampler.cuh b/src/tree/gpu_hist/gradient_based_sampler.cuh index 5be6c71dedaa..dafb98cfd8b3 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cuh +++ b/src/tree/gpu_hist/gradient_based_sampler.cuh @@ -24,7 +24,8 @@ struct GradientBasedSample { class SamplingStrategy { public: /*! \brief Sample from a DMatrix based on the given gradient pairs. */ - virtual GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) = 0; + virtual GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) = 0; virtual ~SamplingStrategy() = default; }; @@ -32,7 +33,8 @@ class SamplingStrategy { class NoSampling : public SamplingStrategy { public: explicit NoSampling(EllpackPageImpl const* page); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: EllpackPageImpl const* page_; @@ -41,10 +43,10 @@ class NoSampling : public SamplingStrategy { /*! \brief No sampling in external memory mode. */ class ExternalMemoryNoSampling : public SamplingStrategy { public: - ExternalMemoryNoSampling(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam& batch_param); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page, size_t n_rows, + BatchParam batch_param); + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: BatchParam batch_param_; @@ -56,7 +58,8 @@ class ExternalMemoryNoSampling : public SamplingStrategy { class UniformSampling : public SamplingStrategy { public: UniformSampling(EllpackPageImpl const* page, float subsample); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: EllpackPageImpl const* page_; @@ -66,10 +69,9 @@ class UniformSampling : public SamplingStrategy { /*! \brief No sampling in external memory mode. */ class ExternalMemoryUniformSampling : public SamplingStrategy { public: - ExternalMemoryUniformSampling(size_t n_rows, - BatchParam batch_param, - float subsample); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + ExternalMemoryUniformSampling(size_t n_rows, BatchParam batch_param, float subsample); + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: BatchParam batch_param_; @@ -82,11 +84,10 @@ class ExternalMemoryUniformSampling : public SamplingStrategy { /*! \brief Gradient-based sampling in in-memory mode.. */ class GradientBasedSampling : public SamplingStrategy { public: - GradientBasedSampling(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam& batch_param, + GradientBasedSampling(EllpackPageImpl const* page, size_t n_rows, const BatchParam& batch_param, float subsample); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: EllpackPageImpl const* page_; @@ -98,10 +99,9 @@ class GradientBasedSampling : public SamplingStrategy { /*! \brief Gradient-based sampling in external memory mode.. */ class ExternalMemoryGradientBasedSampling : public SamplingStrategy { public: - ExternalMemoryGradientBasedSampling(size_t n_rows, - BatchParam batch_param, - float subsample); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + ExternalMemoryGradientBasedSampling(size_t n_rows, BatchParam batch_param, float subsample); + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: BatchParam batch_param_; @@ -124,14 +124,11 @@ class ExternalMemoryGradientBasedSampling : public SamplingStrategy { */ class GradientBasedSampler { public: - GradientBasedSampler(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam& batch_param, - float subsample, - int sampling_method); + GradientBasedSampler(Context const* ctx, EllpackPageImpl const* page, size_t n_rows, + const BatchParam& batch_param, float subsample, int sampling_method); /*! \brief Sample from a DMatrix based on the given gradient pairs. */ - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat); + GradientBasedSample Sample(Context const* ctx, common::Span gpair, DMatrix* dmat); /*! \brief Calculate the threshold used to normalize sampling probabilities. */ static size_t CalculateThresholdIndex(common::Span gpair, diff --git a/src/tree/updater_approx.cc b/src/tree/updater_approx.cc index 148614a7ee38..f637427ad201 100644 --- a/src/tree/updater_approx.cc +++ b/src/tree/updater_approx.cc @@ -66,7 +66,7 @@ class GloablApproxBuilder { partitioner_.clear(); // Generating the GHistIndexMatrix is quite slow, is there a way to speed it up? for (auto const &page : - p_fmat->GetBatches(BatchSpec(*param_, hess, *task_))) { + p_fmat->GetBatches(ctx_, BatchSpec(*param_, hess, *task_))) { if (n_total_bins == 0) { n_total_bins = page.cut.TotalBins(); feature_values_ = page.cut; @@ -97,7 +97,7 @@ class GloablApproxBuilder { std::vector nodes{best}; size_t i = 0; auto space = ConstructHistSpace(partitioner_, nodes); - for (auto const &page : p_fmat->GetBatches(BatchSpec(*param_, hess))) { + for (auto const &page : p_fmat->GetBatches(ctx_, BatchSpec(*param_, hess))) { histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes, {}, gpair); i++; @@ -148,7 +148,7 @@ class GloablApproxBuilder { size_t i = 0; auto space = ConstructHistSpace(partitioner_, nodes_to_build); - for (auto const &page : p_fmat->GetBatches(BatchSpec(*param_, hess))) { + for (auto const &page : p_fmat->GetBatches(ctx_, BatchSpec(*param_, hess))) { histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes_to_build, nodes_to_sub, gpair); i++; @@ -214,7 +214,8 @@ class GloablApproxBuilder { monitor_->Start("UpdatePosition"); size_t page_id = 0; - for (auto const &page : p_fmat->GetBatches(BatchSpec(*param_, hess))) { + for (auto const &page : + p_fmat->GetBatches(ctx_, BatchSpec(*param_, hess))) { partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree); page_id++; } diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index 02edfa74a4ef..bda9b4dfa3bc 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -76,7 +76,7 @@ class ColMaker: public TreeUpdater { // Finds densities if we don't already have them if (column_densities_.empty()) { std::vector column_size(dmat->Info().num_col_); - for (const auto &batch : dmat->GetBatches()) { + for (const auto &batch : dmat->GetBatches(ctx_)) { auto page = batch.GetView(); for (auto i = 0u; i < batch.Size(); i++) { column_size[i] += page[i].size(); @@ -467,7 +467,7 @@ class ColMaker: public TreeUpdater { auto evaluator = tree_evaluator_.GetEvaluator(); auto feat_set = column_sampler_.GetFeatureSet(depth); - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx_)) { this->UpdateSolution(batch, feat_set->HostVector(), gpair, p_fmat); } // after this each thread's stemp will get the best candidates, aggregate results @@ -546,7 +546,7 @@ class ColMaker: public TreeUpdater { } std::sort(fsplits.begin(), fsplits.end()); fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx_)) { auto page = batch.GetView(); for (auto fid : fsplits) { auto col = page[fid]; diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 2d1b7a24df7c..c14e19db1a45 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -218,7 +218,7 @@ struct GPUHistMakerDevice { column_sampler(column_sampler_seed), interaction_constraints(param, n_features), batch_param(std::move(_batch_param)) { - sampler.reset(new GradientBasedSampler(page, _n_rows, batch_param, param.subsample, + sampler.reset(new GradientBasedSampler(ctx, page, _n_rows, batch_param, param.subsample, param.sampling_method)); if (!param.monotone_constraints.empty()) { // Copy assigning an empty vector causes an exception in MSVC debug builds @@ -258,7 +258,7 @@ struct GPUHistMakerDevice { dh::safe_cuda(cudaMemcpyAsync( d_gpair.data().get(), dh_gpair->ConstDevicePointer(), dh_gpair->Size() * sizeof(GradientPair), cudaMemcpyDeviceToDevice)); - auto sample = sampler->Sample(dh::ToSpan(d_gpair), dmat); + auto sample = sampler->Sample(ctx_, dh::ToSpan(d_gpair), dmat); page = sample.page; gpair = sample.gpair; @@ -808,11 +808,8 @@ class GPUHistMaker : public TreeUpdater { uint32_t column_sampling_seed = common::GlobalRandom()(); collective::Broadcast(&column_sampling_seed, sizeof(column_sampling_seed), 0); - BatchParam batch_param{ - ctx_->gpu_id, - param->max_bin, - }; - auto page = (*dmat->GetBatches(batch_param).begin()).Impl(); + auto batch_param = BatchParam{param->max_bin, TrainParam::DftSparseThreshold()}; + auto page = (*dmat->GetBatches(ctx_, batch_param).begin()).Impl(); dh::safe_cuda(cudaSetDevice(ctx_->gpu_id)); info_->feature_types.SetDevice(ctx_->gpu_id); maker.reset(new GPUHistMakerDevice( diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 749e799a8aea..f0dd3dd12dee 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -134,7 +134,7 @@ class MultiTargetHistBuilder { std::vector const &applied) { monitor_->Start(__func__); std::size_t page_id{0}; - for (auto const &page : p_fmat->GetBatches(HistBatch(this->param_))) { + for (auto const &page : p_fmat->GetBatches(ctx_, HistBatch(this->param_))) { this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree); page_id++; } @@ -152,7 +152,7 @@ class MultiTargetHistBuilder { std::size_t page_id = 0; bst_bin_t n_total_bins = 0; partitioner_.clear(); - for (auto const &page : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &page : p_fmat->GetBatches(ctx_, HistBatch(param_))) { if (n_total_bins == 0) { n_total_bins = page.cut.TotalBins(); } else { @@ -206,7 +206,7 @@ class MultiTargetHistBuilder { std::vector nodes{best}; std::size_t i = 0; auto space = ConstructHistSpace(partitioner_, nodes); - for (auto const &page : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &page : p_fmat->GetBatches(ctx_, HistBatch(param_))) { for (bst_target_t t{0}; t < n_targets; ++t) { auto t_gpair = gpair.Slice(linalg::All(), t); histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), @@ -225,7 +225,7 @@ class MultiTargetHistBuilder { for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) { hists.push_back(&histogram_builder_[t].Histogram()); } - for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gmat : p_fmat->GetBatches(ctx_, HistBatch(param_))) { evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, &nodes); break; } @@ -263,7 +263,7 @@ class MultiTargetHistBuilder { std::size_t i = 0; auto space = ConstructHistSpace(partitioner_, nodes_to_build); - for (auto const &page : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &page : p_fmat->GetBatches(ctx_, HistBatch(param_))) { for (std::size_t t = 0; t < p_tree->NumTargets(); ++t) { auto t_gpair = gpair.Slice(linalg::All(), t); // Make sure the gradient matrix is f-order. @@ -283,7 +283,7 @@ class MultiTargetHistBuilder { for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) { hists.push_back(&histogram_builder_[t].Histogram()); } - for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gmat : p_fmat->GetBatches(ctx_, HistBatch(param_))) { evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, best_splits); break; } @@ -383,7 +383,7 @@ class HistBuilder { std::size_t page_id{0}; bst_bin_t n_total_bins{0}; partitioner_.clear(); - for (auto const &page : fmat->GetBatches(HistBatch(param_))) { + for (auto const &page : fmat->GetBatches(ctx_, HistBatch(param_))) { if (n_total_bins == 0) { n_total_bins = page.cut.TotalBins(); } else { @@ -406,7 +406,7 @@ class HistBuilder { monitor_->Start(__func__); auto const &histograms = histogram_builder_->Histogram(); auto ft = p_fmat->Info().feature_types.ConstHostSpan(); - for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gmat : p_fmat->GetBatches(ctx_, HistBatch(param_))) { evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, best_splits); break; } @@ -423,7 +423,7 @@ class HistBuilder { std::size_t page_id = 0; auto space = ConstructHistSpace(partitioner_, {node}); - for (auto const &gidx : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gidx : p_fmat->GetBatches(ctx_, HistBatch(param_))) { std::vector nodes_to_build{node}; std::vector nodes_to_sub; this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree, @@ -439,7 +439,7 @@ class HistBuilder { * Specialized code for dense data: For dense data (with no missing value), the sum * of gradient histogram is equal to snode[nid] */ - auto const &gmat = *(p_fmat->GetBatches(HistBatch(param_)).begin()); + auto const &gmat = *(p_fmat->GetBatches(ctx_, HistBatch(param_)).begin()); std::vector const &row_ptr = gmat.cut.Ptrs(); CHECK_GE(row_ptr.size(), 2); std::uint32_t const ibegin = row_ptr[0]; @@ -467,7 +467,7 @@ class HistBuilder { std::vector entries{node}; monitor_->Start("EvaluateSplits"); auto ft = p_fmat->Info().feature_types.ConstHostSpan(); - for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gmat : p_fmat->GetBatches(ctx_, HistBatch(param_))) { evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree, &entries); break; @@ -503,7 +503,7 @@ class HistBuilder { std::size_t page_id{0}; auto space = ConstructHistSpace(partitioner_, nodes_to_build); - for (auto const &gidx : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gidx : p_fmat->GetBatches(ctx_, HistBatch(param_))) { histogram_builder_->BuildHist(page_id, space, gidx, p_tree, partitioner_.at(page_id).Partitions(), nodes_to_build, nodes_to_sub, gpair.Values()); @@ -515,7 +515,7 @@ class HistBuilder { std::vector const &applied) { monitor_->Start(__func__); std::size_t page_id{0}; - for (auto const &page : p_fmat->GetBatches(HistBatch(this->param_))) { + for (auto const &page : p_fmat->GetBatches(ctx_, HistBatch(param_))) { this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree); page_id++; } diff --git a/tests/cpp/common/test_column_matrix.cc b/tests/cpp/common/test_column_matrix.cc index de7b9a2581b2..b49350702855 100644 --- a/tests/cpp/common/test_column_matrix.cc +++ b/tests/cpp/common/test_column_matrix.cc @@ -14,11 +14,12 @@ TEST(DenseColumn, Test) { int32_t max_num_bins[] = {static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 2}; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); BinTypeSize last{kUint8BinsTypeSize}; for (int32_t max_num_bin : max_num_bins) { auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatrix(); auto sparse_thresh = 0.2; - GHistIndexMatrix gmat{dmat.get(), max_num_bin, sparse_thresh, false, AllThreadsForTest()}; + GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, sparse_thresh, false}; ColumnMatrix column_matrix; for (auto const& page : dmat->GetBatches()) { column_matrix.InitFromSparse(page, gmat, sparse_thresh, AllThreadsForTest()); @@ -62,9 +63,10 @@ TEST(SparseColumn, Test) { int32_t max_num_bins[] = {static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 2}; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (int32_t max_num_bin : max_num_bins) { auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatrix(); - GHistIndexMatrix gmat{dmat.get(), max_num_bin, 0.5f, false, AllThreadsForTest()}; + GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, 0.5f, false}; ColumnMatrix column_matrix; for (auto const& page : dmat->GetBatches()) { column_matrix.InitFromSparse(page, gmat, 1.0, AllThreadsForTest()); @@ -90,9 +92,10 @@ TEST(DenseColumnWithMissing, Test) { int32_t max_num_bins[] = {static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 2}; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (int32_t max_num_bin : max_num_bins) { auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatrix(); - GHistIndexMatrix gmat(dmat.get(), max_num_bin, 0.2, false, AllThreadsForTest()); + GHistIndexMatrix gmat(&ctx, dmat.get(), max_num_bin, 0.2, false); ColumnMatrix column_matrix; for (auto const& page : dmat->GetBatches()) { column_matrix.InitFromSparse(page, gmat, 0.2, AllThreadsForTest()); diff --git a/tests/cpp/common/test_hist_util.cc b/tests/cpp/common/test_hist_util.cc index 41c728f353f8..69ec2cc82ff1 100644 --- a/tests/cpp/common/test_hist_util.cc +++ b/tests/cpp/common/test_hist_util.cc @@ -156,6 +156,7 @@ TEST(CutsBuilder, SearchGroupInd) { } TEST(HistUtil, DenseCutsCategorical) { + auto ctx = CreateEmptyGenericParam(Context::kCpuId); int categorical_sizes[] = {2, 6, 8, 12}; int num_bins = 256; int sizes[] = {25, 100, 1000}; @@ -165,7 +166,7 @@ TEST(HistUtil, DenseCutsCategorical) { std::vector x_sorted(x); std::sort(x_sorted.begin(), x_sorted.end()); auto dmat = GetDMatrixFromData(x, n, 1); - HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest()); + HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins); auto cuts_from_sketch = cuts.Values(); EXPECT_LT(cuts.MinValues()[0], x_sorted.front()); EXPECT_GT(cuts_from_sketch.front(), x_sorted.front()); @@ -176,6 +177,7 @@ TEST(HistUtil, DenseCutsCategorical) { } TEST(HistUtil, DenseCutsAccuracyTest) { + auto ctx = CreateEmptyGenericParam(Context::kCpuId); int bin_sizes[] = {2, 16, 256, 512}; int sizes[] = {100}; int num_columns = 5; @@ -183,7 +185,7 @@ TEST(HistUtil, DenseCutsAccuracyTest) { auto x = GenerateRandom(num_rows, num_columns); auto dmat = GetDMatrixFromData(x, num_rows, num_columns); for (auto num_bins : bin_sizes) { - HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest()); + HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins); ValidateCuts(cuts, dmat.get(), num_bins); } } @@ -193,6 +195,7 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) { int bin_sizes[] = {2, 16, 256, 512}; int sizes[] = {100, 1000, 1500}; int num_columns = 5; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (auto num_rows : sizes) { auto x = GenerateRandom(num_rows, num_columns); auto dmat = GetDMatrixFromData(x, num_rows, num_columns); @@ -200,11 +203,11 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) { dmat->Info().weights_.HostVector() = w; for (auto num_bins : bin_sizes) { { - HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), true); + HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, true); ValidateCuts(cuts, dmat.get(), num_bins); } { - HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), false); + HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, false); ValidateCuts(cuts, dmat.get(), num_bins); } } @@ -215,6 +218,7 @@ void TestQuantileWithHessian(bool use_sorted) { int bin_sizes[] = {2, 16, 256, 512}; int sizes[] = {1000, 1500}; int num_columns = 5; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (auto num_rows : sizes) { auto x = GenerateRandom(num_rows, num_columns); auto dmat = GetDMatrixFromData(x, num_rows, num_columns); @@ -225,15 +229,13 @@ void TestQuantileWithHessian(bool use_sorted) { dmat->Info().weights_.HostVector() = w; for (auto num_bins : bin_sizes) { - HistogramCuts cuts_hess = - SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted, hessian); + HistogramCuts cuts_hess = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted, hessian); for (size_t i = 0; i < w.size(); ++i) { dmat->Info().weights_.HostVector()[i] = w[i] * hessian[i]; } ValidateCuts(cuts_hess, dmat.get(), num_bins); - HistogramCuts cuts_wh = - SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted); + HistogramCuts cuts_wh = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted); ValidateCuts(cuts_wh, dmat.get(), num_bins); ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size()); @@ -255,12 +257,13 @@ TEST(HistUtil, DenseCutsExternalMemory) { int bin_sizes[] = {2, 16, 256, 512}; int sizes[] = {100, 1000, 1500}; int num_columns = 5; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (auto num_rows : sizes) { auto x = GenerateRandom(num_rows, num_columns); dmlc::TemporaryDirectory tmpdir; auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, tmpdir); for (auto num_bins : bin_sizes) { - HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest()); + HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins); ValidateCuts(cuts, dmat.get(), num_bins); } } @@ -275,12 +278,12 @@ TEST(HistUtil, IndexBinBound) { kUint32BinsTypeSize}; size_t constexpr kRows = 100; size_t constexpr kCols = 10; - + auto ctx = CreateEmptyGenericParam(Context::kCpuId); size_t bin_id = 0; for (auto max_bin : bin_sizes) { auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); - GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest()); + GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false); EXPECT_EQ(hmat.index.Size(), kRows*kCols); EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.GetBinTypeSize()); } @@ -300,10 +303,11 @@ TEST(HistUtil, IndexBinData) { static_cast(std::numeric_limits::max()) + 2 }; size_t constexpr kRows = 100; size_t constexpr kCols = 10; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (auto max_bin : kBinSizes) { auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); - GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest()); + GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false); uint32_t const* offsets = hmat.index.Offset(); EXPECT_EQ(hmat.index.Size(), kRows*kCols); switch (max_bin) { @@ -327,10 +331,10 @@ void TestSketchFromWeights(bool with_group) { size_t constexpr kRows = 300, kCols = 20, kBins = 256; size_t constexpr kGroups = 10; auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix(); - common::HistogramCuts cuts = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest()); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins); MetaInfo info; - Context ctx; auto& h_weights = info.weights_.HostVector(); if (with_group) { h_weights.resize(kGroups); @@ -363,7 +367,7 @@ void TestSketchFromWeights(bool with_group) { if (with_group) { m->Info().weights_ = decltype(m->Info().weights_)(); // remove weight - HistogramCuts non_weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest()); + HistogramCuts non_weighted = SketchOnDMatrix(&ctx, m.get(), kBins); for (size_t i = 0; i < cuts.Values().size(); ++i) { EXPECT_EQ(cuts.Values()[i], non_weighted.Values()[i]); } @@ -382,7 +386,7 @@ void TestSketchFromWeights(bool with_group) { for (size_t i = 0; i < h_weights.size(); ++i) { h_weights[i] = static_cast(i + 1) / static_cast(kGroups); } - HistogramCuts weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest()); + HistogramCuts weighted = SketchOnDMatrix(&ctx, m.get(), kBins); ValidateCuts(weighted, m.get(), kBins); } } @@ -393,11 +397,12 @@ TEST(HistUtil, SketchFromWeights) { } TEST(HistUtil, SketchCategoricalFeatures) { - TestCategoricalSketch(1000, 256, 32, false, [](DMatrix* p_fmat, int32_t num_bins) { - return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest()); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + TestCategoricalSketch(1000, 256, 32, false, [&ctx](DMatrix* p_fmat, int32_t num_bins) { + return SketchOnDMatrix(&ctx, p_fmat, num_bins); }); - TestCategoricalSketch(1000, 256, 32, true, [](DMatrix* p_fmat, int32_t num_bins) { - return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest()); + TestCategoricalSketch(1000, 256, 32, true, [&ctx](DMatrix* p_fmat, int32_t num_bins) { + return SketchOnDMatrix(&ctx, p_fmat, num_bins); }); } } // namespace common diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu index 45948b711d06..e907a9f72ee6 100644 --- a/tests/cpp/common/test_hist_util.cu +++ b/tests/cpp/common/test_hist_util.cu @@ -25,9 +25,9 @@ namespace xgboost { namespace common { template -HistogramCuts GetHostCuts(AdapterT *adapter, int num_bins, float missing) { +HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, float missing) { data::SimpleDMatrix dmat(adapter, missing, 1); - HistogramCuts cuts = SketchOnDMatrix(&dmat, num_bins, AllThreadsForTest()); + HistogramCuts cuts = SketchOnDMatrix(ctx, &dmat, num_bins); return cuts; } @@ -39,7 +39,9 @@ TEST(HistUtil, DeviceSketch) { auto dmat = GetDMatrixFromData(x, num_rows, num_columns); auto device_cuts = DeviceSketch(0, dmat.get(), num_bins); - HistogramCuts host_cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest()); + + Context ctx; + HistogramCuts host_cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins); EXPECT_EQ(device_cuts.Values(), host_cuts.Values()); EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs()); @@ -308,7 +310,8 @@ TEST(HistUtil, AdapterDeviceSketch) { data::CupyAdapter adapter(str); auto device_cuts = MakeUnweightedCutsForTest(adapter, num_bins, missing); - auto host_cuts = GetHostCuts(&adapter, num_bins, missing); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + auto host_cuts = GetHostCuts(&ctx, &adapter, num_bins, missing); EXPECT_EQ(device_cuts.Values(), host_cuts.Values()); EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs()); diff --git a/tests/cpp/common/test_quantile.cc b/tests/cpp/common/test_quantile.cc index 4771cc9bfc93..a65969a6c1f8 100644 --- a/tests/cpp/common/test_quantile.cc +++ b/tests/cpp/common/test_quantile.cc @@ -16,7 +16,8 @@ TEST(Quantile, LoadBalance) { size_t constexpr kRows = 1000, kCols = 100; auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(); std::vector cols_ptr; - for (auto const& page : m->GetBatches()) { + Context ctx; + for (auto const& page : m->GetBatches(&ctx)) { data::SparsePageAdapterBatch adapter{page.GetView()}; cols_ptr = LoadBalance(adapter, page.data.Size(), kCols, 13, [](auto) { return true; }); } @@ -43,6 +44,7 @@ void PushPage(HostSketchContainer* container, SparsePage const& page, MetaInfo c template void DoTestDistributedQuantile(size_t rows, size_t cols) { + Context ctx; auto const world = collective::GetWorldSize(); std::vector infos(2); auto& h_weights = infos.front().weights_.HostVector(); @@ -51,7 +53,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) { SimpleRealUniformDistribution dist(3, 1000); std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); }); std::vector column_size(cols, rows); - size_t n_bins = 64; + bst_bin_t n_bins = 64; // Generate cuts for distributed environment. auto sparsity = 0.5f; @@ -72,15 +74,15 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) { std::vector hessian(rows, 1.0); auto hess = Span{hessian}; - ContainerType sketch_distributed(n_bins, m->Info().feature_types.ConstHostSpan(), - column_size, false, AllThreadsForTest()); + ContainerType sketch_distributed( + &ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false); if (use_column) { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_distributed, page, m->Info(), hess); } } else { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_distributed, page, m->Info(), hess); } } @@ -93,8 +95,8 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) { CHECK_EQ(collective::GetWorldSize(), 1); std::for_each(column_size.begin(), column_size.end(), [=](auto& size) { size *= world; }); m->Info().num_row_ = world * rows; - ContainerType sketch_on_single_node(n_bins, m->Info().feature_types.ConstHostSpan(), - column_size, false, AllThreadsForTest()); + ContainerType sketch_on_single_node( + &ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false); m->Info().num_row_ = rows; for (auto rank = 0; rank < world; ++rank) { @@ -106,7 +108,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) { .Upper(1.0f) .GenerateDMatrix(); if (use_column) { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_on_single_node, page, m->Info(), hess); } } else { @@ -172,6 +174,7 @@ TEST(Quantile, SortedDistributed) { namespace { template void DoTestColSplitQuantile(size_t rows, size_t cols) { + Context ctx; auto const world = collective::GetWorldSize(); auto const rank = collective::GetRank(); @@ -204,17 +207,17 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) { // Generate cuts for distributed environment. HistogramCuts distributed_cuts; { - ContainerType sketch_distributed(n_bins, m->Info().feature_types.ConstHostSpan(), - column_size, false, AllThreadsForTest()); + ContainerType sketch_distributed( + &ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false); std::vector hessian(rows, 1.0); auto hess = Span{hessian}; if (use_column) { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_distributed, page, m->Info(), hess); } } else { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_distributed, page, m->Info(), hess); } } @@ -227,17 +230,17 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) { CHECK_EQ(collective::GetWorldSize(), 1); HistogramCuts single_node_cuts; { - ContainerType sketch_on_single_node(n_bins, m->Info().feature_types.ConstHostSpan(), - column_size, false, AllThreadsForTest()); + ContainerType sketch_on_single_node( + &ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false); std::vector hessian(rows, 1.0); auto hess = Span{hessian}; if (use_column) { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_on_single_node, page, m->Info(), hess); } } else { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_on_single_node, page, m->Info(), hess); } } @@ -299,8 +302,10 @@ namespace { void TestSameOnAllWorkers() { auto const world = collective::GetWorldSize(); constexpr size_t kRows = 1000, kCols = 100; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + RunWithSeedsAndBins( - kRows, [=](int32_t seed, size_t n_bins, MetaInfo const&) { + kRows, [=, &ctx](int32_t seed, size_t n_bins, MetaInfo const&) { auto rank = collective::GetRank(); HostDeviceVector storage; std::vector ft(kCols); @@ -314,7 +319,7 @@ void TestSameOnAllWorkers() { .MaxCategory(17) .Seed(rank + seed) .GenerateDMatrix(); - auto cuts = SketchOnDMatrix(m.get(), n_bins, AllThreadsForTest()); + auto cuts = SketchOnDMatrix(&ctx, m.get(), n_bins); std::vector cut_values(cuts.Values().size() * world, 0); std::vector< typename std::remove_reference_t::value_type> diff --git a/tests/cpp/data/test_ellpack_page.cu b/tests/cpp/data/test_ellpack_page.cu index dccf85092d7f..d56f1c7b5d2a 100644 --- a/tests/cpp/data/test_ellpack_page.cu +++ b/tests/cpp/data/test_ellpack_page.cu @@ -1,17 +1,17 @@ -/*! - * Copyright 2019-2020 XGBoost contributors +/** + * Copyright 2019-2023, XGBoost contributors */ #include #include -#include "../helpers.h" -#include "../histogram_helpers.h" -#include "gtest/gtest.h" - #include "../../../src/common/categorical.h" #include "../../../src/common/hist_util.h" #include "../../../src/data/ellpack_page.cuh" +#include "../../../src/tree/param.h" // TrainParam +#include "../helpers.h" +#include "../histogram_helpers.h" +#include "gtest/gtest.h" namespace xgboost { @@ -19,7 +19,10 @@ TEST(EllpackPage, EmptyDMatrix) { constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256; constexpr float kSparsity = 0; auto dmat = RandomDataGenerator(kNRows, kNCols, kSparsity).GenerateDMatrix(); - auto& page = *dmat->GetBatches({0, kMaxBin}).begin(); + Context ctx{MakeCUDACtx(0)}; + auto& page = *dmat->GetBatches( + &ctx, BatchParam{kMaxBin, tree::TrainParam::DftSparseThreshold()}) + .begin(); auto impl = page.Impl(); ASSERT_EQ(impl->row_stride, 0); ASSERT_EQ(impl->Cuts().TotalBins(), 0); @@ -87,8 +90,9 @@ TEST(EllpackPage, FromCategoricalBasic) { auto& h_ft = m->Info().feature_types.HostVector(); h_ft.resize(kCols, FeatureType::kCategorical); - BatchParam p{0, max_bins}; - auto ellpack = EllpackPage(m.get(), p); + Context ctx{MakeCUDACtx(0)}; + auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()}; + auto ellpack = EllpackPage(&ctx, m.get(), p); auto accessor = ellpack.Impl()->GetDeviceAccessor(0); ASSERT_EQ(kCats, accessor.NumBins()); @@ -142,8 +146,9 @@ TEST(EllpackPage, Copy) { dmlc::TemporaryDirectory tmpdir; std::unique_ptr dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); - BatchParam param{0, 256}; - auto page = (*dmat->GetBatches(param).begin()).Impl(); + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + auto page = (*dmat->GetBatches(&ctx, param).begin()).Impl(); // Create an empty result page. EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride, @@ -151,7 +156,7 @@ TEST(EllpackPage, Copy) { // Copy batch pages into the result page. size_t offset = 0; - for (auto& batch : dmat->GetBatches(param)) { + for (auto& batch : dmat->GetBatches(&ctx, param)) { size_t num_elements = result.Copy(0, batch.Impl(), offset); offset += num_elements; } @@ -161,7 +166,7 @@ TEST(EllpackPage, Copy) { thrust::device_vector row_result_d(kCols); std::vector row(kCols); std::vector row_result(kCols); - for (auto& page : dmat->GetBatches(param)) { + for (auto& page : dmat->GetBatches(&ctx, param)) { auto impl = page.Impl(); EXPECT_EQ(impl->base_rowid, current_row); @@ -186,10 +191,11 @@ TEST(EllpackPage, Compact) { // Create a DMatrix with multiple batches. dmlc::TemporaryDirectory tmpdir; - std::unique_ptr - dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); - BatchParam param{0, 256}; - auto page = (*dmat->GetBatches(param).begin()).Impl(); + std::unique_ptr dmat( + CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + auto page = (*dmat->GetBatches(&ctx, param).begin()).Impl(); // Create an empty result page. EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride, @@ -201,7 +207,7 @@ TEST(EllpackPage, Compact) { SIZE_MAX}; thrust::device_vector row_indexes_d = row_indexes_h; common::Span row_indexes_span(row_indexes_d.data().get(), kRows); - for (auto& batch : dmat->GetBatches(param)) { + for (auto& batch : dmat->GetBatches(&ctx, param)) { result.Compact(0, batch.Impl(), row_indexes_span); } @@ -210,7 +216,7 @@ TEST(EllpackPage, Compact) { thrust::device_vector row_result_d(kCols); std::vector row(kCols); std::vector row_result(kCols); - for (auto& page : dmat->GetBatches(param)) { + for (auto& page : dmat->GetBatches(&ctx, param)) { auto impl = page.Impl(); ASSERT_EQ(impl->base_rowid, current_row); @@ -245,15 +251,17 @@ class EllpackPageTest : public testing::TestWithParam { // device. size_t n_samples{128}, n_features{13}; Context ctx; - ctx.gpu_id = 0; + Context gpu_ctx{MakeCUDACtx(0)}; auto Xy = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true); std::unique_ptr from_ghist; ASSERT_TRUE(Xy->SingleColBlock()); - for (auto const& page : Xy->GetBatches(BatchParam{17, 0.6})) { - from_ghist.reset(new EllpackPageImpl{&ctx, page, {}}); + + for (auto const& page : Xy->GetBatches(&ctx, BatchParam{17, 0.6})) { + from_ghist.reset(new EllpackPageImpl{&gpu_ctx, page, {}}); } - for (auto const& page : Xy->GetBatches(BatchParam{0, 17})) { + for (auto const& page : Xy->GetBatches( + &gpu_ctx, BatchParam{17, tree::TrainParam::DftSparseThreshold()})) { auto from_sparse_page = page.Impl(); ASSERT_EQ(from_sparse_page->is_dense, from_ghist->is_dense); ASSERT_EQ(from_sparse_page->base_rowid, 0); diff --git a/tests/cpp/data/test_ellpack_page_raw_format.cu b/tests/cpp/data/test_ellpack_page_raw_format.cu index 92b4acf4bd96..66d4024eca5c 100644 --- a/tests/cpp/data/test_ellpack_page_raw_format.cu +++ b/tests/cpp/data/test_ellpack_page_raw_format.cu @@ -1,17 +1,21 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023, XGBoost contributors */ #include #include #include "../../../src/data/ellpack_page.cuh" #include "../../../src/data/sparse_page_source.h" -#include "../filesystem.h" // dmlc::TemporaryDirectory +#include "../../../src/tree/param.h" // TrainParam +#include "../filesystem.h" // dmlc::TemporaryDirectory #include "../helpers.h" namespace xgboost { namespace data { TEST(EllpackPageRawFormat, IO) { + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + std::unique_ptr> format{CreatePageFormat("raw")}; auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix(); @@ -20,7 +24,7 @@ TEST(EllpackPageRawFormat, IO) { { std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; - for (auto const &ellpack : m->GetBatches({0, 256})) { + for (auto const &ellpack : m->GetBatches(&ctx, param)) { format->Write(ellpack, fo.get()); } } @@ -29,7 +33,7 @@ TEST(EllpackPageRawFormat, IO) { std::unique_ptr fi{dmlc::SeekStream::CreateForRead(path.c_str())}; format->Read(&page, fi.get()); - for (auto const &ellpack : m->GetBatches({0, 256})) { + for (auto const &ellpack : m->GetBatches(&ctx, param)) { auto loaded = page.Impl(); auto orig = ellpack.Impl(); ASSERT_EQ(loaded->Cuts().Ptrs(), orig->Cuts().Ptrs()); diff --git a/tests/cpp/data/test_gradient_index.cc b/tests/cpp/data/test_gradient_index.cc index 93194972f3c9..c1e1f80a958c 100644 --- a/tests/cpp/data/test_gradient_index.cc +++ b/tests/cpp/data/test_gradient_index.cc @@ -2,20 +2,38 @@ * Copyright 2021-2023 by XGBoost contributors */ #include -#include - -#include "../../../src/common/column_matrix.h" -#include "../../../src/common/io.h" // MemoryBufferStream -#include "../../../src/data/gradient_index.h" -#include "../helpers.h" +#include // for BatchIterator, BatchSet, DMatrix, BatchParam + +#include // for sort, unique +#include // for isnan +#include // for size_t +#include // for numeric_limits +#include // for shared_ptr, __shared_ptr_access, unique_ptr +#include // for string +#include // for make_tuple, tie, tuple +#include // for move +#include // for vector + +#include "../../../src/common/categorical.h" // for AsCat +#include "../../../src/common/column_matrix.h" // for ColumnMatrix +#include "../../../src/common/hist_util.h" // for Index, HistogramCuts, SketchOnDMatrix +#include "../../../src/common/io.h" // for MemoryBufferStream +#include "../../../src/data/adapter.h" // for SparsePageAdapterBatch +#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix +#include "../../../src/tree/param.h" // for TrainParam +#include "../helpers.h" // for CreateEmptyGenericParam, GenerateRandomCa... +#include "xgboost/base.h" // for bst_bin_t +#include "xgboost/context.h" // for Context +#include "xgboost/host_device_vector.h" // for HostDeviceVector namespace xgboost { namespace data { TEST(GradientIndex, ExternalMemory) { + auto ctx = CreateEmptyGenericParam(Context::kCpuId); std::unique_ptr dmat = CreateSparsePageDMatrix(10000); std::vector base_rowids; std::vector hessian(dmat->Info().num_row_, 1); - for (auto const &page : dmat->GetBatches({64, hessian, true})) { + for (auto const &page : dmat->GetBatches(&ctx, {64, hessian, true})) { base_rowids.push_back(page.base_rowid); } size_t i = 0; @@ -24,9 +42,8 @@ TEST(GradientIndex, ExternalMemory) { ++i; } - base_rowids.clear(); - for (auto const &page : dmat->GetBatches({64, hessian, false})) { + for (auto const &page : dmat->GetBatches(&ctx, {64, hessian, false})) { base_rowids.push_back(page.base_rowid); } i = 0; @@ -41,12 +58,13 @@ TEST(GradientIndex, FromCategoricalBasic) { size_t max_bins = 8; auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats); auto m = GetDMatrixFromData(x, kRows, 1); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); auto &h_ft = m->Info().feature_types.HostVector(); h_ft.resize(kCols, FeatureType::kCategorical); BatchParam p(max_bins, 0.8); - GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {}); + GHistIndexMatrix gidx(&ctx, m.get(), max_bins, p.sparse_thresh, false, {}); auto x_copy = x; std::sort(x_copy.begin(), x_copy.end()); @@ -80,11 +98,11 @@ TEST(GradientIndex, FromCategoricalLarge) { BatchParam p{max_bins, 0.8}; { - GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {}); + GHistIndexMatrix gidx{&ctx, m.get(), max_bins, p.sparse_thresh, false, {}}; ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize); } { - for (auto const &page : m->GetBatches(p)) { + for (auto const &page : m->GetBatches(&ctx, p)) { common::HistogramCuts cut = page.cut; GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins}; ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats); @@ -96,10 +114,11 @@ TEST(GradientIndex, PushBatch) { size_t constexpr kRows = 64, kCols = 4; bst_bin_t max_bins = 64; float st = 0.5; + Context ctx; auto test = [&](float sparisty) { auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true); - auto cuts = common::SketchOnDMatrix(m.get(), max_bins, AllThreadsForTest(), false, {}); + auto cuts = common::SketchOnDMatrix(&ctx, m.get(), max_bins, false, {}); common::HistogramCuts copy_cuts = cuts; ASSERT_EQ(m->Info().num_row_, kRows); @@ -112,7 +131,7 @@ TEST(GradientIndex, PushBatch) { m->Info().num_row_); gmat.PushAdapterBatchColumns(m->Ctx(), batch, std::numeric_limits::quiet_NaN(), 0); } - for (auto const &page : m->GetBatches(BatchParam{max_bins, st})) { + for (auto const &page : m->GetBatches(&ctx, BatchParam{max_bins, st})) { for (size_t i = 0; i < kRows; ++i) { for (size_t j = 0; j < kCols; ++j) { auto v0 = gmat.GetFvalue(i, j, false); @@ -143,17 +162,19 @@ class GHistIndexMatrixTest : public testing::TestWithParam from_ellpack; ASSERT_TRUE(Xy->SingleColBlock()); bst_bin_t constexpr kBins{17}; auto p = BatchParam{kBins, threshold}; - for (auto const &page : Xy->GetBatches(BatchParam{0, kBins})) { + Context gpu_ctx; + gpu_ctx.gpu_id = 0; + for (auto const &page : Xy->GetBatches( + &gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) { from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p}); } - for (auto const &from_sparse_page : Xy->GetBatches(p)) { + for (auto const &from_sparse_page : Xy->GetBatches(&ctx, p)) { ASSERT_EQ(from_sparse_page.IsDense(), from_ellpack->IsDense()); ASSERT_EQ(from_sparse_page.base_rowid, 0); ASSERT_EQ(from_sparse_page.base_rowid, from_ellpack->base_rowid); diff --git a/tests/cpp/data/test_gradient_index_page_raw_format.cc b/tests/cpp/data/test_gradient_index_page_raw_format.cc index fa1a10faa829..570d1dbcaf45 100644 --- a/tests/cpp/data/test_gradient_index_page_raw_format.cc +++ b/tests/cpp/data/test_gradient_index_page_raw_format.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023, XGBoost contributors */ #include @@ -11,6 +11,8 @@ namespace xgboost { namespace data { TEST(GHistIndexPageRawFormat, IO) { + Context ctx; + std::unique_ptr> format{ CreatePageFormat("raw")}; auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix(); @@ -20,7 +22,7 @@ TEST(GHistIndexPageRawFormat, IO) { { std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; - for (auto const &index : m->GetBatches(batch)) { + for (auto const &index : m->GetBatches(&ctx, batch)) { format->Write(index, fo.get()); } } @@ -29,7 +31,7 @@ TEST(GHistIndexPageRawFormat, IO) { std::unique_ptr fi{dmlc::SeekStream::CreateForRead(path.c_str())}; format->Read(&page, fi.get()); - for (auto const &gidx : m->GetBatches(batch)) { + for (auto const &gidx : m->GetBatches(&ctx, batch)) { auto const &loaded = gidx; ASSERT_EQ(loaded.cut.Ptrs(), page.cut.Ptrs()); ASSERT_EQ(loaded.cut.MinValues(), page.cut.MinValues()); @@ -43,5 +45,5 @@ TEST(GHistIndexPageRawFormat, IO) { ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize()); } } -} // namespace data -} // namespace xgboost +} // namespace data +} // namespace xgboost diff --git a/tests/cpp/data/test_iterative_dmatrix.cc b/tests/cpp/data/test_iterative_dmatrix.cc index f95f7c03c992..74a69e109be0 100644 --- a/tests/cpp/data/test_iterative_dmatrix.cc +++ b/tests/cpp/data/test_iterative_dmatrix.cc @@ -15,8 +15,9 @@ namespace xgboost { namespace data { TEST(IterativeDMatrix, Ref) { + Context ctx; TestRefDMatrix( - [&](GHistIndexMatrix const& page) { return page.cut; }); + &ctx, [&](GHistIndexMatrix const& page) { return page.cut; }); } TEST(IterativeDMatrix, IsDense) { diff --git a/tests/cpp/data/test_iterative_dmatrix.cu b/tests/cpp/data/test_iterative_dmatrix.cu index be97a3f6a015..2f2f1f84ffd8 100644 --- a/tests/cpp/data/test_iterative_dmatrix.cu +++ b/tests/cpp/data/test_iterative_dmatrix.cu @@ -1,11 +1,12 @@ -/*! - * Copyright 2020-2022 XGBoost contributors +/** + * Copyright 2020-2023, XGBoost contributors */ #include #include "../../../src/data/device_adapter.cuh" #include "../../../src/data/ellpack_page.cuh" #include "../../../src/data/iterative_dmatrix.h" +#include "../../../src/tree/param.h" // TrainParam #include "../helpers.h" #include "test_iterative_dmatrix.h" @@ -13,15 +14,17 @@ namespace xgboost { namespace data { void TestEquivalent(float sparsity) { + Context ctx{MakeCUDACtx(0)}; + CudaArrayIterForTest iter{sparsity}; IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits::quiet_NaN(), 0, 256); - size_t offset = 0; - auto first = (*m.GetEllpackBatches({}).begin()).Impl(); + std::size_t offset = 0; + auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl(); std::unique_ptr page_concatenated { new EllpackPageImpl(0, first->Cuts(), first->is_dense, first->row_stride, 1000 * 100)}; - for (auto& batch : m.GetBatches({})) { + for (auto& batch : m.GetBatches(&ctx, {})) { auto page = batch.Impl(); size_t num_elements = page_concatenated->Copy(0, page, offset); offset += num_elements; @@ -34,8 +37,8 @@ void TestEquivalent(float sparsity) { auto adapter = CupyAdapter(interface_str); std::unique_ptr dm{ DMatrix::Create(&adapter, std::numeric_limits::quiet_NaN(), 0)}; - BatchParam bp {0, 256}; - for (auto& ellpack : dm->GetBatches(bp)) { + auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + for (auto& ellpack : dm->GetBatches(&ctx, bp)) { auto from_data = ellpack.Impl()->GetDeviceAccessor(0); std::vector cuts_from_iter(from_iter.gidx_fvalue_map.size()); @@ -92,7 +95,8 @@ TEST(IterativeDeviceDMatrix, RowMajor) { std::numeric_limits::quiet_NaN(), 0, 256); size_t n_batches = 0; std::string interface_str = iter.AsArray(); - for (auto& ellpack : m.GetBatches({})) { + Context ctx{MakeCUDACtx(0)}; + for (auto& ellpack : m.GetBatches(&ctx, {})) { n_batches ++; auto impl = ellpack.Impl(); common::CompressedIterator iterator( @@ -140,7 +144,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) { IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits::quiet_NaN(), 0, 256); - auto &ellpack = *m.GetBatches({0, 256}).begin(); + auto ctx = MakeCUDACtx(0); + auto& ellpack = + *m.GetBatches(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()}) + .begin(); auto impl = ellpack.Impl(); common::CompressedIterator iterator( impl->gidx_buffer.HostVector().data(), impl->NumSymbols()); @@ -171,8 +178,9 @@ TEST(IterativeDeviceDMatrix, IsDense) { } TEST(IterativeDeviceDMatrix, Ref) { + Context ctx{MakeCUDACtx(0)}; TestRefDMatrix( - [](EllpackPage const& page) { return page.Impl()->Cuts(); }); + &ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); }); } } // namespace data } // namespace xgboost diff --git a/tests/cpp/data/test_iterative_dmatrix.h b/tests/cpp/data/test_iterative_dmatrix.h index 588d2b3be721..ed8e2da773f2 100644 --- a/tests/cpp/data/test_iterative_dmatrix.h +++ b/tests/cpp/data/test_iterative_dmatrix.h @@ -1,8 +1,11 @@ -/*! - * Copyright 2022 XGBoost contributors +/** + * Copyright 2022-2023, XGBoost contributors */ #pragma once -#include // std::make_shared +#include // for Context + +#include // for numeric_limits +#include // for make_shared #include "../../../src/data/iterative_dmatrix.h" #include "../helpers.h" @@ -10,7 +13,7 @@ namespace xgboost { namespace data { template -void TestRefDMatrix(Cuts&& get_cuts) { +void TestRefDMatrix(Context const* ctx, Cuts&& get_cuts) { int n_bins = 256; Iter iter(0.3, 2048); auto m = std::make_shared(&iter, iter.Proxy(), nullptr, Reset, Next, @@ -20,8 +23,8 @@ void TestRefDMatrix(Cuts&& get_cuts) { auto m_1 = std::make_shared(&iter_1, iter_1.Proxy(), m, Reset, Next, std::numeric_limits::quiet_NaN(), 0, n_bins); - for (auto const& page_0 : m->template GetBatches({})) { - for (auto const& page_1 : m_1->template GetBatches({})) { + for (auto const& page_0 : m->template GetBatches(ctx, {})) { + for (auto const& page_1 : m_1->template GetBatches(ctx, {})) { auto const& cuts_0 = get_cuts(page_0); auto const& cuts_1 = get_cuts(page_1); ASSERT_EQ(cuts_0.Values(), cuts_1.Values()); @@ -32,8 +35,8 @@ void TestRefDMatrix(Cuts&& get_cuts) { m_1 = std::make_shared(&iter_1, iter_1.Proxy(), nullptr, Reset, Next, std::numeric_limits::quiet_NaN(), 0, n_bins); - for (auto const& page_0 : m->template GetBatches({})) { - for (auto const& page_1 : m_1->template GetBatches({})) { + for (auto const& page_0 : m->template GetBatches(ctx, {})) { + for (auto const& page_1 : m_1->template GetBatches(ctx, {})) { auto const& cuts_0 = get_cuts(page_0); auto const& cuts_1 = get_cuts(page_1); ASSERT_NE(cuts_0.Values(), cuts_1.Values()); @@ -45,8 +48,8 @@ void TestRefDMatrix(Cuts&& get_cuts) { auto dm = RandomDataGenerator(2048, Iter::Cols(), 0.5).GenerateDMatrix(true); auto dqm = std::make_shared(&iter_1, iter_1.Proxy(), dm, Reset, Next, std::numeric_limits::quiet_NaN(), 0, n_bins); - for (auto const& page_0 : dm->template GetBatches({})) { - for (auto const& page_1 : dqm->template GetBatches({})) { + for (auto const& page_0 : dm->template GetBatches(ctx, {})) { + for (auto const& page_1 : dqm->template GetBatches(ctx, {})) { auto const& cuts_0 = get_cuts(page_0); auto const& cuts_1 = get_cuts(page_1); ASSERT_EQ(cuts_0.Values(), cuts_1.Values()); diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc index 3bdbf54037b4..43d0877d3278 100644 --- a/tests/cpp/data/test_simple_dmatrix.cc +++ b/tests/cpp/data/test_simple_dmatrix.cc @@ -61,6 +61,7 @@ TEST(SimpleDMatrix, RowAccess) { } TEST(SimpleDMatrix, ColAccessWithoutBatches) { + Context ctx; dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/simple.libsvm"; CreateSimpleTestData(tmp_file); @@ -70,7 +71,7 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) { // Loop over the batches and assert the data is as expected int64_t num_col_batch = 0; - for (const auto &batch : dmat->GetBatches()) { + for (const auto &batch : dmat->GetBatches(&ctx)) { num_col_batch += 1; EXPECT_EQ(batch.Size(), dmat->Info().num_col_) << "Expected batch size = number of cells as #batches is 1."; diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cc b/tests/cpp/data/test_sparse_page_dmatrix.cc index 608c329476a0..4cbbe6dc95f9 100644 --- a/tests/cpp/data/test_sparse_page_dmatrix.cc +++ b/tests/cpp/data/test_sparse_page_dmatrix.cc @@ -23,7 +23,7 @@ std::string UriSVM(std::string name, std::string cache) { } // namespace template -void TestSparseDMatrixLoadFile() { +void TestSparseDMatrixLoadFile(Context const* ctx) { dmlc::TemporaryDirectory tmpdir; auto opath = tmpdir.path + "/1-based.svm"; CreateBigTestData(opath, 3 * 64, false); @@ -48,7 +48,7 @@ void TestSparseDMatrixLoadFile() { data::SimpleDMatrix simple{&adapter, std::numeric_limits::quiet_NaN(), 1}; Page out; - for (auto const& page : m.GetBatches()) { + for (auto const &page : m.GetBatches(ctx)) { if (std::is_same::value) { out.Push(page); } else { @@ -58,7 +58,7 @@ void TestSparseDMatrixLoadFile() { ASSERT_EQ(m.Info().num_col_, simple.Info().num_col_); ASSERT_EQ(m.Info().num_row_, simple.Info().num_row_); - for (auto const& page : simple.GetBatches()) { + for (auto const& page : simple.GetBatches(ctx)) { ASSERT_EQ(page.offset.HostVector(), out.offset.HostVector()); for (size_t i = 0; i < page.data.Size(); ++i) { ASSERT_EQ(page.data.HostVector()[i].fvalue, out.data.HostVector()[i].fvalue); @@ -67,16 +67,18 @@ void TestSparseDMatrixLoadFile() { } TEST(SparsePageDMatrix, LoadFile) { - TestSparseDMatrixLoadFile(); - TestSparseDMatrixLoadFile(); - TestSparseDMatrixLoadFile(); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + TestSparseDMatrixLoadFile(&ctx); + TestSparseDMatrixLoadFile(&ctx); + TestSparseDMatrixLoadFile(&ctx); } // allow caller to retain pages so they can process multiple pages at the same time. template void TestRetainPage() { auto m = CreateSparsePageDMatrix(10000); - auto batches = m->GetBatches(); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + auto batches = m->GetBatches(&ctx); auto begin = batches.begin(); auto end = batches.end(); @@ -100,7 +102,7 @@ void TestRetainPage() { } // make sure it's const and the caller can not modify the content of page. - for (auto& page : m->GetBatches()) { + for (auto &page : m->GetBatches({&ctx})) { static_assert(std::is_const>::value); } } @@ -143,10 +145,11 @@ TEST(SparsePageDMatrix, ColAccess) { const std::string tmp_file = tempdir.path + "/simple.libsvm"; CreateSimpleTestData(tmp_file); xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file)); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); // Loop over the batches and assert the data is as expected size_t iter = 0; - for (auto const &col_batch : dmat->GetBatches()) { + for (auto const &col_batch : dmat->GetBatches(&ctx)) { auto col_page = col_batch.GetView(); ASSERT_EQ(col_page.Size(), dmat->Info().num_col_); if (iter == 1) { @@ -164,7 +167,7 @@ TEST(SparsePageDMatrix, ColAccess) { // Loop over the batches and assert the data is as expected iter = 0; - for (auto const &col_batch : dmat->GetBatches()) { + for (auto const &col_batch : dmat->GetBatches(&ctx)) { auto col_page = col_batch.GetView(); EXPECT_EQ(col_page.Size(), dmat->Info().num_col_); if (iter == 0) { @@ -182,9 +185,9 @@ TEST(SparsePageDMatrix, ColAccess) { TEST(SparsePageDMatrix, ThreadSafetyException) { size_t constexpr kEntriesPerCol = 3; size_t constexpr kEntries = 64 * kEntriesPerCol * 2; + Context ctx; - std::unique_ptr dmat = - xgboost::CreateSparsePageDMatrix(kEntries); + std::unique_ptr dmat = xgboost::CreateSparsePageDMatrix(kEntries); int threads = 1000; @@ -221,7 +224,8 @@ TEST(SparsePageDMatrix, ColAccessBatches) { // Create multiple sparse pages std::unique_ptr dmat{xgboost::CreateSparsePageDMatrix(kEntries)}; ASSERT_EQ(dmat->Ctx()->Threads(), AllThreadsForTest()); - for (auto const &page : dmat->GetBatches()) { + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + for (auto const &page : dmat->GetBatches(&ctx)) { ASSERT_EQ(dmat->Info().num_col_, page.Size()); } } diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cu b/tests/cpp/data/test_sparse_page_dmatrix.cu index 55a44e458b99..846fe7f634ee 100644 --- a/tests/cpp/data/test_sparse_page_dmatrix.cu +++ b/tests/cpp/data/test_sparse_page_dmatrix.cu @@ -1,15 +1,20 @@ /** * Copyright 2019-2023 by XGBoost Contributors */ +#include // for DMatrix + #include "../../../src/common/compressed_iterator.h" #include "../../../src/data/ellpack_page.cuh" #include "../../../src/data/sparse_page_dmatrix.h" -#include "../filesystem.h" // dmlc::TemporaryDirectory +#include "../../../src/tree/param.h" // TrainParam +#include "../filesystem.h" // dmlc::TemporaryDirectory #include "../helpers.h" namespace xgboost { TEST(SparsePageDMatrix, EllpackPage) { + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/simple.libsvm"; CreateSimpleTestData(tmp_file); @@ -17,7 +22,7 @@ TEST(SparsePageDMatrix, EllpackPage) { // Loop over the batches and assert the data is as expected size_t n = 0; - for (const auto& batch : dmat->GetBatches({0, 256})) { + for (const auto& batch : dmat->GetBatches(&ctx, param)) { n += batch.Size(); } EXPECT_EQ(n, dmat->Info().num_row_); @@ -37,6 +42,8 @@ TEST(SparsePageDMatrix, EllpackPage) { } TEST(SparsePageDMatrix, MultipleEllpackPages) { + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; dmlc::TemporaryDirectory tmpdir; std::string filename = tmpdir.path + "/big.libsvm"; size_t constexpr kPageSize = 64, kEntriesPerCol = 3; @@ -46,7 +53,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) { // Loop over the batches and count the records int64_t batch_count = 0; int64_t row_count = 0; - for (const auto& batch : dmat->GetBatches({0, 256})) { + for (const auto& batch : dmat->GetBatches(&ctx, param)) { EXPECT_LT(batch.Size(), dmat->Info().num_row_); batch_count++; row_count += batch.Size(); @@ -61,8 +68,11 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) { } TEST(SparsePageDMatrix, RetainEllpackPage) { + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()}; auto m = CreateSparsePageDMatrix(10000); - auto batches = m->GetBatches({0, 32}); + + auto batches = m->GetBatches(&ctx, param); auto begin = batches.begin(); auto end = batches.end(); @@ -87,7 +97,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) { } // make sure it's const and the caller can not modify the content of page. - for (auto& page : m->GetBatches({0, 32})) { + for (auto& page : m->GetBatches(&ctx, param)) { static_assert(std::is_const>::value); } @@ -98,6 +108,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) { } TEST(SparsePageDMatrix, EllpackPageContent) { + auto ctx = CreateEmptyGenericParam(0); constexpr size_t kRows = 6; constexpr size_t kCols = 2; constexpr size_t kPageSize = 1; @@ -110,8 +121,8 @@ TEST(SparsePageDMatrix, EllpackPageContent) { std::unique_ptr dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); - BatchParam param{0, 2}; - auto impl = (*dmat->GetBatches(param).begin()).Impl(); + auto param = BatchParam{2, tree::TrainParam::DftSparseThreshold()}; + auto impl = (*dmat->GetBatches(&ctx, param).begin()).Impl(); EXPECT_EQ(impl->base_rowid, 0); EXPECT_EQ(impl->n_rows, kRows); EXPECT_FALSE(impl->is_dense); @@ -120,7 +131,7 @@ TEST(SparsePageDMatrix, EllpackPageContent) { std::unique_ptr impl_ext; size_t offset = 0; - for (auto& batch : dmat_ext->GetBatches(param)) { + for (auto& batch : dmat_ext->GetBatches(&ctx, param)) { if (!impl_ext) { impl_ext.reset(new EllpackPageImpl( batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(), @@ -170,8 +181,9 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) { std::unique_ptr dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); - BatchParam param{0, kMaxBins}; - auto impl = (*dmat->GetBatches(param).begin()).Impl(); + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()}; + auto impl = (*dmat->GetBatches(&ctx, param).begin()).Impl(); EXPECT_EQ(impl->base_rowid, 0); EXPECT_EQ(impl->n_rows, kRows); @@ -180,7 +192,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) { thrust::device_vector row_ext_d(kCols); std::vector row(kCols); std::vector row_ext(kCols); - for (auto& page : dmat_ext->GetBatches(param)) { + for (auto& page : dmat_ext->GetBatches(&ctx, param)) { auto impl_ext = page.Impl(); EXPECT_EQ(impl_ext->base_rowid, current_row); @@ -211,10 +223,11 @@ TEST(SparsePageDMatrix, EllpackPageMultipleLoops) { std::unique_ptr dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); - BatchParam param{0, kMaxBins}; + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()}; size_t current_row = 0; - for (auto& page : dmat_ext->GetBatches(param)) { + for (auto& page : dmat_ext->GetBatches(&ctx, param)) { auto impl_ext = page.Impl(); EXPECT_EQ(impl_ext->base_rowid, current_row); current_row += impl_ext->n_rows; diff --git a/tests/cpp/data/test_sparse_page_raw_format.cc b/tests/cpp/data/test_sparse_page_raw_format.cc index 5743c4223a1b..722655880899 100644 --- a/tests/cpp/data/test_sparse_page_raw_format.cc +++ b/tests/cpp/data/test_sparse_page_raw_format.cc @@ -1,17 +1,24 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023, XGBoost contributors */ #include -#include +#include // for CSCPage, SortedCSCPage, SparsePage -#include "../../../src/data/sparse_page_source.h" -#include "../filesystem.h" // dmlc::TemporaryDirectory -#include "../helpers.h" +#include // for allocator, unique_ptr, __shared_ptr_ac... +#include // for char_traits, operator+, basic_string + +#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat +#include "../helpers.h" // for RandomDataGenerator +#include "dmlc/filesystem.h" // for TemporaryDirectory +#include "dmlc/io.h" // for SeekStream, Stream +#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST +#include "xgboost/context.h" // for Context namespace xgboost { namespace data { template void TestSparsePageRawFormat() { std::unique_ptr> format{CreatePageFormat("raw")}; + Context ctx; auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix(); ASSERT_TRUE(m->SingleColBlock()); @@ -21,7 +28,7 @@ template void TestSparsePageRawFormat() { { // block code to flush the stream std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; - for (auto const &page : m->GetBatches()) { + for (auto const &page : m->GetBatches(&ctx)) { orig.Push(page); format->Write(page, fo.get()); } diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index 5e65a163644f..3a56bd27f33f 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -388,6 +388,11 @@ inline Context CreateEmptyGenericParam(int gpu_id) { return tparam; } +/** + * \brief Make a context that uses CUDA. + */ +inline Context MakeCUDACtx(std::int32_t device) { return Context{}.MakeCUDA(device); } + inline HostDeviceVector GenerateRandomGradients(const size_t n_rows, float lower= 0.0f, float upper = 1.0f) { xgboost::SimpleLCG gen; diff --git a/tests/cpp/test_serialization.cc b/tests/cpp/test_serialization.cc index 731f85563092..a01a21ef611c 100644 --- a/tests/cpp/test_serialization.cc +++ b/tests/cpp/test_serialization.cc @@ -203,7 +203,11 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr learner->Save(&mem_out); ASSERT_EQ(model_at_kiter, serialised_model_tmp); - learner->SetParam("gpu_id", "0"); + for (auto const& [key, value] : args) { + if (key == "tree_method" && value == "gpu_hist") { + learner->SetParam("gpu_id", "0"); + } + } // Pull data to device for (auto &batch : p_dmat->GetBatches()) { batch.data.SetDevice(0); diff --git a/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu b/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu index e211fe70a483..95ae02aee46b 100644 --- a/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu +++ b/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu @@ -1,12 +1,13 @@ -/*! - * Copyright 2020-2021 by XGBoost Contributors +/** + * Copyright 2020-2023, XGBoost Contributors */ #include #include "../../../../src/data/ellpack_page.cuh" #include "../../../../src/tree/gpu_hist/gradient_based_sampler.cuh" #include "../../../../src/tree/param.h" -#include "../../filesystem.h" // dmlc::TemporaryDirectory +#include "../../../../src/tree/param.h" // TrainParam +#include "../../filesystem.h" // dmlc::TemporaryDirectory #include "../../helpers.h" namespace xgboost { @@ -31,14 +32,15 @@ void VerifySampling(size_t page_size, } gpair.SetDevice(0); - BatchParam param{0, 256}; - auto page = (*dmat->GetBatches(param).begin()).Impl(); + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + auto page = (*dmat->GetBatches(&ctx, param).begin()).Impl(); if (page_size != 0) { EXPECT_NE(page->n_rows, kRows); } - GradientBasedSampler sampler(page, kRows, param, subsample, sampling_method); - auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get()); + GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method); + auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get()); if (fixed_size_sampling) { EXPECT_EQ(sample.sample_rows, kRows); @@ -86,12 +88,13 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) { auto gpair = GenerateRandomGradients(kRows); gpair.SetDevice(0); - BatchParam param{0, 256}; - auto page = (*dmat->GetBatches(param).begin()).Impl(); + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + auto page = (*dmat->GetBatches(&ctx, param).begin()).Impl(); EXPECT_NE(page->n_rows, kRows); - GradientBasedSampler sampler(page, kRows, param, kSubsample, TrainParam::kUniform); - auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get()); + GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform); + auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get()); auto sampled_page = sample.page; EXPECT_EQ(sample.sample_rows, kRows); EXPECT_EQ(sample.gpair.size(), gpair.Size()); @@ -103,7 +106,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) { ci(buffer.data(), sampled_page->NumSymbols()); size_t offset = 0; - for (auto& batch : dmat->GetBatches(param)) { + for (auto& batch : dmat->GetBatches(&ctx, param)) { auto page = batch.Impl(); std::vector page_buffer(page->gidx_buffer.HostVector()); common::CompressedIterator diff --git a/tests/cpp/tree/gpu_hist/test_histogram.cu b/tests/cpp/tree/gpu_hist/test_histogram.cu index 95fe66138333..024a1e8d3a92 100644 --- a/tests/cpp/tree/gpu_hist/test_histogram.cu +++ b/tests/cpp/tree/gpu_hist/test_histogram.cu @@ -1,9 +1,14 @@ +/** + * Copyright 2020-2023, XGBoost Contributors + */ #include + #include #include "../../../../src/common/categorical.h" #include "../../../../src/tree/gpu_hist/histogram.cuh" #include "../../../../src/tree/gpu_hist/row_partitioner.cuh" +#include "../../../../src/tree/param.h" // TrainParam #include "../../categorical_helpers.h" #include "../../helpers.h" @@ -11,15 +16,15 @@ namespace xgboost { namespace tree { void TestDeterministicHistogram(bool is_dense, int shm_size) { - Context ctx = CreateEmptyGenericParam(0); + Context ctx = MakeCUDACtx(0); size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16; float constexpr kLower = -1e-2, kUpper = 1e2; float sparsity = is_dense ? 0.0f : 0.5f; auto matrix = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(); - BatchParam batch_param{0, static_cast(kBins)}; + auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()}; - for (auto const& batch : matrix->GetBatches(batch_param)) { + for (auto const& batch : matrix->GetBatches(&ctx, batch_param)) { auto* page = batch.Impl(); tree::RowPartitioner row_partitioner(0, kRows); @@ -114,13 +119,13 @@ void ValidateCategoricalHistogram(size_t n_categories, common::SpanInfo().feature_types.HostVector().push_back(FeatureType::kCategorical); - BatchParam batch_param{0, static_cast(kBins)}; + auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()}; tree::RowPartitioner row_partitioner(0, kRows); auto ridx = row_partitioner.GetRows(0); dh::device_vector cat_hist(num_categories); @@ -130,7 +135,7 @@ void TestGPUHistogramCategorical(size_t num_categories) { /** * Generate hist with cat data. */ - for (auto const &batch : cat_m->GetBatches(batch_param)) { + for (auto const &batch : cat_m->GetBatches(&ctx, batch_param)) { auto* page = batch.Impl(); FeatureGroups single_group(page->Cuts()); BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0), @@ -144,7 +149,7 @@ void TestGPUHistogramCategorical(size_t num_categories) { auto x_encoded = OneHotEncodeFeature(x, num_categories); auto encode_m = GetDMatrixFromData(x_encoded, kRows, num_categories); dh::device_vector encode_hist(2 * num_categories); - for (auto const &batch : encode_m->GetBatches(batch_param)) { + for (auto const &batch : encode_m->GetBatches(&ctx, batch_param)) { auto* page = batch.Impl(); FeatureGroups single_group(page->Cuts()); BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0), diff --git a/tests/cpp/tree/hist/test_evaluate_splits.cc b/tests/cpp/tree/hist/test_evaluate_splits.cc index dcd04f68a969..c53d9d90b477 100644 --- a/tests/cpp/tree/hist/test_evaluate_splits.cc +++ b/tests/cpp/tree/hist/test_evaluate_splits.cc @@ -41,7 +41,7 @@ void TestEvaluateSplits(bool force_read_by_column) { size_t constexpr kMaxBins = 4; // dense, no missing values - GHistIndexMatrix gmat(dmat.get(), kMaxBins, 0.5, false, AllThreadsForTest()); + GHistIndexMatrix gmat(&ctx, dmat.get(), kMaxBins, 0.5, false); common::RowSetCollection row_set_collection; std::vector &row_indices = *row_set_collection.Data(); row_indices.resize(kRows); @@ -228,7 +228,7 @@ auto CompareOneHotAndPartition(bool onehot) { auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; std::vector entries(1); - for (auto const &gmat : dmat->GetBatches({32, param.sparse_threshold})) { + for (auto const &gmat : dmat->GetBatches(&ctx, {32, param.sparse_threshold})) { common::HistCollection hist; entries.front().nid = 0; diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc index 3b354bebbb3f..2e620fd1044d 100644 --- a/tests/cpp/tree/hist/test_histogram.cc +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -25,6 +25,7 @@ void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples } // anonymous namespace void TestAddHistRows(bool is_distributed) { + auto ctx = CreateEmptyGenericParam(Context::kCpuId); std::vector nodes_for_explicit_hist_build_; std::vector nodes_for_subtraction_trick_; int starting_index = std::numeric_limits::max(); @@ -32,9 +33,9 @@ void TestAddHistRows(bool is_distributed) { size_t constexpr kNRows = 8, kNCols = 16; int32_t constexpr kMaxBins = 4; - auto p_fmat = - RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); - auto const &gmat = *(p_fmat->GetBatches(BatchParam{kMaxBins, 0.5}).begin()); + auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + auto const &gmat = + *(p_fmat->GetBatches(&ctx, BatchParam{kMaxBins, 0.5}).begin()); RegTree tree; @@ -73,6 +74,7 @@ TEST(CPUHistogram, AddRows) { void TestSyncHist(bool is_distributed) { size_t constexpr kNRows = 8, kNCols = 16; int32_t constexpr kMaxBins = 4; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); std::vector nodes_for_explicit_hist_build_; std::vector nodes_for_subtraction_trick_; @@ -80,9 +82,9 @@ void TestSyncHist(bool is_distributed) { int sync_count = 0; RegTree tree; - auto p_fmat = - RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); - auto const &gmat = *(p_fmat->GetBatches(BatchParam{kMaxBins, 0.5}).begin()); + auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + auto const &gmat = + *(p_fmat->GetBatches(&ctx, BatchParam{kMaxBins, 0.5}).begin()); HistogramBuilder histogram; uint32_t total_bins = gmat.cut.Ptrs().back(); @@ -227,12 +229,15 @@ TEST(CPUHistogram, SyncHist) { void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) { size_t constexpr kNRows = 8, kNCols = 16; int32_t constexpr kMaxBins = 4; - auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + auto p_fmat = + RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); if (is_col_split) { p_fmat = std::shared_ptr{ p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())}; } - auto const &gmat = *(p_fmat->GetBatches(BatchParam{kMaxBins, 0.5}).begin()); + auto const &gmat = + *(p_fmat->GetBatches(&ctx, BatchParam{kMaxBins, 0.5}).begin()); uint32_t total_bins = gmat.cut.Ptrs().back(); static double constexpr kEps = 1e-6; @@ -257,9 +262,9 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_ CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)}; std::vector nodes_for_explicit_hist_build; nodes_for_explicit_hist_build.push_back(node); - for (auto const &gidx : p_fmat->GetBatches({kMaxBins, 0.5})) { - histogram.BuildHist(0, gidx, &tree, row_set_collection, - nodes_for_explicit_hist_build, {}, gpair, force_read_by_column); + for (auto const &gidx : p_fmat->GetBatches(&ctx, {kMaxBins, 0.5})) { + histogram.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {}, + gpair, force_read_by_column); } // Check if number of histogram bins is correct @@ -325,6 +330,8 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) { auto x = GenerateRandomCategoricalSingleColumn(kRows, n_categories); auto cat_m = GetDMatrixFromData(x, kRows, 1); cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + BatchParam batch_param{0, static_cast(kBins)}; RegTree tree; @@ -345,12 +352,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) { * Generate hist with cat data. */ HistogramBuilder cat_hist; - for (auto const &gidx : cat_m->GetBatches({kBins, 0.5})) { + for (auto const &gidx : cat_m->GetBatches(&ctx, {kBins, 0.5})) { auto total_bins = gidx.cut.TotalBins(); cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false); - cat_hist.BuildHist(0, gidx, &tree, row_set_collection, - nodes_for_explicit_hist_build, {}, gpair.HostVector(), - force_read_by_column); + cat_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {}, + gpair.HostVector(), force_read_by_column); } /** @@ -359,12 +365,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) { auto x_encoded = OneHotEncodeFeature(x, n_categories); auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories); HistogramBuilder onehot_hist; - for (auto const &gidx : encode_m->GetBatches({kBins, 0.5})) { + for (auto const &gidx : encode_m->GetBatches(&ctx, {kBins, 0.5})) { auto total_bins = gidx.cut.TotalBins(); onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false); onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {}, - gpair.HostVector(), - force_read_by_column); + gpair.HostVector(), force_read_by_column); } auto cat = cat_hist.Histogram()[0]; @@ -382,8 +387,8 @@ TEST(CPUHistogram, Categorical) { } } namespace { -void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool force_read_by_column) { - Context ctx; +void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, bool is_approx, + bool force_read_by_column) { size_t constexpr kEntries = 1 << 16; auto m = CreateSparsePageDMatrix(kEntries, "cache"); @@ -410,7 +415,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo * Multi page */ std::vector rows_set; - for (auto const &page : m->GetBatches(batch_param)) { + for (auto const &page : m->GetBatches(ctx, batch_param)) { CHECK_LT(page.base_rowid, m->Info().num_row_); auto n_rows_in_node = page.Size(); partition_size[0] = std::max(partition_size[0], n_rows_in_node); @@ -426,12 +431,12 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo 1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); }, 256}; - multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false); + multi_build.Reset(total_bins, batch_param, ctx->Threads(), rows_set.size(), false, false); size_t page_idx{0}; - for (auto const &page : m->GetBatches(batch_param)) { - multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {}, - h_gpair, force_read_by_column); + for (auto const &page : m->GetBatches(ctx, batch_param)) { + multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {}, h_gpair, + force_read_by_column); ++page_idx; } ASSERT_EQ(page_idx, 2); @@ -447,16 +452,16 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo common::RowSetCollection row_set_collection; InitRowPartitionForTest(&row_set_collection, n_samples); - single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false); + single_build.Reset(total_bins, batch_param, ctx->Threads(), 1, false, false); SparsePage concat; std::vector hess(m->Info().num_row_, 1.0f); for (auto const& page : m->GetBatches()) { concat.Push(page); } - auto cut = common::SketchOnDMatrix(m.get(), batch_param.max_bin, ctx.Threads(), false, hess); + auto cut = common::SketchOnDMatrix(ctx, m.get(), batch_param.max_bin, false, hess); GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false, - std::numeric_limits::quiet_NaN(), ctx.Threads()); + std::numeric_limits::quiet_NaN(), ctx->Threads()); single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair, force_read_by_column); single_page = single_build.Histogram()[0]; } @@ -470,16 +475,17 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo TEST(CPUHistogram, ExternalMemory) { int32_t constexpr kBins = 256; - TestHistogramExternalMemory(BatchParam{kBins, common::Span{}, false}, true, false); - TestHistogramExternalMemory(BatchParam{kBins, common::Span{}, false}, true, true); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + + TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span{}, false}, true, false); + TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span{}, false}, true, true); float sparse_thresh{0.5}; - TestHistogramExternalMemory({kBins, sparse_thresh}, false, false); - TestHistogramExternalMemory({kBins, sparse_thresh}, false, true); + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false); + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true); sparse_thresh = std::numeric_limits::quiet_NaN(); - TestHistogramExternalMemory({kBins, sparse_thresh}, false, false); - TestHistogramExternalMemory({kBins, sparse_thresh}, false, true); - + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false); + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true); } } // namespace tree } // namespace xgboost diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc index 6f2b8351163b..38da629b1438 100644 --- a/tests/cpp/tree/test_approx.cc +++ b/tests/cpp/tree/test_approx.cc @@ -34,7 +34,7 @@ TEST(Approx, Partitioner) { std::vector candidates{{0, 0}}; candidates.front().split.loss_chg = 0.4; - for (auto const& page : Xy->GetBatches({64, hess, true})) { + for (auto const& page : Xy->GetBatches(&ctx, {64, hess, true})) { bst_feature_t const split_ind = 0; { auto min_value = page.cut.MinValues()[split_ind]; @@ -84,7 +84,7 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared Context ctx; ctx.InitAllowUnknown(Args{}); - for (auto const& page : dmat->GetBatches({64, *hess, true})) { + for (auto const& page : dmat->GetBatches(&ctx, {64, *hess, true})) { { RegTree tree; CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true}; @@ -133,7 +133,7 @@ TEST(Approx, PartitionerColSplit) { Context ctx; ctx.InitAllowUnknown(Args{}); CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false}; - for (auto const& page : Xy->GetBatches({64, hess, true})) { + for (auto const& page : Xy->GetBatches(&ctx, {64, hess, true})) { bst_feature_t const split_ind = 0; min_value = page.cut.MinValues()[split_ind]; diff --git a/tests/cpp/tree/test_common_partitioner.cc b/tests/cpp/tree/test_common_partitioner.cc index 7e47ec28993e..116802c6a930 100644 --- a/tests/cpp/tree/test_common_partitioner.cc +++ b/tests/cpp/tree/test_common_partitioner.cc @@ -43,7 +43,7 @@ void TestLeafPartition(size_t n_samples) { std::vector h_nptr; float split_value{0}; - for (auto const& page : Xy->GetBatches({Context::kCpuId, 64})) { + for (auto const& page : Xy->GetBatches(&ctx, BatchParam{64, 0.2})) { bst_feature_t const split_ind = 0; auto ptr = page.cut.Ptrs()[split_ind + 1]; split_value = page.cut.Values().at(ptr / 2); diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index 003347c8e29a..42ab10ed4d59 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -208,17 +208,16 @@ TEST(GpuHist, TestHistogramIndex) { TestHistogramIndexImpl(); } -void UpdateTree(HostDeviceVector* gpair, DMatrix* dmat, - size_t gpu_page_size, RegTree* tree, - HostDeviceVector* preds, float subsample = 1.0f, - const std::string& sampling_method = "uniform", +void UpdateTree(Context const* ctx, HostDeviceVector* gpair, DMatrix* dmat, + size_t gpu_page_size, RegTree* tree, HostDeviceVector* preds, + float subsample = 1.0f, const std::string& sampling_method = "uniform", int max_bin = 2) { - if (gpu_page_size > 0) { // Loop over the batches and count the records int64_t batch_count = 0; int64_t row_count = 0; - for (const auto& batch : dmat->GetBatches({0, max_bin})) { + for (const auto& batch : dmat->GetBatches( + ctx, BatchParam{max_bin, TrainParam::DftSparseThreshold()})) { EXPECT_LT(batch.Size(), dmat->Info().num_row_); batch_count++; row_count += batch.Size(); @@ -239,14 +238,13 @@ void UpdateTree(HostDeviceVector* gpair, DMatrix* dmat, TrainParam param; param.UpdateAllowUnknown(args); - Context ctx(CreateEmptyGenericParam(0)); ObjInfo task{ObjInfo::kRegression}; - tree::GPUHistMaker hist_maker{&ctx, &task}; + tree::GPUHistMaker hist_maker{ctx, &task}; std::vector> position(1); hist_maker.Update(¶m, gpair, dmat, common::Span>{position}, {tree}); - auto cache = linalg::MakeTensorView(&ctx, preds->DeviceSpan(), preds->Size(), 1); + auto cache = linalg::MakeTensorView(ctx, preds->DeviceSpan(), preds->Size(), 1); hist_maker.UpdatePredictionCache(dmat, cache); } @@ -264,12 +262,13 @@ TEST(GpuHist, UniformSampling) { // Build a tree using the in-memory DMatrix. RegTree tree; HostDeviceVector preds(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); + Context ctx(CreateEmptyGenericParam(0)); + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); // Build another tree using sampling. RegTree tree_sampling; HostDeviceVector preds_sampling(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, - "uniform", kRows); + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform", + kRows); // Make sure the predictions are the same. auto preds_h = preds.ConstHostVector(); @@ -293,12 +292,13 @@ TEST(GpuHist, GradientBasedSampling) { // Build a tree using the in-memory DMatrix. RegTree tree; HostDeviceVector preds(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); + Context ctx(CreateEmptyGenericParam(0)); + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); // Build another tree using sampling. RegTree tree_sampling; HostDeviceVector preds_sampling(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "gradient_based", kRows); // Make sure the predictions are the same. @@ -327,12 +327,13 @@ TEST(GpuHist, ExternalMemory) { // Build a tree using the in-memory DMatrix. RegTree tree; + Context ctx(CreateEmptyGenericParam(0)); HostDeviceVector preds(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); // Build another tree using multiple ELLPACK pages. RegTree tree_ext; HostDeviceVector preds_ext(kRows, 0.0, 0); - UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows); + UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows); // Make sure the predictions are the same. auto preds_h = preds.ConstHostVector(); @@ -364,17 +365,17 @@ TEST(GpuHist, ExternalMemoryWithSampling) { // Build a tree using the in-memory DMatrix. auto rng = common::GlobalRandom(); + Context ctx(CreateEmptyGenericParam(0)); RegTree tree; HostDeviceVector preds(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, - kRows); + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows); // Build another tree using multiple ELLPACK pages. common::GlobalRandom() = rng; RegTree tree_ext; HostDeviceVector preds_ext(kRows, 0.0, 0); - UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, - kSubsample, kSamplingMethod, kRows); + UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample, + kSamplingMethod, kRows); // Make sure the predictions are the same. auto preds_h = preds.ConstHostVector(); diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index 2aa1b8f47847..e5ce75585c30 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -36,7 +36,7 @@ void TestPartitioner(bst_target_t n_targets) { std::vector candidates{{0, 0}}; candidates.front().split.loss_chg = 0.4; - auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads()); + auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64); for (auto const& page : Xy->GetBatches()) { GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads()); diff --git a/tests/cpp/tree/test_regen.cc b/tests/cpp/tree/test_regen.cc index b766e0775891..d0fe5b449ee2 100644 --- a/tests/cpp/tree/test_regen.cc +++ b/tests/cpp/tree/test_regen.cc @@ -15,16 +15,17 @@ class DMatrixForTest : public data::SimpleDMatrix { public: using SimpleDMatrix::SimpleDMatrix; - BatchSet GetGradientIndex(const BatchParam& param) override { + BatchSet GetGradientIndex(Context const* ctx, + const BatchParam& param) override { auto backup = this->gradient_index_; - auto iter = SimpleDMatrix::GetGradientIndex(param); + auto iter = SimpleDMatrix::GetGradientIndex(ctx, param); n_regen_ += (backup != this->gradient_index_); return iter; } - BatchSet GetEllpackBatches(const BatchParam& param) override { + BatchSet GetEllpackBatches(Context const* ctx, const BatchParam& param) override { auto backup = this->ellpack_page_; - auto iter = SimpleDMatrix::GetEllpackBatches(param); + auto iter = SimpleDMatrix::GetEllpackBatches(ctx, param); n_regen_ += (backup != this->ellpack_page_); return iter; } @@ -50,8 +51,8 @@ class RegenTest : public ::testing::Test { HostDeviceVector storage; auto dense = RandomDataGenerator{kRows, kCols, 0.5}.GenerateArrayInterface(&storage); auto adapter = data::ArrayAdapter(StringView{dense}); - p_fmat_ = std::shared_ptr(new DMatrixForTest{ - &adapter, std::numeric_limits::quiet_NaN(), AllThreadsForTest()}); + p_fmat_ = std::shared_ptr( + new DMatrixForTest{&adapter, std::numeric_limits::quiet_NaN(), AllThreadsForTest()}); p_fmat_->Info().labels.Reshape(256, 1); auto labels = p_fmat_->Info().labels.Data(); @@ -74,7 +75,7 @@ class RegenTest : public ::testing::Test { auto for_test = dynamic_cast(p_fmat_.get()); CHECK(for_test); auto backup = for_test->NumRegen(); - for_test->GetBatches(BatchParam{}); + for_test->GetBatches(p_fmat_->Ctx(), BatchParam{}); CHECK_EQ(for_test->NumRegen(), backup); if (reset) { diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py index 3cd65e30fe8f..c5b7e4fc5d7e 100644 --- a/tests/python-gpu/test_device_quantile_dmatrix.py +++ b/tests/python-gpu/test_device_quantile_dmatrix.py @@ -18,6 +18,7 @@ class TestQuantileDMatrix: @pytest.mark.skipif(**tm.no_cupy()) def test_dmatrix_feature_weights(self) -> None: import cupy as cp + rng = cp.random.RandomState(1994) data = rng.randn(5, 5) m = xgb.DMatrix(data) @@ -26,23 +27,91 @@ def test_dmatrix_feature_weights(self) -> None: m.set_info(feature_weights=feature_weights) cp.testing.assert_array_equal( - cp.array(m.get_float_info('feature_weights')), - feature_weights.astype(np.float32)) + cp.array(m.get_float_info("feature_weights")), + feature_weights.astype(np.float32), + ) @pytest.mark.skipif(**tm.no_cupy()) def test_dmatrix_cupy_init(self) -> None: import cupy as cp + data = cp.random.randn(5, 5) xgb.QuantileDMatrix(data, cp.ones(5, dtype=np.float64)) + @pytest.mark.parametrize( + "on_device,tree_method", + [(True, "hist"), (False, "gpu_hist"), (False, "hist"), (True, "gpu_hist")], + ) + def test_initialization(self, on_device: bool, tree_method: str) -> None: + n_samples, n_features, max_bin = 64, 3, 16 + X, y, w = tm.make_batches( + n_samples, + n_features=n_features, + n_batches=1, + use_cupy=on_device, + ) + + # Init SparsePage + Xy = xgb.DMatrix(X[0], y[0], weight=w[0]) + # Init GIDX/Ellpack + xgb.train( + {"tree_method": tree_method, "max_bin": max_bin}, + Xy, + num_boost_round=1, + ) + # query cuts from GIDX/Ellpack + qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy) + tm.predictor_equal(Xy, qXy) + with pytest.raises(ValueError, match="Inconsistent"): + # max_bin changed. + xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy) + + # No error, DMatrix can be modified for different training session. + xgb.train( + {"tree_method": tree_method, "max_bin": max_bin - 1}, + Xy, + num_boost_round=1, + ) + + # Init Ellpack/GIDX + Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin) + # Init GIDX/Ellpack + xgb.train( + {"tree_method": tree_method, "max_bin": max_bin}, + Xy, + num_boost_round=1, + ) + # query cuts from GIDX/Ellpack + qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy) + tm.predictor_equal(Xy, qXy) + with pytest.raises(ValueError, match="Inconsistent"): + # max_bin changed. + xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy) + + Xy = xgb.DMatrix(X[0], y[0], weight=w[0]) + booster0 = xgb.train( + {"tree_method": "hist", "max_bin": max_bin, "max_depth": 4}, + Xy, + num_boost_round=1, + ) + booster1 = xgb.train( + {"tree_method": "gpu_hist", "max_bin": max_bin, "max_depth": 4}, + Xy, + num_boost_round=1, + ) + qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy) + predt0 = booster0.predict(qXy) + predt1 = booster1.predict(qXy) + np.testing.assert_allclose(predt0, predt1) + @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.parametrize( - "tree_method,max_bin", [ - ("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64) - ] + "tree_method,max_bin", + [("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64)], ) def test_interoperability(self, tree_method: str, max_bin: int) -> None: import cupy as cp + n_samples = 64 n_features = 3 X, y, w = tm.make_batches( @@ -75,6 +144,7 @@ def test_interoperability(self, tree_method: str, max_bin: int) -> None: @pytest.mark.skipif(**tm.no_cupy()) def test_metainfo(self) -> None: import cupy as cp + rng = cp.random.RandomState(1994) rows = 10 @@ -98,6 +168,7 @@ def test_metainfo(self) -> None: @pytest.mark.skipif(**tm.no_cudf()) def test_ref_dmatrix(self) -> None: import cupy as cp + rng = cp.random.RandomState(1994) self.cputest.run_ref_dmatrix(rng, "gpu_hist", False) @@ -158,5 +229,6 @@ def test_ltr(self) -> None: @pytest.mark.skipif(**tm.no_cupy()) def test_check_inf(self) -> None: import cupy as cp + rng = cp.random.default_rng(1994) check_inf(rng) diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index ea8d5dcb5389..75e403dbe91b 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -153,12 +153,18 @@ def test_invalid_category(self): tm.dataset_strategy ) @settings(deadline=None, max_examples=20, print_blob=True) - def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset): + def test_gpu_hist_device_dmatrix( + self, param: dict, num_rounds: int, dataset: tm.TestDataset + ) -> None: # We cannot handle empty dataset yet assume(len(dataset.y) > 0) param['tree_method'] = 'gpu_hist' param = dataset.set_params(param) - result = train_result(param, dataset.get_device_dmat(), num_rounds) + result = train_result( + param, + dataset.get_device_dmat(max_bin=param.get("max_bin", None)), + num_rounds + ) note(result) assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)