diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index 7107cb2dee7f..dfd80cb68c13 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -187,15 +187,14 @@ class GHistBuildingManager { }; template -void RowsWiseBuildHistKernel(Span gpair, - const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat, - GHistRow hist) { +void RowsWiseBuildHistKernel(Span gpair, Span row_indices, + const GHistIndexMatrix &gmat, GHistRow hist) { constexpr bool kAnyMissing = BuildingManager::kAnyMissing; constexpr bool kFirstPage = BuildingManager::kFirstPage; using BinIdxType = typename BuildingManager::BinIdxType; - const size_t size = row_indices.Size(); - const size_t *rid = row_indices.begin; + const size_t size = row_indices.size(); + bst_idx_t const *rid = row_indices.data(); auto const *p_gpair = reinterpret_cast(gpair.data()); const BinIdxType *gradient_index = gmat.index.data(); @@ -216,9 +215,9 @@ void RowsWiseBuildHistKernel(Span gpair, return kFirstPage ? ridx : (ridx - base_rowid); }; - CHECK_NE(row_indices.Size(), 0); + CHECK_NE(row_indices.size(), 0); const size_t n_features = - get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]); + get_row_ptr(row_indices.data()[0] + 1) - get_row_ptr(row_indices.data()[0]); auto hist_data = reinterpret_cast(hist.data()); const uint32_t two{2}; // Each element from 'gpair' and 'hist' contains // 2 FP values: gradient and hessian. @@ -264,14 +263,13 @@ void RowsWiseBuildHistKernel(Span gpair, } template -void ColsWiseBuildHistKernel(Span gpair, - const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat, - GHistRow hist) { +void ColsWiseBuildHistKernel(Span gpair, Span row_indices, + const GHistIndexMatrix &gmat, GHistRow hist) { constexpr bool kAnyMissing = BuildingManager::kAnyMissing; constexpr bool kFirstPage = BuildingManager::kFirstPage; using BinIdxType = typename BuildingManager::BinIdxType; - const size_t size = row_indices.Size(); - const size_t *rid = row_indices.begin; + const size_t size = row_indices.size(); + bst_idx_t const *rid = row_indices.data(); auto const *pgh = reinterpret_cast(gpair.data()); const BinIdxType *gradient_index = gmat.index.data(); @@ -315,31 +313,31 @@ void ColsWiseBuildHistKernel(Span gpair, } template -void BuildHistDispatch(Span gpair, const RowSetCollection::Elem row_indices, +void BuildHistDispatch(Span gpair, Span row_indices, const GHistIndexMatrix &gmat, GHistRow hist) { if (BuildingManager::kReadByColumn) { ColsWiseBuildHistKernel(gpair, row_indices, gmat, hist); } else { - const size_t nrows = row_indices.Size(); + const size_t nrows = row_indices.size(); const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows); // if need to work with all rows from bin-matrix (e.g. root node) const bool contiguousBlock = - (row_indices.begin[nrows - 1] - row_indices.begin[0]) == (nrows - 1); + (row_indices.begin()[nrows - 1] - row_indices.begin()[0]) == (nrows - 1); if (contiguousBlock) { - // contiguous memory access, built-in HW prefetching is enough - if (row_indices.Size() == 0) { + if (row_indices.empty()) { return; } + // contiguous memory access, built-in HW prefetching is enough RowsWiseBuildHistKernel(gpair, row_indices, gmat, hist); } else { - const RowSetCollection::Elem span1(row_indices.begin, row_indices.end - no_prefetch_size); - if (span1.Size() != 0) { + auto span1 = row_indices.subspan(0, row_indices.size() - no_prefetch_size); + if (!span1.empty()) { RowsWiseBuildHistKernel(gpair, span1, gmat, hist); } // no prefetching to avoid loading extra memory - const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size, row_indices.end); - if (span2.Size() != 0) { + auto span2 = row_indices.subspan(row_indices.size() - no_prefetch_size); + if (!span2.empty()) { RowsWiseBuildHistKernel(gpair, span2, gmat, hist); } } @@ -347,7 +345,7 @@ void BuildHistDispatch(Span gpair, const RowSetCollection::E } template -void BuildHist(Span gpair, const RowSetCollection::Elem row_indices, +void BuildHist(Span gpair, Span row_indices, const GHistIndexMatrix &gmat, GHistRow hist, bool force_read_by_column) { /* force_read_by_column is used for testing the columnwise building of histograms. * default force_read_by_column = false @@ -365,13 +363,11 @@ void BuildHist(Span gpair, const RowSetCollection::Elem row_ }); } -template void BuildHist(Span gpair, - const RowSetCollection::Elem row_indices, +template void BuildHist(Span gpair, Span row_indices, const GHistIndexMatrix &gmat, GHistRow hist, bool force_read_by_column); -template void BuildHist(Span gpair, - const RowSetCollection::Elem row_indices, +template void BuildHist(Span gpair, Span row_indices, const GHistIndexMatrix &gmat, GHistRow hist, bool force_read_by_column); } // namespace xgboost::common diff --git a/src/common/hist_util.h b/src/common/hist_util.h index 2e24f68ffb0a..559093bb5a3f 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -635,7 +635,7 @@ class ParallelGHistBuilder { // construct a histogram via histogram aggregation template -void BuildHist(Span gpair, const RowSetCollection::Elem row_indices, +void BuildHist(Span gpair, Span row_indices, const GHistIndexMatrix& gmat, GHistRow hist, bool force_read_by_column = false); } // namespace common } // namespace xgboost diff --git a/src/common/partition_builder.h b/src/common/partition_builder.h index 53e11bd91d1a..98c876e849a0 100644 --- a/src/common/partition_builder.h +++ b/src/common/partition_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2021-2023 by Contributors + * Copyright 2021-2024, XGBoost Contributors * \file row_set.h * \brief Quick Utility to compute subset of rows * \author Philip Cho, Tianqi Chen @@ -16,7 +16,6 @@ #include #include -#include "../tree/hist/expand_entry.h" #include "categorical.h" #include "column_matrix.h" #include "xgboost/context.h" @@ -54,23 +53,23 @@ class PartitionBuilder { // Handle dense columns // Analog of std::stable_partition, but in no-inplace manner template - inline std::pair PartitionKernel(ColumnType* p_column, - common::Span row_indices, - common::Span left_part, - common::Span right_part, - size_t base_rowid, Predicate&& pred) { + std::pair PartitionKernel(ColumnType* p_column, + common::Span row_indices, + common::Span left_part, + common::Span right_part, + bst_idx_t base_rowid, Predicate&& pred) { auto& column = *p_column; - size_t* p_left_part = left_part.data(); - size_t* p_right_part = right_part.data(); - size_t nleft_elems = 0; - size_t nright_elems = 0; + bst_idx_t* p_left_part = left_part.data(); + bst_idx_t* p_right_part = right_part.data(); + bst_idx_t nleft_elems = 0; + bst_idx_t nright_elems = 0; auto p_row_indices = row_indices.data(); auto n_samples = row_indices.size(); for (size_t i = 0; i < n_samples; ++i) { auto rid = p_row_indices[i]; - const int32_t bin_id = column[rid - base_rowid]; + bst_bin_t const bin_id = column[rid - base_rowid]; if (any_missing && bin_id == ColumnType::kMissingId) { if (default_left) { p_left_part[nleft_elems++] = rid; @@ -90,14 +89,14 @@ class PartitionBuilder { } template - inline std::pair PartitionRangeKernel(common::Span ridx, - common::Span left_part, - common::Span right_part, + inline std::pair PartitionRangeKernel(common::Span ridx, + common::Span left_part, + common::Span right_part, Pred pred) { - size_t* p_left_part = left_part.data(); - size_t* p_right_part = right_part.data(); - size_t nleft_elems = 0; - size_t nright_elems = 0; + bst_idx_t* p_left_part = left_part.data(); + bst_idx_t* p_right_part = right_part.data(); + bst_idx_t nleft_elems = 0; + bst_idx_t nright_elems = 0; for (auto row_id : ridx) { if (pred(row_id)) { p_left_part[nleft_elems++] = row_id; @@ -112,10 +111,10 @@ class PartitionBuilder { void Partition(const size_t node_in_set, std::vector const& nodes, const common::Range1d range, const bst_bin_t split_cond, GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix, - const RegTree& tree, const size_t* rid) { - common::Span rid_span(rid + range.begin(), rid + range.end()); - common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); - common::Span right = GetRightBuffer(node_in_set, range.begin(), range.end()); + const RegTree& tree, bst_idx_t const* rid) { + common::Span rid_span{rid + range.begin(), rid + range.end()}; + common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); + common::Span right = GetRightBuffer(node_in_set, range.begin(), range.end()); std::size_t nid = nodes[node_in_set].nid; bst_feature_t fid = tree.SplitIndex(nid); bool default_left = tree.DefaultLeft(nid); @@ -184,8 +183,9 @@ class PartitionBuilder { } template - void MaskKernel(ColumnType* p_column, common::Span row_indices, size_t base_rowid, - BitVector* decision_bits, BitVector* missing_bits, Predicate&& pred) { + void MaskKernel(ColumnType* p_column, common::Span row_indices, + bst_idx_t base_rowid, BitVector* decision_bits, BitVector* missing_bits, + Predicate&& pred) { auto& column = *p_column; for (auto const row_id : row_indices) { auto const bin_id = column[row_id - base_rowid]; @@ -205,9 +205,9 @@ class PartitionBuilder { template void MaskRows(const size_t node_in_set, std::vector const& nodes, const common::Range1d range, bst_bin_t split_cond, GHistIndexMatrix const& gmat, - const common::ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid, - BitVector* decision_bits, BitVector* missing_bits) { - common::Span rid_span(rid + range.begin(), rid + range.end()); + const common::ColumnMatrix& column_matrix, const RegTree& tree, + bst_idx_t const* rid, BitVector* decision_bits, BitVector* missing_bits) { + common::Span rid_span{rid + range.begin(), rid + range.end()}; std::size_t nid = nodes[node_in_set].nid; bst_feature_t fid = tree.SplitIndex(nid); bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical; @@ -263,11 +263,11 @@ class PartitionBuilder { template void PartitionByMask(const size_t node_in_set, std::vector const& nodes, const common::Range1d range, GHistIndexMatrix const& gmat, - const RegTree& tree, const size_t* rid, BitVector const& decision_bits, + const RegTree& tree, bst_idx_t const* rid, BitVector const& decision_bits, BitVector const& missing_bits) { - common::Span rid_span(rid + range.begin(), rid + range.end()); - common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); - common::Span right = GetRightBuffer(node_in_set, range.begin(), range.end()); + common::Span rid_span(rid + range.begin(), rid + range.end()); + common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); + common::Span right = GetRightBuffer(node_in_set, range.begin(), range.end()); std::size_t nid = nodes[node_in_set].nid; bool default_left = tree.DefaultLeft(nid); @@ -299,12 +299,12 @@ class PartitionBuilder { } } - common::Span GetLeftBuffer(int nid, size_t begin, size_t end) { + common::Span GetLeftBuffer(int nid, size_t begin, size_t end) { const size_t task_idx = GetTaskIdx(nid, begin); return { mem_blocks_.at(task_idx)->Left(), end - begin }; } - common::Span GetRightBuffer(int nid, size_t begin, size_t end) { + common::Span GetRightBuffer(int nid, size_t begin, size_t end) { const size_t task_idx = GetTaskIdx(nid, begin); return { mem_blocks_.at(task_idx)->Right(), end - begin }; } @@ -346,14 +346,14 @@ class PartitionBuilder { } } - void MergeToArray(int nid, size_t begin, size_t* rows_indexes) { + void MergeToArray(bst_node_t nid, size_t begin, bst_idx_t* rows_indexes) { size_t task_idx = GetTaskIdx(nid, begin); - size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left; - size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right; + bst_idx_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left; + bst_idx_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right; - const size_t* left = mem_blocks_[task_idx]->Left(); - const size_t* right = mem_blocks_[task_idx]->Right(); + bst_idx_t const* left = mem_blocks_[task_idx]->Left(); + bst_idx_t const* right = mem_blocks_[task_idx]->Right(); std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result); std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result); @@ -377,10 +377,10 @@ class PartitionBuilder { return; } CHECK(tree.IsLeaf(node.node_id)); - if (node.begin) { // guard for empty node. - size_t ptr_offset = node.end - p_begin; + if (node.begin()) { // guard for empty node. + size_t ptr_offset = node.end() - p_begin; CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id; - for (auto idx = node.begin; idx != node.end; ++idx) { + for (auto idx = node.begin(); idx != node.end(); ++idx) { h_pos[*idx] = sampledp(*idx) ? ~node.node_id : node.node_id; } } @@ -395,16 +395,16 @@ class PartitionBuilder { size_t n_offset_left; size_t n_offset_right; - size_t* Left() { + bst_idx_t* Left() { return &left_data_[0]; } - size_t* Right() { + bst_idx_t* Right() { return &right_data_[0]; } private: - size_t left_data_[BlockSize]; - size_t right_data_[BlockSize]; + bst_idx_t left_data_[BlockSize]; + bst_idx_t right_data_[BlockSize]; }; std::vector> left_right_nodes_sizes_; std::vector blocks_offsets_; diff --git a/src/common/row_set.h b/src/common/row_set.h index acb39730539b..8df2a7a36839 100644 --- a/src/common/row_set.h +++ b/src/common/row_set.h @@ -31,15 +31,29 @@ class RowSetCollection { * associated with a particular node in a decision tree. */ struct Elem { - std::size_t const* begin{nullptr}; - std::size_t const* end{nullptr}; + private: + bst_idx_t* begin_{nullptr}; + bst_idx_t* end_{nullptr}; + + public: bst_node_t node_id{-1}; // id of node associated with this instance set; -1 means uninitialized Elem() = default; - Elem(std::size_t const* begin, std::size_t const* end, bst_node_t node_id = -1) - : begin(begin), end(end), node_id(node_id) {} + Elem(bst_idx_t* begin, bst_idx_t* end, bst_node_t node_id = -1) + : begin_(begin), end_(end), node_id(node_id) {} + + // Disable copy ctor to avoid casting away the constness via copy. + Elem(Elem const& that) = delete; + Elem& operator=(Elem const& that) = delete; + Elem(Elem&& that) = default; + Elem& operator=(Elem&& that) = default; - std::size_t Size() const { return end - begin; } + [[nodiscard]] std::size_t Size() const { return std::distance(begin(), end()); } + + [[nodiscard]] bst_idx_t const* begin() const { return this->begin_; } // NOLINT + [[nodiscard]] bst_idx_t const* end() const { return this->end_; } // NOLINT + [[nodiscard]] bst_idx_t* begin() { return this->begin_; } // NOLINT + [[nodiscard]] bst_idx_t* end() { return this->end_; } // NOLINT }; [[nodiscard]] std::vector::const_iterator begin() const { // NOLINT @@ -71,55 +85,57 @@ class RowSetCollection { CHECK(elem_of_each_node_.empty()); if (row_indices_.empty()) { // edge case: empty instance set - constexpr std::size_t* kBegin = nullptr; - constexpr std::size_t* kEnd = nullptr; + constexpr bst_idx_t* kBegin = nullptr; + constexpr bst_idx_t* kEnd = nullptr; static_assert(kEnd - kBegin == 0); elem_of_each_node_.emplace_back(kBegin, kEnd, 0); return; } - const std::size_t* begin = dmlc::BeginPtr(row_indices_); - const std::size_t* end = dmlc::BeginPtr(row_indices_) + row_indices_.size(); + bst_idx_t* begin = row_indices_.data(); + bst_idx_t* end = row_indices_.data() + row_indices_.size(); elem_of_each_node_.emplace_back(begin, end, 0); } - [[nodiscard]] std::vector* Data() { return &row_indices_; } - [[nodiscard]] std::vector const* Data() const { return &row_indices_; } + [[nodiscard]] std::vector* Data() { return &row_indices_; } + [[nodiscard]] std::vector const* Data() const { return &row_indices_; } // split rowset into two void AddSplit(bst_node_t node_id, bst_node_t left_node_id, bst_node_t right_node_id, bst_idx_t n_left, bst_idx_t n_right) { - const Elem e = elem_of_each_node_[node_id]; + Elem& e = elem_of_each_node_[node_id]; - std::size_t* all_begin{nullptr}; - std::size_t* begin{nullptr}; - if (e.begin == nullptr) { + bst_idx_t* all_begin{nullptr}; + bst_idx_t* begin{nullptr}; + bst_idx_t* end{nullptr}; + if (e.begin() == nullptr) { CHECK_EQ(n_left, 0); CHECK_EQ(n_right, 0); } else { all_begin = row_indices_.data(); - begin = all_begin + (e.begin - all_begin); + begin = all_begin + (e.begin() - all_begin); + end = elem_of_each_node_[node_id].end(); } CHECK_EQ(n_left + n_right, e.Size()); - CHECK_LE(begin + n_left, e.end); - CHECK_EQ(begin + n_left + n_right, e.end); + CHECK_LE(begin + n_left, e.end()); + CHECK_EQ(begin + n_left + n_right, e.end()); if (left_node_id >= static_cast(elem_of_each_node_.size())) { - elem_of_each_node_.resize(left_node_id + 1, Elem{nullptr, nullptr, -1}); + elem_of_each_node_.resize(left_node_id + 1); } if (right_node_id >= static_cast(elem_of_each_node_.size())) { - elem_of_each_node_.resize(right_node_id + 1, Elem{nullptr, nullptr, -1}); + elem_of_each_node_.resize(right_node_id + 1); } elem_of_each_node_[left_node_id] = Elem{begin, begin + n_left, left_node_id}; - elem_of_each_node_[right_node_id] = Elem{begin + n_left, e.end, right_node_id}; + elem_of_each_node_[right_node_id] = Elem{begin + n_left, end, right_node_id}; elem_of_each_node_[node_id] = Elem{nullptr, nullptr, -1}; } private: // stores the row indexes in the set - std::vector row_indices_; + std::vector row_indices_; // vector: node_id -> elements std::vector elem_of_each_node_; }; diff --git a/src/tree/common_row_partitioner.h b/src/tree/common_row_partitioner.h index c3065ad5f135..cd267673b66c 100644 --- a/src/tree/common_row_partitioner.h +++ b/src/tree/common_row_partitioner.h @@ -7,7 +7,7 @@ #define XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_ #include // for all_of, fill -#include // for uint32_t +#include // for uint32_t, int32_t #include // for numeric_limits #include // for vector @@ -18,7 +18,7 @@ #include "../common/partition_builder.h" // for PartitionBuilder #include "../common/row_set.h" // for RowSetCollection #include "../common/threading_utils.h" // for ParallelFor2d -#include "xgboost/base.h" // for bst_row_t +#include "xgboost/base.h" // for bst_idx_t #include "xgboost/collective/result.h" // for Success, SafeColl #include "xgboost/context.h" // for Context #include "xgboost/linalg.h" // for TensorView @@ -46,7 +46,7 @@ class ColumnSplitHelper { void Partition(Context const* ctx, common::BlockedSpace2d const& space, std::int32_t n_threads, GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix, std::vector const& nodes, - std::vector const& split_conditions, RegTree const* p_tree) { + std::vector const& split_conditions, RegTree const* p_tree) { // When data is split by column, we don't have all the feature values in the local worker, so // we first collect all the decisions and whether the feature is missing into bit vectors. std::fill(decision_storage_.begin(), decision_storage_.end(), 0); @@ -56,7 +56,7 @@ class ColumnSplitHelper { bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0; partition_builder_->MaskRows( node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree, - (*row_set_collection_)[nid].begin, &decision_bits_, &missing_bits_); + (*row_set_collection_)[nid].begin(), &decision_bits_, &missing_bits_); }); // Then aggregate the bit vectors across all the workers. @@ -74,7 +74,7 @@ class ColumnSplitHelper { const size_t task_id = partition_builder_->GetTaskIdx(node_in_set, begin); partition_builder_->AllocateForTask(task_id); partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, *p_tree, - (*row_set_collection_)[nid].begin, decision_bits_, + (*row_set_collection_)[nid].begin(), decision_bits_, missing_bits_); }); } @@ -98,10 +98,10 @@ class CommonRowPartitioner { bool is_col_split) : base_rowid{_base_rowid}, is_col_split_{is_col_split} { row_set_collection_.Clear(); - std::vector& row_indices = *row_set_collection_.Data(); + std::vector& row_indices = *row_set_collection_.Data(); row_indices.resize(num_row); - std::size_t* p_row_indices = row_indices.data(); + bst_idx_t* p_row_indices = row_indices.data(); common::Iota(ctx, p_row_indices, p_row_indices + row_indices.size(), base_rowid); row_set_collection_.Init(); @@ -112,7 +112,7 @@ class CommonRowPartitioner { template void FindSplitConditions(const std::vector& nodes, const RegTree& tree, - const GHistIndexMatrix& gmat, std::vector* split_conditions) { + const GHistIndexMatrix& gmat, std::vector* split_conditions) { auto const& ptrs = gmat.cut.Ptrs(); auto const& vals = gmat.cut.Values(); @@ -197,7 +197,7 @@ class CommonRowPartitioner { // 1. Find split condition for each split size_t n_nodes = nodes.size(); - std::vector split_conditions; + std::vector split_conditions; if (column_matrix.IsInitialized()) { split_conditions.resize(n_nodes); FindSplitConditions(nodes, *p_tree, gmat, &split_conditions); @@ -206,8 +206,8 @@ class CommonRowPartitioner { // 2.1 Create a blocked space of size SUM(samples in each node) common::BlockedSpace2d space( n_nodes, - [&](size_t node_in_set) { - int32_t nid = nodes[node_in_set].nid; + [&](std::size_t node_in_set) { + auto nid = nodes[node_in_set].nid; return row_set_collection_[nid].Size(); }, kPartitionBlockSize); @@ -236,7 +236,7 @@ class CommonRowPartitioner { bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0; partition_builder_.template Partition( node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree, - row_set_collection_[nid].begin); + row_set_collection_[nid].begin()); }); } @@ -248,8 +248,7 @@ class CommonRowPartitioner { // with updated row-indexes for each tree-node common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) { const int32_t nid = nodes[node_in_set].nid; - partition_builder_.MergeToArray(node_in_set, r.begin(), - const_cast(row_set_collection_[nid].begin)); + partition_builder_.MergeToArray(node_in_set, r.begin(), row_set_collection_[nid].begin()); }); // 5. Add info about splits into row_set_collection_ diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index ba673d85fc60..a260784ad1db 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -739,7 +739,7 @@ void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree, if (!tree[nidx].IsDeleted() && tree[nidx].IsLeaf()) { auto const &rowset = part[nidx]; auto leaf_value = tree[nidx].LeafValue(); - for (const size_t *it = rowset.begin + r.begin(); it < rowset.begin + r.end(); ++it) { + for (auto const *it = rowset.begin() + r.begin(); it < rowset.begin() + r.end(); ++it) { out_preds(*it) += leaf_value; } } @@ -774,7 +774,8 @@ void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree, if (tree.IsLeaf(nidx)) { auto const &rowset = part[nidx]; auto leaf_value = mttree->LeafValue(nidx); - for (std::size_t const *it = rowset.begin + r.begin(); it < rowset.begin + r.end(); ++it) { + for (bst_idx_t const *it = rowset.begin() + r.begin(); it < rowset.begin() + r.end(); + ++it) { for (std::size_t i = 0; i < n_targets; ++i) { out_preds(*it, i) += leaf_value(i); } diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index e28cae16597c..1e9dc9c7d53c 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -76,13 +76,13 @@ class HistogramBuilder { common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { const auto tid = static_cast(omp_get_thread_num()); bst_node_t const nidx = nodes_to_build[nid_in_set]; - auto elem = row_set_collection[nidx]; + auto const& elem = row_set_collection[nidx]; auto start_of_row_set = std::min(r.begin(), elem.Size()); auto end_of_row_set = std::min(r.end(), elem.Size()); - auto rid_set = common::RowSetCollection::Elem(elem.begin + start_of_row_set, - elem.begin + end_of_row_set, nidx); + auto rid_set = common::Span{elem.begin() + start_of_row_set, + elem.begin() + end_of_row_set}; auto hist = buffer_.GetInitializedHist(tid, nid_in_set); - if (rid_set.Size() != 0) { + if (rid_set.size() != 0) { common::BuildHist(gpair_h, rid_set, gidx, hist, force_read_by_column); } }); diff --git a/tests/cpp/common/test_partition_builder.cc b/tests/cpp/common/test_partition_builder.cc index 08dd345f261f..36fb7a8d9870 100644 --- a/tests/cpp/common/test_partition_builder.cc +++ b/tests/cpp/common/test_partition_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020-2023 by XGBoost contributors + * Copyright 2020-2024, XGBoost contributors */ #include @@ -58,7 +58,7 @@ TEST(PartitionBuilder, BasicTest) { } builder.CalculateRowOffsets(); - std::vector v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize); + std::vector v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize); for(size_t nid = 0; nid < kNodes; ++nid) { diff --git a/tests/cpp/tree/hist/test_evaluate_splits.cc b/tests/cpp/tree/hist/test_evaluate_splits.cc index 329379b5b4d6..b7aae1b57e5b 100644 --- a/tests/cpp/tree/hist/test_evaluate_splits.cc +++ b/tests/cpp/tree/hist/test_evaluate_splits.cc @@ -45,7 +45,7 @@ void TestEvaluateSplits(bool force_read_by_column) { // dense, no missing values GHistIndexMatrix gmat(&ctx, dmat.get(), kMaxBins, 0.5, false); common::RowSetCollection row_set_collection; - std::vector &row_indices = *row_set_collection.Data(); + std::vector &row_indices = *row_set_collection.Data(); row_indices.resize(kRows); std::iota(row_indices.begin(), row_indices.end(), 0); row_set_collection.Init(); @@ -53,7 +53,9 @@ void TestEvaluateSplits(bool force_read_by_column) { HistMakerTrainParam hist_param; hist.Reset(gmat.cut.Ptrs().back(), hist_param.max_cached_hist_node); hist.AllocateHistograms({0}); - common::BuildHist(row_gpairs, row_set_collection[0], gmat, hist[0], force_read_by_column); + auto const &elem = row_set_collection[0]; + common::BuildHist(row_gpairs, common::Span{elem.begin(), elem.end()}, gmat, hist[0], + force_read_by_column); // Compute total gradient for all data points GradientPairPrecise total_gpair; diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc index 740175c577c5..88c8d1cf6e64 100644 --- a/tests/cpp/tree/hist/test_histogram.cc +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -14,7 +14,6 @@ #include // for max #include // for size_t #include // for int32_t, uint32_t -#include // for function #include // for back_inserter #include // for numeric_limits #include // for shared_ptr, allocator, unique_ptr @@ -108,7 +107,7 @@ void TestSyncHist(bool is_distributed) { common::RowSetCollection row_set_collection; { row_set_collection.Clear(); - std::vector &row_indices = *row_set_collection.Data(); + std::vector &row_indices = *row_set_collection.Data(); row_indices.resize(kNRows); std::iota(row_indices.begin(), row_indices.end(), 0); row_set_collection.Init(); @@ -251,7 +250,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_ common::RowSetCollection row_set_collection; row_set_collection.Clear(); - std::vector &row_indices = *row_set_collection.Data(); + std::vector &row_indices = *row_set_collection.Data(); row_indices.resize(kNRows); std::iota(row_indices.begin(), row_indices.end(), 0); row_set_collection.Init(); @@ -345,7 +344,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) { common::RowSetCollection row_set_collection; row_set_collection.Clear(); - std::vector &row_indices = *row_set_collection.Data(); + std::vector &row_indices = *row_set_collection.Data(); row_indices.resize(kRows); std::iota(row_indices.begin(), row_indices.end(), 0); row_set_collection.Init(); diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc index b2949e5952a2..d647d3a970bf 100644 --- a/tests/cpp/tree/test_approx.cc +++ b/tests/cpp/tree/test_approx.cc @@ -3,7 +3,6 @@ */ #include -#include "../../../src/common/numeric.h" #include "../../../src/tree/common_row_partitioner.h" #include "../collective/test_worker.h" // for TestDistributedGlobal #include "../helpers.h" @@ -54,20 +53,23 @@ TEST(Approx, Partitioner) { GetSplit(&tree, split_value, &candidates); partitioner.UpdatePosition(&ctx, page, candidates, &tree); - auto left_nidx = tree[RegTree::kRoot].LeftChild(); - auto elem = partitioner[left_nidx]; - ASSERT_LT(elem.Size(), n_samples); - ASSERT_GT(elem.Size(), 1); - for (auto it = elem.begin; it != elem.end; ++it) { - auto value = page.cut.Values().at(page.index[*it]); - ASSERT_LE(value, split_value); + { + auto left_nidx = tree[RegTree::kRoot].LeftChild(); + auto const& elem = partitioner[left_nidx]; + ASSERT_LT(elem.Size(), n_samples); + ASSERT_GT(elem.Size(), 1); + for (auto& it : elem) { + auto value = page.cut.Values().at(page.index[it]); + ASSERT_LE(value, split_value); + } } - - auto right_nidx = tree[RegTree::kRoot].RightChild(); - elem = partitioner[right_nidx]; - for (auto it = elem.begin; it != elem.end; ++it) { - auto value = page.cut.Values().at(page.index[*it]); - ASSERT_GT(value, split_value) << *it; + { + auto right_nidx = tree[RegTree::kRoot].RightChild(); + auto const& elem = partitioner[right_nidx]; + for (auto& it : elem) { + auto value = page.cut.Values().at(page.index[it]); + ASSERT_GT(value, split_value) << it; + } } } } @@ -99,23 +101,25 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared RegTree tree; GetSplit(&tree, mid_value, &candidates); partitioner.UpdatePosition(&ctx, page, candidates, &tree); - - auto left_nidx = tree[RegTree::kRoot].LeftChild(); - auto elem = partitioner[left_nidx]; - ASSERT_LT(elem.Size(), n_samples); - ASSERT_GT(elem.Size(), 1); - auto expected_elem = expected_mid_partitioner[left_nidx]; - ASSERT_EQ(elem.Size(), expected_elem.Size()); - for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) { - ASSERT_EQ(*it, *eit); + { + auto left_nidx = tree[RegTree::kRoot].LeftChild(); + auto const& elem = partitioner[left_nidx]; + ASSERT_LT(elem.Size(), n_samples); + ASSERT_GT(elem.Size(), 1); + auto const& expected_elem = expected_mid_partitioner[left_nidx]; + ASSERT_EQ(elem.Size(), expected_elem.Size()); + for (auto it = elem.begin(), eit = expected_elem.begin(); it != elem.end(); ++it, ++eit) { + ASSERT_EQ(*it, *eit); + } } - - auto right_nidx = tree[RegTree::kRoot].RightChild(); - elem = partitioner[right_nidx]; - expected_elem = expected_mid_partitioner[right_nidx]; - ASSERT_EQ(elem.Size(), expected_elem.Size()); - for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) { - ASSERT_EQ(*it, *eit); + { + auto right_nidx = tree[RegTree::kRoot].RightChild(); + auto const& elem = partitioner[right_nidx]; + auto const& expected_elem = expected_mid_partitioner[right_nidx]; + ASSERT_EQ(elem.Size(), expected_elem.Size()); + for (auto it = elem.begin(), eit = expected_elem.begin(); it != elem.end(); ++it, ++eit) { + ASSERT_EQ(*it, *eit); + } } } } diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index ce637caa4d46..29ae02f8d2b2 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -5,7 +5,6 @@ #include #include -#include #include // for size_t #include #include @@ -68,21 +67,24 @@ void TestPartitioner(bst_target_t n_targets) { } else { GetMultiSplitForTest(&tree, split_value, &candidates); } - auto left_nidx = tree.LeftChild(RegTree::kRoot); partitioner.UpdatePosition(&ctx, gmat, column_indices, candidates, &tree); - - auto elem = partitioner[left_nidx]; - ASSERT_LT(elem.Size(), n_samples); - ASSERT_GT(elem.Size(), 1); - for (auto it = elem.begin; it != elem.end; ++it) { - auto value = gmat.cut.Values().at(gmat.index[*it]); - ASSERT_LE(value, split_value); + { + auto left_nidx = tree.LeftChild(RegTree::kRoot); + auto const& elem = partitioner[left_nidx]; + ASSERT_LT(elem.Size(), n_samples); + ASSERT_GT(elem.Size(), 1); + for (auto& it : elem) { + auto value = gmat.cut.Values().at(gmat.index[it]); + ASSERT_LE(value, split_value); + } } - auto right_nidx = tree.RightChild(RegTree::kRoot); - elem = partitioner[right_nidx]; - for (auto it = elem.begin; it != elem.end; ++it) { - auto value = gmat.cut.Values().at(gmat.index[*it]); - ASSERT_GT(value, split_value); + { + auto right_nidx = tree.RightChild(RegTree::kRoot); + auto const& elem = partitioner[right_nidx]; + for (auto& it : elem) { + auto value = gmat.cut.Values().at(gmat.index[it]); + ASSERT_GT(value, split_value); + } } } } @@ -138,21 +140,24 @@ void VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples, auto left_nidx = tree.LeftChild(RegTree::kRoot); partitioner.UpdatePosition(&ctx, gmat, column_indices, candidates, &tree); - auto elem = partitioner[left_nidx]; - ASSERT_LT(elem.Size(), n_samples); - ASSERT_GT(elem.Size(), 1); - auto expected_elem = expected_mid_partitioner[left_nidx]; - ASSERT_EQ(elem.Size(), expected_elem.Size()); - for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) { - ASSERT_EQ(*it, *eit); + { + auto const& elem = partitioner[left_nidx]; + ASSERT_LT(elem.Size(), n_samples); + ASSERT_GT(elem.Size(), 1); + auto const& expected_elem = expected_mid_partitioner[left_nidx]; + ASSERT_EQ(elem.Size(), expected_elem.Size()); + for (auto it = elem.begin(), eit = expected_elem.begin(); it != elem.end(); ++it, ++eit) { + ASSERT_EQ(*it, *eit); + } } - - auto right_nidx = tree.RightChild(RegTree::kRoot); - elem = partitioner[right_nidx]; - expected_elem = expected_mid_partitioner[right_nidx]; - ASSERT_EQ(elem.Size(), expected_elem.Size()); - for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) { - ASSERT_EQ(*it, *eit); + { + auto right_nidx = tree.RightChild(RegTree::kRoot); + auto const& elem = partitioner[right_nidx]; + auto const& expected_elem = expected_mid_partitioner[right_nidx]; + ASSERT_EQ(elem.Size(), expected_elem.Size()); + for (auto it = elem.begin(), eit = expected_elem.begin(); it != elem.end(); ++it, ++eit) { + ASSERT_EQ(*it, *eit); + } } } }