Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid the use of size_t in the partitioner. #10541

Merged
merged 4 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 22 additions & 26 deletions src/common/hist_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,14 @@ class GHistBuildingManager {
};

template <bool do_prefetch, class BuildingManager>
void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
void RowsWiseBuildHistKernel(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,
const GHistIndexMatrix &gmat, GHistRow hist) {
constexpr bool kAnyMissing = BuildingManager::kAnyMissing;
constexpr bool kFirstPage = BuildingManager::kFirstPage;
using BinIdxType = typename BuildingManager::BinIdxType;

const size_t size = row_indices.Size();
const size_t *rid = row_indices.begin;
const size_t size = row_indices.size();
bst_idx_t const *rid = row_indices.data();
auto const *p_gpair = reinterpret_cast<const float *>(gpair.data());
const BinIdxType *gradient_index = gmat.index.data<BinIdxType>();

Expand All @@ -216,9 +215,9 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
return kFirstPage ? ridx : (ridx - base_rowid);
};

CHECK_NE(row_indices.Size(), 0);
CHECK_NE(row_indices.size(), 0);
const size_t n_features =
get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]);
get_row_ptr(row_indices.data()[0] + 1) - get_row_ptr(row_indices.data()[0]);
auto hist_data = reinterpret_cast<double *>(hist.data());
const uint32_t two{2}; // Each element from 'gpair' and 'hist' contains
// 2 FP values: gradient and hessian.
Expand Down Expand Up @@ -264,14 +263,13 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
}

template <class BuildingManager>
void ColsWiseBuildHistKernel(Span<GradientPair const> gpair,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
GHistRow hist) {
void ColsWiseBuildHistKernel(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,
const GHistIndexMatrix &gmat, GHistRow hist) {
constexpr bool kAnyMissing = BuildingManager::kAnyMissing;
constexpr bool kFirstPage = BuildingManager::kFirstPage;
using BinIdxType = typename BuildingManager::BinIdxType;
const size_t size = row_indices.Size();
const size_t *rid = row_indices.begin;
const size_t size = row_indices.size();
bst_idx_t const *rid = row_indices.data();
auto const *pgh = reinterpret_cast<const float *>(gpair.data());
const BinIdxType *gradient_index = gmat.index.data<BinIdxType>();

Expand Down Expand Up @@ -315,39 +313,39 @@ void ColsWiseBuildHistKernel(Span<GradientPair const> gpair,
}

template <class BuildingManager>
void BuildHistDispatch(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
void BuildHistDispatch(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,
const GHistIndexMatrix &gmat, GHistRow hist) {
if (BuildingManager::kReadByColumn) {
ColsWiseBuildHistKernel<BuildingManager>(gpair, row_indices, gmat, hist);
} else {
const size_t nrows = row_indices.Size();
const size_t nrows = row_indices.size();
const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows);
// if need to work with all rows from bin-matrix (e.g. root node)
const bool contiguousBlock =
(row_indices.begin[nrows - 1] - row_indices.begin[0]) == (nrows - 1);
(row_indices.begin()[nrows - 1] - row_indices.begin()[0]) == (nrows - 1);

if (contiguousBlock) {
// contiguous memory access, built-in HW prefetching is enough
if (row_indices.Size() == 0) {
if (row_indices.empty()) {
return;
}
// contiguous memory access, built-in HW prefetching is enough
RowsWiseBuildHistKernel<false, BuildingManager>(gpair, row_indices, gmat, hist);
} else {
const RowSetCollection::Elem span1(row_indices.begin, row_indices.end - no_prefetch_size);
if (span1.Size() != 0) {
auto span1 = row_indices.subspan(0, row_indices.size() - no_prefetch_size);
if (!span1.empty()) {
RowsWiseBuildHistKernel<true, BuildingManager>(gpair, span1, gmat, hist);
}
// no prefetching to avoid loading extra memory
const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size, row_indices.end);
if (span2.Size() != 0) {
auto span2 = row_indices.subspan(row_indices.size() - no_prefetch_size);
if (!span2.empty()) {
RowsWiseBuildHistKernel<false, BuildingManager>(gpair, span2, gmat, hist);
}
}
}
}

template <bool any_missing>
void BuildHist(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
void BuildHist(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,
const GHistIndexMatrix &gmat, GHistRow hist, bool force_read_by_column) {
/* force_read_by_column is used for testing the columnwise building of histograms.
* default force_read_by_column = false
Expand All @@ -365,13 +363,11 @@ void BuildHist(Span<GradientPair const> gpair, const RowSetCollection::Elem row_
});
}

template void BuildHist<true>(Span<GradientPair const> gpair,
const RowSetCollection::Elem row_indices,
template void BuildHist<true>(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,
const GHistIndexMatrix &gmat, GHistRow hist,
bool force_read_by_column);

template void BuildHist<false>(Span<GradientPair const> gpair,
const RowSetCollection::Elem row_indices,
template void BuildHist<false>(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,
const GHistIndexMatrix &gmat, GHistRow hist,
bool force_read_by_column);
} // namespace xgboost::common
2 changes: 1 addition & 1 deletion src/common/hist_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ class ParallelGHistBuilder {

// construct a histogram via histogram aggregation
template <bool any_missing>
void BuildHist(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
void BuildHist(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,
const GHistIndexMatrix& gmat, GHistRow hist, bool force_read_by_column = false);
} // namespace common
} // namespace xgboost
Expand Down
92 changes: 46 additions & 46 deletions src/common/partition_builder.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2021-2023 by Contributors
* Copyright 2021-2024, XGBoost Contributors
* \file row_set.h
* \brief Quick Utility to compute subset of rows
* \author Philip Cho, Tianqi Chen
Expand All @@ -16,7 +16,6 @@
#include <utility>
#include <vector>

#include "../tree/hist/expand_entry.h"
#include "categorical.h"
#include "column_matrix.h"
#include "xgboost/context.h"
Expand Down Expand Up @@ -54,23 +53,23 @@ class PartitionBuilder {
// Handle dense columns
// Analog of std::stable_partition, but in no-inplace manner
template <bool default_left, bool any_missing, typename ColumnType, typename Predicate>
inline std::pair<size_t, size_t> PartitionKernel(ColumnType* p_column,
common::Span<const size_t> row_indices,
common::Span<size_t> left_part,
common::Span<size_t> right_part,
size_t base_rowid, Predicate&& pred) {
std::pair<size_t, size_t> PartitionKernel(ColumnType* p_column,
common::Span<const bst_idx_t> row_indices,
common::Span<bst_idx_t> left_part,
common::Span<bst_idx_t> right_part,
bst_idx_t base_rowid, Predicate&& pred) {
auto& column = *p_column;
size_t* p_left_part = left_part.data();
size_t* p_right_part = right_part.data();
size_t nleft_elems = 0;
size_t nright_elems = 0;
bst_idx_t* p_left_part = left_part.data();
bst_idx_t* p_right_part = right_part.data();
bst_idx_t nleft_elems = 0;
bst_idx_t nright_elems = 0;

auto p_row_indices = row_indices.data();
auto n_samples = row_indices.size();

for (size_t i = 0; i < n_samples; ++i) {
auto rid = p_row_indices[i];
const int32_t bin_id = column[rid - base_rowid];
bst_bin_t const bin_id = column[rid - base_rowid];
if (any_missing && bin_id == ColumnType::kMissingId) {
if (default_left) {
p_left_part[nleft_elems++] = rid;
Expand All @@ -90,14 +89,14 @@ class PartitionBuilder {
}

template <typename Pred>
inline std::pair<size_t, size_t> PartitionRangeKernel(common::Span<const size_t> ridx,
common::Span<size_t> left_part,
common::Span<size_t> right_part,
inline std::pair<size_t, size_t> PartitionRangeKernel(common::Span<const bst_idx_t> ridx,
common::Span<bst_idx_t> left_part,
common::Span<bst_idx_t> right_part,
Pred pred) {
size_t* p_left_part = left_part.data();
size_t* p_right_part = right_part.data();
size_t nleft_elems = 0;
size_t nright_elems = 0;
bst_idx_t* p_left_part = left_part.data();
bst_idx_t* p_right_part = right_part.data();
bst_idx_t nleft_elems = 0;
bst_idx_t nright_elems = 0;
for (auto row_id : ridx) {
if (pred(row_id)) {
p_left_part[nleft_elems++] = row_id;
Expand All @@ -112,10 +111,10 @@ class PartitionBuilder {
void Partition(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
const common::Range1d range, const bst_bin_t split_cond,
GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix,
const RegTree& tree, const size_t* rid) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
common::Span<size_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
common::Span<size_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
const RegTree& tree, bst_idx_t const* rid) {
common::Span<bst_idx_t const> rid_span{rid + range.begin(), rid + range.end()};
common::Span<bst_idx_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
common::Span<bst_idx_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
std::size_t nid = nodes[node_in_set].nid;
bst_feature_t fid = tree.SplitIndex(nid);
bool default_left = tree.DefaultLeft(nid);
Expand Down Expand Up @@ -184,8 +183,9 @@ class PartitionBuilder {
}

template <bool any_missing, typename ColumnType, typename Predicate>
void MaskKernel(ColumnType* p_column, common::Span<const size_t> row_indices, size_t base_rowid,
BitVector* decision_bits, BitVector* missing_bits, Predicate&& pred) {
void MaskKernel(ColumnType* p_column, common::Span<bst_idx_t const> row_indices,
bst_idx_t base_rowid, BitVector* decision_bits, BitVector* missing_bits,
Predicate&& pred) {
auto& column = *p_column;
for (auto const row_id : row_indices) {
auto const bin_id = column[row_id - base_rowid];
Expand All @@ -205,9 +205,9 @@ class PartitionBuilder {
template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
void MaskRows(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
const common::Range1d range, bst_bin_t split_cond, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid,
BitVector* decision_bits, BitVector* missing_bits) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
const common::ColumnMatrix& column_matrix, const RegTree& tree,
bst_idx_t const* rid, BitVector* decision_bits, BitVector* missing_bits) {
common::Span<bst_idx_t const> rid_span{rid + range.begin(), rid + range.end()};
std::size_t nid = nodes[node_in_set].nid;
bst_feature_t fid = tree.SplitIndex(nid);
bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
Expand Down Expand Up @@ -263,11 +263,11 @@ class PartitionBuilder {
template <typename ExpandEntry>
void PartitionByMask(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
const common::Range1d range, GHistIndexMatrix const& gmat,
const RegTree& tree, const size_t* rid, BitVector const& decision_bits,
const RegTree& tree, bst_idx_t const* rid, BitVector const& decision_bits,
BitVector const& missing_bits) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
common::Span<size_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
common::Span<size_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
common::Span<bst_idx_t const> rid_span(rid + range.begin(), rid + range.end());
common::Span<bst_idx_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
common::Span<bst_idx_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
std::size_t nid = nodes[node_in_set].nid;
bool default_left = tree.DefaultLeft(nid);

Expand Down Expand Up @@ -299,12 +299,12 @@ class PartitionBuilder {
}
}

common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
common::Span<bst_idx_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx)->Left(), end - begin };
}

common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
common::Span<bst_idx_t> GetRightBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx)->Right(), end - begin };
}
Expand Down Expand Up @@ -346,14 +346,14 @@ class PartitionBuilder {
}
}

void MergeToArray(int nid, size_t begin, size_t* rows_indexes) {
void MergeToArray(bst_node_t nid, size_t begin, bst_idx_t* rows_indexes) {
size_t task_idx = GetTaskIdx(nid, begin);

size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;
bst_idx_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
bst_idx_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;

const size_t* left = mem_blocks_[task_idx]->Left();
const size_t* right = mem_blocks_[task_idx]->Right();
bst_idx_t const* left = mem_blocks_[task_idx]->Left();
bst_idx_t const* right = mem_blocks_[task_idx]->Right();

std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);
std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);
Expand All @@ -377,10 +377,10 @@ class PartitionBuilder {
return;
}
CHECK(tree.IsLeaf(node.node_id));
if (node.begin) { // guard for empty node.
size_t ptr_offset = node.end - p_begin;
if (node.begin()) { // guard for empty node.
size_t ptr_offset = node.end() - p_begin;
CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id;
for (auto idx = node.begin; idx != node.end; ++idx) {
for (auto idx = node.begin(); idx != node.end(); ++idx) {
h_pos[*idx] = sampledp(*idx) ? ~node.node_id : node.node_id;
}
}
Expand All @@ -395,16 +395,16 @@ class PartitionBuilder {
size_t n_offset_left;
size_t n_offset_right;

size_t* Left() {
bst_idx_t* Left() {
return &left_data_[0];
}

size_t* Right() {
bst_idx_t* Right() {
return &right_data_[0];
}
private:
size_t left_data_[BlockSize];
size_t right_data_[BlockSize];
bst_idx_t left_data_[BlockSize];
bst_idx_t right_data_[BlockSize];
};
std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
std::vector<size_t> blocks_offsets_;
Expand Down
Loading
Loading