Skip to content

Commit

Permalink
Make the span class trivially copyable. (#10878)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis authored Oct 9, 2024
1 parent 56d155c commit f7cb8e0
Show file tree
Hide file tree
Showing 15 changed files with 25 additions and 36 deletions.
17 changes: 6 additions & 11 deletions include/xgboost/span.h
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ class Span {
using const_reverse_iterator = const std::reverse_iterator<const_iterator>; // NOLINT

// constructors
constexpr Span() __span_noexcept = default;
constexpr Span() = default;

XGBOOST_DEVICE Span(pointer _ptr, index_type _count) :
size_(_count), data_(_ptr) {
Expand Down Expand Up @@ -480,16 +480,11 @@ class Span {
__span_noexcept : size_(_other.size()),
data_(_other.data()) {}

XGBOOST_DEVICE constexpr Span(const Span& _other)
__span_noexcept : size_(_other.size()), data_(_other.data()) {}

XGBOOST_DEVICE Span& operator=(const Span& _other) __span_noexcept {
size_ = _other.size();
data_ = _other.data();
return *this;
}

XGBOOST_DEVICE ~Span() __span_noexcept {}; // NOLINT
constexpr Span(Span const& _other) noexcept(true) = default;
constexpr Span& operator=(Span const& _other) noexcept(true) = default;
constexpr Span(Span&& _other) noexcept(true) = default;
constexpr Span& operator=(Span&& _other) noexcept(true) = default;
~Span() noexcept(true) = default;

XGBOOST_DEVICE constexpr iterator begin() const __span_noexcept { // NOLINT
return {this, 0};
Expand Down
1 change: 0 additions & 1 deletion src/c_api/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1098,7 +1098,6 @@ XGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, in
auto ctx = learner->Ctx();
if (!grad_is_cuda) {
gpair.Reshape(i_grad.Shape(0), i_grad.Shape(1));
auto const shape = gpair.Shape();
auto h_gpair = gpair.HostView();
DispatchDType(i_grad, DeviceOrd::CPU(), [&](auto &&t_grad) {
DispatchDType(i_hess, DeviceOrd::CPU(), [&](auto &&t_hess) {
Expand Down
4 changes: 1 addition & 3 deletions src/common/hist_util.cu
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,9 @@ void RemoveDuplicatedCategories(Context const* ctx, MetaInfo const& info,
sorted_entries.resize(n_uniques);

// Renew the column scan and cut scan based on categorical data.
auto d_old_column_sizes_scan = dh::ToSpan(column_sizes_scan);
dh::caching_device_vector<SketchContainer::OffsetT> new_cuts_size(info.num_col_ + 1);
CHECK_EQ(new_column_scan.size(), new_cuts_size.size());
dh::LaunchN(new_column_scan.size(),
dh::LaunchN(new_column_scan.size(), ctx->CUDACtx()->Stream(),
[=, d_new_cuts_size = dh::ToSpan(new_cuts_size),
d_old_column_sizes_scan = dh::ToSpan(column_sizes_scan),
d_new_columns_ptr = dh::ToSpan(new_column_scan)] __device__(size_t idx) {
Expand Down Expand Up @@ -277,7 +276,6 @@ void ProcessWeightedBatch(Context const* ctx, const SparsePage& page, MetaInfo c
HostDeviceVector<float>* p_out_weight) {
if (hessian.empty()) {
if (info.IsRanking() && !info.weights_.Empty()) {
common::Span<float const> group_weight = info.weights_.ConstDeviceSpan();
dh::device_vector<bst_group_t> group_ptr(info.group_ptr_);
auto d_group_ptr = dh::ToSpan(group_ptr);
CHECK_GE(d_group_ptr.size(), 2) << "Must have at least 1 group for ranking.";
Expand Down
1 change: 0 additions & 1 deletion src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ void CopyGradient(Context const* ctx, linalg::Matrix<GradientPair> const* in_gpa
GPUCopyGradient(ctx, in_gpair, group_id, out_gpair);
} else {
auto const& in = *in_gpair;
auto target_gpair = in.Slice(linalg::All(), group_id);
auto h_tmp = out_gpair->HostView();
auto h_in = in.HostView().Slice(linalg::All(), group_id);
CHECK_EQ(h_tmp.Size(), h_in.Size());
Expand Down
2 changes: 0 additions & 2 deletions src/metric/rank_metric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,6 @@ class EvalPrecision : public EvalRankWithCache<ltr::PreCache> {

auto gptr = p_cache->DataGroupPtr(ctx_);
auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
auto h_predt = linalg::MakeTensorView(ctx_, &predt, predt.Size());
auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());

auto weight = common::MakeOptionalWeights(ctx_, info.weights_);
Expand Down Expand Up @@ -433,7 +432,6 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {

auto gptr = p_cache->DataGroupPtr(ctx_);
auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
auto h_predt = linalg::MakeTensorView(ctx_, &predt, predt.Size());

auto map_gloc = p_cache->Map(ctx_);
std::fill_n(map_gloc.data(), map_gloc.size(), 0.0);
Expand Down
3 changes: 0 additions & 3 deletions src/objective/lambdarank_obj.cu
Original file line number Diff line number Diff line change
Expand Up @@ -488,9 +488,6 @@ void LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter,
info.labels.SetDevice(device);
predt.SetDevice(device);

auto d_predt = predt.ConstDeviceSpan();
auto const d_sorted_idx = p_cache->SortedIdx(ctx, d_predt);

auto delta = [] XGBOOST_DEVICE(float, float, std::size_t, std::size_t, bst_group_t) {
return 1.0;
};
Expand Down
5 changes: 3 additions & 2 deletions src/tree/gpu_hist/evaluate_splits.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "../../collective/allgather.h"
#include "../../collective/communicator-inl.h" // for GetWorldSize, GetRank
#include "../../common/categorical.h"
#include "../../common/cuda_context.cuh" // for CUDAContext
#include "evaluate_splits.cuh"
#include "expand_entry.cuh"

Expand Down Expand Up @@ -472,8 +473,8 @@ void GPUHistEvaluator::EvaluateSplits(Context const *ctx, const std::vector<bst_

GPUExpandEntry GPUHistEvaluator::EvaluateSingleSplit(Context const *ctx, EvaluateSplitInputs input,
EvaluateSplitSharedInputs shared_inputs) {
dh::device_vector<EvaluateSplitInputs> inputs(1);
dh::safe_cuda(cudaMemcpyAsync(inputs.data().get(), &input, sizeof(input), cudaMemcpyDefault));
dh::CachingDeviceUVector<EvaluateSplitInputs> inputs(1);
dh::safe_cuda(cudaMemcpyAsync(inputs.data(), &input, sizeof(input), cudaMemcpyDefault));

dh::TemporaryArray<GPUExpandEntry> out_entries(1);
this->EvaluateSplits(ctx, {input.nidx}, input.feature_set.size(), dh::ToSpan(inputs),
Expand Down
1 change: 0 additions & 1 deletion src/tree/gpu_hist/evaluate_splits.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "../split_evaluator.h"
#include "../updater_gpu_common.cuh"
#include "expand_entry.cuh"
#include "histogram.cuh"

namespace xgboost {
namespace common {
Expand Down
3 changes: 1 addition & 2 deletions src/tree/gpu_hist/evaluator.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <thrust/logical.h> // thrust::any_of
#include <thrust/sort.h> // thrust::stable_sort

#include "../../common/cuda_context.cuh" // for CUDAContext
#include "../../common/device_helpers.cuh"
#include "../../common/hist_util.h" // common::HistogramCuts
#include "evaluate_splits.cuh"
Expand Down Expand Up @@ -53,9 +54,7 @@ void GPUHistEvaluator::Reset(Context const *ctx, common::HistogramCuts const &cu
* cache feature index binary search result
*/
feature_idx_.resize(cat_sorted_idx_.size());
auto d_fidxes = dh::ToSpan(feature_idx_);
auto it = thrust::make_counting_iterator(0ul);
auto values = cuts.cut_values_.ConstDeviceSpan();
thrust::transform(ctx->CUDACtx()->CTP(), it, it + feature_idx_.size(), feature_idx_.begin(),
[=] XGBOOST_DEVICE(size_t i) {
auto fidx = dh::SegmentId(ptrs, i);
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/common/test_host_device_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ TEST(HostDeviceVector, SetDevice) {

vec.SetDevice(device);
ASSERT_EQ(vec.Size(), h_vec.size());
auto span = vec.DeviceSpan(); // sync to device
vec.DeviceSpan(); // sync to device

vec.SetDevice(DeviceOrd::CPU()); // pull back to cpu.
ASSERT_EQ(vec.Size(), h_vec.size());
Expand Down
5 changes: 2 additions & 3 deletions tests/cpp/common/test_linalg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,8 @@ TEST(Linalg, TensorView) {
{
// Don't assign the initial dimension, tensor should be able to deduce the correct dim
// for Slice.
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(1, 2, All());
static_assert(decltype(s)::kDimension == 1);
static_assert(decltype(MakeTensorView(&ctx, data, 2, 3, 4).Slice(1, 2, All()))::kDimension ==
1);
}
{
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
Expand Down
9 changes: 9 additions & 0 deletions tests/cpp/common/test_span.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@
#include "../../../src/common/transform_iterator.h" // for MakeIndexTransformIter

namespace xgboost::common {
namespace {
using ST = common::Span<int, dynamic_extent>;
static_assert(std::is_trivially_copyable_v<ST>);
static_assert(std::is_trivially_move_assignable_v<ST>);
static_assert(std::is_trivially_move_constructible_v<ST>);
static_assert(std::is_trivially_copy_assignable_v<ST>);
static_assert(std::is_trivially_copy_constructible_v<ST>);
} // namespace

TEST(Span, TestStatus) {
int status = 1;
TestTestStatus {&status}();
Expand Down
4 changes: 2 additions & 2 deletions tests/cpp/data/test_extmem_quantile_dmatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ class ExtMemQuantileDMatrixGpu : public ::testing::TestWithParam<std::tuple<floa
ASSERT_EQ(orig_cuts.Ptrs(), sparse_cuts.Ptrs());

std::vector<common::CompressedByteT> h_orig, h_sparse;
auto orig_acc = orig.Impl()->GetHostAccessor(ctx, &h_orig, {});
auto sparse_acc = sparse.Impl()->GetHostAccessor(ctx, &h_sparse, {});
[[maybe_unused]] auto orig_acc = orig.Impl()->GetHostAccessor(ctx, &h_orig, {});
[[maybe_unused]] auto sparse_acc = sparse.Impl()->GetHostAccessor(ctx, &h_sparse, {});
ASSERT_EQ(h_orig.size(), h_sparse.size());

auto equal = std::equal(h_orig.cbegin(), h_orig.cend(), h_sparse.cbegin());
Expand Down
2 changes: 0 additions & 2 deletions tests/cpp/data/test_simple_dmatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,6 @@ TEST(SimpleDMatrix, FromCupySparse){
auto& batch = *dmat.GetBatches<SparsePage>().begin();
auto page = batch.GetView();

auto inst0 = page[0];
auto inst1 = page[1];
EXPECT_EQ(page[0].size(), 1);
EXPECT_EQ(page[1].size(), 1);
EXPECT_EQ(page[0][0].fvalue, 0.0f);
Expand Down
2 changes: 0 additions & 2 deletions tests/cpp/tree/gpu_hist/test_histogram.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ void TestBuildHist(bool use_shared_memory_histograms) {
auto ctx = MakeCUDACtx(0);

auto page = BuildEllpackPage(&ctx, kNRows, kNCols);
BatchParam batch_param{};

xgboost::SimpleLCG gen;
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
Expand Down Expand Up @@ -448,7 +447,6 @@ class HistogramExternalMemoryTest : public ::testing::TestWithParam<std::tuple<f
auto impl = page.Impl();
if (k == 0) {
// Initialization
auto d_matrix = impl->GetDeviceAccessor(&ctx);
fg = std::make_unique<FeatureGroups>(impl->Cuts());
auto init = GradientPairInt64{0, 0};
multi_hist = decltype(multi_hist)(impl->Cuts().TotalBins(), init);
Expand Down

0 comments on commit f7cb8e0

Please sign in to comment.