Skip to content

[WIP] Remove global random engine. #10354

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions include/xgboost/base.h
Original file line number Diff line number Diff line change
@@ -31,13 +31,6 @@
#define XGBOOST_LOG_WITH_TIME 1
#endif // XGBOOST_LOG_WITH_TIME

/*!
* \brief Whether to customize global PRNG.
*/
#ifndef XGBOOST_CUSTOMIZE_GLOBAL_PRNG
#define XGBOOST_CUSTOMIZE_GLOBAL_PRNG 0
#endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG

/*!
* \brief Check if alignas(*) keyword is supported. (g++ 4.8 or higher)
*/
16 changes: 12 additions & 4 deletions include/xgboost/context.h
Original file line number Diff line number Diff line change
@@ -15,8 +15,11 @@
#include <type_traits> // for invoke_result_t, is_same_v, underlying_type_t

namespace xgboost {

class Json;
struct CUDAContext;
namespace common {
class RandomEngine;
} // namespace common

// symbolic names
struct DeviceSym {
@@ -46,9 +49,7 @@ struct DeviceOrd {
[[nodiscard]] bool IsSyclDefault() const { return device == kSyclDefault; }
[[nodiscard]] bool IsSyclCPU() const { return device == kSyclCPU; }
[[nodiscard]] bool IsSyclGPU() const { return device == kSyclGPU; }
[[nodiscard]] bool IsSycl() const { return (IsSyclDefault() ||
IsSyclCPU() ||
IsSyclGPU()); }
[[nodiscard]] bool IsSycl() const { return (IsSyclDefault() || IsSyclCPU() || IsSyclGPU()); }

constexpr DeviceOrd() = default;
constexpr DeviceOrd(Type type, bst_d_ordinal_t ord) : device{type}, ordinal{ord} {}
@@ -296,6 +297,11 @@ struct Context : public XGBoostParameter<Context> {
.describe("Enable checking whether parameters are used or not.");
}

[[nodiscard]] auto& Rng() const { return *rng_; }

void SaveConfig(Json* out) const;
void LoadConfig(Json const& in);

private:
void SetDeviceOrdinal(Args const& kwargs);
Context& SetDevice(DeviceOrd d) {
@@ -307,6 +313,8 @@ struct Context : public XGBoostParameter<Context> {
// shared_ptr is used instead of unique_ptr as with unique_ptr it's difficult to define
// p_impl while trying to hide CUDA code from the host compiler.
mutable std::shared_ptr<CUDAContext> cuctx_;
// mutable for random engine. The rng is shared by child contexts, if there's any.
mutable std::shared_ptr<common::RandomEngine> rng_;
// cached value for CFS CPU limit. (used in containerized env)
std::int32_t cfs_cpu_count_; // NOLINT
};
14 changes: 1 addition & 13 deletions src/common/common.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2015-2023 by Contributors
* Copyright 2015-2024 by Contributors
*/
#include "common.h"

@@ -9,19 +9,7 @@
#include <cstdio> // for snprintf, size_t
#include <string> // for string

#include "./random.h" // for GlobalRandomEngine, GlobalRandom

namespace xgboost::common {
/*! \brief thread local entry for random. */
struct RandomThreadLocalEntry {
/*! \brief the random engine instance. */
GlobalRandomEngine engine;
};

using RandomThreadLocalStore = dmlc::ThreadLocalStore<RandomThreadLocalEntry>;

GlobalRandomEngine &GlobalRandom() { return RandomThreadLocalStore::Get()->engine; }

void EscapeU8(std::string const &string, std::string *p_buffer) {
auto &buffer = *p_buffer;
for (size_t i = 0; i < string.length(); i++) {
6 changes: 3 additions & 3 deletions src/common/random.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#include <thrust/shuffle.h> // for shuffle

@@ -19,7 +19,7 @@ void WeightedSamplingWithoutReplacement(Context const *ctx, common::Span<bst_fea
common::Span<float const> weights,
common::Span<bst_feature_t> results,
HostDeviceVector<bst_feature_t> *sorted_idx,
GlobalRandomEngine *grng) {
RandomEngine *grng) {
CUDAContext const *cuctx = ctx->CUDACtx();
CHECK_EQ(array.size(), weights.size());
// Sampling keys
@@ -61,7 +61,7 @@ void SampleFeature(Context const *ctx, bst_feature_t n_features,
std::shared_ptr<HostDeviceVector<bst_feature_t>> p_new_features,
HostDeviceVector<float> const &feature_weights,
HostDeviceVector<float> *weight_buffer,
HostDeviceVector<bst_feature_t> *idx_buffer, GlobalRandomEngine *grng) {
HostDeviceVector<bst_feature_t> *idx_buffer, RandomEngine *grng) {
CUDAContext const *cuctx = ctx->CUDACtx();
auto &new_features = *p_new_features;
new_features.SetDevice(ctx->Device());
78 changes: 11 additions & 67 deletions src/common/random.h
Original file line number Diff line number Diff line change
@@ -7,80 +7,24 @@
#ifndef XGBOOST_COMMON_RANDOM_H_
#define XGBOOST_COMMON_RANDOM_H_

#include <xgboost/logging.h>

#include <algorithm>
#include <functional>
#include <limits>
#include <map>
#include <memory>
#include <numeric>
#include <random>
#include <utility>
#include <vector>

#include "../collective/broadcast.h" // for Broadcast
#include "../collective/communicator-inl.h"
#include "algorithm.h" // ArgSort
#include "common.h"
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h"
#include "xgboost/linalg.h"
#include "../collective/broadcast.h" // for Broadcast
#include "algorithm.h" // ArgSort
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/linalg.h" // for MakeVec
#include "xgboost/logging.h"

namespace xgboost::common {
/*!
* \brief Define mt19937 as default type Random Engine.
*/
using RandomEngine = std::mt19937;

#if defined(XGBOOST_CUSTOMIZE_GLOBAL_PRNG) && XGBOOST_CUSTOMIZE_GLOBAL_PRNG == 1
/*!
* \brief An customized random engine, used to be plugged in PRNG from other systems.
* The implementation of this library is not provided by xgboost core library.
* Instead the other library can implement this class, which will be used as GlobalRandomEngine
* If XGBOOST_RANDOM_CUSTOMIZE = 1, by default this is switched off.
*/
class CustomGlobalRandomEngine {
public:
/*! \brief The result type */
using result_type = uint32_t;
/*! \brief The minimum of random numbers generated */
inline static constexpr result_type min() {
return 0;
}
/*! \brief The maximum random numbers generated */
inline static constexpr result_type max() {
return std::numeric_limits<result_type>::max();
}
/*!
* \brief seed function, to be implemented
* \param val The value of the seed.
*/
void seed(result_type val);
/*!
* \return next random number.
*/
result_type operator()();
};

/*!
* \brief global random engine
*/
typedef CustomGlobalRandomEngine GlobalRandomEngine;

#else
/*!
* \brief global random engine
*/
using GlobalRandomEngine = RandomEngine;
#endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG

/*!
* \brief global singleton of a random engine.
* This random engine is thread-local and
* only visible to current thread.
*/
GlobalRandomEngine& GlobalRandom(); // NOLINT(*)
class RandomEngine : public std::mt19937 {};

/*
* Original paper:
@@ -96,7 +40,7 @@ std::vector<T> WeightedSamplingWithoutReplacement(Context const* ctx, std::vecto
CHECK_EQ(array.size(), weights.size());
std::vector<float> keys(weights.size());
std::uniform_real_distribution<float> dist;
auto& rng = GlobalRandom();
auto& rng = ctx->Rng();
for (size_t i = 0; i < array.size(); ++i) {
auto w = std::max(weights.at(i), kRtEps);
auto u = dist(rng);
@@ -120,7 +64,7 @@ void SampleFeature(Context const* ctx, bst_feature_t n_features,
std::shared_ptr<HostDeviceVector<bst_feature_t>> p_new_features,
HostDeviceVector<float> const& feature_weights,
HostDeviceVector<float>* weight_buffer,
HostDeviceVector<bst_feature_t>* idx_buffer, GlobalRandomEngine* grng);
HostDeviceVector<bst_feature_t>* idx_buffer, RandomEngine* grng);

void InitFeatureSet(Context const* ctx,
std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features);
@@ -140,7 +84,7 @@ class ColumnSampler {
float colsample_bylevel_{1.0f};
float colsample_bytree_{1.0f};
float colsample_bynode_{1.0f};
GlobalRandomEngine rng_;
RandomEngine rng_;
Context const* ctx_;

// Used for weighted sampling.
@@ -230,7 +174,7 @@ class ColumnSampler {
};

inline auto MakeColumnSampler(Context const* ctx) {
std::uint32_t seed = common::GlobalRandomEngine()();
std::uint32_t seed = ctx->Rng()();
auto rc = collective::Broadcast(ctx, linalg::MakeVec(&seed, 1), 0);
collective::SafeColl(rc);
auto cs = std::make_shared<common::ColumnSampler>(seed);
30 changes: 27 additions & 3 deletions src/context.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2014-2023 by XGBoost Contributors
* Copyright 2014-2024, XGBoost Contributors
*
* \brief Context object used for controlling runtime parameters.
*/
@@ -8,21 +8,27 @@
#include <algorithm> // for find_if
#include <charconv> // for from_chars
#include <iterator> // for distance
#include <locale> // for locale
#include <optional> // for optional
#include <regex> // for regex_replace, regex_match
#include <sstream> // for stringstream

#include "common/common.h" // AssertGPUSupport
#include "common/error_msg.h" // WarnDeprecatedGPUId
#include "common/threading_utils.h"
#include "xgboost/json.h" // for Json
#include "xgboost/string_view.h"
#include "common/random.h" // for RandomEngin

namespace xgboost {

DMLC_REGISTER_PARAMETER(Context);

std::int64_t constexpr Context::kDefaultSeed;

Context::Context() : cfs_cpu_count_{common::GetCfsCPUCount()} {}
Context::Context()
: rng_{std::make_shared<common::RandomEngine>()}, cfs_cpu_count_{common::GetCfsCPUCount()} {
rng_->seed(kDefaultSeed);
}

namespace {
inline constexpr char const* kDevice = "device";
@@ -219,6 +225,24 @@ void Context::Init(Args const& kwargs) {
}
}

void Context::SaveConfig(Json* out) const {
(*out) = ToJson(*this);
std::stringstream ss;
ss.imbue(std::locale{"en_US.UTF8"});
ss << this->Rng();
(*out)["rng"] = ss.str();
}

void Context::LoadConfig(Json const& in) {
FromJson(in, this);
std::stringstream ss;
ss.imbue(std::locale{"en_US.UTF8"});
ss << get<String const>(in["rng"]);
ss >> this->Rng();
// make sure the GPU ID is valid in new environment before start running configure.
this->ConfigureGpuId(false);
}

void Context::ConfigureGpuId(bool require_gpu) {
if (this->IsCPU() && require_gpu) {
this->UpdateAllowUnknown(Args{{kDevice, DeviceSym::CUDA()}});
5 changes: 2 additions & 3 deletions src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
@@ -10,8 +10,7 @@
#include <dmlc/parameter.h>

#include <algorithm> // for equal
#include <cinttypes> // for uint32_t
#include <limits>
#include <cstdint> // for uint32_t
#include <memory>
#include <string>
#include <utility>
@@ -928,7 +927,7 @@ class Dart : public GBTree {
idx_drop_.clear();

std::uniform_real_distribution<> runif(0.0, 1.0);
auto& rnd = common::GlobalRandom();
auto& rnd = ctx_->Rng();
bool skip = false;
if (dparam_.skip_drop > 0.0) skip = (runif(rnd) < dparam_.skip_drop);
// sample some trees to drop
17 changes: 7 additions & 10 deletions src/learner.cc
Original file line number Diff line number Diff line change
@@ -23,13 +23,11 @@
#include <limits> // for numeric_limits
#include <memory> // for allocator, unique_ptr, shared_ptr, operator==
#include <mutex> // for mutex, lock_guard
#include <set> // for set
#include <sstream> // for operator<<, basic_ostream, basic_ostream::opera...
#include <stack> // for stack
#include <string> // for basic_string, char_traits, operator<, string
#include <system_error> // for errc
#include <tuple> // for get
#include <unordered_map> // for operator!=, unordered_map
#include <utility> // for pair, as_const, move, swap
#include <vector> // for vector

@@ -41,7 +39,7 @@
#include "common/error_msg.h" // for MaxFeatureSize, WarnOldSerialization, ...
#include "common/io.h" // for PeekableInStream, ReadAll, FixedSizeStream, Mem...
#include "common/observer.h" // for TrainingObserver
#include "common/random.h" // for GlobalRandom
#include "common/random.h" // for RandomEngine
#include "common/timer.h" // for Monitor
#include "common/version.h" // for Version
#include "dmlc/endian.h" // for ByteSwap, DMLC_IO_NO_ENDIAN_SWAP
@@ -476,7 +474,7 @@ class LearnerConfiguration : public Learner {

// set seed only before the model is initialized
if (!initialized || ctx_.seed != old_seed) {
common::GlobalRandom().seed(ctx_.seed);
ctx_.Rng().seed(ctx_.seed);
}

// must precede configure gbm since num_features is required for gbm
@@ -556,9 +554,7 @@ class LearnerConfiguration : public Learner {
}
}

FromJson(learner_parameters.at("generic_param"), &ctx_);
// make sure the GPU ID is valid in new environment before start running configure.
ctx_.ConfigureGpuId(false);
ctx_.LoadConfig(learner_parameters.at("generic_param"));

this->need_configuration_ = true;
}
@@ -588,7 +584,8 @@ class LearnerConfiguration : public Learner {
}
learner_parameters["metrics"] = Array(std::move(metrics));

learner_parameters["generic_param"] = ToJson(ctx_);
learner_parameters["generic_param"] = Object{};
ctx_.SaveConfig(&learner_parameters["generic_param"]);
}

void SetParam(const std::string& key, const std::string& value) override {
@@ -1271,7 +1268,7 @@ class LearnerImpl : public LearnerIO {
this->InitBaseScore(train.get());

if (ctx_.seed_per_iteration) {
common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
ctx_.Rng().seed(ctx_.seed * kRandSeedMagic + iter);
}

this->ValidateDMatrix(train.get(), true);
@@ -1298,7 +1295,7 @@ class LearnerImpl : public LearnerIO {
this->Configure();

if (ctx_.seed_per_iteration) {
common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
ctx_.Rng().seed(ctx_.seed * kRandSeedMagic + iter);
}

this->ValidateDMatrix(train.get(), true);
8 changes: 4 additions & 4 deletions src/linear/coordinate_common.h
Original file line number Diff line number Diff line change
@@ -278,13 +278,13 @@ class CyclicFeatureSelector : public FeatureSelector {
class ShuffleFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,
void Setup(Context const *ctx, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,
DMatrix *, float, float, int) override {
if (feat_index_.size() == 0) {
feat_index_.resize(model.learner_model_param->num_feature);
std::iota(feat_index_.begin(), feat_index_.end(), 0);
}
std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
std::shuffle(feat_index_.begin(), feat_index_.end(), ctx->Rng());
}

int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,
@@ -303,9 +303,9 @@ class ShuffleFeatureSelector : public FeatureSelector {
class RandomFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int,
int NextFeature(Context const *ctx, int, const gbm::GBLinearModel &model, int,
const std::vector<GradientPair> &, DMatrix *, float, float) override {
return common::GlobalRandom()() % model.learner_model_param->num_feature;
return ctx->Rng()() % model.learner_model_param->num_feature;
}
};

14 changes: 7 additions & 7 deletions src/tree/gpu_hist/gradient_based_sampler.cu
Original file line number Diff line number Diff line change
@@ -187,7 +187,7 @@ GradientBasedSample UniformSampling::Sample(Context const* ctx, common::Span<Gra
auto cuctx = ctx->CUDACtx();
thrust::replace_if(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
thrust::counting_iterator<std::size_t>(0),
BernoulliTrial(common::GlobalRandom()(), subsample_), GradientPair());
BernoulliTrial(ctx->Rng()(), subsample_), GradientPair());
auto page = (*dmat->GetBatches<EllpackPage>(ctx, batch_param_).begin()).Impl();
return {dmat->Info().num_row_, page, gpair};
}
@@ -206,7 +206,7 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(Context const* ctx,
// Set gradient pair to 0 with p = 1 - subsample
thrust::replace_if(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
thrust::counting_iterator<std::size_t>(0),
BernoulliTrial(common::GlobalRandom()(), subsample_), GradientPair{});
BernoulliTrial(ctx->Rng()(), subsample_), GradientPair{});

// Count the sampled rows.
size_t sample_rows =
@@ -260,7 +260,7 @@ GradientBasedSample GradientBasedSampling::Sample(Context const* ctx,
thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
thrust::counting_iterator<size_t>(0), dh::tbegin(gpair),
PoissonSampling(dh::ToSpan(threshold_), threshold_index,
RandomWeight(common::GlobalRandom()())));
RandomWeight(ctx->Rng()())));
return {n_rows, page, gpair};
}

@@ -282,10 +282,10 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c
gpair, dh::ToSpan(threshold_), dh::ToSpan(grad_sum_), n_rows * subsample_);

// Perform Poisson sampling in place.
thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
thrust::counting_iterator<size_t>(0), dh::tbegin(gpair),
PoissonSampling(dh::ToSpan(threshold_), threshold_index,
RandomWeight(common::GlobalRandom()())));
thrust::transform(
cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), thrust::counting_iterator<size_t>(0),
dh::tbegin(gpair),
PoissonSampling(dh::ToSpan(threshold_), threshold_index, RandomWeight(ctx->Rng()())));

// Count the sampled rows.
size_t sample_rows = thrust::count_if(dh::tbegin(gpair), dh::tend(gpair), IsNonZero());
27 changes: 14 additions & 13 deletions src/tree/hist/evaluate_splits.h
Original file line number Diff line number Diff line change
@@ -12,19 +12,20 @@
#include <utility> // for move
#include <vector> // for vector

#include "../../collective/allgather.h"
#include "../../common/categorical.h" // for CatBitField
#include "../../common/hist_util.h" // for GHistRow, HistogramCuts
#include "../../common/linalg_op.h" // for cbegin, cend, begin
#include "../../common/random.h" // for ColumnSampler
#include "../constraints.h" // for FeatureInteractionConstraintHost
#include "../param.h" // for TrainParam
#include "../split_evaluator.h" // for TreeEvaluator
#include "expand_entry.h" // for MultiExpandEntry
#include "hist_cache.h" // for BoundedHistCollection
#include "xgboost/base.h" // for bst_node_t, bst_target_t, bst_feature_t
#include "xgboost/context.h" // for COntext
#include "xgboost/linalg.h" // for Constants, Vector
#include "../../collective/allgather.h" // for VectorAllgatherV
#include "../../collective/communicator-inl.h" // for GetWorldSize
#include "../../common/categorical.h" // for CatBitField
#include "../../common/hist_util.h" // for GHistRow, HistogramCuts
#include "../../common/linalg_op.h" // for cbegin, cend, begin
#include "../../common/random.h" // for ColumnSampler
#include "../constraints.h" // for FeatureInteractionConstraintHost
#include "../param.h" // for TrainParam
#include "../split_evaluator.h" // for TreeEvaluator
#include "expand_entry.h" // for MultiExpandEntry
#include "hist_cache.h" // for BoundedHistCollection
#include "xgboost/base.h" // for bst_node_t, bst_target_t, bst_feature_t
#include "xgboost/context.h" // for COntext
#include "xgboost/linalg.h" // for Constants, Vector

namespace xgboost::tree {
/**
28 changes: 9 additions & 19 deletions src/tree/hist/sampler.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
* Copyright 2020-2024, XGBoost Contributors
*/
#ifndef XGBOOST_TREE_HIST_SAMPLER_H_
#define XGBOOST_TREE_HIST_SAMPLER_H_
@@ -8,12 +8,12 @@
#include <cstdint> // std::uint64_t
#include <random> // bernoulli_distribution, linear_congruential_engine

#include "../../common/random.h" // GlobalRandom
#include "../param.h" // TrainParam
#include "xgboost/base.h" // GradientPair
#include "xgboost/context.h" // Context
#include "xgboost/data.h" // MetaInfo
#include "xgboost/linalg.h" // TensorView
#include "../../common/random.h" // for RandomEngine
#include "../param.h" // for TrainParam
#include "xgboost/base.h" // for GradientPair
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/linalg.h" // for TensorView

namespace xgboost {
namespace tree {
@@ -55,18 +55,9 @@ inline void SampleGradient(Context const* ctx, TrainParam param,
return;
}
bst_idx_t n_samples = out.Shape(0);
auto& rnd = common::GlobalRandom();
auto& rng = ctx->Rng();

#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
std::bernoulli_distribution coin_flip(param.subsample);
CHECK_EQ(out.Shape(1), 1) << "Multi-target with sampling for R is not yet supported.";
for (size_t i = 0; i < n_samples; ++i) {
if (!(out(i, 0).GetHess() >= 0.0f && coin_flip(rnd)) || out(i, 0).GetGrad() == 0.0f) {
out(i, 0) = GradientPair(0);
}
}
#else
std::uint64_t initial_seed = rnd();
std::uint64_t initial_seed = rng();

auto n_threads = static_cast<size_t>(ctx->Threads());
std::size_t const discard_size = n_samples / n_threads;
@@ -102,7 +93,6 @@ inline void SampleGradient(Context const* ctx, TrainParam param,
});
}
exc.Rethrow();
#endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG
}
} // namespace tree
} // namespace xgboost
7 changes: 4 additions & 3 deletions src/tree/updater_colmaker.cc
Original file line number Diff line number Diff line change
@@ -8,7 +8,8 @@
#include <cmath>
#include <vector>

#include "../common/error_msg.h" // for NoCategorical
#include "../collective/communicator-inl.h" // for IsDistributed
#include "../common/error_msg.h" // for NoCategorical
#include "../common/random.h"
#include "constraints.h"
#include "param.h"
@@ -224,8 +225,8 @@ class ColMaker: public TreeUpdater {
<< "Only uniform sampling is supported, "
<< "gradient-based sampling is only support by GPU Hist.";
std::bernoulli_distribution coin_flip(param_.subsample);
auto& rnd = common::GlobalRandom();
for (size_t ridx = 0; ridx < position_.size(); ++ridx) {
auto &rnd = ctx_->Rng();
for (bst_idx_t ridx = 0; ridx < position_.size(); ++ridx) {
if (gpair[ridx].GetHess() < 0.0f) continue;
if (!coin_flip(rnd)) position_[ridx] = ~position_[ridx];
}
4 changes: 2 additions & 2 deletions src/tree/updater_gpu_hist.cu
Original file line number Diff line number Diff line change
@@ -867,7 +867,7 @@ class GPUHistMaker : public TreeUpdater {
info_ = &dmat->Info();

// Synchronise the column sampling seed
uint32_t column_sampling_seed = common::GlobalRandom()();
std::uint32_t column_sampling_seed = ctx_->Rng()();
auto rc = collective::Broadcast(
ctx_, linalg::MakeVec(&column_sampling_seed, sizeof(column_sampling_seed)), 0);
SafeColl(rc);
@@ -1011,7 +1011,7 @@ class GPUGlobalApproxMaker : public TreeUpdater {

monitor_.Start(__func__);
CHECK(ctx_->IsCUDA()) << error::InvalidCUDAOrdinal();
uint32_t column_sampling_seed = common::GlobalRandom()();
std::uint32_t column_sampling_seed = ctx_->Rng()();
this->column_sampler_ = std::make_shared<common::ColumnSampler>(column_sampling_seed);

p_last_fmat_ = p_fmat;
2 changes: 1 addition & 1 deletion tests/cpp/test_learner.cc
Original file line number Diff line number Diff line change
@@ -399,7 +399,7 @@ TEST(Learner, ConstantSeed) {
learner->Configure(); // seed the global random

std::uniform_real_distribution<float> dist;
auto& rng = common::GlobalRandom();
auto& rng = learner->Ctx()->Rng();
float v_0 = dist(rng);

learner->SetParam("", "");
11 changes: 5 additions & 6 deletions tests/cpp/test_serialization.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2019-2023, XGBoost Contributors
* Copyright (c) 2019-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h>
@@ -148,13 +148,12 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
std::string continued_model;
{
// Continue the previous training with another kIters
std::unique_ptr<dmlc::Stream> fi(
dmlc::Stream::Create(fname.c_str(), "r"));
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
learner->Load(fi.get());
learner->Configure();

// verify the loaded model doesn't change.
// Verify the loaded model doesn't change.
std::string serialised_model_tmp;
common::MemoryBufferStream mem_out(&serialised_model_tmp);
learner->Save(&mem_out);
@@ -484,7 +483,7 @@ class LogitSerializationTest : public SerializationTest {
auto& h_labels = p_dmat->Info().labels.Data()->HostVector();

std::bernoulli_distribution flip(0.5);
auto& rnd = common::GlobalRandom();
auto& rnd = p_dmat->Ctx()->Rng();
rnd.seed(0);

for (auto& v : h_labels) { v = flip(rnd); }
@@ -608,7 +607,7 @@ class MultiClassesSerializationTest : public SerializationTest {
auto &h_labels = p_dmat->Info().labels.Data()->HostVector();

std::uniform_int_distribution<size_t> categorical(0, kClasses - 1);
auto& rnd = common::GlobalRandom();
auto& rnd = p_dmat->Ctx()->Rng();
rnd.seed(0);

for (auto& v : h_labels) { v = categorical(rnd); }
28 changes: 16 additions & 12 deletions tests/cpp/tree/test_gpu_hist.cu
Original file line number Diff line number Diff line change
@@ -258,7 +258,6 @@ TEST(GpuHist, UniformSampling) {
constexpr size_t kRows = 4096;
constexpr size_t kCols = 2;
constexpr float kSubsample = 0.9999;
common::GlobalRandom().seed(1994);

// Create an in-memory DMatrix.
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
@@ -274,8 +273,11 @@ TEST(GpuHist, UniformSampling) {
// Build another tree using sampling.
RegTree tree_sampling;
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, DeviceOrd::CUDA(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform",
kRows);
{
auto ctx = MakeCUDACtx(0);
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform",
kRows);
}

// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();
@@ -289,7 +291,6 @@ TEST(GpuHist, GradientBasedSampling) {
constexpr size_t kRows = 4096;
constexpr size_t kCols = 2;
constexpr float kSubsample = 0.9999;
common::GlobalRandom().seed(1994);

// Create an in-memory DMatrix.
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
@@ -306,8 +307,11 @@ TEST(GpuHist, GradientBasedSampling) {
// Build another tree using sampling.
RegTree tree_sampling;
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, DeviceOrd::CUDA(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
"gradient_based", kRows);
{
auto ctx = MakeCUDACtx(0);
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
"gradient_based", kRows);
}

// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();
@@ -358,7 +362,6 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
constexpr size_t kPageSize = 1024;
constexpr float kSubsample = 0.5;
const std::string kSamplingMethod = "gradient_based";
common::GlobalRandom().seed(0);

dmlc::TemporaryDirectory tmpdir;

@@ -374,18 +377,19 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
gpair.Data()->Copy(GenerateRandomGradients(kRows));

// Build a tree using the in-memory DMatrix.
auto rng = common::GlobalRandom();

RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows);

// Build another tree using multiple ELLPACK pages.
common::GlobalRandom() = rng;

RegTree tree_ext;
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, DeviceOrd::CUDA(0));
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample,
kSamplingMethod, kRows);
{
Context ctx(MakeCUDACtx(0));
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample,
kSamplingMethod, kRows);
}

// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();