Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove omp_get_max_threads in gbm and linear. #7537

Merged
merged 2 commits into from
Jan 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/xgboost/gbm.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class PredictionContainer;
*/
class GradientBooster : public Model, public Configurable {
protected:
GenericParameter const* generic_param_;
GenericParameter const* ctx_;

public:
/*! \brief virtual destructor */
Expand Down
2 changes: 1 addition & 1 deletion include/xgboost/linear_updater.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class GBLinearModel;
*/
class LinearUpdater : public Configurable {
protected:
GenericParameter const* learner_param_;
GenericParameter const* ctx_;

public:
/*! \brief virtual destructor */
Expand Down
4 changes: 2 additions & 2 deletions src/gbm/gblinear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class GBLinear : public GradientBooster {
}
param_.UpdateAllowUnknown(cfg);
param_.CheckGPUSupport();
updater_.reset(LinearUpdater::Create(param_.updater, generic_param_));
updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
updater_->Configure(cfg);
monitor_.Init("GBLinear");
}
Expand Down Expand Up @@ -120,7 +120,7 @@ class GBLinear : public GradientBooster {
CHECK_EQ(get<String>(in["name"]), "gblinear");
FromJson(in["gblinear_train_param"], &param_);
param_.CheckGPUSupport();
updater_.reset(LinearUpdater::Create(param_.updater, generic_param_));
updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
this->updater_->LoadConfig(in["updater"]);
}
void SaveConfig(Json* p_out) const override {
Expand Down
2 changes: 1 addition & 1 deletion src/gbm/gbm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ GradientBooster* GradientBooster::Create(
LOG(FATAL) << "Unknown gbm type " << name;
}
auto p_bst = (e->body)(learner_model_param);
p_bst->generic_param_ = generic_param;
p_bst->ctx_ = generic_param;
return p_bst;
}
} // namespace xgboost
Expand Down
37 changes: 17 additions & 20 deletions src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ void GBTree::Configure(const Args& cfg) {
// configure predictors
if (!cpu_predictor_) {
cpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("cpu_predictor", this->generic_param_));
Predictor::Create("cpu_predictor", this->ctx_));
}
cpu_predictor_->Configure(cfg);
#if defined(XGBOOST_USE_CUDA)
auto n_gpus = common::AllVisibleGPUs();
if (!gpu_predictor_ && n_gpus != 0) {
gpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("gpu_predictor", this->generic_param_));
Predictor::Create("gpu_predictor", this->ctx_));
}
if (n_gpus != 0) {
gpu_predictor_->Configure(cfg);
Expand Down Expand Up @@ -201,16 +201,16 @@ void GPUCopyGradient(HostDeviceVector<GradientPair> const *in_gpair,
}
#endif

void CopyGradient(HostDeviceVector<GradientPair> const *in_gpair,
void CopyGradient(HostDeviceVector<GradientPair> const* in_gpair, int32_t n_threads,
bst_group_t n_groups, bst_group_t group_id,
HostDeviceVector<GradientPair> *out_gpair) {
HostDeviceVector<GradientPair>* out_gpair) {
if (in_gpair->DeviceIdx() != GenericParameter::kCpuId) {
GPUCopyGradient(in_gpair, n_groups, group_id, out_gpair);
} else {
std::vector<GradientPair> &tmp_h = out_gpair->HostVector();
auto nsize = static_cast<bst_omp_uint>(out_gpair->Size());
const auto &gpair_h = in_gpair->ConstHostVector();
common::ParallelFor(nsize, [&](bst_omp_uint i) {
common::ParallelFor(nsize, n_threads, [&](bst_omp_uint i) {
tmp_h[i] = gpair_h[i * n_groups + group_id];
});
}
Expand All @@ -228,7 +228,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
// break a lots of existing code.
auto device = tparam_.tree_method != TreeMethod::kGPUHist
? GenericParameter::kCpuId
: generic_param_->gpu_id;
: ctx_->gpu_id;
auto out = linalg::TensorView<float, 2>{
device == GenericParameter::kCpuId ? predt->predictions.HostSpan()
: predt->predictions.DeviceSpan(),
Expand All @@ -255,7 +255,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
in_gpair->DeviceIdx());
bool update_predict = true;
for (int gid = 0; gid < ngroup; ++gid) {
CopyGradient(in_gpair, ngroup, gid, &tmp);
CopyGradient(in_gpair, ctx_->Threads(), ngroup, gid, &tmp);
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(&tmp, p_fmat, gid, &ret);
const size_t num_new_trees = ret.size();
Expand Down Expand Up @@ -310,7 +310,7 @@ void GBTree::InitUpdater(Args const& cfg) {
// create new updaters
for (const std::string& pstr : ups) {
std::unique_ptr<TreeUpdater> up(
TreeUpdater::Create(pstr.c_str(), generic_param_, model_.learner_model_param->task));
TreeUpdater::Create(pstr.c_str(), ctx_, model_.learner_model_param->task));
up->Configure(cfg);
updaters_.push_back(std::move(up));
}
Expand Down Expand Up @@ -396,7 +396,7 @@ void GBTree::LoadConfig(Json const& in) {
updaters_.clear();
for (auto const& kv : j_updaters) {
std::unique_ptr<TreeUpdater> up(
TreeUpdater::Create(kv.first, generic_param_, model_.learner_model_param->task));
TreeUpdater::Create(kv.first, ctx_, model_.learner_model_param->task));
up->LoadConfig(kv.second);
updaters_.push_back(std::move(up));
}
Expand Down Expand Up @@ -562,7 +562,7 @@ GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
auto on_device = is_ellpack || is_from_device;

// Use GPU Predictor if data is already on device and gpu_id is set.
if (on_device && generic_param_->gpu_id >= 0) {
if (on_device && ctx_->gpu_id >= 0) {
#if defined(XGBOOST_USE_CUDA)
CHECK_GE(common::AllVisibleGPUs(), 1) << "No visible GPU is found for XGBoost.";
CHECK(gpu_predictor_);
Expand Down Expand Up @@ -728,8 +728,8 @@ class Dart : public GBTree {
auto n_groups = model_.learner_model_param->num_output_group;

PredictionCacheEntry predts; // temporary storage for prediction
if (generic_param_->gpu_id != GenericParameter::kCpuId) {
predts.predictions.SetDevice(generic_param_->gpu_id);
if (ctx_->gpu_id != GenericParameter::kCpuId) {
predts.predictions.SetDevice(ctx_->gpu_id);
}
predts.predictions.Resize(p_fmat->Info().num_row_ * n_groups, 0);

Expand Down Expand Up @@ -758,11 +758,10 @@ class Dart : public GBTree {
} else {
auto &h_out_predts = p_out_preds->predictions.HostVector();
auto &h_predts = predts.predictions.HostVector();
#pragma omp parallel for
for (omp_ulong ridx = 0; ridx < p_fmat->Info().num_row_; ++ridx) {
common::ParallelFor(p_fmat->Info().num_row_, ctx_->Threads(), [&](auto ridx) {
const size_t offset = ridx * n_groups + group;
h_out_predts[offset] += (h_predts[offset] * w);
}
});
}
}
}
Expand Down Expand Up @@ -846,13 +845,11 @@ class Dart : public GBTree {
if (device == GenericParameter::kCpuId) {
auto &h_predts = predts.predictions.HostVector();
auto &h_out_predts = out_preds->predictions.HostVector();
#pragma omp parallel for
for (omp_ulong ridx = 0; ridx < n_rows; ++ridx) {
common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
const size_t offset = ridx * n_groups + group;
// Need to remove the base margin from individual tree.
h_out_predts[offset] +=
(h_predts[offset] - model_.learner_model_param->base_score) * w;
}
h_out_predts[offset] += (h_predts[offset] - model_.learner_model_param->base_score) * w;
});
} else {
out_preds->predictions.SetDevice(device);
predts.predictions.SetDevice(device);
Expand Down
5 changes: 2 additions & 3 deletions src/gbm/gbtree.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,10 +413,9 @@ class GBTree : public GradientBooster {
p_fmat, out_contribs, model_, tree_end, nullptr, approximate);
}

std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
std::string format) const override {
return model_.DumpModel(fmap, with_stats, format);
return model_.DumpModel(fmap, with_stats, this->ctx_->Threads(), format);
}

protected:
Expand Down
7 changes: 3 additions & 4 deletions src/gbm/gbtree_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,11 @@ struct GBTreeModel : public Model {
void SaveModel(Json* p_out) const override;
void LoadModel(Json const& p_out) override;

std::vector<std::string> DumpModel(const FeatureMap &fmap, bool with_stats,
std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats, int32_t n_threads,
std::string format) const {
std::vector<std::string> dump(trees.size());
common::ParallelFor(static_cast<omp_ulong>(trees.size()), [&](size_t i) {
dump[i] = trees[i]->DumpModel(fmap, with_stats, format);
});
common::ParallelFor(trees.size(), n_threads,
[&](size_t i) { dump[i] = trees[i]->DumpModel(fmap, with_stats, format); });
return dump;
}
void CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees,
Expand Down
91 changes: 45 additions & 46 deletions src/linear/coordinate_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,21 +149,21 @@ GetGradientParallel(GenericParameter const *ctx, int group_idx, int num_group,
*/
inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_group,
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat) {
double sum_grad = 0.0, sum_hess = 0.0;
DMatrix *p_fmat, int32_t n_threads) {
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
dmlc::OMPException exc;
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
for (bst_omp_uint i = 0; i < ndata; ++i) {
exc.Run([&]() {
auto &p = gpair[i * num_group + group_idx];
if (p.GetHess() >= 0.0f) {
sum_grad += p.GetGrad();
sum_hess += p.GetHess();
}
});
}
exc.Rethrow();
std::vector<double> sum_grad_tloc(n_threads, 0);
std::vector<double> sum_hess_tloc(n_threads, 0);

common::ParallelFor(ndata, n_threads, [&](auto i) {
auto tid = omp_get_thread_num();
auto &p = gpair[i * num_group + group_idx];
if (p.GetHess() >= 0.0f) {
sum_grad_tloc[tid] += p.GetGrad();
sum_hess_tloc[tid] += p.GetHess();
}
});
double sum_grad = std::accumulate(sum_grad_tloc.cbegin(), sum_grad_tloc.cend(), 0.0);
double sum_hess = std::accumulate(sum_hess_tloc.cbegin(), sum_hess_tloc.cend(), 0.0);
return std::make_pair(sum_grad, sum_hess);
}

Expand All @@ -179,23 +179,18 @@ inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_
*/
inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
float dw, std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat) {
DMatrix *p_fmat, int32_t n_threads) {
if (dw == 0.0f) return;
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
auto page = batch.GetView();
auto col = page[fidx];
// update grad value
const auto num_row = static_cast<bst_omp_uint>(col.size());
dmlc::OMPException exc;
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < num_row; ++j) {
exc.Run([&]() {
GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
if (p.GetHess() < 0.0f) return;
p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
});
}
exc.Rethrow();
common::ParallelFor(num_row, n_threads, [&](auto j) {
GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
if (p.GetHess() < 0.0f) return;
p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
});
}
}

Expand All @@ -209,30 +204,29 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
* \param p_fmat The input feature matrix.
*/
inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias,
std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat) {
std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
int32_t n_threads) {
if (dbias == 0.0f) return;
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
dmlc::OMPException exc;
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
exc.Run([&]() {
GradientPair &g = (*in_gpair)[i * num_group + group_idx];
if (g.GetHess() < 0.0f) return;
g += GradientPair(g.GetHess() * dbias, 0);
});
}
exc.Rethrow();
common::ParallelFor(ndata, n_threads, [&](auto i) {
GradientPair &g = (*in_gpair)[i * num_group + group_idx];
if (g.GetHess() < 0.0f) return;
g += GradientPair(g.GetHess() * dbias, 0);
});
}

/**
* \brief Abstract class for stateful feature selection or ordering
* in coordinate descent algorithms.
*/
class FeatureSelector {
protected:
int32_t n_threads_{-1};

public:
explicit FeatureSelector(int32_t n_threads) : n_threads_{n_threads} {}
/*! \brief factory method */
static FeatureSelector *Create(int choice);
static FeatureSelector *Create(int choice, int32_t n_threads);
/*! \brief virtual destructor */
virtual ~FeatureSelector() = default;
/**
Expand Down Expand Up @@ -274,6 +268,7 @@ class FeatureSelector {
*/
class CyclicFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
int NextFeature(int iteration, const gbm::GBLinearModel &model,
int , const std::vector<GradientPair> &,
DMatrix *, float, float) override {
Expand All @@ -287,6 +282,7 @@ class CyclicFeatureSelector : public FeatureSelector {
*/
class ShuffleFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
void Setup(const gbm::GBLinearModel &model,
const std::vector<GradientPair>&,
DMatrix *, float, float, int) override {
Expand All @@ -313,6 +309,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
*/
class RandomFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
int NextFeature(int, const gbm::GBLinearModel &model,
int, const std::vector<GradientPair> &,
DMatrix *, float, float) override {
Expand All @@ -331,6 +328,7 @@ class RandomFeatureSelector : public FeatureSelector {
*/
class GreedyFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
void Setup(const gbm::GBLinearModel &model,
const std::vector<GradientPair> &,
DMatrix *, float, float, int param) override {
Expand Down Expand Up @@ -360,7 +358,7 @@ class GreedyFeatureSelector : public FeatureSelector {
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
auto page = batch.GetView();
common::ParallelFor(nfeat, [&](bst_omp_uint i) {
common::ParallelFor(nfeat, this->n_threads_, [&](bst_omp_uint i) {
const auto col = page[i];
const bst_uint ndata = col.size();
auto &sums = gpair_sums_[group_idx * nfeat + i];
Expand Down Expand Up @@ -407,6 +405,7 @@ class GreedyFeatureSelector : public FeatureSelector {
*/
class ThriftyFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
void Setup(const gbm::GBLinearModel &model,
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda, int param) override {
Expand All @@ -426,7 +425,7 @@ class ThriftyFeatureSelector : public FeatureSelector {
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
auto page = batch.GetView();
// column-parallel is usually fastaer than row-parallel
common::ParallelFor(nfeat, [&](bst_omp_uint i) {
common::ParallelFor(nfeat, this->n_threads_, [&](auto i) {
const auto col = page[i];
const bst_uint ndata = col.size();
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
Expand Down Expand Up @@ -483,18 +482,18 @@ class ThriftyFeatureSelector : public FeatureSelector {
std::vector<std::pair<double, double>> gpair_sums_;
};

inline FeatureSelector *FeatureSelector::Create(int choice) {
inline FeatureSelector *FeatureSelector::Create(int choice, int32_t n_threads) {
switch (choice) {
case kCyclic:
return new CyclicFeatureSelector();
return new CyclicFeatureSelector(n_threads);
case kShuffle:
return new ShuffleFeatureSelector();
return new ShuffleFeatureSelector(n_threads);
case kThrifty:
return new ThriftyFeatureSelector();
return new ThriftyFeatureSelector(n_threads);
case kGreedy:
return new GreedyFeatureSelector();
return new GreedyFeatureSelector(n_threads);
case kRandom:
return new RandomFeatureSelector();
return new RandomFeatureSelector(n_threads);
default:
LOG(FATAL) << "unknown coordinate selector: " << choice;
}
Expand Down
2 changes: 1 addition & 1 deletion src/linear/linear_updater.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ LinearUpdater* LinearUpdater::Create(const std::string& name, GenericParameter c
LOG(FATAL) << "Unknown linear updater " << name;
}
auto p_linear = (e->body)();
p_linear->learner_param_ = lparam;
p_linear->ctx_ = lparam;
return p_linear;
}

Expand Down
Loading