Skip to content

Commit

Permalink
Pass shared pointer instead of raw pointer to Learner. (#5302)
Browse files Browse the repository at this point in the history
Extracted from #5220 .
  • Loading branch information
trivialfis committed Feb 11, 2020
1 parent 2e0067e commit 29eeea7
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 73 deletions.
10 changes: 5 additions & 5 deletions include/xgboost/learner.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright 2015-2019 by Contributors
* Copyright 2015-2020 by Contributors
* \file learner.h
* \brief Learner interface that integrates objective, gbm and evaluation together.
* This is the user facing XGBoost training module.
Expand Down Expand Up @@ -59,7 +59,7 @@ class Learner : public Model, public Configurable, public rabit::Serializable {
* \param iter current iteration number
* \param train reference to the data matrix.
*/
virtual void UpdateOneIter(int iter, DMatrix* train) = 0;
virtual void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) = 0;
/*!
* \brief Do customized gradient boosting with in_gpair.
* in_gair can be mutated after this call.
Expand All @@ -68,7 +68,7 @@ class Learner : public Model, public Configurable, public rabit::Serializable {
* \param in_gpair The input gradient statistics.
*/
virtual void BoostOneIter(int iter,
DMatrix* train,
std::shared_ptr<DMatrix> train,
HostDeviceVector<GradientPair>* in_gpair) = 0;
/*!
* \brief evaluate the model for specific iteration using the configured metrics.
Expand All @@ -78,7 +78,7 @@ class Learner : public Model, public Configurable, public rabit::Serializable {
* \return a string corresponding to the evaluation result
*/
virtual std::string EvalOneIter(int iter,
const std::vector<DMatrix*>& data_sets,
const std::vector<std::shared_ptr<DMatrix>>& data_sets,
const std::vector<std::string>& data_names) = 0;
/*!
* \brief get prediction given the model.
Expand All @@ -92,7 +92,7 @@ class Learner : public Model, public Configurable, public rabit::Serializable {
* \param approx_contribs whether to approximate the feature contributions for speed
* \param pred_interactions whether to compute the feature pair contributions
*/
virtual void Predict(DMatrix* data,
virtual void Predict(std::shared_ptr<DMatrix> data,
bool output_margin,
HostDeviceVector<bst_float> *out_preds,
unsigned ntree_limit = 0,
Expand Down
12 changes: 6 additions & 6 deletions src/c_api/c_api.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2014-2019 by Contributors
// Copyright (c) 2014-2020 by Contributors
#include <dmlc/thread_local.h>
#include <rabit/rabit.h>
#include <rabit/c_api.h>
Expand Down Expand Up @@ -498,7 +498,7 @@ XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
auto *dtr =
static_cast<std::shared_ptr<DMatrix>*>(dtrain);

bst->UpdateOneIter(iter, dtr->get());
bst->UpdateOneIter(iter, *dtr);
API_END();
}

Expand All @@ -519,7 +519,7 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
tmp_gpair_h[i] = GradientPair(grad[i], hess[i]);
}

bst->BoostOneIter(0, dtr->get(), &tmp_gpair);
bst->BoostOneIter(0, *dtr, &tmp_gpair);
API_END();
}

Expand All @@ -533,11 +533,11 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
API_BEGIN();
CHECK_HANDLE();
auto* bst = static_cast<Learner*>(handle);
std::vector<DMatrix*> data_sets;
std::vector<std::shared_ptr<DMatrix>> data_sets;
std::vector<std::string> data_names;

for (xgboost::bst_ulong i = 0; i < len; ++i) {
data_sets.push_back(static_cast<std::shared_ptr<DMatrix>*>(dmats[i])->get());
data_sets.push_back(*static_cast<std::shared_ptr<DMatrix>*>(dmats[i]));
data_names.emplace_back(evnames[i]);
}

Expand All @@ -560,7 +560,7 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
auto *bst = static_cast<Learner*>(handle);
HostDeviceVector<bst_float> tmp_preds;
bst->Predict(
static_cast<std::shared_ptr<DMatrix>*>(dmat)->get(),
*static_cast<std::shared_ptr<DMatrix>*>(dmat),
(option_mask & 1) != 0,
&tmp_preds, ntree_limit,
static_cast<bool>(training),
Expand Down
14 changes: 7 additions & 7 deletions src/cli_main.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright 2014-2019 by Contributors
* Copyright 2014-2020 by Contributors
* \file cli_main.cc
* \brief The command line interface program of xgboost.
* This file is not included in dynamic library.
Expand Down Expand Up @@ -165,20 +165,20 @@ void CLITrain(const CLIParam& param) {
param.dsplit == 2));
std::vector<std::shared_ptr<DMatrix> > deval;
std::vector<std::shared_ptr<DMatrix> > cache_mats;
std::vector<DMatrix*> eval_datasets;
std::vector<std::shared_ptr<DMatrix>> eval_datasets;
cache_mats.push_back(dtrain);
for (size_t i = 0; i < param.eval_data_names.size(); ++i) {
deval.emplace_back(
std::shared_ptr<DMatrix>(DMatrix::Load(
param.eval_data_paths[i],
ConsoleLogger::GlobalVerbosity() > ConsoleLogger::DefaultVerbosity(),
param.dsplit == 2)));
eval_datasets.push_back(deval.back().get());
eval_datasets.push_back(deval.back());
cache_mats.push_back(deval.back());
}
std::vector<std::string> eval_data_names = param.eval_data_names;
if (param.eval_train) {
eval_datasets.push_back(dtrain.get());
eval_datasets.push_back(dtrain);
eval_data_names.emplace_back("train");
}
// initialize the learner.
Expand All @@ -203,7 +203,7 @@ void CLITrain(const CLIParam& param) {
double elapsed = dmlc::GetTime() - start;
if (version % 2 == 0) {
LOG(INFO) << "boosting round " << i << ", " << elapsed << " sec elapsed";
learner->UpdateOneIter(i, dtrain.get());
learner->UpdateOneIter(i, dtrain);
if (learner->AllowLazyCheckPoint()) {
rabit::LazyCheckPoint(learner.get());
} else {
Expand Down Expand Up @@ -305,7 +305,7 @@ void CLIPredict(const CLIParam& param) {
CHECK_NE(param.test_path, "NULL")
<< "Test dataset parameter test:data must be specified.";
// load data
std::unique_ptr<DMatrix> dtest(
std::shared_ptr<DMatrix> dtest(
DMatrix::Load(
param.test_path,
ConsoleLogger::GlobalVerbosity() > ConsoleLogger::DefaultVerbosity(),
Expand All @@ -321,7 +321,7 @@ void CLIPredict(const CLIParam& param) {

LOG(INFO) << "start prediction...";
HostDeviceVector<bst_float> preds;
learner->Predict(dtest.get(), param.pred_margin, &preds, param.ntree_limit);
learner->Predict(dtest, param.pred_margin, &preds, param.ntree_limit);
LOG(CONSOLE) << "writing prediction to " << param.name_pred;

std::unique_ptr<dmlc::Stream> fo(
Expand Down
3 changes: 1 addition & 2 deletions src/common/device_helpers.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2019 XGBoost contributors
* Copyright 2017-2020 XGBoost contributors
*/
#pragma once
#include <thrust/device_ptr.h>
Expand All @@ -9,7 +9,6 @@
#include <thrust/system_error.h>
#include <thrust/logical.h>

#include <omp.h>
#include <rabit/rabit.h>
#include <cub/cub.cuh>
#include <cub/util_allocator.cuh>
Expand Down
19 changes: 13 additions & 6 deletions src/common/observer.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
/*!
* Copyright 2019 XGBoost contributors
* Copyright 2019-2020 XGBoost contributors
* \file observer.h
*/
#ifndef XGBOOST_COMMON_OBSERVER_H_
#define XGBOOST_COMMON_OBSERVER_H_

#include <iostream>
#include <limits>
#include <string>
#include <vector>

Expand Down Expand Up @@ -63,7 +64,8 @@ class TrainingObserver {
}
/*\brief Observe data hosted by `std::vector'. */
template <typename T>
void Observe(std::vector<T> const& h_vec, std::string name) const {
void Observe(std::vector<T> const& h_vec, std::string name,
size_t n = std::numeric_limits<std::size_t>::max()) const {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
OBSERVER_PRINT << "Procedure: " << name << OBSERVER_ENDL;

Expand All @@ -72,20 +74,25 @@ class TrainingObserver {
if (i % 8 == 0) {
OBSERVER_PRINT << OBSERVER_NEWLINE;
}
if ((i + 1) == n) {
break;
}
}
OBSERVER_PRINT << OBSERVER_ENDL;
}
/*\brief Observe data hosted by `HostDeviceVector'. */
template <typename T>
void Observe(HostDeviceVector<T> const& vec, std::string name) const {
void Observe(HostDeviceVector<T> const& vec, std::string name,
size_t n = std::numeric_limits<std::size_t>::max()) const {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
auto const& h_vec = vec.HostVector();
this->Observe(h_vec, name);
this->Observe(h_vec, name, n);
}
template <typename T>
void Observe(HostDeviceVector<T>* vec, std::string name) const {
void Observe(HostDeviceVector<T>* vec, std::string name,
size_t n = std::numeric_limits<std::size_t>::max()) const {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
this->Observe(*vec, name);
this->Observe(*vec, name, n);
}

/*\brief Observe objects with `XGBoostParamer' type. */
Expand Down
7 changes: 5 additions & 2 deletions src/gbm/gblinear.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright 2014 by Contributors
* Copyright 2014-2020 by Contributors
* \file gblinear.cc
* \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
* the update rule is parallel coordinate descent (shotgun)
Expand Down Expand Up @@ -239,7 +239,7 @@ class GBLinear : public GradientBooster {
void PredictBatchInternal(DMatrix *p_fmat,
std::vector<bst_float> *out_preds) {
monitor_.Start("PredictBatchInternal");
model_.LazyInitModel();
model_.LazyInitModel();
std::vector<bst_float> &preds = *out_preds;
const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector();
// start collecting the prediction
Expand All @@ -250,6 +250,9 @@ class GBLinear : public GradientBooster {
// k is number of group
// parallel over local batch
const auto nsize = static_cast<omp_ulong>(batch.Size());
if (base_margin.size() != 0) {
CHECK_EQ(base_margin.size(), nsize * ngroup);
}
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < nsize; ++i) {
const size_t ridx = batch.base_rowid + i;
Expand Down
36 changes: 18 additions & 18 deletions src/learner.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright 2014-2019 by Contributors
* Copyright 2014-2020 by Contributors
* \file learner.cc
* \brief Implementation of learning algorithm.
* \author Tianqi Chen
Expand Down Expand Up @@ -691,45 +691,45 @@ class LearnerImpl : public Learner {
return gbm_->DumpModel(fmap, with_stats, format);
}

void UpdateOneIter(int iter, DMatrix* train) override {
void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) override {
monitor_.Start("UpdateOneIter");
TrainingObserver::Instance().Update(iter);
this->Configure();
if (generic_parameters_.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
}
this->CheckDataSplitMode();
this->ValidateDMatrix(train);
this->ValidateDMatrix(train.get());

monitor_.Start("PredictRaw");
this->PredictRaw(train, &preds_[train], true);
this->PredictRaw(train.get(), &preds_[train.get()], true);
monitor_.Stop("PredictRaw");
TrainingObserver::Instance().Observe(preds_[train], "Predictions");
TrainingObserver::Instance().Observe(preds_[train.get()], "Predictions");

monitor_.Start("GetGradient");
obj_->GetGradient(preds_[train], train->Info(), iter, &gpair_);
obj_->GetGradient(preds_[train.get()], train->Info(), iter, &gpair_);
monitor_.Stop("GetGradient");
TrainingObserver::Instance().Observe(gpair_, "Gradients");

gbm_->DoBoost(train, &gpair_, obj_.get());
gbm_->DoBoost(train.get(), &gpair_, obj_.get());
monitor_.Stop("UpdateOneIter");
}

void BoostOneIter(int iter, DMatrix* train,
void BoostOneIter(int iter, std::shared_ptr<DMatrix> train,
HostDeviceVector<GradientPair>* in_gpair) override {
monitor_.Start("BoostOneIter");
this->Configure();
if (generic_parameters_.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
}
this->CheckDataSplitMode();
this->ValidateDMatrix(train);
this->ValidateDMatrix(train.get());

gbm_->DoBoost(train, in_gpair);
gbm_->DoBoost(train.get(), in_gpair);
monitor_.Stop("BoostOneIter");
}

std::string EvalOneIter(int iter, const std::vector<DMatrix*>& data_sets,
std::string EvalOneIter(int iter, const std::vector<std::shared_ptr<DMatrix>>& data_sets,
const std::vector<std::string>& data_names) override {
monitor_.Start("EvalOneIter");
this->Configure();
Expand All @@ -741,9 +741,9 @@ class LearnerImpl : public Learner {
metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
}
for (size_t i = 0; i < data_sets.size(); ++i) {
DMatrix * dmat = data_sets[i];
DMatrix * dmat = data_sets[i].get();
this->ValidateDMatrix(dmat);
this->PredictRaw(data_sets[i], &preds_[dmat], false);
this->PredictRaw(dmat, &preds_[dmat], false);
obj_->EvalTransform(&preds_[dmat]);
for (auto& ev : metrics_) {
os << '\t' << data_names[i] << '-' << ev->Name() << ':'
Expand Down Expand Up @@ -805,7 +805,7 @@ class LearnerImpl : public Learner {
return generic_parameters_;
}

void Predict(DMatrix* data, bool output_margin,
void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
HostDeviceVector<bst_float>* out_preds, unsigned ntree_limit,
bool training,
bool pred_leaf, bool pred_contribs, bool approx_contribs,
Expand All @@ -816,14 +816,14 @@ class LearnerImpl : public Learner {
this->Configure();
CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
if (pred_contribs) {
gbm_->PredictContribution(data, &out_preds->HostVector(), ntree_limit, approx_contribs);
gbm_->PredictContribution(data.get(), &out_preds->HostVector(), ntree_limit, approx_contribs);
} else if (pred_interactions) {
gbm_->PredictInteractionContributions(data, &out_preds->HostVector(), ntree_limit,
gbm_->PredictInteractionContributions(data.get(), &out_preds->HostVector(), ntree_limit,
approx_contribs);
} else if (pred_leaf) {
gbm_->PredictLeaf(data, &out_preds->HostVector(), ntree_limit);
gbm_->PredictLeaf(data.get(), &out_preds->HostVector(), ntree_limit);
} else {
this->PredictRaw(data, out_preds, training, ntree_limit);
this->PredictRaw(data.get(), out_preds, training, ntree_limit);
if (!output_margin) {
obj_->PredTransform(out_preds);
}
Expand Down
8 changes: 5 additions & 3 deletions tests/cpp/c_api/test_c_api.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
// Copyright (c) 2019 by Contributors
/*!
* Copyright 2019-2020 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/version_config.h>
#include <xgboost/c_api.h>
Expand Down Expand Up @@ -92,7 +94,7 @@ TEST(c_api, ConfigIO) {
std::shared_ptr<Learner> learner { Learner::Create(mat) };

BoosterHandle handle = learner.get();
learner->UpdateOneIter(0, p_dmat.get());
learner->UpdateOneIter(0, p_dmat);

char const* out[1];
bst_ulong len {0};
Expand Down Expand Up @@ -127,7 +129,7 @@ TEST(c_api, JsonModelIO) {

std::shared_ptr<Learner> learner { Learner::Create(mat) };

learner->UpdateOneIter(0, p_dmat.get());
learner->UpdateOneIter(0, p_dmat);
BoosterHandle handle = learner.get();

std::string modelfile_0 = tempdir.path + "/model_0.json";
Expand Down
Loading

0 comments on commit 29eeea7

Please sign in to comment.