Skip to content

Commit

Permalink
Set device in device dmatrix. (#5596)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Apr 25, 2020
1 parent 3728855 commit 844d7c1
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 5 deletions.
2 changes: 1 addition & 1 deletion include/xgboost/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class MetaInfo {
/*!
* \brief Validate all metainfo.
*/
void Validate() const;
void Validate(int32_t device) const;

MetaInfo Slice(common::Span<int32_t const> ridxs) const;
/*!
Expand Down
16 changes: 15 additions & 1 deletion src/data/data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t
}
}

void MetaInfo::Validate() const {
void MetaInfo::Validate(int32_t device) const {
if (group_ptr_.size() != 0 && weights_.Size() != 0) {
CHECK_EQ(group_ptr_.size(), weights_.Size() + 1)
<< "Size of weights must equal to number of groups when ranking "
Expand All @@ -350,30 +350,44 @@ void MetaInfo::Validate() const {
<< "Invalid group structure. Number of rows obtained from groups "
"doesn't equal to actual number of rows given by data.";
}
auto check_device = [device](HostDeviceVector<float> const &v) {
CHECK(v.DeviceIdx() == GenericParameter::kCpuId ||
device == GenericParameter::kCpuId ||
v.DeviceIdx() == device)
<< "Data is resided on a different device than `gpu_id`. "
<< "Device that data is on: " << v.DeviceIdx() << ", "
<< "`gpu_id` for XGBoost: " << device;
};

if (weights_.Size() != 0) {
CHECK_EQ(weights_.Size(), num_row_)
<< "Size of weights must equal to number of rows.";
check_device(weights_);
return;
}
if (labels_.Size() != 0) {
CHECK_EQ(labels_.Size(), num_row_)
<< "Size of labels must equal to number of rows.";
check_device(labels_);
return;
}
if (labels_lower_bound_.Size() != 0) {
CHECK_EQ(labels_lower_bound_.Size(), num_row_)
<< "Size of label_lower_bound must equal to number of rows.";
check_device(labels_lower_bound_);
return;
}
if (labels_upper_bound_.Size() != 0) {
CHECK_EQ(labels_upper_bound_.Size(), num_row_)
<< "Size of label_upper_bound must equal to number of rows.";
check_device(labels_upper_bound_);
return;
}
CHECK_LE(num_nonzero_, num_col_ * num_row_);
if (base_margin_.Size() != 0) {
CHECK_EQ(base_margin_.Size() % num_row_, 0)
<< "Size of base margin must be a multiple of number of rows.";
check_device(base_margin_);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/data/device_dmatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ template <typename AdapterT>
DeviceDMatrix::DeviceDMatrix(AdapterT* adapter, float missing, int nthread, int max_bin) {
common::HistogramCuts cuts =
common::AdapterDeviceSketch(adapter, max_bin, missing);
dh::safe_cuda(cudaSetDevice(adapter->DeviceIdx()));
auto& batch = adapter->Value();
// Work out how many valid entries we have in each row
dh::caching_device_vector<size_t> row_counts(adapter->NumRows() + 1, 0);
Expand Down
1 change: 1 addition & 0 deletions src/data/simple_dmatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ void CopyDataRowMajor(AdapterT* adapter, common::Span<Entry> data,
// be supported in future. Does not currently support inferring row/column size
template <typename AdapterT>
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
dh::safe_cuda(cudaSetDevice(adapter->DeviceIdx()));
CHECK(adapter->NumRows() != kAdapterUnknownSize);
CHECK(adapter->NumColumns() != kAdapterUnknownSize);

Expand Down
2 changes: 1 addition & 1 deletion src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1052,7 +1052,7 @@ class LearnerImpl : public LearnerIO {

void ValidateDMatrix(DMatrix* p_fmat) const {
MetaInfo const& info = p_fmat->Info();
info.Validate();
info.Validate(generic_parameters_.gpu_id);

auto const row_based_split = [this]() {
return tparam_.dsplit == DataSplitMode::kRow ||
Expand Down
12 changes: 10 additions & 2 deletions tests/cpp/data/test_metainfo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,17 @@ TEST(MetaInfo, Validate) {
info.num_col_ = 3;
std::vector<xgboost::bst_group_t> groups (11);
info.SetInfo("group", groups.data(), xgboost::DataType::kUInt32, 11);
EXPECT_THROW(info.Validate(), dmlc::Error);
EXPECT_THROW(info.Validate(0), dmlc::Error);

std::vector<float> labels(info.num_row_ + 1);
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
EXPECT_THROW(info.Validate(), dmlc::Error);
EXPECT_THROW(info.Validate(0), dmlc::Error);

#if defined(XGBOOST_USE_CUDA)
info.group_ptr_.clear();
labels.resize(info.num_row_);
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
info.labels_.SetDevice(0);
EXPECT_THROW(info.Validate(1), dmlc::Error);
#endif // defined(XGBOOST_USE_CUDA)
}
11 changes: 11 additions & 0 deletions tests/python-gpu/test_from_cupy.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,14 @@ def test_dlpack_device_dmat(self):
n = 100
X = cp.random.random((n, 2))
xgb.DeviceQuantileDMatrix(X.toDlpack())

@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_specified_device(self):
import cupy as cp
cp.cuda.runtime.setDevice(0)
dtrain = dmatrix_from_cupy(
np.float32, xgb.DeviceQuantileDMatrix, np.nan)
with pytest.raises(xgb.core.XGBoostError):
xgb.train({'tree_method': 'gpu_hist', 'gpu_id': 1},
dtrain, num_boost_round=10)
1 change: 1 addition & 0 deletions tests/python-gpu/test_gpu_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def test_sklearn(self):
@pytest.mark.skipif(**tm.no_cupy())
def test_inplace_predict_cupy(self):
import cupy as cp
cp.cuda.runtime.setDevice(0)
rows = 1000
cols = 10
cp_rng = cp.random.RandomState(1994)
Expand Down

0 comments on commit 844d7c1

Please sign in to comment.