Skip to content

Commit

Permalink
Add test.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed May 31, 2023
1 parent c17de65 commit edba724
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 20 deletions.
12 changes: 12 additions & 0 deletions src/common/error_msg.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
#ifndef XGBOOST_COMMON_ERROR_MSG_H_
#define XGBOOST_COMMON_ERROR_MSG_H_

#include <cinttypes> // for uint64_t
#include <limits> // for numeric_limits

#include "xgboost/base.h" // for bst_feature_t
#include "xgboost/logging.h"
#include "xgboost/string_view.h" // for StringView

namespace xgboost::error {
Expand Down Expand Up @@ -35,5 +40,12 @@ constexpr StringView InconsistentMaxBin() {
}

constexpr StringView UnknownDevice() { return "Unknown device type."; }

inline void MaxFeatureSize(std::uint64_t n_features) {
auto max_n_features = std::numeric_limits<bst_feature_t>::max();
CHECK_LE(n_features, max_n_features)
<< "Unfortunately, XGBoost does not support data matrices with "
<< std::numeric_limits<bst_feature_t>::max() << " features or greater";
}
} // namespace xgboost::error
#endif // XGBOOST_COMMON_ERROR_MSG_H_
10 changes: 6 additions & 4 deletions src/data/adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <dmlc/data.h>

#include <algorithm>
#include <cstddef> // std::size_t
#include <cstddef> // for size_t
#include <functional>
#include <limits>
#include <map>
Expand All @@ -17,6 +17,7 @@
#include <vector>

#include "../c_api/c_api_error.h"
#include "../common/error_msg.h" // for MaxFeatureSize
#include "../common/math.h"
#include "array_interface.h"
#include "arrow-cdi.h"
Expand Down Expand Up @@ -299,10 +300,11 @@ class ArrayAdapter : public detail::SingleBatchDataIter<ArrayAdapterBatch> {
auto j = Json::Load(array_interface);
array_interface_ = ArrayInterface<2>(get<Object const>(j));
batch_ = ArrayAdapterBatch{array_interface_};
error::MaxFeatureSize(this->NumColumns());
}
ArrayAdapterBatch const& Value() const override { return batch_; }
size_t NumRows() const { return array_interface_.Shape(0); }
size_t NumColumns() const { return array_interface_.Shape(1); }
[[nodiscard]] ArrayAdapterBatch const& Value() const override { return batch_; }
[[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape(0); }
[[nodiscard]] std::size_t NumColumns() const { return array_interface_.Shape(1); }

private:
ArrayAdapterBatch batch_;
Expand Down
4 changes: 2 additions & 2 deletions src/data/proxy_dmatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
#include "proxy_dmatrix.h"

namespace xgboost::data {
void DMatrixProxy::SetArrayData(char const *c_interface) {
std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter(StringView{c_interface})};
void DMatrixProxy::SetArrayData(StringView interface_str) {
std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter{interface_str}};
this->batch_ = adapter;
this->Info().num_col_ = adapter->NumColumns();
this->Info().num_row_ = adapter->NumRows();
Expand Down
2 changes: 1 addition & 1 deletion src/data/proxy_dmatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class DMatrixProxy : public DMatrix {
#endif // defined(XGBOOST_USE_CUDA)
}

void SetArrayData(char const* c_interface);
void SetArrayData(StringView interface_str);
void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values,
bst_feature_t n_features, bool on_host);

Expand Down
4 changes: 0 additions & 4 deletions src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -616,10 +616,6 @@ void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
CHECK(configured_);
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
// fixme: create an option to allow copying data.
// fixme: should we cache the result?
// - We cache the result if the input is DMatrix, otherwise no.
// - scikit-learn needs cache too.
if (p_m->Ctx()->Device() != this->ctx_->Device()) {
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. XGBoost "
<< "is running on: " << this->ctx_->DeviceName()
Expand Down
5 changes: 2 additions & 3 deletions src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "common/api_entry.h" // for XGBAPIThreadLocalEntry
#include "common/charconv.h" // for to_chars, to_chars_result, NumericLimits, from_...
#include "common/common.h" // for ToString, Split
#include "common/error_msg.h" // for MaxFeatureSize
#include "common/io.h" // for PeekableInStream, ReadAll, FixedSizeStream, Mem...
#include "common/observer.h" // for TrainingObserver
#include "common/random.h" // for GlobalRandom
Expand Down Expand Up @@ -763,9 +764,7 @@ class LearnerConfiguration : public Learner {
CHECK(matrix.first.ptr);
CHECK(!matrix.second.ref.expired());
const uint64_t num_col = matrix.first.ptr->Info().num_col_;
CHECK_LE(num_col, static_cast<uint64_t>(std::numeric_limits<unsigned>::max()))
<< "Unfortunately, XGBoost does not support data matrices with "
<< std::numeric_limits<unsigned>::max() << " features or greater";
error::MaxFeatureSize(num_col);
num_feature = std::max(num_feature, static_cast<uint32_t>(num_col));
}

Expand Down
15 changes: 9 additions & 6 deletions tests/cpp/gbm/test_gbtree.cc
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
/*!
* Copyright 2019-2022 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/learner.h> // for Learner

#include "../../../src/data/adapter.h"
#include "../../../src/data/proxy_dmatrix.h"
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr
#include <string> // for string

#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
#include "../../../src/gbm/gbtree.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
#include "xgboost/base.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/learner.h"
#include "xgboost/predictor.h"

namespace xgboost {
Expand Down
63 changes: 63 additions & 0 deletions tests/cpp/gbm/test_gbtree.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include <xgboost/context.h> // for Context
#include <xgboost/learner.h> // for Learner
#include <xgboost/string_view.h> // for StringView

#include <limits> // for numeric_limits
#include <memory> // for shared_ptr
#include <string> // for string

#include "../../../src/data/adapter.h" // for ArrayAdapter
#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
#include "../helpers.h" // for RandomDataGenerator

namespace xgboost {
void TestInplaceFallback(std::string tree_method) {
bst_row_t n_samples{1024};
bst_feature_t n_features{32};
HostDeviceVector<float> X_storage;
auto X = RandomDataGenerator{n_samples, n_features, 0.0}.GenerateArrayInterface(&X_storage);
HostDeviceVector<float> y_storage;
auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage);

auto X_adapter = data::ArrayAdapter{StringView{X}};

Context ctx;
std::shared_ptr<DMatrix> Xy{
DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx.Threads())};
Xy->SetInfo("label", y);

std::unique_ptr<Learner> learner{Learner::Create({Xy})};
learner->SetParam("tree_method", tree_method);
for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(i, Xy);
}

std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
proxy->SetArrayData(StringView{X});

HostDeviceVector<float>* out_predt{nullptr};

::testing::internal::CaptureStderr();
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0);
auto output = testing::internal::GetCapturedStderr();
ASSERT_NE(output.find("Falling back"), std::string::npos);

learner->SetParam("tree_method", "hist");
learner->SetParam("gpu_id", "-1");
learner->Configure();
HostDeviceVector<float>* out_predt_1{nullptr};

::testing::internal::CaptureStderr();
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt_1, 0, 0);
output = testing::internal::GetCapturedStderr();

ASSERT_TRUE(output.empty());

ASSERT_EQ(out_predt->ConstHostVector(), out_predt_1->ConstHostVector());
}

TEST(GBTree, InplacePredictFallback) { TestInplaceFallback("gpu_hist"); }
} // namespace xgboost
2 changes: 2 additions & 0 deletions tests/cpp/predictor/test_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
#include <xgboost/data.h> // for DMatrix, BatchIterator, BatchSet, MetaInfo
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/predictor.h> // for PredictionCacheEntry, Predictor, Predic...
#include <xgboost/string_view.h> // for StringView

#include <algorithm> // for max
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr
#include <unordered_map> // for unordered_map

#include "../../../src/common/bitfield.h" // for LBitField32
Expand Down

0 comments on commit edba724

Please sign in to comment.