diff --git a/src/common/error_msg.h b/src/common/error_msg.h index aa7e3f194871..e690a12f33a2 100644 --- a/src/common/error_msg.h +++ b/src/common/error_msg.h @@ -6,6 +6,11 @@ #ifndef XGBOOST_COMMON_ERROR_MSG_H_ #define XGBOOST_COMMON_ERROR_MSG_H_ +#include // for uint64_t +#include // for numeric_limits + +#include "xgboost/base.h" // for bst_feature_t +#include "xgboost/logging.h" #include "xgboost/string_view.h" // for StringView namespace xgboost::error { @@ -35,5 +40,12 @@ constexpr StringView InconsistentMaxBin() { } constexpr StringView UnknownDevice() { return "Unknown device type."; } + +inline void MaxFeatureSize(std::uint64_t n_features) { + auto max_n_features = std::numeric_limits::max(); + CHECK_LE(n_features, max_n_features) + << "Unfortunately, XGBoost does not support data matrices with " + << std::numeric_limits::max() << " features or greater"; +} } // namespace xgboost::error #endif // XGBOOST_COMMON_ERROR_MSG_H_ diff --git a/src/data/adapter.h b/src/data/adapter.h index b027084aaefc..3ce347e8d859 100644 --- a/src/data/adapter.h +++ b/src/data/adapter.h @@ -7,7 +7,7 @@ #include #include -#include // std::size_t +#include // for size_t #include #include #include @@ -17,6 +17,7 @@ #include #include "../c_api/c_api_error.h" +#include "../common/error_msg.h" // for MaxFeatureSize #include "../common/math.h" #include "array_interface.h" #include "arrow-cdi.h" @@ -299,10 +300,11 @@ class ArrayAdapter : public detail::SingleBatchDataIter { auto j = Json::Load(array_interface); array_interface_ = ArrayInterface<2>(get(j)); batch_ = ArrayAdapterBatch{array_interface_}; + error::MaxFeatureSize(this->NumColumns()); } - ArrayAdapterBatch const& Value() const override { return batch_; } - size_t NumRows() const { return array_interface_.Shape(0); } - size_t NumColumns() const { return array_interface_.Shape(1); } + [[nodiscard]] ArrayAdapterBatch const& Value() const override { return batch_; } + [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape(0); } + [[nodiscard]] std::size_t NumColumns() const { return array_interface_.Shape(1); } private: ArrayAdapterBatch batch_; diff --git a/src/data/proxy_dmatrix.cc b/src/data/proxy_dmatrix.cc index e5d506088af0..e0a28142dd16 100644 --- a/src/data/proxy_dmatrix.cc +++ b/src/data/proxy_dmatrix.cc @@ -6,8 +6,8 @@ #include "proxy_dmatrix.h" namespace xgboost::data { -void DMatrixProxy::SetArrayData(char const *c_interface) { - std::shared_ptr adapter{new ArrayAdapter(StringView{c_interface})}; +void DMatrixProxy::SetArrayData(StringView interface_str) { + std::shared_ptr adapter{new ArrayAdapter{interface_str}}; this->batch_ = adapter; this->Info().num_col_ = adapter->NumColumns(); this->Info().num_row_ = adapter->NumRows(); diff --git a/src/data/proxy_dmatrix.h b/src/data/proxy_dmatrix.h index 396fdced8216..b1646927024a 100644 --- a/src/data/proxy_dmatrix.h +++ b/src/data/proxy_dmatrix.h @@ -62,7 +62,7 @@ class DMatrixProxy : public DMatrix { #endif // defined(XGBOOST_USE_CUDA) } - void SetArrayData(char const* c_interface); + void SetArrayData(StringView interface_str); void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values, bst_feature_t n_features, bool on_host); diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index 76e923c5743c..31b91399dfbd 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -616,10 +616,6 @@ void GBTree::InplacePredict(std::shared_ptr p_m, float missing, CHECK(configured_); auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees."; - // fixme: create an option to allow copying data. - // fixme: should we cache the result? - // - We cache the result if the input is DMatrix, otherwise no. - // - scikit-learn needs cache too. if (p_m->Ctx()->Device() != this->ctx_->Device()) { LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. XGBoost " << "is running on: " << this->ctx_->DeviceName() diff --git a/src/learner.cc b/src/learner.cc index 78297404b73b..223531014b92 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -40,6 +40,7 @@ #include "common/api_entry.h" // for XGBAPIThreadLocalEntry #include "common/charconv.h" // for to_chars, to_chars_result, NumericLimits, from_... #include "common/common.h" // for ToString, Split +#include "common/error_msg.h" // for MaxFeatureSize #include "common/io.h" // for PeekableInStream, ReadAll, FixedSizeStream, Mem... #include "common/observer.h" // for TrainingObserver #include "common/random.h" // for GlobalRandom @@ -763,9 +764,7 @@ class LearnerConfiguration : public Learner { CHECK(matrix.first.ptr); CHECK(!matrix.second.ref.expired()); const uint64_t num_col = matrix.first.ptr->Info().num_col_; - CHECK_LE(num_col, static_cast(std::numeric_limits::max())) - << "Unfortunately, XGBoost does not support data matrices with " - << std::numeric_limits::max() << " features or greater"; + error::MaxFeatureSize(num_col); num_feature = std::max(num_feature, static_cast(num_col)); } diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc index cfe22f5089f0..046ee1630abe 100644 --- a/tests/cpp/gbm/test_gbtree.cc +++ b/tests/cpp/gbm/test_gbtree.cc @@ -1,17 +1,20 @@ -/*! - * Copyright 2019-2022 XGBoost contributors +/** + * Copyright 2019-2023, XGBoost contributors */ #include #include +#include // for HostDeviceVector +#include // for Learner -#include "../../../src/data/adapter.h" -#include "../../../src/data/proxy_dmatrix.h" +#include // for numeric_limits +#include // for shared_ptr +#include // for string + +#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy #include "../../../src/gbm/gbtree.h" #include "../filesystem.h" // dmlc::TemporaryDirectory #include "../helpers.h" #include "xgboost/base.h" -#include "xgboost/host_device_vector.h" -#include "xgboost/learner.h" #include "xgboost/predictor.h" namespace xgboost { diff --git a/tests/cpp/gbm/test_gbtree.cu b/tests/cpp/gbm/test_gbtree.cu new file mode 100644 index 000000000000..d46fedcb4b16 --- /dev/null +++ b/tests/cpp/gbm/test_gbtree.cu @@ -0,0 +1,63 @@ +#include // for Context +#include // for Learner +#include // for StringView + +#include // for numeric_limits +#include // for shared_ptr +#include // for string + +#include "../../../src/data/adapter.h" // for ArrayAdapter +#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy +#include "../helpers.h" // for RandomDataGenerator + +namespace xgboost { +void TestInplaceFallback(std::string tree_method) { + bst_row_t n_samples{1024}; + bst_feature_t n_features{32}; + HostDeviceVector X_storage; + auto X = RandomDataGenerator{n_samples, n_features, 0.0}.GenerateArrayInterface(&X_storage); + HostDeviceVector y_storage; + auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage); + + auto X_adapter = data::ArrayAdapter{StringView{X}}; + + Context ctx; + std::shared_ptr Xy{ + DMatrix::Create(&X_adapter, std::numeric_limits::quiet_NaN(), ctx.Threads())}; + Xy->SetInfo("label", y); + + std::unique_ptr learner{Learner::Create({Xy})}; + learner->SetParam("tree_method", tree_method); + for (std::int32_t i = 0; i < 3; ++i) { + learner->UpdateOneIter(i, Xy); + } + + std::shared_ptr p_m{new data::DMatrixProxy}; + auto proxy = std::dynamic_pointer_cast(p_m); + proxy->SetArrayData(StringView{X}); + + HostDeviceVector* out_predt{nullptr}; + + ::testing::internal::CaptureStderr(); + learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits::quiet_NaN(), + &out_predt, 0, 0); + auto output = testing::internal::GetCapturedStderr(); + ASSERT_NE(output.find("Falling back"), std::string::npos); + + learner->SetParam("tree_method", "hist"); + learner->SetParam("gpu_id", "-1"); + learner->Configure(); + HostDeviceVector* out_predt_1{nullptr}; + + ::testing::internal::CaptureStderr(); + learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits::quiet_NaN(), + &out_predt_1, 0, 0); + output = testing::internal::GetCapturedStderr(); + + ASSERT_TRUE(output.empty()); + + ASSERT_EQ(out_predt->ConstHostVector(), out_predt_1->ConstHostVector()); +} + +TEST(GBTree, InplacePredictFallback) { TestInplaceFallback("gpu_hist"); } +} // namespace xgboost diff --git a/tests/cpp/predictor/test_predictor.cc b/tests/cpp/predictor/test_predictor.cc index c82fb3d17824..bb4f760e095b 100644 --- a/tests/cpp/predictor/test_predictor.cc +++ b/tests/cpp/predictor/test_predictor.cc @@ -8,9 +8,11 @@ #include // for DMatrix, BatchIterator, BatchSet, MetaInfo #include // for HostDeviceVector #include // for PredictionCacheEntry, Predictor, Predic... +#include // for StringView #include // for max #include // for numeric_limits +#include // for shared_ptr #include // for unordered_map #include "../../../src/common/bitfield.h" // for LBitField32