Implement multi-target for hist.

Initial commit. Predictor. Compile. fixes. Cleanup. Moving code around. Start working on cat features. Start working on model IO. Fix. Revert. cleanup. Rebase. Reverse cleanup. rename. Fix rebase. small cleanup. inc Merge it into reg tree. Strategy. Extract the cat matrix. Use array in predictor. Use array in scalar. Merge two kernels. QDM. inplace predict. cleanup. naming. cleanup. cleanup. sampler. copy. cleanup. compile test. Hide the tree. Hide from the partitioner. Hide init root. layer to trees. check. Remove old sampling func. leaf partition. use linalg. remove grad stats. ro5 reverse. Don't support prediction cache for now. col sampler. Cleanup. Cleanup. Cleanup histogram. t Cleanup evaluation. ic. Cleanup. start working on io. is valid. basic io. dispatch. Basic IO. Cleanup node sum. cleanup. Extract the updater. Merge it into quantile hist. cleanup. Cleanup. restore checks. Cleanup. remove num_target. fix tests. Fix. fixes. Type deduction. R package. Predict leaf. Predict leaf. cleanup. Add a test to sampling. check. cleanup. cleanup. parallel. Cleanup Fix root. column-major. fewer right. Cleanup. Initial work on merging the updaters. Fix. Merge update tree. Consistent naming. HD. Unify sampling. Fix build. Fix build. CUDA build. Fix GPU SHAP tests. fix.
dmlc · Jan 19, 2023 · d87affb · d87affb
1 parent 26c9882
commit d87affb
Show file tree

Hide file tree

Showing 65 changed files with 2,570 additions and 1,252 deletions.
diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
@@ -59,6 +59,7 @@ OBJECTS= \
     $(PKGROOT)/src/tree/fit_stump.o \
     $(PKGROOT)/src/tree/tree_model.o \
     $(PKGROOT)/src/tree/tree_updater.o \
+    $(PKGROOT)/src/tree/multi_target_tree_model.o \
     $(PKGROOT)/src/tree/updater_approx.o \
     $(PKGROOT)/src/tree/updater_colmaker.o \
     $(PKGROOT)/src/tree/updater_prune.o \

diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win
@@ -58,6 +58,7 @@ OBJECTS= \
     $(PKGROOT)/src/tree/param.o \
     $(PKGROOT)/src/tree/fit_stump.o \
     $(PKGROOT)/src/tree/tree_model.o \
+    $(PKGROOT)/src/tree/multi_target_tree_model.o \
     $(PKGROOT)/src/tree/tree_updater.o \
     $(PKGROOT)/src/tree/updater_approx.o \
     $(PKGROOT)/src/tree/updater_colmaker.o \

diff --git a/demo/guide-python/multioutput_regression.py b/demo/guide-python/multioutput_regression.py
@@ -44,10 +44,19 @@ def rmse_model(plot_result: bool):
     """Draw a circle with 2-dim coordinate as target variables."""
     X, y = gen_circle()
     # Train a regressor on it
-    reg = xgb.XGBRegressor(tree_method="hist", n_estimators=64)
+    reg = xgb.XGBRegressor(
+        tree_method="hist",
+        n_estimators=16,
+        n_jobs=16,
+        max_depth=8,
+        multi_strategy="mono",
+        subsample=0.6,
+    )
     reg.fit(X, y, eval_set=[(X, y)])
+    # reg.save_model("model.json")
 
     y_predt = reg.predict(X)
+    # print("y_predt:", y_predt, y)
     if plot_result:
         plot_predt(y, y_predt, "multi")
 
@@ -81,13 +90,15 @@ def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
     X, y = gen_circle()
     Xy = xgb.DMatrix(X, y)
     results: Dict[str, Dict[str, List[float]]] = {}
-    # Make sure the `num_target` is passed to XGBoost when custom objective is used.
+    # Make sure the `num_class` is passed to XGBoost when custom objective is used.
     # When builtin objective is used, XGBoost can figure out the number of targets
     # automatically.
     booster = xgb.train(
         {
             "tree_method": "hist",
-            "num_target": y.shape[1],
+            "num_class": y.shape[1],
+            "multi_strategy": "mono",
+            "objective": "reg:squarederror",  # fixme
         },
         dtrain=Xy,
         num_boost_round=100,

diff --git a/include/xgboost/base.h b/include/xgboost/base.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright (c) 2015 by Contributors
+ * Copyright (c) 2015-2023 by Contributors
  * \file base.h
  * \brief defines configuration macros of xgboost.
  */
@@ -10,6 +10,7 @@
 #include <dmlc/omp.h>
 #include <cmath>
 #include <iostream>
+#include <type_traits>
 #include <vector>
 #include <string>
 #include <utility>
@@ -110,19 +111,19 @@
 namespace xgboost {
 
 /*! \brief unsigned integer type used for feature index. */
-using bst_uint = uint32_t;  // NOLINT
+using bst_uint = std::uint32_t;  // NOLINT
 /*! \brief integer type. */
-using bst_int = int32_t;    // NOLINT
+using bst_int = std::int32_t;    // NOLINT
 /*! \brief unsigned long integers */
-using bst_ulong = uint64_t;  // NOLINT
+using bst_ulong = std::uint64_t;  // NOLINT
 /*! \brief float type, used for storing statistics */
 using bst_float = float;  // NOLINT
 /*! \brief Categorical value type. */
-using bst_cat_t = int32_t;  // NOLINT
+using bst_cat_t = std::int32_t;  // NOLINT
 /*! \brief Type for data column (feature) index. */
-using bst_feature_t = uint32_t;  // NOLINT
+using bst_feature_t = std::uint32_t;  // NOLINT
 /*! \brief Type for histogram bin index. */
-using bst_bin_t = int32_t;  // NOLINT
+using bst_bin_t = std::int32_t;  // NOLINT
 /*! \brief Type for data row index.
  *
  * Be careful `std::size_t' is implementation-defined.  Meaning that the binary
@@ -131,11 +132,11 @@ using bst_bin_t = int32_t;  // NOLINT
  */
 using bst_row_t = std::size_t;   // NOLINT
 /*! \brief Type for tree node index. */
-using bst_node_t = int32_t;      // NOLINT
+using bst_node_t = std::int32_t;      // NOLINT
 /*! \brief Type for ranking group index. */
-using bst_group_t = uint32_t;    // NOLINT
-/*! \brief Type for indexing target variables. */
-using bst_target_t = std::size_t;  // NOLINT
+using bst_group_t = std::uint32_t;      // NOLINT
+/*! \brief Type for indexing into output targets. */
+using bst_target_t = std::uint32_t;  // NOLINT
 
 namespace detail {
 /*! \brief Implementation of gradient statistics pair. Template specialisation
@@ -171,11 +172,14 @@ class GradientPairInternal {
   }
 
   // Copy constructor if of same value type, marked as default to be trivially_copyable
-  GradientPairInternal(const GradientPairInternal<T> &g) = default;
+  GradientPairInternal(GradientPairInternal const &g) = default;
+  GradientPairInternal(GradientPairInternal &&g) = default;
+  GradientPairInternal &operator=(GradientPairInternal const &that) = default;
+  GradientPairInternal &operator=(GradientPairInternal &&that) = default;
 
   // Copy constructor if different value type - use getters and setters to
   // perform conversion
-  template <typename T2>
+  template <typename T2, std::enable_if_t<!std::is_same<T, T2>::value>* = nullptr>
   XGBOOST_DEVICE explicit GradientPairInternal(const GradientPairInternal<T2> &g) {
     SetGrad(g.GetGrad());
     SetHess(g.GetHess());

diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
@@ -16,6 +16,7 @@
 #include <xgboost/predictor.h>
 #include <xgboost/task.h>
 
+#include <cstddef>
 #include <map>
 #include <memory>
 #include <string>
@@ -162,6 +163,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
    */
   virtual int32_t BoostedRounds() const = 0;
   virtual uint32_t Groups() const = 0;
+  virtual bst_target_t Targets() const = 0;
 
   void LoadModel(Json const& in) override = 0;
   void SaveModel(Json* out) const override = 0;
@@ -305,11 +307,21 @@ struct LearnerModelParam {
   linalg::Tensor<float, 1> base_score_;
 
  public:
-  /* \brief number of features  */
-  uint32_t num_feature { 0 };
-  /* \brief number of classes, if it is multi-class classification  */
-  uint32_t num_output_group { 0 };
-  /* \brief Current task, determined by objective. */
+  /**
+   * \brief The number of features.
+   */
+  bst_feature_t num_feature{0};
+  /**
+   * \brief The number of classes or targets if the current strategy is composite.
+   */
+  uint32_t num_output_group{0};
+  /**
+   * \brief The number of output targets.
+   */
+  bst_target_t num_target{0};
+  /**
+   * \brief Current task, determined by objective.
+   */
   ObjInfo task{ObjInfo::kRegression};
 
   LearnerModelParam() = default;
@@ -319,13 +331,20 @@ struct LearnerModelParam {
                     linalg::Tensor<float, 1> base_margin, ObjInfo t);
   LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
   LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_margin,
-                    uint32_t n_groups)
-      : base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
+                    uint32_t n_groups, bst_target_t n_targets)
+      : base_score_{std::move(base_margin)},
+        num_feature{n_features},
+        num_output_group{n_groups},
+        num_target{n_targets} {}
 
   linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
   linalg::TensorView<float const, 1> BaseScore(int32_t device) const;
 
   void Copy(LearnerModelParam const& that);
+  bool IsVectorLeaf() const { return num_output_group == 1 && num_target > 1; }
+  bst_target_t OutputLength() const {
+    return this->IsVectorLeaf() ? this->num_target : this->num_output_group;
+  }
 
   /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
   bool Initialized() const { return num_feature != 0 && num_output_group != 0; }