From ec2a7025a9834d9891891d4d87c54838685cd922 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 22 Jul 2022 11:06:22 +0800
Subject: [PATCH 001/133] Calculate `base_score` based on input labels.

Custom objective.

Fixes.

Use a tensor in learner.

fixes.

Fix.

Lint.

Remove.

Cache the model.

Empty dmatrix.

Revert unnecessary changes.

Fix.

Add serialization test.

CPU build.

revert.

Better average.

Move configuration.

Check for model initialized.

Merge dispatching into median.

Split up the configuration.

Add a quick test.

check.

test.

Don't change.

check.

check.

cleanup.

Add init estimation for LS.
---
 include/xgboost/learner.h                 |  30 +-
 include/xgboost/linalg.h                  |  56 +++-
 include/xgboost/objective.h               |  12 +-
 include/xgboost/predictor.h               |   7 +-
 src/common/common.h                       |  16 +-
 src/common/linalg_op.h                    |  26 ++
 src/common/stats.cu                       |  63 ++++
 src/common/stats.h                        |  86 ++++++
 src/common/threading_utils.h              |   3 +
 src/data/array_interface.h                |   4 +-
 src/gbm/gblinear.cc                       |  17 +-
 src/gbm/gbtree.cc                         |  19 +-
 src/gbm/gbtree.cu                         |  11 +-
 src/learner.cc                            | 344 +++++++++++++++-------
 src/objective/adaptive.h                  |   1 +
 src/objective/objective.cc                |   9 +-
 src/objective/regression_obj.cu           |  42 ++-
 src/predictor/cpu_predictor.cc            |  12 +-
 src/predictor/gpu_predictor.cu            |  36 +--
 src/predictor/predictor.cc                |   7 +-
 tests/cpp/common/test_stats.cc            |  12 +
 tests/cpp/common/test_stats.cu            |  15 +
 tests/cpp/gbm/test_gblinear.cc            |  12 +-
 tests/cpp/gbm/test_gbtree.cc              |  46 +--
 tests/cpp/helpers.h                       |  11 +
 tests/cpp/linear/test_linear.cc           |  10 +-
 tests/cpp/linear/test_linear.cu           |  12 +-
 tests/cpp/predictor/test_cpu_predictor.cc |  25 +-
 tests/cpp/predictor/test_gpu_predictor.cu |  69 ++---
 tests/cpp/predictor/test_predictor.cc     |  21 +-
 tests/cpp/predictor/test_predictor.h      |   8 +-
 tests/cpp/test_learner.cc                 |  78 ++++-
 tests/cpp/test_serialization.cc           |  39 +++
 tests/python-gpu/test_gpu_updaters.py     |   5 +
 tests/python/test_model_compatibility.py  |  26 +-
 tests/python/test_updaters.py             |  71 ++++-
 36 files changed, 933 insertions(+), 328 deletions(-)
 create mode 100644 src/common/stats.cu
diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index 51fefac1365f..34ae5a4d53bb 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -8,10 +8,9 @@
 #ifndef XGBOOST_LEARNER_H_
 #define XGBOOST_LEARNER_H_
 
-#include <dmlc/any.h>
 #include <xgboost/base.h>
 #include <xgboost/feature_map.h>
-#include <xgboost/generic_parameters.h>
+#include <xgboost/generic_parameters.h>  // Context
 #include <xgboost/host_device_vector.h>
 #include <xgboost/model.h>
 #include <xgboost/predictor.h>
@@ -274,7 +273,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   /**
    * \brief Return the context object of this Booster.
    */
-  virtual GenericParameter const* Ctx() const = 0;
+  virtual Context const* Ctx() const = 0;
   /*!
    * \brief Get configuration arguments currently stored by the learner
    * \return Key-value pairs representing configuration arguments
@@ -289,7 +288,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   /*! \brief The evaluation metrics used to evaluate the model. */
   std::vector<std::unique_ptr<Metric> > metrics_;
   /*! \brief Training parameter. */
-  GenericParameter generic_parameters_;
+  Context ctx_;
 };
 
 struct LearnerModelParamLegacy;
@@ -298,8 +297,14 @@ struct LearnerModelParamLegacy;
  * \brief Basic Model Parameters, used to describe the booster.
  */
 struct LearnerModelParam {
-  /* \brief global bias */
-  bst_float base_score { 0.5f };
+ private:
+  /**
+   * \brief Global bias, this is just a scalar value but can be extended to vector when we
+   *        support multi-class and multi-target.
+   */
+  linalg::Tensor<float, 1> base_score_;
+
+ public:
   /* \brief number of features  */
   uint32_t num_feature { 0 };
   /* \brief number of classes, if it is multi-class classification  */
@@ -310,7 +315,18 @@ struct LearnerModelParam {
   LearnerModelParam() = default;
   // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
   // this one as an immutable copy.
-  LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin, ObjInfo t);
+  LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
+                    linalg::Tensor<float, 1> base_margin, ObjInfo t);
+  LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
+  LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_margin,
+                    uint32_t n_groups)
+      : base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
+
+  linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
+  linalg::TensorView<float const, 1> BaseScore(int32_t device) const;
+
+  void Copy(LearnerModelParam const& that);
+
   /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
   bool Initialized() const { return num_feature != 0; }
 };
diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index 944903ac83e5..3897e89ea1ce 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -8,6 +8,7 @@
 
 #include <dmlc/endian.h>
 #include <xgboost/base.h>
+#include <xgboost/generic_parameters.h>
 #include <xgboost/host_device_vector.h>
 #include <xgboost/json.h>
 #include <xgboost/span.h>
@@ -16,6 +17,7 @@
 #include <cassert>
 #include <limits>
 #include <string>
+#include <tuple>
 #include <type_traits>
 #include <utility>
 #include <vector>
@@ -213,6 +215,22 @@ LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) {
   constexpr auto kSize = std::tuple_size<Tup>::value;
   return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});
 }
+
+/**
+ * C++ 17 conjunction
+ */
+template <class...>
+struct Conjunction : std::true_type {};
+template <class B1>
+struct Conjunction<B1> : B1 {};
+template <class B1, class... Bn>
+struct Conjunction<B1, Bn...> : std::conditional_t<bool(B1::value), Conjunction<Bn...>, B1> {};
+
+template <typename... Index>
+using IsAllIntegral = Conjunction<std::is_integral<std::remove_reference_t<Index>>...>;
+
+template <typename... Index>
+using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
 }  // namespace detail
 
 /**
@@ -406,7 +424,7 @@ class TensorView {
    *
    * \endcode
    */
-  template <typename... Index>
+  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
   LINALG_HD T &operator()(Index &&...index) {
     static_assert(sizeof...(index) <= kDim, "Invalid index.");
     size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
@@ -416,7 +434,7 @@ class TensorView {
   /**
    * \brief Index the tensor to obtain a scalar value.
    */
-  template <typename... Index>
+  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
   LINALG_HD T const &operator()(Index &&...index) const {
     static_assert(sizeof...(index) <= kDim, "Invalid index.");
     size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
@@ -656,7 +674,7 @@ class Tensor {
     }
     if (device >= 0) {
       data_.SetDevice(device);
-      data_.DevicePointer();  // Pull to device;
+      data_.ConstDevicePointer();  // Pull to device;
     }
     CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
   }
@@ -702,12 +720,29 @@ class Tensor {
   }
 
   template <typename I, int32_t D>
-  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], int32_t device) {
+  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D],
+                  int32_t device = Context::kCpuId) {
     auto &h_vec = data_.HostVector();
     h_vec = data;
     // shape
     this->Initialize(shape, device);
   }
+  /**
+   * \brief Index operator. Not thread safe, should not be used in performance critical
+   *        region. For more efficient indexing, consider getting a view first.
+   */
+  template <typename... Index>
+  T &operator()(Index &&...idx) {
+    return this->HostView()(std::forward<Index>(idx)...);
+  }
+  /**
+   * \brief Index operator. Not thread safe, should not be used in performance critical
+   *        region. For more efficient indexing, consider getting a view first.
+   */
+  template <typename... Index>
+  T const &operator()(Index &&...idx) const {
+    return this->HostView()(std::forward<Index>(idx)...);
+  }
 
   /**
    * \brief Get a \ref TensorView for this tensor.
@@ -761,7 +796,7 @@ class Tensor {
    *
    *    If the total size is changed, then data in this tensor is no longer valid.
    */
-  template <typename... S>
+  template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
   void Reshape(S &&...s) {
     static_assert(sizeof...(S) <= kDim, "Invalid shape.");
     detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
@@ -777,15 +812,20 @@ class Tensor {
    *
    *    If the total size is changed, then data in this tensor is no longer valid.
    */
-  template <int32_t D>
-  void Reshape(size_t (&shape)[D]) {
+  template <size_t D>
+  void Reshape(common::Span<size_t const, D> shape) {
     static_assert(D <= kDim, "Invalid shape.");
-    std::copy(shape, shape + D, this->shape_);
+    std::copy(shape.data(), shape.data() + D, this->shape_);
     std::fill(shape_ + D, shape_ + kDim, 1);
     auto n = detail::CalcSize(shape_);
     data_.Resize(n);
   }
 
+  template <size_t D>
+  void Reshape(size_t (&shape)[D]) {
+    this->Reshape(common::Span<size_t const, D>{shape});
+  }
+
   /**
    * \brief Set device ordinal for this tensor.
    */
diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
index d30f81379f5a..0c0d502bdbfb 100644
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -27,7 +27,10 @@ class RegTree;
 /*! \brief interface of objective function */
 class ObjFunction : public Configurable {
  protected:
-  GenericParameter const* ctx_;
+  Context const* ctx_;
+
+ public:
+  static constexpr float DefaultBaseScore() { return 0.5f; }
 
  public:
   /*! \brief virtual destructor */
@@ -75,6 +78,13 @@ class ObjFunction : public Configurable {
   virtual bst_float ProbToMargin(bst_float base_score) const {
     return base_score;
   }
+  /**
+   * \brief Make initialize estimation of prediction.
+   *
+   * \param info MetaInfo that contains label.
+   * \param base_score Output estimation.
+   */
+  virtual void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const;
   /*!
    * \brief Return task of this objective.
    */
diff --git a/include/xgboost/predictor.h b/include/xgboost/predictor.h
index 33c695bc19bf..877ff462bf24 100644
--- a/include/xgboost/predictor.h
+++ b/include/xgboost/predictor.h
@@ -102,13 +102,10 @@ class PredictionContainer {
  */
 class Predictor {
  protected:
-  /*
-   * \brief Runtime parameters.
-   */
-  GenericParameter const* ctx_;
+  Context const* ctx_;
 
  public:
-  explicit Predictor(GenericParameter const* ctx) : ctx_{ctx} {}
+  explicit Predictor(Context const* ctx) : ctx_{ctx} {}
 
   virtual ~Predictor() = default;
 
diff --git a/src/common/common.h b/src/common/common.h
index 0f21739876b2..c09b75fddaa3 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -195,6 +195,10 @@ class IndexTransformIter {
   }
 
   value_type operator*() const { return fn_(iter_); }
+  value_type operator[](size_t i) const {
+    auto iter = *this + i;
+    return *iter;
+  }
 
   auto operator-(IndexTransformIter const &that) const { return iter_ - that.iter_; }
   bool operator==(IndexTransformIter const &that) const { return iter_ == that.iter_; }
@@ -265,6 +269,7 @@ struct OptionalWeights {
   explicit OptionalWeights(float w) : dft{w} {}
 
   XGBOOST_DEVICE float operator[](size_t i) const { return weights.empty() ? dft : weights[i]; }
+  auto Empty() const { return weights.empty(); }
 };
 
 /**
@@ -276,7 +281,7 @@ XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
 }
 
 /**
- * @brief A CRTP (curiously recurring template pattern) helper function.
+ * \brief A CRTP (curiously recurring template pattern) helper function.
  *
  * https://www.fluentcpp.com/2017/05/19/crtp-helper/
  *
@@ -284,7 +289,7 @@ XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
  * 1. Makes "crtp" explicit in the inheritance structure of a CRTP base class.
  * 2. Avoids having to `static_cast` in a lot of places.
  *
- * @tparam T The derived class in a CRTP hierarchy.
+ * \tparam T The derived class in a CRTP hierarchy.
  */
 template <typename T>
 struct Crtp {
@@ -292,6 +297,13 @@ struct Crtp {
   T const &Underlying() const { return static_cast<T const &>(*this); }
 };
 
+/**
+ * \brief C++17 std::as_const
+ */
+template <typename T>
+typename std::add_const<T>::type &AsConst(T &v) noexcept {  // NOLINT(runtime/references)
+  return v;
+}
 }  // namespace common
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_COMMON_H_
diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
index 05f050772ccc..0de173c8e73f 100644
--- a/src/common/linalg_op.h
+++ b/src/common/linalg_op.h
@@ -4,6 +4,7 @@
 #ifndef XGBOOST_COMMON_LINALG_OP_H_
 #define XGBOOST_COMMON_LINALG_OP_H_
 #include <type_traits>
+#include <cstdint>  // std::int32_t
 
 #include "common.h"
 #include "threading_utils.h"
@@ -59,6 +60,31 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
   ElementWiseKernelHost(t, ctx->Threads(), fn);
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
+
+template <typename T, std::int32_t kDim>
+auto cbegin(TensorView<T, kDim> v) {  // NOLINT
+  auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
+    return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
+  });
+  return it;
+}
+
+template <typename T, std::int32_t kDim>
+auto cend(TensorView<T, kDim> v) {  // NOLINT
+  return cbegin(v) + v.Size();
+}
+
+template <typename T, std::int32_t kDim>
+auto begin(TensorView<T, kDim> v) {  // NOLINT
+  auto it = common::MakeIndexTransformIter(
+      [&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
+  return it;
+}
+
+template <typename T, std::int32_t kDim>
+auto end(TensorView<T, kDim> v) {  // NOLINT
+  return begin(v) + v.Size();
+}
 }  // namespace linalg
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_LINALG_OP_H_
diff --git a/src/common/stats.cu b/src/common/stats.cu
new file mode 100644
index 000000000000..bb70c3a557be
--- /dev/null
+++ b/src/common/stats.cu
@@ -0,0 +1,63 @@
+/*!
+ * Copyright 2022 by XGBoost Contributors
+ */
+
+#include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
+
+#include "common.h"            // common::OptionalWeights
+#include "device_helpers.cuh"  // dh::MakeTransformIterator, tcbegin, tcend
+#include "stats.cuh"           // common::SegmentedQuantile, common::SegmentedWeightedQuantile
+#include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
+#include "xgboost/linalg.h"              // linalg::TensorView, UnravelIndex, Apply
+
+namespace xgboost {
+namespace common {
+namespace cuda {
+float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
+             common::OptionalWeights weights) {
+  HostDeviceVector<size_t> segments{0, t.Size()};
+  segments.SetDevice(ctx->gpu_id);
+  auto d_segments = segments.ConstDeviceSpan();
+  auto val_it = dh::MakeTransformIterator<float>(
+      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
+        return linalg::detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
+      });
+
+  HostDeviceVector<float> quantile{0};
+  quantile.SetDevice(ctx->gpu_id);
+  if (weights.Empty()) {
+    common::SegmentedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments), val_it,
+                              val_it + t.Size(), &quantile);
+  } else {
+    CHECK_NE(t.Shape(1), 0);
+    auto w_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
+                                                 [=] XGBOOST_DEVICE(size_t i) {
+                                                   auto sample_idx = i / t.Shape(1);
+                                                   return weights[sample_idx];
+                                                 });
+    common::SegmentedWeightedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments),
+                                      val_it, val_it + t.Size(), w_it, w_it + t.Size(), &quantile);
+  }
+  CHECK_EQ(quantile.Size(), 1);
+  return quantile.HostVector().front();
+}
+
+float Mean(Context const* ctx, linalg::TensorView<float const, 2> t,
+           common::OptionalWeights weights, size_t n) {
+  dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
+  double size = n == 0 ? t.Size() : n;
+  CHECK_NE(size, 0);
+  auto val_it = dh::MakeTransformIterator<float>(
+      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
+        auto idx = linalg::UnravelIndex(i, t.Shape());
+        auto ridx = std::get<0>(idx);
+        return linalg::detail::Apply(t, std::move(idx)) * weights[ridx] / size;
+      });
+  dh::XGBCachingDeviceAllocator<char> alloc;
+  auto mean = thrust::reduce(thrust::cuda::par(alloc), val_it, val_it + t.Size(), 0.0f);
+  return mean;
+}
+}  // namespace cuda
+}  // namespace common
+}  // namespace xgboost
diff --git a/src/common/stats.h b/src/common/stats.h
index 4ad9e4aa770a..6d23944f252e 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -9,6 +9,8 @@
 #include <vector>
 
 #include "common.h"
+#include "linalg_op.h"
+#include "xgboost/generic_parameters.h"
 #include "xgboost/linalg.h"
 
 namespace xgboost {
@@ -90,6 +92,90 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
   idx = std::min(idx, static_cast<size_t>(n - 1));
   return val(idx);
 }
+
+namespace cuda {
+float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
+             common::OptionalWeights weights);
+#if !defined(XGBOOST_USE_CUDA)
+inline float Median(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights) {
+  common::AssertGPUSupport();
+  return 0;
+}
+#endif  // !defined(XGBOOST_USE_CUDA)
+
+float Mean(Context const* ctx, linalg::TensorView<float const, 2> t,
+           common::OptionalWeights weights, size_t n);
+
+#if !defined(XGBOOST_USE_CUDA)
+inline float Mean(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights,
+                  size_t n) {
+  common::AssertGPUSupport();
+  return 0;
+}
+#endif  // !defined(XGBOOST_USE_CUDA)
+}  // namespace cuda
+
+inline float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
+                    HostDeviceVector<float> const& weights) {
+  CHECK_EQ(t.Shape(1), 0) << "Matrix is not yet supported.";
+  if (!ctx->IsCPU()) {
+    weights.SetDevice(ctx->gpu_id);
+    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
+    auto t_v = t.View(ctx->gpu_id);
+    return cuda::Median(ctx, t_v, opt_weights);
+  }
+
+  auto opt_weights = OptionalWeights(weights.ConstHostSpan());
+  auto t_v = t.HostView();
+  auto iter = common::MakeIndexTransformIter(
+      [&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
+  float q{0};
+  if (opt_weights.Empty()) {
+    q = common::Quantile(0.5, iter, iter + t_v.Size());
+  } else {
+    CHECK_NE(t_v.Shape(1), 0);
+    auto w_it = common::MakeIndexTransformIter([&](size_t i) {
+      auto sample_idx = i / t_v.Shape(1);
+      return opt_weights[sample_idx];
+    });
+    q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
+  }
+  return q;
+}
+
+/**
+ * \brief Calculate mean or partial mean. When n is specified to be non-zero, we use n as
+ *        the total number of elements instead of the size of t.
+ */
+inline float Mean(Context const* ctx, linalg::Tensor<float, 2> const& t,
+                  HostDeviceVector<float> const& weights, size_t n = 0) {
+  if (!weights.Empty()) {
+    CHECK_EQ(weights.Size(), t.Shape(0)) << "Weight is assigned for each row.";
+  }
+  if (!ctx->IsCPU()) {
+    weights.SetDevice(ctx->gpu_id);
+    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
+    auto t_v = t.View(ctx->gpu_id);
+    cuda::Mean(ctx, t_v, opt_weights, n);
+  }
+
+  auto opt_weights = OptionalWeights(weights.ConstHostSpan());
+  auto t_v = t.HostView();
+
+  MemStackAllocator<float, 128> mean_tloc(ctx->Threads(), 0.0f);
+  auto iter = common::MakeIndexTransformIter(
+      [&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
+
+  double size = n == 0 ? t_v.Size() : n;
+  CHECK_NE(size, 0);
+  ParallelFor(t_v.Size(), ctx->Threads(), [&](auto i) {
+    auto tidx = omp_get_thread_num();
+    auto ridx = std::get<0>(linalg::UnravelIndex(i, t_v.Shape()));
+    mean_tloc[tidx] += iter[i] * opt_weights[ridx] / size;
+  });
+  auto mean = std::accumulate(mean_tloc.cbegin(), mean_tloc.cend(), 0.0f);
+  return mean;
+}
 }  // namespace common
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_STATS_H_
diff --git a/src/common/threading_utils.h b/src/common/threading_utils.h
index 9d4149d7d07b..98955a446777 100644
--- a/src/common/threading_utils.h
+++ b/src/common/threading_utils.h
@@ -278,6 +278,9 @@ class MemStackAllocator {
   T& operator[](size_t i) { return ptr_[i]; }
   T const& operator[](size_t i) const { return ptr_[i]; }
 
+  auto cbegin() const { return ptr_; }                 // NOLINT
+  auto cend() const { return ptr_ + required_size_; }  // NOLINT
+
  private:
   T* ptr_ = nullptr;
   size_t required_size_;
diff --git a/src/data/array_interface.h b/src/data/array_interface.h
index c646654bef3f..e90473458e1c 100644
--- a/src/data/array_interface.h
+++ b/src/data/array_interface.h
@@ -345,8 +345,8 @@ struct ToDType<int64_t> {
 };
 
 #if !defined(XGBOOST_USE_CUDA)
-inline void ArrayInterfaceHandler::SyncCudaStream(int64_t stream) { common::AssertGPUSupport(); }
-inline bool ArrayInterfaceHandler::IsCudaPtr(void const *ptr) { return false; }
+inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
+inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
 #endif  // !defined(XGBOOST_USE_CUDA)
 
 /**
diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc
index 35de4c70d604..c8cdfeb476b1 100644
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -161,9 +161,10 @@ class GBLinear : public GradientBooster {
                        uint32_t layer_begin, uint32_t) override {
     LinearCheckLayer(layer_begin);
     const int ngroup = model_.learner_model_param->num_output_group;
+
+    auto base_score = learner_model_param_->BaseScore(ctx_);
     for (int gid = 0; gid < ngroup; ++gid) {
-      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid,
-                 learner_model_param_->base_score);
+      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_score(0));
     }
   }
 
@@ -184,6 +185,7 @@ class GBLinear : public GradientBooster {
     contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
     // make sure contributions is zeroed, we could be reusing a previously allocated one
     std::fill(contribs.begin(), contribs.end(), 0);
+    auto base_score = learner_model_param_->BaseScore(ctx_);
     // start collecting the contributions
     for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
       // parallel over local batch
@@ -202,8 +204,8 @@ class GBLinear : public GradientBooster {
           }
           // add base margin to BIAS
           p_contribs[ncolumns - 1] =
-              model_.Bias()[gid] + ((base_margin.Size() != 0) ? base_margin(row_idx, gid)
-                                                              : learner_model_param_->base_score);
+              model_.Bias()[gid] +
+              ((base_margin.Size() != 0) ? base_margin(row_idx, gid) : base_score(0));
         }
       });
     }
@@ -268,10 +270,12 @@ class GBLinear : public GradientBooster {
     monitor_.Start("PredictBatchInternal");
     model_.LazyInitModel();
     std::vector<bst_float> &preds = *out_preds;
-    auto base_margin = p_fmat->Info().base_margin_.View(GenericParameter::kCpuId);
+    auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
     // start collecting the prediction
     const int ngroup = model_.learner_model_param->num_output_group;
     preds.resize(p_fmat->Info().num_row_ * ngroup);
+
+    auto base_score = learner_model_param_->BaseScore(Context::kCpuId);
     for (const auto &page : p_fmat->GetBatches<SparsePage>()) {
       auto const& batch = page.GetView();
       // output convention: nrow * k, where nrow is number of rows
@@ -285,8 +289,7 @@ class GBLinear : public GradientBooster {
         const size_t ridx = page.base_rowid + i;
         // loop over output groups
         for (int gid = 0; gid < ngroup; ++gid) {
-          float margin =
-              (base_margin.Size() != 0) ? base_margin(ridx, gid) : learner_model_param_->base_score;
+          float margin = (base_margin.Size() != 0) ? base_margin(ridx, gid) : base_score(0);
           this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
         }
       });
diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc
index 9d1d5404409e..a4106888f240 100644
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -638,13 +638,12 @@ void GPUDartPredictInc(common::Span<float> out_predts,
 }
 #endif
 
-void GPUDartInplacePredictInc(common::Span<float> out_predts,
-                              common::Span<float> predts, float tree_w,
-                              size_t n_rows, float base_score,
-                              bst_group_t n_groups,
-                              bst_group_t group)
+void GPUDartInplacePredictInc(common::Span<float> /*out_predts*/, common::Span<float> /*predts*/,
+                              float /*tree_w*/, size_t /*n_rows*/,
+                              linalg::TensorView<float const, 1> /*base_score*/,
+                              bst_group_t /*n_groups*/, bst_group_t /*group*/)
 #if defined(XGBOOST_USE_CUDA)
-;  // NOLINT
+    ;  // NOLINT
 #else
 {
   common::AssertGPUSupport();
@@ -850,15 +849,17 @@ class Dart : public GBTree {
       size_t n_rows = p_fmat->Info().num_row_;
       if (predts.predictions.DeviceIdx() != Context::kCpuId) {
         p_out_preds->predictions.SetDevice(predts.predictions.DeviceIdx());
+        auto base_score = model_.learner_model_param->BaseScore(predts.predictions.DeviceIdx());
         GPUDartInplacePredictInc(p_out_preds->predictions.DeviceSpan(),
-                                 predts.predictions.DeviceSpan(), w, n_rows,
-                                 model_.learner_model_param->base_score, n_groups, group);
+                                 predts.predictions.DeviceSpan(), w, n_rows, base_score, n_groups,
+                                 group);
       } else {
+        auto base_score = model_.learner_model_param->BaseScore(Context::kCpuId);
         auto& h_predts = predts.predictions.HostVector();
         auto& h_out_predts = p_out_preds->predictions.HostVector();
         common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
           const size_t offset = ridx * n_groups + group;
-          h_out_predts[offset] += (h_predts[offset] - model_.learner_model_param->base_score) * w;
+          h_out_predts[offset] += (h_predts[offset] - base_score(0)) * w;
         });
       }
     }
diff --git a/src/gbm/gbtree.cu b/src/gbm/gbtree.cu
index 0b81fff23e5c..12109782d59b 100644
--- a/src/gbm/gbtree.cu
+++ b/src/gbm/gbtree.cu
@@ -31,13 +31,14 @@ void GPUDartPredictInc(common::Span<float> out_predts,
   });
 }
 
-void GPUDartInplacePredictInc(common::Span<float> out_predts,
-                              common::Span<float> predts, float tree_w,
-                              size_t n_rows, float base_score,
-                              bst_group_t n_groups, bst_group_t group) {
+void GPUDartInplacePredictInc(common::Span<float> out_predts, common::Span<float> predts,
+                              float tree_w, size_t n_rows,
+                              linalg::TensorView<float const, 1> base_score, bst_group_t n_groups,
+                              bst_group_t group) {
+  CHECK_EQ(base_score.Size(), 1);
   dh::LaunchN(n_rows, [=] XGBOOST_DEVICE(size_t ridx) {
     const size_t offset = ridx * n_groups + group;
-    out_predts[offset] += (predts[offset] - base_score) * tree_w;
+    out_predts[offset] += (predts[offset] - base_score(0)) * tree_w;
   });
 }
 }  // namespace gbm
diff --git a/src/learner.cc b/src/learner.cc
index 0dbf3631a499..d0ecfcb345b0 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -4,47 +4,48 @@
  * \brief Implementation of learning algorithm.
  * \author Tianqi Chen
  */
+#include "xgboost/learner.h"
+
+#include <dmlc/any.h>
 #include <dmlc/io.h>
 #include <dmlc/parameter.h>
 #include <dmlc/thread_local.h>
 
-#include <atomic>
-#include <mutex>
 #include <algorithm>
+#include <atomic>
 #include <iomanip>
-#include <limits>
+#include <limits>  // std::numeric_limits
 #include <memory>
+#include <mutex>
 #include <sstream>
-#include <string>
 #include <stack>
+#include <string>
 #include <utility>
 #include <vector>
 
-#include "dmlc/any.h"
+#include "common/charconv.h"
+#include "common/common.h"
+#include "common/io.h"
+#include "common/linalg_op.h"
+#include "common/observer.h"
+#include "common/random.h"
+#include "common/threading_utils.h"
+#include "common/timer.h"
+#include "common/version.h"
 #include "xgboost/base.h"
 #include "xgboost/c_api.h"
 #include "xgboost/data.h"
-#include "xgboost/model.h"
-#include "xgboost/predictor.h"
 #include "xgboost/feature_map.h"
 #include "xgboost/gbm.h"
 #include "xgboost/generic_parameters.h"
 #include "xgboost/host_device_vector.h"
 #include "xgboost/json.h"
-#include "xgboost/learner.h"
 #include "xgboost/logging.h"
 #include "xgboost/metric.h"
+#include "xgboost/model.h"
 #include "xgboost/objective.h"
 #include "xgboost/parameter.h"
-
-#include "common/common.h"
-#include "common/io.h"
-#include "common/observer.h"
-#include "common/random.h"
-#include "common/timer.h"
-#include "common/charconv.h"
-#include "common/version.h"
-#include "common/threading_utils.h"
+#include "xgboost/predictor.h"
 
 namespace {
 
@@ -85,26 +86,29 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   uint32_t minor_version;
 
   uint32_t num_target{1};
+
+  int32_t base_score_estimated{0};
   /*! \brief reserved field */
-  int reserved[26];
+  int reserved[25];
   /*! \brief constructor */
   LearnerModelParamLegacy() {
     std::memset(this, 0, sizeof(LearnerModelParamLegacy));
-    base_score = 0.5f;
+    base_score = ObjFunction::DefaultBaseScore();
     num_target = 1;
     major_version = std::get<0>(Version::Self());
     minor_version = std::get<1>(Version::Self());
+    base_score_estimated = 0;
     static_assert(sizeof(LearnerModelParamLegacy) == 136,
                   "Do not change the size of this struct, as it will break binary IO.");
   }
+
   // Skip other legacy fields.
   Json ToJson() const {
     Object obj;
     char floats[NumericLimits<float>::kToCharsSize];
     auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
-    CHECK(ret.ec == std::errc());
-    obj["base_score"] =
-        std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};
+    CHECK(ret.ec == std::errc{});
+    obj["base_score"] = std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};
 
     char integers[NumericLimits<int64_t>::kToCharsSize];
     ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,
@@ -136,10 +140,14 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     }
 
     this->Init(m);
+
     std::string str = get<String const>(j_param.at("base_score"));
     from_chars(str.c_str(), str.c_str() + str.size(), base_score);
+    // It can only be estimated during the first training, we consider it estimated afterward
+    base_score_estimated = 1;
   }
-  inline LearnerModelParamLegacy ByteSwap() const {
+
+  LearnerModelParamLegacy ByteSwap() const {
     LearnerModelParamLegacy x = *this;
     dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
     dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
@@ -149,14 +157,30 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
     dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
     dmlc::ByteSwap(&x.num_target, sizeof(x.num_target), 1);
+    dmlc::ByteSwap(&x.base_score_estimated, sizeof(x.base_score_estimated), 1);
     dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
     return x;
   }
 
+  template <typename Container>
+  Args UpdateAllowUnknown(Container const& kwargs) {
+    // Detect whether user has made their own base score.
+    if (std::find_if(kwargs.cbegin(), kwargs.cend(),
+                     [](auto const& kv) { return kv.first == "base_score"; }) != kwargs.cend()) {
+      base_score_estimated = true;
+    }
+    if (std::find_if(kwargs.cbegin(), kwargs.cend(), [](auto const& kv) {
+          return kv.first == "base_score_estimated";
+        }) != kwargs.cend()) {
+      LOG(FATAL) << "`base_score_estimated` cannot be specified as hyper-parameter.";
+    }
+    return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);
+  }
+
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
     DMLC_DECLARE_FIELD(base_score)
-        .set_default(0.5f)
+        .set_default(ObjFunction::DefaultBaseScore())
         .describe("Global bias of the model.");
     DMLC_DECLARE_FIELD(num_feature)
         .set_default(0)
@@ -170,12 +194,12 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
         .set_default(1)
         .set_lower_bound(1)
         .describe("Number of target for multi-target regression.");
+    DMLC_DECLARE_FIELD(base_score_estimated).set_default(0);
   }
 };
 
-LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin,
-                                     ObjInfo t)
-    : base_score{base_margin}, num_feature{user_param.num_feature}, task{t} {
+LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t)
+    : num_feature{user_param.num_feature}, task{t} {
   auto n_classes = std::max(static_cast<uint32_t>(user_param.num_class), 1u);
   auto n_targets = user_param.num_target;
   num_output_group = std::max(n_classes, n_targets);
@@ -185,6 +209,53 @@ LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param,
       << ", n_targets:" << n_targets;
 }
 
+LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
+                                     linalg::Tensor<float, 1> base_margin, ObjInfo t)
+    : LearnerModelParam{user_param, t} {
+  std::swap(base_score_, base_margin);
+  // Make sure read access everywhere for thread-safe prediction.
+  common::AsConst(base_score_).HostView();
+  if (!ctx->IsCPU()) {
+    common::AsConst(base_score_).View(ctx->gpu_id);
+  }
+  CHECK(common::AsConst(base_score_).Data()->HostCanRead());
+}
+
+linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
+  // multi-class is not supported yet.
+  CHECK_EQ(base_score_.Size(), 1);
+  if (device == Context::kCpuId) {
+    // Make sure that we won't run it race condition.
+    CHECK(base_score_.Data()->HostCanRead());
+    return base_score_.HostView();
+  }
+  // Make sure that we won't run it race condition.
+  CHECK(base_score_.Data()->DeviceCanRead());
+  auto v = base_score_.View(device);
+  CHECK(base_score_.Data()->HostCanRead());  // make sure read access is not removed.
+  return v;
+}
+
+linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(Context const* ctx) const {
+  return this->BaseScore(ctx->gpu_id);
+}
+
+void LearnerModelParam::Copy(LearnerModelParam const& that) {
+  base_score_.Reshape(that.base_score_.Shape());
+  base_score_.Data()->SetDevice(that.base_score_.DeviceIdx());
+  base_score_.Data()->Copy(*that.base_score_.Data());
+  common::AsConst(base_score_).HostView();
+  if (that.base_score_.DeviceIdx() != Context::kCpuId) {
+    common::AsConst(base_score_).View(that.base_score_.DeviceIdx());
+  }
+  CHECK_EQ(base_score_.Data()->DeviceCanRead(), that.base_score_.Data()->DeviceCanRead());
+  CHECK(base_score_.Data()->HostCanRead());
+
+  num_feature = that.num_feature;
+  num_output_group = that.num_output_group;
+  task = that.task;
+}
+
 struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
   // data split mode, can be row, col, or none.
   DataSplitMode dsplit {DataSplitMode::kAuto};
@@ -308,8 +379,61 @@ class LearnerConfiguration : public Learner {
   LearnerModelParamLegacy mparam_;
   LearnerModelParam learner_model_param_;
   LearnerTrainParam tparam_;
+  // Initial prediction.
   std::vector<std::string> metric_names_;
 
+  /**
+   * \brief Calculate the `base_score` based on input data.
+   *
+   * \param p_fmat The training DMatrix used to estimate the base score.
+   */
+  void InitBaseScore(DMatrix const* p_fmat) {
+    // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
+    // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
+    // keep the stability, we initialize it in binary LoadModel instead of configuration.
+    // Under what condition should we omit the transformation:
+    //
+    // - base_score is loaded from old binary model.
+    //
+    // What are the other possible conditions:
+    //
+    // - model loaded from new binary or JSON.
+    // - model is created from scratch.
+    // - model is configured second time due to change of parameter
+    CHECK(obj_);
+    if (!mparam_.base_score_estimated) {
+      if (p_fmat) {
+        // We estimate it from input data.
+        linalg::Tensor<float, 1> base_score;
+        obj_->InitEstimation(p_fmat->Info(), &base_score);
+        mparam_.base_score = base_score(0);
+        CHECK(!std::isnan(mparam_.base_score));
+      } else {
+        mparam_.base_score = ObjFunction::DefaultBaseScore();
+      }
+      mparam_.base_score_estimated = true;
+      // Update the shared model parameter
+      this->ConfigureModelParam();
+    }
+  }
+
+  // Convert mparam to learner_model_param
+  void ConfigureModelParam() {
+    this->ConfigureTargets();
+
+    CHECK(obj_);
+    auto task = obj_->Task();
+    linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
+    auto h_base_score = base_score.HostView();
+
+    // transform to margin
+    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
+    // move it to model param, which is shared with all other components.
+    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
+    CHECK(learner_model_param_.Initialized());
+    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
+  }
+
  public:
   explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix> > cache)
       : need_configuration_{true} {
@@ -329,22 +453,24 @@ class LearnerConfiguration : public Learner {
   // Configuration before data is known.
   void Configure() override {
     // Varient of double checked lock
-    if (!this->need_configuration_) { return; }
+    if (!this->need_configuration_) {
+      return;
+    }
     std::lock_guard<std::mutex> guard(config_lock_);
-    if (!this->need_configuration_) { return; }
+    if (!this->need_configuration_) {
+      return;
+    }
 
     monitor_.Start("Configure");
     auto old_tparam = tparam_;
     Args args = {cfg_.cbegin(), cfg_.cend()};
 
     tparam_.UpdateAllowUnknown(args);
-    auto mparam_backup = mparam_;
-
     mparam_.UpdateAllowUnknown(args);
 
-    auto initialized = generic_parameters_.GetInitialised();
-    auto old_seed = generic_parameters_.seed;
-    generic_parameters_.UpdateAllowUnknown(args);
+    auto initialized = ctx_.GetInitialised();
+    auto old_seed = ctx_.seed;
+    ctx_.UpdateAllowUnknown(args);
 
     ConsoleLogger::Configure(args);
 
@@ -355,8 +481,8 @@ class LearnerConfiguration : public Learner {
     }
 
     // set seed only before the model is initialized
-    if (!initialized || generic_parameters_.seed != old_seed) {
-      common::GlobalRandom().seed(generic_parameters_.seed);
+    if (!initialized || ctx_.seed != old_seed) {
+      common::GlobalRandom().seed(ctx_.seed);
     }
 
     // must precede configure gbm since num_features is required for gbm
@@ -364,31 +490,15 @@ class LearnerConfiguration : public Learner {
     args = {cfg_.cbegin(), cfg_.cend()};  // renew
     this->ConfigureObjective(old_tparam, &args);
 
-    auto task = this->ConfigureTargets();
-
-    // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
-    // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
-    // keep the stability, we initialize it in binary LoadModel instead of configuration.
-    // Under what condition should we omit the transformation:
-    //
-    // - base_score is loaded from old binary model.
-    //
-    // What are the other possible conditions:
-    //
-    // - model loaded from new binary or JSON.
-    // - model is created from scratch.
-    // - model is configured second time due to change of parameter
-    if (!learner_model_param_.Initialized() || mparam_.base_score != mparam_backup.base_score) {
-      learner_model_param_ =
-          LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score), task);
-    }
-
+    learner_model_param_.task = obj_->Task();  // required by gbm configuration.
     this->ConfigureGBM(old_tparam, args);
-    generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
+    ctx_.ConfigureGpuId(this->gbm_->UseGPU());
+    this->ConfigureModelParam();
+
     this->ConfigureMetrics(args);
 
     this->need_configuration_ = false;
-    if (generic_parameters_.validate_parameters) {
+    if (ctx_.validate_parameters) {
       this->ValidateParameters();
     }
 
@@ -396,6 +506,11 @@ class LearnerConfiguration : public Learner {
     monitor_.Stop("Configure");
   }
 
+  void CheckModelInitialized() const {
+    CHECK(learner_model_param_.Initialized()) << "Model not yet initialized.";
+    CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0);
+  }
+
   virtual PredictionContainer* GetPredictionCache() const {
     return &((*ThreadLocalPredictionCache::Get())[this]);
   }
@@ -417,7 +532,7 @@ class LearnerConfiguration : public Learner {
 
     auto const& objective_fn = learner_parameters.at("objective");
     if (!obj_) {
-      obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
+      obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
     }
     obj_->LoadConfig(objective_fn);
     learner_model_param_.task = obj_->Task();
@@ -425,7 +540,7 @@ class LearnerConfiguration : public Learner {
     tparam_.booster = get<String>(gradient_booster["name"]);
     if (!gbm_) {
       gbm_.reset(GradientBooster::Create(tparam_.booster,
-                                         &generic_parameters_, &learner_model_param_));
+                                         &ctx_, &learner_model_param_));
     }
     gbm_->LoadConfig(gradient_booster);
 
@@ -441,15 +556,15 @@ class LearnerConfiguration : public Learner {
       } else {
         metric_names_[i] = get<String>(j_metrics[i]["name"]);
       }
-      metrics_[i] = std::unique_ptr<Metric>(Metric::Create(metric_names_[i], &generic_parameters_));
+      metrics_[i] = std::unique_ptr<Metric>(Metric::Create(metric_names_[i], &ctx_));
       if (!old_serialization) {
         metrics_[i]->LoadConfig(j_metrics[i]);
       }
     }
 
-    FromJson(learner_parameters.at("generic_param"), &generic_parameters_);
+    FromJson(learner_parameters.at("generic_param"), &ctx_);
     // make sure the GPU ID is valid in new environment before start running configure.
-    generic_parameters_.ConfigureGpuId(false);
+    ctx_.ConfigureGpuId(false);
 
     this->need_configuration_ = true;
   }
@@ -478,7 +593,7 @@ class LearnerConfiguration : public Learner {
     }
     learner_parameters["metrics"] = Array(std::move(metrics));
 
-    learner_parameters["generic_param"] = ToJson(generic_parameters_);
+    learner_parameters["generic_param"] = ToJson(ctx_);
   }
 
   void SetParam(const std::string& key, const std::string& value) override {
@@ -551,7 +666,7 @@ class LearnerConfiguration : public Learner {
     return cfg_;
   }
 
-  GenericParameter const* Ctx() const override { return &generic_parameters_; }
+  Context const* Ctx() const override { return &ctx_; }
 
  private:
   void ValidateParameters() {
@@ -654,7 +769,7 @@ class LearnerConfiguration : public Learner {
 
   void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
     if (gbm_ == nullptr || old.booster != tparam_.booster) {
-      gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
+      gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_,
                                          &learner_model_param_));
     }
     gbm_->Configure(args);
@@ -678,7 +793,7 @@ class LearnerConfiguration : public Learner {
       cfg_["max_delta_step"] = kMaxDeltaStepDefaultValue;
     }
     if (obj_ == nullptr || tparam_.objective != old.objective) {
-      obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
+      obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
     }
     auto& args = *p_args;
     args = {cfg_.cbegin(), cfg_.cend()};  // renew
@@ -691,7 +806,7 @@ class LearnerConfiguration : public Learner {
                         return m->Name() != name;
                       };
       if (std::all_of(metrics_.begin(), metrics_.end(), DupCheck)) {
-        metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &generic_parameters_)));
+        metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &ctx_)));
         mparam_.contain_eval_metrics = 1;
       }
     }
@@ -703,7 +818,7 @@ class LearnerConfiguration : public Learner {
   /**
    * Get number of targets from objective function.
    */
-  ObjInfo ConfigureTargets() {
+  void ConfigureTargets() {
     CHECK(this->obj_);
     auto const& cache = this->GetPredictionCache()->Container();
     size_t n_targets = 1;
@@ -722,7 +837,6 @@ class LearnerConfiguration : public Learner {
     } else {
       mparam_.num_target = n_targets;
     }
-    return this->obj_->Task();
   }
 };
 
@@ -754,14 +868,14 @@ class LearnerIO : public LearnerConfiguration {
 
     std::string name = get<String>(objective_fn["name"]);
     tparam_.UpdateAllowUnknown(Args{{"objective", name}});
-    obj_.reset(ObjFunction::Create(name, &generic_parameters_));
+    obj_.reset(ObjFunction::Create(name, &ctx_));
     obj_->LoadConfig(objective_fn);
 
     auto const& gradient_booster = learner.at("gradient_booster");
     name = get<String>(gradient_booster["name"]);
     tparam_.UpdateAllowUnknown(Args{{"booster", name}});
     gbm_.reset(
-        GradientBooster::Create(tparam_.booster, &generic_parameters_, &learner_model_param_));
+        GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));
     gbm_->LoadModel(gradient_booster);
 
     auto const& j_attributes = get<Object const>(learner.at("attributes"));
@@ -791,6 +905,7 @@ class LearnerIO : public LearnerConfiguration {
 
   void SaveModel(Json* p_out) const override {
     CHECK(!this->need_configuration_) << "Call Configure before saving model.";
+    this->CheckModelInitialized();
 
     Version::Save(p_out);
     Json& out { *p_out };
@@ -826,7 +941,7 @@ class LearnerIO : public LearnerConfiguration {
 
   // About to be deprecated by JSON format
   void LoadModel(dmlc::Stream* fi) override {
-    generic_parameters_.UpdateAllowUnknown(Args{});
+    ctx_.UpdateAllowUnknown(Args{});
     tparam_.Init(std::vector<std::pair<std::string, std::string>>{});
     // TODO(tqchen) mark deprecation of old format.
     common::PeekableInStream fp(fi);
@@ -881,8 +996,8 @@ class LearnerIO : public LearnerConfiguration {
     CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
     CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
 
-    obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
-    gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
+    obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
+    gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_,
                                        &learner_model_param_));
     gbm_->Load(fi);
     if (mparam_.contain_extra_attrs != 0) {
@@ -925,7 +1040,14 @@ class LearnerIO : public LearnerConfiguration {
     }
 
     learner_model_param_ =
-        LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score), obj_->Task());
+        LearnerModelParam(&ctx_, mparam_,
+                          linalg::Tensor<float, 1>{{std::isnan(mparam_.base_score)
+                                                        ? std::numeric_limits<float>::quiet_NaN()
+                                                        : obj_->ProbToMargin(mparam_.base_score)},
+                                                   {1},
+                                                   Context::kCpuId},
+                          obj_->Task());
+
     if (attributes_.find("objective") != attributes_.cend()) {
       auto obj_str = attributes_.at("objective");
       auto j_obj = Json::Load({obj_str.c_str(), obj_str.size()});
@@ -969,6 +1091,8 @@ class LearnerIO : public LearnerConfiguration {
   // Save model into binary format.  The code is about to be deprecated by more robust
   // JSON serialization format.
   void SaveModel(dmlc::Stream* fo) const override {
+    this->CheckModelInitialized();
+
     LearnerModelParamLegacy mparam = mparam_;  // make a copy to potentially modify
     std::vector<std::pair<std::string, std::string> > extra_attr;
     mparam.contain_extra_attrs = 1;
@@ -1000,6 +1124,7 @@ class LearnerIO : public LearnerConfiguration {
       }
       extra_attr.emplace_back("metrics", os.str());
     }
+
     std::string header {"binf"};
     fo->Write(header.data(), 4);
     if (DMLC_IO_NO_ENDIAN_SWAP) {
@@ -1022,6 +1147,8 @@ class LearnerIO : public LearnerConfiguration {
   }
 
   void Save(dmlc::Stream* fo) const override {
+    this->CheckModelInitialized();
+
     Json memory_snapshot{Object()};
     memory_snapshot["Model"] = Object();
     auto& model = memory_snapshot["Model"];
@@ -1108,28 +1235,30 @@ class LearnerImpl : public LearnerIO {
     }
   }
 
-  std::vector<std::string> DumpModel(const FeatureMap& fmap,
-                                     bool with_stats,
+  std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
                                      std::string format) override {
     this->Configure();
+    this->CheckModelInitialized();
+
     return gbm_->DumpModel(fmap, with_stats, format);
   }
 
-  Learner *Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
-                 bool *out_of_bound) override {
+  Learner* Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
+                 bool* out_of_bound) override {
     this->Configure();
+    this->CheckModelInitialized();
+
     CHECK_NE(this->learner_model_param_.num_feature, 0);
     CHECK_GE(begin_layer, 0);
-    auto *out_impl = new LearnerImpl({});
-    out_impl->learner_model_param_ = this->learner_model_param_;
-    out_impl->generic_parameters_ = this->generic_parameters_;
+    auto* out_impl = new LearnerImpl({});
+    out_impl->learner_model_param_.Copy(this->learner_model_param_);
+    out_impl->ctx_ = this->ctx_;
     auto gbm = std::unique_ptr<GradientBooster>(GradientBooster::Create(
-        this->tparam_.booster, &out_impl->generic_parameters_,
-        &out_impl->learner_model_param_));
+        this->tparam_.booster, &out_impl->ctx_, &out_impl->learner_model_param_));
     this->gbm_->Slice(begin_layer, end_layer, step, gbm.get(), out_of_bound);
     out_impl->gbm_ = std::move(gbm);
 
-    Json config { Object() };
+    Json config{Object()};
     this->SaveConfig(&config);
     out_impl->mparam_ = this->mparam_;
     out_impl->attributes_ = this->attributes_;
@@ -1156,15 +1285,17 @@ class LearnerImpl : public LearnerIO {
     monitor_.Start("UpdateOneIter");
     TrainingObserver::Instance().Update(iter);
     this->Configure();
-    if (generic_parameters_.seed_per_iteration) {
-      common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
+    this->InitBaseScore(train.get());
+
+    if (ctx_.seed_per_iteration) {
+      common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
     }
 
     this->CheckDataSplitMode();
     this->ValidateDMatrix(train.get(), true);
 
     auto local_cache = this->GetPredictionCache();
-    auto& predt = local_cache->Cache(train, generic_parameters_.gpu_id);
+    auto& predt = local_cache->Cache(train, ctx_.gpu_id);
 
     monitor_.Start("PredictRaw");
     this->PredictRaw(train.get(), &predt, true, 0, 0);
@@ -1184,14 +1315,18 @@ class LearnerImpl : public LearnerIO {
                     HostDeviceVector<GradientPair>* in_gpair) override {
     monitor_.Start("BoostOneIter");
     this->Configure();
-    if (generic_parameters_.seed_per_iteration) {
-      common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
+    // Should have been set to default in the first prediction.
+    CHECK(mparam_.base_score_estimated);
+
+    if (ctx_.seed_per_iteration) {
+      common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
     }
 
     this->CheckDataSplitMode();
     this->ValidateDMatrix(train.get(), true);
+
     auto local_cache = this->GetPredictionCache();
-    local_cache->Cache(train, generic_parameters_.gpu_id);
+    local_cache->Cache(train, ctx_.gpu_id);
 
     gbm_->DoBoost(train.get(), in_gpair, &local_cache->Entry(train.get()), obj_.get());
     monitor_.Stop("BoostOneIter");
@@ -1202,23 +1337,24 @@ class LearnerImpl : public LearnerIO {
                           const std::vector<std::string>& data_names) override {
     monitor_.Start("EvalOneIter");
     this->Configure();
+    this->CheckModelInitialized();
 
     std::ostringstream os;
     os.precision(std::numeric_limits<double>::max_digits10);
     os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
     if (metrics_.size() == 0 && tparam_.disable_default_eval_metric <= 0) {
-      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_parameters_));
+      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &ctx_));
       metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
     }
 
     auto local_cache = this->GetPredictionCache();
     for (size_t i = 0; i < data_sets.size(); ++i) {
       std::shared_ptr<DMatrix> m = data_sets[i];
-      auto &predt = local_cache->Cache(m, generic_parameters_.gpu_id);
+      auto &predt = local_cache->Cache(m, ctx_.gpu_id);
       this->ValidateDMatrix(m.get(), false);
       this->PredictRaw(m.get(), &predt, false, 0, 0);
 
-      auto &out = output_predictions_.Cache(m, generic_parameters_.gpu_id).predictions;
+      auto &out = output_predictions_.Cache(m, ctx_.gpu_id).predictions;
       out.Resize(predt.predictions.Size());
       out.Copy(predt.predictions);
 
@@ -1241,6 +1377,9 @@ class LearnerImpl : public LearnerIO {
                                static_cast<int>(pred_interactions) +
                                static_cast<int>(pred_contribs);
     this->Configure();
+    this->InitBaseScore(nullptr);
+    this->CheckModelInitialized();
+
     CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
     if (pred_contribs) {
       gbm_->PredictContribution(data.get(), out_preds, layer_begin, layer_end, approx_contribs);
@@ -1251,10 +1390,10 @@ class LearnerImpl : public LearnerIO {
       gbm_->PredictLeaf(data.get(), out_preds, layer_begin, layer_end);
     } else {
       auto local_cache = this->GetPredictionCache();
-      auto& prediction = local_cache->Cache(data, generic_parameters_.gpu_id);
+      auto& prediction = local_cache->Cache(data, ctx_.gpu_id);
       this->PredictRaw(data.get(), &prediction, training, layer_begin, layer_end);
       // Copy the prediction cache to output prediction. out_preds comes from C API
-      out_preds->SetDevice(generic_parameters_.gpu_id);
+      out_preds->SetDevice(ctx_.gpu_id);
       out_preds->Resize(prediction.predictions.Size());
       out_preds->Copy(prediction.predictions);
       if (!output_margin) {
@@ -1268,8 +1407,10 @@ class LearnerImpl : public LearnerIO {
     CHECK(!this->need_configuration_);
     return this->gbm_->BoostedRounds();
   }
+
   uint32_t Groups() const override {
     CHECK(!this->need_configuration_);
+    this->CheckModelInitialized();
     return this->learner_model_param_.num_output_group;
   }
 
@@ -1281,6 +1422,9 @@ class LearnerImpl : public LearnerIO {
                       HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
                       uint32_t iteration_end) override {
     this->Configure();
+    this->InitBaseScore(nullptr);
+    this->CheckModelInitialized();
+
     auto& out_predictions = this->GetThreadLocal().prediction_entry;
     this->gbm_->InplacePredict(p_m, missing, &out_predictions, iteration_begin, iteration_end);
     if (type == PredictionType::kValue) {
@@ -1296,6 +1440,8 @@ class LearnerImpl : public LearnerIO {
   void CalcFeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
                         std::vector<bst_feature_t>* features, std::vector<float>* scores) override {
     this->Configure();
+    this->CheckModelInitialized();
+
     gbm_->FeatureScore(importance_type, trees, features, scores);
   }
 
@@ -1315,17 +1461,17 @@ class LearnerImpl : public LearnerIO {
   void PredictRaw(DMatrix *data, PredictionCacheEntry *out_preds, bool training,
                   unsigned layer_begin, unsigned layer_end) const {
     CHECK(gbm_ != nullptr) << "Predict must happen after Load or configuration";
+    this->CheckModelInitialized();
     this->ValidateDMatrix(data, false);
     gbm_->PredictBatch(data, out_preds, training, layer_begin, layer_end);
   }
 
   void ValidateDMatrix(DMatrix* p_fmat, bool is_training) const {
     MetaInfo const& info = p_fmat->Info();
-    info.Validate(generic_parameters_.gpu_id);
+    info.Validate(ctx_.gpu_id);
 
     auto const row_based_split = [this]() {
-      return tparam_.dsplit == DataSplitMode::kRow ||
-             tparam_.dsplit == DataSplitMode::kAuto;
+      return tparam_.dsplit == DataSplitMode::kRow || tparam_.dsplit == DataSplitMode::kAuto;
     };
     if (row_based_split()) {
       if (is_training) {
diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h
index 85c041347cb9..00d27a57afef 100644
--- a/src/objective/adaptive.h
+++ b/src/objective/adaptive.h
@@ -7,6 +7,7 @@
 #include <limits>
 #include <vector>
 
+#include "../common/common.h"
 #include "rabit/rabit.h"
 #include "xgboost/generic_parameters.h"
 #include "xgboost/host_device_vector.h"
diff --git a/src/objective/objective.cc b/src/objective/objective.cc
index 5991e918d315..5ba5f87fb8c5 100644
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -1,10 +1,10 @@
 /*!
- * Copyright 2015 by Contributors
+ * Copyright 2015-2022 by Contributors
  * \file objective.cc
  * \brief Registry of all objective functions.
  */
-#include <xgboost/objective.h>
 #include <dmlc/registry.h>
+#include <xgboost/objective.h>
 
 #include <sstream>
 
@@ -31,6 +31,11 @@ ObjFunction* ObjFunction::Create(const std::string& name, GenericParameter const
   return pobj;
 }
 
+void ObjFunction::InitEstimation(MetaInfo const&, linalg::Tensor<float, 1>* base_score) const {
+  CHECK(base_score);
+  base_score->Reshape(1);
+  (*base_score)(0) = DefaultBaseScore();
+}
 }  // namespace xgboost
 
 namespace xgboost {
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index ecd906f699a4..4afefe8f7e6f 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -16,6 +16,7 @@
 #include "../common/common.h"
 #include "../common/linalg_op.h"
 #include "../common/pseudo_huber.h"
+#include "../common/stats.h"
 #include "../common/threading_utils.h"
 #include "../common/transform.h"
 #include "./regression_loss.h"
@@ -37,14 +38,18 @@
 namespace xgboost {
 namespace obj {
 namespace {
-void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
+void CheckInitInputs(MetaInfo const& info) {
   CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
-  CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
   if (!info.weights_.Empty()) {
     CHECK_EQ(info.weights_.Size(), info.num_row_)
         << "Number of weights should be equal to number of data points.";
   }
 }
+
+void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
+  CheckInitInputs(info);
+  CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
+}
 }  // anonymous namespace
 
 #if defined(XGBOOST_USE_CUDA)
@@ -162,6 +167,17 @@ class RegLossObj : public ObjFunction {
     return Loss::ProbToMargin(base_score);
   }
 
+  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
+    CheckInitInputs(info);
+    base_margin->Reshape(1);
+    auto out = base_margin->HostView();
+    std::uint64_t n_samples = info.num_row_;
+    rabit::Allreduce<rabit::op::Sum>(&n_samples, 1);
+    auto mean = common::Mean(ctx_, info.labels, info.weights_, n_samples);
+    rabit::Allreduce<rabit::op::Sum>(&mean, 1);
+    out(0) = mean;
+  }
+
   void SaveConfig(Json* p_out) const override {
     auto& out = *p_out;
     out["name"] = String(Loss::Name());
@@ -698,6 +714,28 @@ class MeanAbsoluteError : public ObjFunction {
     });
   }
 
+  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
+    CheckInitInputs(info);
+    base_margin->Reshape(1);
+    auto out = base_margin->HostView();
+    std::int32_t invalid{0};
+    if (info.num_row_ == 0) {
+      out(0) = 0;
+      invalid++;
+    } else {
+      out(0) = common::Median(ctx_, info.labels, info.weights_);
+    }
+
+    auto world = static_cast<float>(rabit::GetWorldSize());
+    rabit::Allreduce<rabit::op::Sum>(&invalid, 1);  // number of empty workers
+    world -= static_cast<float>(invalid);           // number of non-empty workers
+
+    // average base score across all valid workers
+    rabit::Allreduce<rabit::op::Sum>(out.Values().data(), out.Values().size());
+    std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
+                   [world](float v) { return v / world; });
+  }
+
   void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
                       HostDeviceVector<float> const& prediction, RegTree* p_tree) const override {
     if (ctx_->IsCPU()) {
diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc
index 0e213b281231..444d1b089d21 100644
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -429,11 +429,12 @@ class CPUPredictor : public Predictor {
     }
     out_preds->resize(model.learner_model_param->num_output_group *
                       (model.param.size_leaf_vector + 1));
+    auto base_score = model.learner_model_param->BaseScore(ctx_)(0);
     // loop over output groups
     for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) {
-      (*out_preds)[gid] = PredValue(inst, model.trees, model.tree_info, gid,
-                                    &feat_vecs[0], 0, ntree_limit) +
-                          model.learner_model_param->base_score;
+      (*out_preds)[gid] =
+          PredValue(inst, model.trees, model.tree_info, gid, &feat_vecs[0], 0, ntree_limit) +
+          base_score;
     }
   }
 
@@ -504,7 +505,8 @@ class CPUPredictor : public Predictor {
     common::ParallelFor(ntree_limit, n_threads, [&](bst_omp_uint i) {
       FillNodeMeanValues(model.trees[i].get(), &(mean_values[i]));
     });
-    auto base_margin = info.base_margin_.View(GenericParameter::kCpuId);
+    auto base_margin = info.base_margin_.View(Context::kCpuId);
+    auto base_score = model.learner_model_param->BaseScore(Context::kCpuId)(0);
     // start collecting the contributions
     for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
       auto page = batch.GetView();
@@ -548,7 +550,7 @@ class CPUPredictor : public Predictor {
             CHECK_EQ(base_margin.Shape(1), ngroup);
             p_contribs[ncolumns - 1] += base_margin(row_idx, gid);
           } else {
-            p_contribs[ncolumns - 1] += model.learner_model_param->base_score;
+            p_contribs[ncolumns - 1] += base_score;
           }
         }
       });
diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index 163f7b40f368..2716883303b5 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -511,7 +511,7 @@ void ExtractPaths(
           n = d_nodes[n.Parent() + tree_offset];
           path_length++;
         }
-        return PathInfo{int64_t(idx), path_length, tree_idx};
+        return PathInfo{static_cast<int64_t>(idx), path_length, tree_idx};
       });
   auto end = thrust::copy_if(
       thrust::cuda::par(alloc), nodes_transform,
@@ -859,13 +859,13 @@ class GPUPredictor : public xgboost::Predictor {
     // Add the base margin term to last column
     p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
     const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
-    float base_score = model.learner_model_param->base_score;
-    dh::LaunchN(
-        p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
-        [=] __device__(size_t idx) {
-          phis[(idx + 1) * contributions_columns - 1] +=
-              margin.empty() ? base_score : margin[idx];
-        });
+
+    auto base_score = model.learner_model_param->BaseScore(ctx_);
+    dh::LaunchN(p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
+                [=] __device__(size_t idx) {
+                  phis[(idx + 1) * contributions_columns - 1] +=
+                      margin.empty() ? base_score(0) : margin[idx];
+                });
   }
 
   void PredictInteractionContributions(DMatrix* p_fmat,
@@ -918,17 +918,17 @@ class GPUPredictor : public xgboost::Predictor {
     // Add the base margin term to last column
     p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
     const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
-    float base_score = model.learner_model_param->base_score;
+
+    auto base_score = model.learner_model_param->BaseScore(ctx_);
     size_t n_features = model.learner_model_param->num_feature;
-    dh::LaunchN(
-        p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
-        [=] __device__(size_t idx) {
-          size_t group = idx % ngroup;
-          size_t row_idx = idx / ngroup;
-          phis[gpu_treeshap::IndexPhiInteractions(
-              row_idx, ngroup, group, n_features, n_features, n_features)] +=
-              margin.empty() ? base_score : margin[idx];
-        });
+    dh::LaunchN(p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
+                [=] __device__(size_t idx) {
+                  size_t group = idx % ngroup;
+                  size_t row_idx = idx / ngroup;
+                  phis[gpu_treeshap::IndexPhiInteractions(row_idx, ngroup, group, n_features,
+                                                          n_features, n_features)] +=
+                      margin.empty() ? base_score(0) : margin[idx];
+                });
   }
 
   void PredictInstance(const SparsePage::Inst&,
diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc
index 10d006a832d0..5701ed892f23 100644
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@@ -80,14 +80,15 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_fl
   if (ctx_->gpu_id >= 0) {
     out_preds->SetDevice(ctx_->gpu_id);
   }
-  if (base_margin->Size() != 0) {
+  if (!base_margin->Empty()) {
     out_preds->Resize(n);
     ValidateBaseMarginShape(info.base_margin_, info.num_row_, n_classes);
     out_preds->Copy(*base_margin);
   } else {
-    out_preds->Resize(n);
     // cannot rely on the Resize to fill as it might skip if the size is already correct.
-    out_preds->Fill(model.learner_model_param->base_score);
+    out_preds->Resize(n);
+    auto base_score = model.learner_model_param->BaseScore(Context::kCpuId)(0);
+    out_preds->Fill(base_score);
   }
 }
 }  // namespace xgboost
diff --git a/tests/cpp/common/test_stats.cc b/tests/cpp/common/test_stats.cc
index 2a1e375c0f20..415bf4fdac5f 100644
--- a/tests/cpp/common/test_stats.cc
+++ b/tests/cpp/common/test_stats.cc
@@ -54,5 +54,17 @@ TEST(Stats, WeightedQuantile) {
   q = WeightedQuantile(1.0, beg, end, w);
   ASSERT_EQ(q, 5);
 }
+
+TEST(Stats, Mean) {
+  Context ctx;
+  linalg::Tensor<float, 2> arr({1.f, 2.f, 3.f, 4.f}, {2, 2}, Context::kCpuId);
+  HostDeviceVector<float> weights;
+  auto mean = Mean(&ctx, arr, weights);
+  ASSERT_EQ(mean, 2.5);
+
+  weights.Resize(2, 1.0f);
+  mean = Mean(&ctx, arr, weights);
+  ASSERT_EQ(mean, 2.5);
+}
 }  // namespace common
 }  // namespace xgboost
diff --git a/tests/cpp/common/test_stats.cu b/tests/cpp/common/test_stats.cu
index eee92921d931..50528269de1c 100644
--- a/tests/cpp/common/test_stats.cu
+++ b/tests/cpp/common/test_stats.cu
@@ -2,10 +2,12 @@
  * Copyright 2022 by XGBoost Contributors
  */
 #include <gtest/gtest.h>
+
 #include <utility>
 #include <vector>
 
 #include "../../../src/common/stats.cuh"
+#include "../../../src/common/stats.h"
 #include "xgboost/base.h"
 #include "xgboost/generic_parameters.h"
 #include "xgboost/host_device_vector.h"
@@ -73,5 +75,18 @@ class StatsGPU : public ::testing::Test {
 
 TEST_F(StatsGPU, Quantile) { this->NonWeighted(); }
 TEST_F(StatsGPU, WeightedQuantile) { this->Weighted(); }
+
+TEST(Stats, GPUMean) {
+  Context ctx;
+  ctx.gpu_id = 0;
+  linalg::Tensor<float, 2> arr({1.f, 2.f, 3.f, 4.f}, {2, 2}, Context::kCpuId);
+  HostDeviceVector<float> weights;
+  auto mean = Mean(&ctx, arr, weights);
+  ASSERT_EQ(mean, 2.5);
+
+  weights.Resize(2, 1.0f);
+  mean = Mean(&ctx, arr, weights);
+  ASSERT_EQ(mean, 2.5);
+}
 }  // namespace common
 }  // namespace xgboost
diff --git a/tests/cpp/gbm/test_gblinear.cc b/tests/cpp/gbm/test_gblinear.cc
index 61d22f5ea1ff..c53bb08f68ef 100644
--- a/tests/cpp/gbm/test_gblinear.cc
+++ b/tests/cpp/gbm/test_gblinear.cc
@@ -19,15 +19,11 @@ namespace gbm {
 TEST(GBLinear, JsonIO) {
   size_t constexpr kRows = 16, kCols = 16;
 
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.num_output_group = 1;
+  Context ctx;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
-  GenericParameter gparam;
-  gparam.Init(Args{});
-
-  std::unique_ptr<GradientBooster> gbm {
-    CreateTrainedGBM("gblinear", Args{}, kRows, kCols, &param, &gparam) };
+  std::unique_ptr<GradientBooster> gbm{
+      CreateTrainedGBM("gblinear", Args{}, kRows, kCols, &mparam, &ctx)};
   Json model { Object() };
   gbm->SaveModel(&model);
   ASSERT_TRUE(IsA<Object>(model));
diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc
index a5c16f7951d7..13ec23c14906 100644
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@@ -18,15 +18,11 @@ namespace xgboost {
 TEST(GBTree, SelectTreeMethod) {
   size_t constexpr kCols = 10;
 
-  GenericParameter generic_param;
-  generic_param.UpdateAllowUnknown(Args{});
-  LearnerModelParam mparam;
-  mparam.base_score = 0.5;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
+  Context ctx;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   std::unique_ptr<GradientBooster> p_gbm {
-    GradientBooster::Create("gbtree", &generic_param, &mparam)};
+    GradientBooster::Create("gbtree", &ctx, &mparam)};
   auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
 
   // Test if `tree_method` can be set
@@ -45,7 +41,7 @@ TEST(GBTree, SelectTreeMethod) {
   ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
 
 #ifdef XGBOOST_USE_CUDA
-  generic_param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
   gbtree.Configure({{"tree_method", "gpu_hist"}});
   ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
   gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"}});
@@ -55,15 +51,11 @@ TEST(GBTree, SelectTreeMethod) {
 
 TEST(GBTree, PredictionCache) {
   size_t constexpr kRows = 100, kCols = 10;
-  GenericParameter generic_param;
-  generic_param.UpdateAllowUnknown(Args{});
-  LearnerModelParam mparam;
-  mparam.base_score = 0.5;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
+  Context ctx;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   std::unique_ptr<GradientBooster> p_gbm {
-    GradientBooster::Create("gbtree", &generic_param, &mparam)};
+    GradientBooster::Create("gbtree", &ctx, &mparam)};
   auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
 
   gbtree.Configure({{"tree_method", "hist"}});
@@ -176,16 +168,11 @@ TEST(GBTree, ChoosePredictor) {
 TEST(GBTree, JsonIO) {
   size_t constexpr kRows = 16, kCols = 16;
 
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
-  mparam.base_score = 0.5;
-
-  GenericParameter gparam;
-  gparam.Init(Args{});
+  Context ctx;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   std::unique_ptr<GradientBooster> gbm {
-    CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &gparam) };
+    CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &ctx) };
 
   Json model {Object()};
   model["model"] = Object();
@@ -215,16 +202,11 @@ TEST(GBTree, JsonIO) {
 TEST(Dart, JsonIO) {
   size_t constexpr kRows = 16, kCols = 16;
 
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.base_score = 0.5;
-  mparam.num_output_group = 1;
-
-  GenericParameter gparam;
-  gparam.Init(Args{});
+  Context ctx;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
-  std::unique_ptr<GradientBooster> gbm {
-    CreateTrainedGBM("dart", Args{}, kRows, kCols, &mparam, &gparam) };
+  std::unique_ptr<GradientBooster> gbm{
+      CreateTrainedGBM("dart", Args{}, kRows, kCols, &mparam, &ctx)};
 
   Json model {Object()};
   model["model"] = Object();
diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h
index b79ea27187f5..c7f73495c49f 100644
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -451,5 +451,16 @@ class RMMAllocator;
 using RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>;
 RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);
 
+/*
+ * \brief Make learner model param
+ */
+inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint32_t n_groups,
+                                int32_t device = Context::kCpuId) {
+  size_t shape[1]{1};
+  LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
+                           n_groups);
+  return mparam;
+}
+
 }  // namespace xgboost
 #endif
diff --git a/tests/cpp/linear/test_linear.cc b/tests/cpp/linear/test_linear.cc
index f021641a2b23..779c20940598 100644
--- a/tests/cpp/linear/test_linear.cc
+++ b/tests/cpp/linear/test_linear.cc
@@ -18,10 +18,7 @@ TEST(Linear, Shotgun) {
   auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
 
   auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
-  mparam.base_score = 0.5;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   {
     auto updater = std::unique_ptr<xgboost::LinearUpdater>(
@@ -54,10 +51,7 @@ TEST(Linear, coordinate) {
   auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
 
   auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
-  mparam.base_score = 0.5;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   auto updater = std::unique_ptr<xgboost::LinearUpdater>(
       xgboost::LinearUpdater::Create("coord_descent", &lparam));
diff --git a/tests/cpp/linear/test_linear.cu b/tests/cpp/linear/test_linear.cu
index c2eea45d166c..193e9b4b21eb 100644
--- a/tests/cpp/linear/test_linear.cu
+++ b/tests/cpp/linear/test_linear.cu
@@ -13,15 +13,11 @@ TEST(Linear, GPUCoordinate) {
   size_t constexpr kCols = 10;
 
   auto mat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
-  auto lparam = CreateEmptyGenericParam(GPUIDX);
-
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = 1;
-  mparam.base_score = 0.5;
+  auto ctx = CreateEmptyGenericParam(GPUIDX);
 
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
   auto updater = std::unique_ptr<xgboost::LinearUpdater>(
-      xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
+      xgboost::LinearUpdater::Create("gpu_coord_descent", &ctx));
   updater->Configure({{"eta", "1."}});
   xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
       mat->Info().num_row_, xgboost::GradientPair(-5, 1.0));
@@ -36,4 +32,4 @@ TEST(Linear, GPUCoordinate) {
 TEST(GPUCoordinate, JsonIO) {
   TestUpdaterJsonIO("gpu_coord_descent");
 }
-}  // namespace xgboost
\ No newline at end of file
+}  // namespace xgboost
diff --git a/tests/cpp/predictor/test_cpu_predictor.cc b/tests/cpp/predictor/test_cpu_predictor.cc
index 8ba270083c74..8db605be3bcc 100644
--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@@ -21,14 +21,11 @@ TEST(CpuPredictor, Basic) {
   size_t constexpr kRows = 5;
   size_t constexpr kCols = 5;
 
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.base_score = 0.0;
-  param.num_output_group = 1;
+  LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
 
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
+  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
 
   auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
 
@@ -104,14 +101,11 @@ TEST(CpuPredictor, ExternalMemory) {
   std::unique_ptr<Predictor> cpu_predictor =
       std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
 
-  LearnerModelParam param;
-  param.base_score = 0;
-  param.num_feature = dmat->Info().num_col_;
-  param.num_output_group = 1;
+  LearnerModelParam mparam{MakeMP(dmat->Info().num_col_, .0, 1)};
 
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
+  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
 
   // Test predict batch
   PredictionCacheEntry out_predictions;
@@ -201,16 +195,11 @@ TEST(CpuPredictor, InplacePredict) {
 
 void TestUpdatePredictionCache(bool use_subsampling) {
   size_t constexpr kRows = 64, kCols = 16, kClasses = 4;
-  LearnerModelParam mparam;
-  mparam.num_feature = kCols;
-  mparam.num_output_group = kClasses;
-  mparam.base_score = 0;
-
-  GenericParameter gparam;
-  gparam.Init(Args{});
+  LearnerModelParam mparam{MakeMP(kCols, .0, kClasses)};
+  Context ctx;
 
   std::unique_ptr<gbm::GBTree> gbm;
-  gbm.reset(static_cast<gbm::GBTree*>(GradientBooster::Create("gbtree", &gparam, &mparam)));
+  gbm.reset(static_cast<gbm::GBTree*>(GradientBooster::Create("gbtree", &ctx, &mparam)));
   std::map<std::string, std::string> cfg;
   cfg["tree_method"] = "hist";
   cfg["predictor"]   = "cpu_predictor";
diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu
index 8dacadac5403..2a0b69cbd629 100644
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2017-2020 XGBoost contributors
+ * Copyright 2017-2022 XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/c_api.h>
@@ -34,14 +34,10 @@ TEST(GPUPredictor, Basic) {
     int n_row = i, n_col = i;
     auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
 
-    LearnerModelParam param;
-    param.num_feature = n_col;
-    param.num_output_group = 1;
-    param.base_score = 0.5;
-
-    GenericParameter ctx;
-    ctx.UpdateAllowUnknown(Args{});
-    gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
+    Context ctx;
+    ctx.gpu_id = 0;
+    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
+    gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
 
     // Test predict batch
     PredictionCacheEntry gpu_out_predictions;
@@ -93,15 +89,12 @@ TEST(GPUPredictor, ExternalMemoryTest) {
       std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
   gpu_predictor->Configure({});
 
-  LearnerModelParam param;
-  param.num_feature = 5;
   const int n_classes = 3;
-  param.num_output_group = n_classes;
-  param.base_score = 0.5;
+  Context ctx;
+  ctx.gpu_id = 0;
+  LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.gpu_id)};
 
-  GenericParameter ctx;
-  ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model = CreateTestModel(&param, &ctx, n_classes);
+  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
   std::vector<std::unique_ptr<DMatrix>> dmats;
 
   dmats.push_back(CreateSparsePageDMatrix(400));
@@ -171,15 +164,10 @@ TEST(GpuPredictor, LesserFeatures) {
 TEST(GPUPredictor, ShapStump) {
   cudaSetDevice(0);
 
-  LearnerModelParam param;
-  param.num_feature = 1;
-  param.num_output_group = 1;
-  param.base_score = 0.5;
-
-  GenericParameter ctx;
-  ctx.UpdateAllowUnknown(Args{});
-
-  gbm::GBTreeModel model(&param, &ctx);
+  Context ctx;
+  ctx.gpu_id = 0;
+  LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
+  gbm::GBTreeModel model(&mparam, &ctx);
 
   std::vector<std::unique_ptr<RegTree>> trees;
   trees.push_back(std::unique_ptr<RegTree>(new RegTree));
@@ -193,24 +181,20 @@ TEST(GPUPredictor, ShapStump) {
   auto dmat = RandomDataGenerator(3, 1, 0).GenerateDMatrix();
   gpu_predictor->PredictContribution(dmat.get(), &predictions, model);
   auto& phis = predictions.HostVector();
+  auto base_score = mparam.BaseScore(Context::kCpuId)(0);
   EXPECT_EQ(phis[0], 0.0);
-  EXPECT_EQ(phis[1], param.base_score);
+  EXPECT_EQ(phis[1], base_score);
   EXPECT_EQ(phis[2], 0.0);
-  EXPECT_EQ(phis[3], param.base_score);
+  EXPECT_EQ(phis[3], base_score);
   EXPECT_EQ(phis[4], 0.0);
-  EXPECT_EQ(phis[5], param.base_score);
+  EXPECT_EQ(phis[5], base_score);
 }
 
 TEST(GPUPredictor, Shap) {
-  LearnerModelParam param;
-  param.num_feature = 1;
-  param.num_output_group = 1;
-  param.base_score = 0.5;
-
-  GenericParameter ctx;
-  ctx.UpdateAllowUnknown(Args{});
-
-  gbm::GBTreeModel model(&param, &ctx);
+  Context ctx;
+  ctx.gpu_id = 0;
+  LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
+  gbm::GBTreeModel model(&mparam, &ctx);
 
   std::vector<std::unique_ptr<RegTree>> trees;
   trees.push_back(std::unique_ptr<RegTree>(new RegTree));
@@ -258,14 +242,9 @@ TEST(GPUPredictor, PredictLeafBasic) {
       std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
   gpu_predictor->Configure({});
 
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.base_score = 0.0;
-  param.num_output_group = 1;
-
-  GenericParameter ctx;
-  ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
+  LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
+  Context ctx;
+  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
 
   HostDeviceVector<float> leaf_out_predictions;
   gpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
diff --git a/tests/cpp/predictor/test_predictor.cc b/tests/cpp/predictor/test_predictor.cc
index 34c4d48e6dc1..64d2b9a81ea2 100644
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -210,11 +210,7 @@ void TestCategoricalPrediction(std::string name) {
   size_t constexpr kCols = 10;
   PredictionCacheEntry out_predictions;
 
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.num_output_group = 1;
-  param.base_score = 0.5;
-
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
   uint32_t split_ind = 3;
   bst_cat_t split_cat = 4;
   float left_weight = 1.3f;
@@ -222,7 +218,7 @@ void TestCategoricalPrediction(std::string name) {
 
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model(&param, &ctx);
+  gbm::GBTreeModel model(&mparam, &ctx);
   GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
 
   ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
@@ -237,27 +233,24 @@ void TestCategoricalPrediction(std::string name) {
 
   predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);
   predictor->PredictBatch(m.get(), &out_predictions, model, 0);
+  auto score = mparam.BaseScore(Context::kCpuId)(0);
   ASSERT_EQ(out_predictions.predictions.Size(), 1ul);
   ASSERT_EQ(out_predictions.predictions.HostVector()[0],
-            right_weight + param.base_score);  // go to right for matching cat
+            right_weight + score);  // go to right for matching cat
 
   row[split_ind] = split_cat + 1;
   m = GetDMatrixFromData(row, 1, kCols);
   out_predictions.version = 0;
   predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);
   predictor->PredictBatch(m.get(), &out_predictions, model, 0);
-  ASSERT_EQ(out_predictions.predictions.HostVector()[0],
-            left_weight + param.base_score);
+  ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
 }
 
 void TestCategoricalPredictLeaf(StringView name) {
   size_t constexpr kCols = 10;
   PredictionCacheEntry out_predictions;
 
-  LearnerModelParam param;
-  param.num_feature = kCols;
-  param.num_output_group = 1;
-  param.base_score = 0.5;
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
 
   uint32_t split_ind = 3;
   bst_cat_t split_cat = 4;
@@ -267,7 +260,7 @@ void TestCategoricalPredictLeaf(StringView name) {
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
 
-  gbm::GBTreeModel model(&param, &ctx);
+  gbm::GBTreeModel model(&mparam, &ctx);
   GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
 
   ctx.gpu_id = 0;
diff --git a/tests/cpp/predictor/test_predictor.h b/tests/cpp/predictor/test_predictor.h
index 1ff96096c533..81ee249e250e 100644
--- a/tests/cpp/predictor/test_predictor.h
+++ b/tests/cpp/predictor/test_predictor.h
@@ -12,11 +12,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
                                      std::shared_ptr<DMatrix> p_hist) {
   constexpr size_t kClasses { 3 };
 
-  LearnerModelParam param;
-  param.num_feature = cols;
-  param.num_output_group = kClasses;
-  param.base_score = 0.5;
-
+  LearnerModelParam mparam{MakeMP(cols, .5, kClasses)};
   auto lparam = CreateEmptyGenericParam(0);
 
   std::unique_ptr<Predictor> predictor =
@@ -25,7 +21,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
 
   GenericParameter ctx;
   ctx.UpdateAllowUnknown(Args{});
-  gbm::GBTreeModel model = CreateTestModel(&param, &ctx, kClasses);
+  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, kClasses);
 
   {
     auto p_precise = RandomDataGenerator(rows, cols, 0).GenerateDMatrix();
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 4a8214e9c5cd..49c1d9537426 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -3,8 +3,10 @@
  */
 #include <gtest/gtest.h>
 #include <xgboost/learner.h>
+#include <xgboost/objective.h>  // ObjFunction
 #include <xgboost/version_config.h>
 
+#include <string>  // std::stof, std::string
 #include <thread>
 #include <vector>
 
@@ -206,8 +208,7 @@ TEST(Learner, MultiThreadedPredict) {
   p_dmat->Info().labels.Reshape(kRows);
   CHECK_NE(p_dmat->Info().num_col_, 0);
 
-  std::shared_ptr<DMatrix> p_data{
-      RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
+  std::shared_ptr<DMatrix> p_data{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
   CHECK_NE(p_data->Info().num_col_, 0);
 
   std::shared_ptr<Learner> learner{Learner::Create({p_dmat})};
@@ -448,4 +449,77 @@ TEST(Learner, MultiTarget) {
     EXPECT_THROW({ learner->Configure(); }, dmlc::Error);
   }
 }
+
+/**
+ * Test the model initialization sequence is correctly performed.
+ */
+TEST(Learner, InitEstimation) {
+  size_t constexpr kCols = 10;
+  auto Xy = RandomDataGenerator{10, kCols, 0}.GenerateDMatrix(true);
+
+  {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+    learner->SetParam("objective", "reg:absoluteerror");
+    learner->Configure();
+    HostDeviceVector<float> predt;
+    learner->Predict(Xy, false, &predt, 0, 0);
+
+    auto h_predt = predt.ConstHostSpan();
+    for (auto v : h_predt) {
+      ASSERT_EQ(v, ObjFunction::DefaultBaseScore());
+    }
+    Json config{Object{}};
+    learner->SaveConfig(&config);
+    auto base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    // No base score is estimated yet.
+    ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
+  }
+
+  {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+    learner->SetParam("objective", "reg:absoluteerror");
+    learner->UpdateOneIter(0, Xy);
+
+    HostDeviceVector<float> predt;
+    learner->Predict(Xy, false, &predt, 0, 0);
+    auto h_predt = predt.ConstHostSpan();
+    for (auto v : h_predt) {
+      ASSERT_NE(v, ObjFunction::DefaultBaseScore());
+    }
+
+    Json config{Object{}};
+    learner->SaveConfig(&config);
+    auto base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+
+    ASSERT_THROW(
+        {
+          learner->SetParam("base_score_estimated", "1");
+          learner->Configure();
+        },
+        dmlc::Error);
+  }
+
+  {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+    learner->SetParam("objective", "reg:absoluteerror");
+    learner->SetParam("base_score", "1.3");
+    learner->Configure();
+    HostDeviceVector<float> predt;
+    learner->Predict(Xy, false, &predt, 0, 0);
+    auto h_predt = predt.ConstHostSpan();
+    for (auto v : h_predt) {
+      ASSERT_FLOAT_EQ(v, 1.3);
+    }
+    learner->UpdateOneIter(0, Xy);
+    Json config{Object{}};
+    learner->SaveConfig(&config);
+    auto base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    // no change
+    ASSERT_FLOAT_EQ(base_score, 1.3);
+  }
+}
 }  // namespace xgboost
diff --git a/tests/cpp/test_serialization.cc b/tests/cpp/test_serialization.cc
index d80a7442202e..15765f09f29d 100644
--- a/tests/cpp/test_serialization.cc
+++ b/tests/cpp/test_serialization.cc
@@ -418,6 +418,45 @@ TEST_F(SerializationTest, GPUCoordDescent) {
 }
 #endif  // defined(XGBOOST_USE_CUDA)
 
+class L1SerializationTest : public SerializationTest {};
+
+TEST_F(L1SerializationTest, Exact) {
+  TestLearnerSerialization({{"booster", "gbtree"},
+                            {"objective", "reg:absoluteerror"},
+                            {"seed", "0"},
+                            {"max_depth", "2"},
+                            {"tree_method", "exact"}},
+                           fmap_, p_dmat_);
+}
+
+TEST_F(L1SerializationTest, Approx) {
+  TestLearnerSerialization({{"booster", "gbtree"},
+                            {"objective", "reg:absoluteerror"},
+                            {"seed", "0"},
+                            {"max_depth", "2"},
+                            {"tree_method", "approx"}},
+                           fmap_, p_dmat_);
+}
+
+TEST_F(L1SerializationTest, Hist) {
+  TestLearnerSerialization({{"booster", "gbtree"},
+                            {"objective", "reg:absoluteerror"},
+                            {"seed", "0"},
+                            {"max_depth", "2"},
+                            {"tree_method", "hist"}},
+                           fmap_, p_dmat_);
+}
+
+#if defined(XGBOOST_USE_CUDA)
+TEST_F(L1SerializationTest, GpuHist) {
+  TestLearnerSerialization({{"booster", "gbtree"},
+                            {"objective", "reg:absoluteerror"},
+                            {"seed", "0"},
+                            {"max_depth", "2"},
+                            {"tree_method", "gpu_hist"}},
+                           fmap_, p_dmat_);
+}
+#endif  //  defined(XGBOOST_USE_CUDA)
 
 class LogitSerializationTest : public SerializationTest {
  protected:
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index d0e7c5bc883d..7f29a92e6101 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -208,3 +208,8 @@ def test_specified_gpu_id_gpu_update(self, dataset, gpu_id):
         param = dataset.set_params(param)
         result = train_result(param, dataset.get_dmat(), 10)
         assert tm.non_increasing(result['train'][dataset.metric])
+
+    @pytest.mark.skipif(**tm.no_sklearn())
+    @pytest.mark.parametrize("weighted", [True, False])
+    def test_adaptive(self, weighted) -> None:
+        self.cputest.run_adaptive("gpu_hist", weighted)
diff --git a/tests/python/test_model_compatibility.py b/tests/python/test_model_compatibility.py
index 6f9a184922ab..88549e1f2acb 100644
--- a/tests/python/test_model_compatibility.py
+++ b/tests/python/test_model_compatibility.py
@@ -102,34 +102,38 @@ def run_scikit_model_check(name, path):
 
 @pytest.mark.skipif(**tm.no_sklearn())
 def test_model_compatibility():
-    '''Test model compatibility, can only be run on CI as others don't
+    """Test model compatibility, can only be run on CI as others don't
     have the credentials.
 
-    '''
+    """
     path = os.path.dirname(os.path.abspath(__file__))
-    path = os.path.join(path, 'models')
+    path = os.path.join(path, "models")
 
-    zip_path, _ = urllib.request.urlretrieve('https://xgboost-ci-jenkins-artifacts.s3-us-west-2' +
-                                             '.amazonaws.com/xgboost_model_compatibility_test.zip')
-    with zipfile.ZipFile(zip_path, 'r') as z:
-        z.extractall(path)
+    if not os.path.exists(path):
+        zip_path, _ = urllib.request.urlretrieve(
+            "https://xgboost-ci-jenkins-artifacts.s3-us-west-2"
+            + ".amazonaws.com/xgboost_model_compatibility_test.zip"
+        )
+        with zipfile.ZipFile(zip_path, "r") as z:
+            z.extractall(path)
 
     models = [
-        os.path.join(root, f) for root, subdir, files in os.walk(path)
+        os.path.join(root, f)
+        for root, subdir, files in os.walk(path)
         for f in files
-        if f != 'version'
+        if f != "version"
     ]
     assert models
 
     for path in models:
         name = os.path.basename(path)
-        if name.startswith('xgboost-'):
+        if name.startswith("xgboost-"):
             booster = xgboost.Booster(model_file=path)
             run_booster_check(booster, name)
             # Do full serialization.
             booster = copy.copy(booster)
             run_booster_check(booster, name)
-        elif name.startswith('xgboost_scikit'):
+        elif name.startswith("xgboost_scikit"):
             run_scikit_model_check(name, path)
         else:
             assert False
diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py
index 3e43b98ff113..e28f173860e7 100644
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -1,4 +1,4 @@
-from random import choice
+import json
 from string import ascii_lowercase
 from typing import Dict, Any
 import testing as tm
@@ -397,3 +397,72 @@ def test_categorical_ames_housing(
     def test_categorical_missing(self, rows, cols, cats):
         self.run_categorical_missing(rows, cols, cats, "approx")
         self.run_categorical_missing(rows, cols, cats, "hist")
+
+    def run_adaptive(self, tree_method, weighted) -> None:
+        rng = np.random.RandomState(1994)
+        from sklearn.datasets import make_regression
+        from sklearn.utils import stats
+
+        n_samples = 256
+        X, y = make_regression(n_samples, 16, random_state=rng)
+        if weighted:
+            w = rng.normal(size=n_samples)
+            w -= w.min()
+            Xy = xgb.DMatrix(X, y, weight=w)
+            base_score = stats._weighted_percentile(y, w, percentile=50)
+        else:
+            Xy = xgb.DMatrix(X, y)
+            base_score = np.median(y)
+
+        booster_0 = xgb.train(
+            {
+                "tree_method": tree_method,
+                "base_score": base_score,
+                "objective": "reg:absoluteerror",
+            },
+            Xy,
+            num_boost_round=1,
+        )
+        booster_1 = xgb.train(
+            {"tree_method": tree_method, "objective": "reg:absoluteerror"},
+            Xy,
+            num_boost_round=1,
+        )
+        config_0 = json.loads(booster_0.save_config())
+        config_1 = json.loads(booster_1.save_config())
+
+        def get_score(config: Dict) -> float:
+            return float(config["learner"]["learner_model_param"]["base_score"])
+
+        assert get_score(config_0) == get_score(config_1)
+
+        raw_booster = booster_1.save_raw(raw_format="deprecated")
+        booster_2 = xgb.Booster(model_file=raw_booster)
+        config_2 = json.loads(booster_2.save_config())
+        assert get_score(config_1) == get_score(config_2)
+
+        raw_booster = booster_1.save_raw(raw_format="ubj")
+        booster_2 = xgb.Booster(model_file=raw_booster)
+        config_2 = json.loads(booster_2.save_config())
+        assert get_score(config_1) == get_score(config_2)
+
+        booster_0 = xgb.train(
+            {
+                "tree_method": tree_method,
+                "base_score": base_score + 1.0,
+                "objective": "reg:absoluteerror",
+            },
+            Xy,
+            num_boost_round=1,
+        )
+        config_0 = json.loads(booster_0.save_config())
+        np.testing.assert_allclose(get_score(config_0), get_score(config_1) + 1)
+
+    @pytest.mark.skipif(**tm.no_sklearn())
+    @pytest.mark.parametrize(
+        "tree_method,weighted", [
+            ("approx", False), ("hist", False), ("approx", True), ("hist", True)
+        ]
+    )
+    def test_adaptive(self, tree_method, weighted) -> None:
+        self.run_adaptive(tree_method, weighted)

From 86f5a14cd13ff2e6d32487d5edd0e0e7748050f7 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 21 Sep 2022 05:15:46 +0800
Subject: [PATCH 002/133] Move.

---
 amalgamation/xgboost-all0.cc |  1 +
 src/common/stats.cc          | 44 +++++++++++++++++++++++++++++++++
 src/common/stats.cu          |  4 +--
 src/common/stats.h           | 48 ++++++++----------------------------
 4 files changed, 57 insertions(+), 40 deletions(-)
 create mode 100644 src/common/stats.cc

diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc
index 3bc15d05388d..d1e17e2d49aa 100644
--- a/amalgamation/xgboost-all0.cc
+++ b/amalgamation/xgboost-all0.cc
@@ -86,6 +86,7 @@
 #include "../src/common/pseudo_huber.cc"
 #include "../src/common/quantile.cc"
 #include "../src/common/random.cc"
+#include "../src/common/stats.cc"
 #include "../src/common/survival_util.cc"
 #include "../src/common/threading_utils.cc"
 #include "../src/common/timer.cc"
diff --git a/src/common/stats.cc b/src/common/stats.cc
new file mode 100644
index 000000000000..dfde09986eb5
--- /dev/null
+++ b/src/common/stats.cc
@@ -0,0 +1,44 @@
+#include "stats.h"
+
+#include <numeric>  // std::accumulate
+
+#include "common.h"                      // OptionalWeights, MakeIndexTransformIter
+#include "threading_utils.h"             // ParallelFor, MemStackAllocator
+#include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
+#include "xgboost/linalg.h"              // Tensor, UnravelIndex, Apply
+#include "xgboost/logging.h"             // CHECK_EQ
+
+namespace xgboost {
+namespace common {
+float Mean(Context const* ctx, linalg::Tensor<float, 2> const& t,
+           HostDeviceVector<float> const& weights) {
+  if (!weights.Empty()) {
+    CHECK_EQ(weights.Size(), t.Shape(0)) << "Weight is assigned for each row.";
+  }
+  if (!ctx->IsCPU()) {
+    weights.SetDevice(ctx->gpu_id);
+    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
+    auto t_v = t.View(ctx->gpu_id);
+    cuda::Mean(ctx, t_v, opt_weights);
+  }
+
+  auto opt_weights = OptionalWeights(weights.ConstHostSpan());
+  auto t_v = t.HostView();
+
+  MemStackAllocator<float, 128> mean_tloc(ctx->Threads(), 0.0f);
+  auto iter = common::MakeIndexTransformIter(
+      [&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
+
+  double size = t_v.Shape(0);
+  CHECK_NE(size, 0);
+  ParallelFor(t_v.Size(), ctx->Threads(), [&](auto i) {
+    auto tidx = omp_get_thread_num();
+    auto ridx = std::get<0>(linalg::UnravelIndex(i, t_v.Shape()));
+    mean_tloc[tidx] += iter[i] * opt_weights[ridx] / size;
+  });
+  auto mean = std::accumulate(mean_tloc.cbegin(), mean_tloc.cend(), 0.0f);
+  return mean;
+}
+}  // namespace common
+}  // namespace xgboost
diff --git a/src/common/stats.cu b/src/common/stats.cu
index bb70c3a557be..ecd75afcd9a5 100644
--- a/src/common/stats.cu
+++ b/src/common/stats.cu
@@ -44,9 +44,9 @@ float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
 }
 
 float Mean(Context const* ctx, linalg::TensorView<float const, 2> t,
-           common::OptionalWeights weights, size_t n) {
+           common::OptionalWeights weights) {
   dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
-  double size = n == 0 ? t.Size() : n;
+  double size = t.Shape(0);
   CHECK_NE(size, 0);
   auto val_it = dh::MakeTransformIterator<float>(
       thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
diff --git a/src/common/stats.h b/src/common/stats.h
index 8b54866353bb..f79df1b6ccbc 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -8,9 +8,10 @@
 #include <limits>
 #include <vector>
 
-#include "common.h"  // AssertGPUSupport
-#include "xgboost/generic_parameters.h"
+#include "common.h"                      // AssertGPUSupport, OptionalWeights
+#include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/linalg.h"
+#include "xgboost/logging.h"  // CHECK_GE
 
 namespace xgboost {
 namespace common {
@@ -96,18 +97,16 @@ namespace cuda {
 float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
              common::OptionalWeights weights);
 #if !defined(XGBOOST_USE_CUDA)
-inline float Median(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights) {
+inline float Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights) {
   common::AssertGPUSupport();
   return 0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
 
-float Mean(Context const* ctx, linalg::TensorView<float const, 2> t,
-           common::OptionalWeights weights, size_t n);
+float Mean(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights);
 
 #if !defined(XGBOOST_USE_CUDA)
-inline float Mean(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights,
-                  size_t n) {
+inline float Mean(Context const*, linalg::TensorView<float const, 2>, OptionalWeights) {
   AssertGPUSupport();
   return 0;
 }
@@ -143,38 +142,11 @@ inline float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
 }
 
 /**
- * \brief Calculate mean or partial mean. When n is specified to be non-zero, we use n as
- *        the total number of elements instead of the size of t.
+ * \brief Calculate mean or partial mean. Weight is per-sample, which means if weight is
+ *        not empty then it should contain 1 element for each row in t.
  */
-inline float Mean(Context const* ctx, linalg::Tensor<float, 2> const& t,
-                  HostDeviceVector<float> const& weights, size_t n = 0) {
-  if (!weights.Empty()) {
-    CHECK_EQ(weights.Size(), t.Shape(0)) << "Weight is assigned for each row.";
-  }
-  if (!ctx->IsCPU()) {
-    weights.SetDevice(ctx->gpu_id);
-    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
-    auto t_v = t.View(ctx->gpu_id);
-    cuda::Mean(ctx, t_v, opt_weights, n);
-  }
-
-  auto opt_weights = OptionalWeights(weights.ConstHostSpan());
-  auto t_v = t.HostView();
-
-  MemStackAllocator<float, 128> mean_tloc(ctx->Threads(), 0.0f);
-  auto iter = common::MakeIndexTransformIter(
-      [&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
-
-  double size = n == 0 ? t_v.Size() : n;
-  CHECK_NE(size, 0);
-  ParallelFor(t_v.Size(), ctx->Threads(), [&](auto i) {
-    auto tidx = omp_get_thread_num();
-    auto ridx = std::get<0>(linalg::UnravelIndex(i, t_v.Shape()));
-    mean_tloc[tidx] += iter[i] * opt_weights[ridx] / size;
-  });
-  auto mean = std::accumulate(mean_tloc.cbegin(), mean_tloc.cend(), 0.0f);
-  return mean;
-}
+float Mean(Context const* ctx, linalg::Tensor<float, 2> const& t,
+           HostDeviceVector<float> const& weights);
 }  // namespace common
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_STATS_H_

From 0f0fbb67bb9cceb087e099f5ccfdaacf63bb5863 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 21 Sep 2022 05:16:59 +0800
Subject: [PATCH 003/133] move.

---
 src/common/stats.cc            | 29 +++++++++++++++++++++++++++++
 src/common/stats.h             | 32 +++-----------------------------
 tests/cpp/common/test_stats.cc | 29 ++++++++++++++++++-----------
 tests/cpp/common/test_stats.cu | 13 -------------
 4 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/src/common/stats.cc b/src/common/stats.cc
index dfde09986eb5..bcc9254b97cb 100644
--- a/src/common/stats.cc
+++ b/src/common/stats.cc
@@ -11,6 +11,35 @@
 
 namespace xgboost {
 namespace common {
+
+float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
+             HostDeviceVector<float> const& weights) {
+  CHECK_EQ(t.Shape(1), 0) << "Matrix is not yet supported.";
+  if (!ctx->IsCPU()) {
+    weights.SetDevice(ctx->gpu_id);
+    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
+    auto t_v = t.View(ctx->gpu_id);
+    return cuda::Median(ctx, t_v, opt_weights);
+  }
+
+  auto opt_weights = OptionalWeights(weights.ConstHostSpan());
+  auto t_v = t.HostView();
+  auto iter = common::MakeIndexTransformIter(
+      [&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
+  float q{0};
+  if (opt_weights.Empty()) {
+    q = common::Quantile(0.5, iter, iter + t_v.Size());
+  } else {
+    CHECK_NE(t_v.Shape(1), 0);
+    auto w_it = common::MakeIndexTransformIter([&](size_t i) {
+      auto sample_idx = i / t_v.Shape(1);
+      return opt_weights[sample_idx];
+    });
+    q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
+  }
+  return q;
+}
+
 float Mean(Context const* ctx, linalg::Tensor<float, 2> const& t,
            HostDeviceVector<float> const& weights) {
   if (!weights.Empty()) {
diff --git a/src/common/stats.h b/src/common/stats.h
index f79df1b6ccbc..145aa9c50527 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -94,8 +94,7 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
 }
 
 namespace cuda {
-float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
-             common::OptionalWeights weights);
+float Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights);
 #if !defined(XGBOOST_USE_CUDA)
 inline float Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights) {
   common::AssertGPUSupport();
@@ -113,33 +112,8 @@ inline float Mean(Context const*, linalg::TensorView<float const, 2>, OptionalWe
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda
 
-inline float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
-                    HostDeviceVector<float> const& weights) {
-  CHECK_EQ(t.Shape(1), 0) << "Matrix is not yet supported.";
-  if (!ctx->IsCPU()) {
-    weights.SetDevice(ctx->gpu_id);
-    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
-    auto t_v = t.View(ctx->gpu_id);
-    return cuda::Median(ctx, t_v, opt_weights);
-  }
-
-  auto opt_weights = OptionalWeights(weights.ConstHostSpan());
-  auto t_v = t.HostView();
-  auto iter = common::MakeIndexTransformIter(
-      [&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
-  float q{0};
-  if (opt_weights.Empty()) {
-    q = common::Quantile(0.5, iter, iter + t_v.Size());
-  } else {
-    CHECK_NE(t_v.Shape(1), 0);
-    auto w_it = common::MakeIndexTransformIter([&](size_t i) {
-      auto sample_idx = i / t_v.Shape(1);
-      return opt_weights[sample_idx];
-    });
-    q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
-  }
-  return q;
-}
+float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
+             HostDeviceVector<float> const& weights);
 
 /**
  * \brief Calculate mean or partial mean. Weight is per-sample, which means if weight is
diff --git a/tests/cpp/common/test_stats.cc b/tests/cpp/common/test_stats.cc
index 5e6df52570db..35aa24285d03 100644
--- a/tests/cpp/common/test_stats.cc
+++ b/tests/cpp/common/test_stats.cc
@@ -55,6 +55,22 @@ TEST(Stats, WeightedQuantile) {
   ASSERT_EQ(q, 5);
 }
 
+TEST(Stats, Median) {
+  linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
+  Context ctx;
+  HostDeviceVector<float> weights;
+  auto m = Median(&ctx, values, weights);
+  ASSERT_EQ(m, .5f);
+
+#if defined(XGBOOST_USE_CUDA)
+  ctx.gpu_id = 0;
+  ASSERT_FALSE(ctx.IsCPU());
+  m = Median(&ctx, values, weights);
+  ASSERT_EQ(m, .5f);
+#endif  // defined(XGBOOST_USE_CUDA)
+}
+
+
 TEST(Stats, Mean) {
   Context ctx;
   linalg::Tensor<float, 2> arr({1.f, 2.f, 3.f, 4.f}, {2, 2}, Context::kCpuId);
@@ -65,20 +81,11 @@ TEST(Stats, Mean) {
   weights.Resize(2, 1.0f);
   mean = Mean(&ctx, arr, weights);
   ASSERT_EQ(mean, 2.5);
-}
-
-TEST(Stats, Median) {
-  linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
-  Context ctx;
-  HostDeviceVector<float> weights;
-  auto m = Median(&ctx, values, weights);
-  ASSERT_EQ(m, .5f);
 
 #if defined(XGBOOST_USE_CUDA)
   ctx.gpu_id = 0;
-  ASSERT_FALSE(ctx.IsCPU());
-  m = Median(&ctx, values, weights);
-  ASSERT_EQ(m, .5f);
+  mean = Mean(&ctx, arr, weights);
+  ASSERT_EQ(mean, 2.5);
 #endif  // defined(XGBOOST_USE_CUDA)
 }
 }  // namespace common
diff --git a/tests/cpp/common/test_stats.cu b/tests/cpp/common/test_stats.cu
index 50528269de1c..4258d4b4ce52 100644
--- a/tests/cpp/common/test_stats.cu
+++ b/tests/cpp/common/test_stats.cu
@@ -75,18 +75,5 @@ class StatsGPU : public ::testing::Test {
 
 TEST_F(StatsGPU, Quantile) { this->NonWeighted(); }
 TEST_F(StatsGPU, WeightedQuantile) { this->Weighted(); }
-
-TEST(Stats, GPUMean) {
-  Context ctx;
-  ctx.gpu_id = 0;
-  linalg::Tensor<float, 2> arr({1.f, 2.f, 3.f, 4.f}, {2, 2}, Context::kCpuId);
-  HostDeviceVector<float> weights;
-  auto mean = Mean(&ctx, arr, weights);
-  ASSERT_EQ(mean, 2.5);
-
-  weights.Resize(2, 1.0f);
-  mean = Mean(&ctx, arr, weights);
-  ASSERT_EQ(mean, 2.5);
-}
 }  // namespace common
 }  // namespace xgboost

From ad9728adc41d29a4de4eb1344e46ea1ffd7d21a2 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 22 Sep 2022 08:25:51 +0800
Subject: [PATCH 004/133] impl weighted mean.

---
 src/common/numeric.cc            |  7 ++----
 src/common/numeric.cu            | 10 ++++----
 src/common/numeric.cuh           | 23 ++++++++++++++++++
 src/common/numeric.h             | 22 +++++++++++++++---
 src/objective/init_estimation.cu | 24 +++++++++++++++++++
 src/objective/init_estimation.h  | 40 ++++++++++++++++++++++++++++++++
 src/objective/regression_obj.cu  |  7 ++----
 7 files changed, 114 insertions(+), 19 deletions(-)
 create mode 100644 src/common/numeric.cuh
 create mode 100644 src/objective/init_estimation.cu
 create mode 100644 src/objective/init_estimation.h

diff --git a/src/common/numeric.cc b/src/common/numeric.cc
index 9740d6af1f8d..7e2bb07425c4 100644
--- a/src/common/numeric.cc
+++ b/src/common/numeric.cc
@@ -15,14 +15,11 @@ namespace common {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
   if (ctx->IsCPU()) {
     auto const& h_values = values.ConstHostVector();
-    MemStackAllocator<double, DefaultMaxThreads()> result_tloc(ctx->Threads(), 0);
-    ParallelFor(h_values.size(), ctx->Threads(),
-                [&](auto i) { result_tloc[omp_get_thread_num()] += h_values[i]; });
-    auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cend(), 0.0);
+    auto result = cpu_impl::Reduce(ctx, h_values.cbegin(), h_values.cend(), 0.0);
     static_assert(std::is_same<decltype(result), double>::value, "");
     return result;
   }
-  return cuda::Reduce(ctx, values);
+  return cuda_impl::Reduce(ctx, values);
 }
 }  // namespace common
 }  // namespace xgboost
diff --git a/src/common/numeric.cu b/src/common/numeric.cu
index faac6ddb56da..6c5f34e8a835 100644
--- a/src/common/numeric.cu
+++ b/src/common/numeric.cu
@@ -5,21 +5,19 @@
 #include <thrust/functional.h>  // thrust:plus
 
 #include "device_helpers.cuh"  // dh::Reduce, safe_cuda, dh::XGBCachingDeviceAllocator
+#include "numeric.cuh"
 #include "numeric.h"
 #include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
 
 namespace xgboost {
 namespace common {
-namespace cuda {
+namespace cuda_impl {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
   values.SetDevice(ctx->gpu_id);
   auto const d_values = values.ConstDeviceSpan();
-  dh::XGBCachingDeviceAllocator<char> alloc;
-  auto res = dh::Reduce(thrust::cuda::par(alloc), d_values.data(),
-                        d_values.data() + d_values.size(), 0.0, thrust::plus<double>{});
-  return res;
+  return Reduce(ctx, d_values.cbegin(), d_values.cend(), 0.0);
 }
-}  // namespace cuda
+}  // namespace cuda_impl
 }  // namespace common
 }  // namespace xgboost
diff --git a/src/common/numeric.cuh b/src/common/numeric.cuh
new file mode 100644
index 000000000000..9bf774a62308
--- /dev/null
+++ b/src/common/numeric.cuh
@@ -0,0 +1,23 @@
+/*!
+ * Copyright 2022 by XGBoost Contributors
+ */
+#ifndef XGBOOST_COMMON_NUMERIC_CUH_
+#define XGBOOST_COMMON_NUMERIC_CUH_
+#include <xgboost/generic_parameters.h>  // Context
+
+#include "device_helpers.cuh"  // Reduce
+#include "numeric.h"
+
+namespace xgboost {
+namespace common {
+namespace cuda_impl {
+template <typename It, typename V = typename It::value_type>
+V Reduce(Context const* /*ctx unused*/, It first, It second, V const& init) {
+  dh::XGBCachingDeviceAllocator<char> alloc;
+  auto res = dh::Reduce(thrust::cuda::par(alloc), first, second, init, thrust::plus<V>{});
+  return res;
+}
+}  // namespace cuda_impl
+}  // namespace common
+}  // namespace xgboost
+#endif
diff --git a/src/common/numeric.h b/src/common/numeric.h
index cff3e8a12121..2f7325a15024 100644
--- a/src/common/numeric.h
+++ b/src/common/numeric.h
@@ -93,7 +93,7 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
   exc.Rethrow();
 }
 
-namespace cuda {
+namespace cuda_impl {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
 #if !defined(XGBOOST_USE_CUDA)
 inline double Reduce(Context const*, HostDeviceVector<float> const&) {
@@ -101,11 +101,27 @@ inline double Reduce(Context const*, HostDeviceVector<float> const&) {
   return 0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
-}  // namespace cuda
+}  // namespace cuda_impl
+
 /**
- * \brief Reduction with summation.
+ * \brief Reduction.
  */
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
+
+/**
+ * \brief Reduction with iterator.
+ */
+namespace cpu_impl {
+template <typename It, typename V = typename It::value_type>
+double Reduce(Context const* ctx, It first, It second, V const& init) {
+  size_t n = std::distance(first, second);
+  common::MemStackAllocator<double, common::DefaultMaxThreads()> result_tloc(ctx->Threads(), init);
+  common::ParallelFor(n, ctx->Threads(),
+                      [&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
+  auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cend(), init);
+  return result;
+}
+}  // namespace cpu_impl
 }  // namespace common
 }  // namespace xgboost
 
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
new file mode 100644
index 000000000000..3f361fdd6ee7
--- /dev/null
+++ b/src/objective/init_estimation.cu
@@ -0,0 +1,24 @@
+#include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
+
+#include "../common/device_helpers.cuh"  // dh::MakeTransformIterator
+#include "../common/numeric.cuh"         // Reduce
+#include "init_estimation.h"
+
+namespace xgboost {
+namespace obj {
+namespace cuda_impl {
+double WeightedMean(Context const* ctx, MetaInfo const& info) {
+  std::uint64_t n_samples = info.num_row_;
+  auto y = info.labels.View(ctx->gpu_id);
+  auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
+  auto it = dh::MakeTransformIterator<double>(
+      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) -> double {
+        size_t r, c;
+        std::tie(r, c) = linalg::UnravelIndex(i, y.Shape());
+        return y(r, c) * w[r] / static_cast<double>(n_samples);
+      });
+  return common::cuda_impl::Reduce(ctx, it, it + y.Size(), 0.0);
+}
+}  // namespace cuda_impl
+}  // namespace obj
+}  // namespace xgboost
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
new file mode 100644
index 000000000000..98e4bd57c002
--- /dev/null
+++ b/src/objective/init_estimation.h
@@ -0,0 +1,40 @@
+/**
+ * \brief Utilities for estimating initial score.
+ */
+
+#ifndef XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
+#define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
+
+#include "../common/common.h"   // OptionalWeights, MakeIndexTransformIter
+#include "../common/numeric.h"  // cpu_impl::Reduce
+#include "rabit/rabit.h"
+#include "xgboost/data.h"  // MetaInfo
+
+namespace xgboost {
+namespace obj {
+namespace cpu_impl {
+inline double WeightedMean(Context const* ctx, MetaInfo const& info) {
+  std::uint64_t n_samples = info.num_row_;
+  auto y = info.labels.HostView();
+  auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
+  auto it = common::MakeIndexTransformIter([=] XGBOOST_DEVICE(size_t i) -> double {
+    size_t r, c;
+    std::tie(r, c) = linalg::UnravelIndex(i, y.Shape());
+    return y(r, c) * w[r] / static_cast<double>(n_samples);
+  });
+  auto res = common::cpu_impl::Reduce(ctx, it, it + y.Size(), 0.0);
+  rabit::Allreduce<rabit::op::Sum>(&res, 1);
+  return res;
+}
+}  // namespace cpu_impl
+
+namespace cuda_impl {
+double WeightedMean(Context const* ctx, MetaInfo const& info);
+}  // namespace cuda_impl
+
+double WeightedMean(Context const* ctx, MetaInfo const& info) {
+  return ctx->IsCPU() ? cpu_impl::WeightedMean(ctx, info) : cuda_impl::WeightedMean(ctx, info);
+}
+}  // namespace obj
+}  // namespace xgboost
+#endif  // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 1255f3f020ef..74206999ee45 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -22,6 +22,7 @@
 #include "../common/transform.h"
 #include "./regression_loss.h"
 #include "adaptive.h"
+#include "init_estimation.h"
 #include "xgboost/base.h"
 #include "xgboost/data.h"
 #include "xgboost/generic_parameters.h"
@@ -172,11 +173,7 @@ class RegLossObj : public ObjFunction {
     CheckInitInputs(info);
     base_margin->Reshape(1);
     auto out = base_margin->HostView();
-    std::uint64_t n_samples = info.num_row_;
-    rabit::Allreduce<rabit::op::Sum>(&n_samples, 1);
-    auto mean = common::Mean(ctx_, info.labels, info.weights_, n_samples);
-    rabit::Allreduce<rabit::op::Sum>(&mean, 1);
-    out(0) = mean;
+    out(0) = WeightedMean(ctx_, info);
   }
 
   void SaveConfig(Json* p_out) const override {

From a9db92783d428af6e7972a6e339ed88ca112229c Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 22 Sep 2022 08:33:04 +0800
Subject: [PATCH 005/133] split up.

---
 src/objective/init_estimation.cu | 5 +++--
 src/objective/init_estimation.h  | 9 +++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index 3f361fdd6ee7..a6aa5eef89b3 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -9,12 +9,13 @@ namespace obj {
 namespace cuda_impl {
 double WeightedMean(Context const* ctx, MetaInfo const& info) {
   std::uint64_t n_samples = info.num_row_;
+  rabit::Allreduce<rabit::op::Sum>(&n_samples, 1);
   auto y = info.labels.View(ctx->gpu_id);
   auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
   auto it = dh::MakeTransformIterator<double>(
       thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) -> double {
-        size_t r, c;
-        std::tie(r, c) = linalg::UnravelIndex(i, y.Shape());
+        auto idx = linalg::UnravelIndex(i, y.Shape());
+        size_t r{std::get<0>(idx)}, c{std::get<1>(idx)};
         return y(r, c) * w[r] / static_cast<double>(n_samples);
       });
   return common::cuda_impl::Reduce(ctx, it, it + y.Size(), 0.0);
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index 98e4bd57c002..01f510dbd885 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -15,9 +15,10 @@ namespace obj {
 namespace cpu_impl {
 inline double WeightedMean(Context const* ctx, MetaInfo const& info) {
   std::uint64_t n_samples = info.num_row_;
+  rabit::Allreduce<rabit::op::Sum>(&n_samples, 1);
   auto y = info.labels.HostView();
   auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
-  auto it = common::MakeIndexTransformIter([=] XGBOOST_DEVICE(size_t i) -> double {
+  auto it = common::MakeIndexTransformIter([&](size_t i) -> double {
     size_t r, c;
     std::tie(r, c) = linalg::UnravelIndex(i, y.Shape());
     return y(r, c) * w[r] / static_cast<double>(n_samples);
@@ -32,7 +33,11 @@ namespace cuda_impl {
 double WeightedMean(Context const* ctx, MetaInfo const& info);
 }  // namespace cuda_impl
 
-double WeightedMean(Context const* ctx, MetaInfo const& info) {
+/**
+ * \brief Weighted mean for distributed env. Not a general implementation since we have
+ *        2-dim label with 1-dim weight.
+ */
+inline double WeightedMean(Context const* ctx, MetaInfo const& info) {
   return ctx->IsCPU() ? cpu_impl::WeightedMean(ctx, info) : cuda_impl::WeightedMean(ctx, info);
 }
 }  // namespace obj

From d9b6aa009b04a5c37eb4aa46375a7d26dd341e14 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 22 Sep 2022 08:35:13 +0800
Subject: [PATCH 006/133] cleanup.

---
 src/common/stats.cc            | 33 +--------------------------------
 src/common/stats.cu            |  4 ++--
 src/common/stats.h             | 20 ++------------------
 tests/cpp/common/test_stats.cc | 19 -------------------
 4 files changed, 5 insertions(+), 71 deletions(-)

diff --git a/src/common/stats.cc b/src/common/stats.cc
index bcc9254b97cb..5bf83182d9d4 100644
--- a/src/common/stats.cc
+++ b/src/common/stats.cc
@@ -11,7 +11,6 @@
 
 namespace xgboost {
 namespace common {
-
 float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
              HostDeviceVector<float> const& weights) {
   CHECK_EQ(t.Shape(1), 0) << "Matrix is not yet supported.";
@@ -19,7 +18,7 @@ float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
     weights.SetDevice(ctx->gpu_id);
     auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
     auto t_v = t.View(ctx->gpu_id);
-    return cuda::Median(ctx, t_v, opt_weights);
+    return cuda_impl::Median(ctx, t_v, opt_weights);
   }
 
   auto opt_weights = OptionalWeights(weights.ConstHostSpan());
@@ -39,35 +38,5 @@ float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
   }
   return q;
 }
-
-float Mean(Context const* ctx, linalg::Tensor<float, 2> const& t,
-           HostDeviceVector<float> const& weights) {
-  if (!weights.Empty()) {
-    CHECK_EQ(weights.Size(), t.Shape(0)) << "Weight is assigned for each row.";
-  }
-  if (!ctx->IsCPU()) {
-    weights.SetDevice(ctx->gpu_id);
-    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
-    auto t_v = t.View(ctx->gpu_id);
-    cuda::Mean(ctx, t_v, opt_weights);
-  }
-
-  auto opt_weights = OptionalWeights(weights.ConstHostSpan());
-  auto t_v = t.HostView();
-
-  MemStackAllocator<float, 128> mean_tloc(ctx->Threads(), 0.0f);
-  auto iter = common::MakeIndexTransformIter(
-      [&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
-
-  double size = t_v.Shape(0);
-  CHECK_NE(size, 0);
-  ParallelFor(t_v.Size(), ctx->Threads(), [&](auto i) {
-    auto tidx = omp_get_thread_num();
-    auto ridx = std::get<0>(linalg::UnravelIndex(i, t_v.Shape()));
-    mean_tloc[tidx] += iter[i] * opt_weights[ridx] / size;
-  });
-  auto mean = std::accumulate(mean_tloc.cbegin(), mean_tloc.cend(), 0.0f);
-  return mean;
-}
 }  // namespace common
 }  // namespace xgboost
diff --git a/src/common/stats.cu b/src/common/stats.cu
index ecd75afcd9a5..1ef6a34f919c 100644
--- a/src/common/stats.cu
+++ b/src/common/stats.cu
@@ -13,7 +13,7 @@
 
 namespace xgboost {
 namespace common {
-namespace cuda {
+namespace cuda_impl {
 float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
              common::OptionalWeights weights) {
   HostDeviceVector<size_t> segments{0, t.Size()};
@@ -58,6 +58,6 @@ float Mean(Context const* ctx, linalg::TensorView<float const, 2> t,
   auto mean = thrust::reduce(thrust::cuda::par(alloc), val_it, val_it + t.Size(), 0.0f);
   return mean;
 }
-}  // namespace cuda
+}  // namespace cuda_impl
 }  // namespace common
 }  // namespace xgboost
diff --git a/src/common/stats.h b/src/common/stats.h
index 145aa9c50527..b15f1f193096 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -93,7 +93,7 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
   return val(idx);
 }
 
-namespace cuda {
+namespace cuda_impl {
 float Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights);
 #if !defined(XGBOOST_USE_CUDA)
 inline float Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights) {
@@ -101,26 +101,10 @@ inline float Median(Context const*, linalg::TensorView<float const, 2>, Optional
   return 0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
-
-float Mean(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights);
-
-#if !defined(XGBOOST_USE_CUDA)
-inline float Mean(Context const*, linalg::TensorView<float const, 2>, OptionalWeights) {
-  AssertGPUSupport();
-  return 0;
-}
-#endif  // !defined(XGBOOST_USE_CUDA)
-}  // namespace cuda
+}  // namespace cuda_impl
 
 float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
              HostDeviceVector<float> const& weights);
-
-/**
- * \brief Calculate mean or partial mean. Weight is per-sample, which means if weight is
- *        not empty then it should contain 1 element for each row in t.
- */
-float Mean(Context const* ctx, linalg::Tensor<float, 2> const& t,
-           HostDeviceVector<float> const& weights);
 }  // namespace common
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_STATS_H_
diff --git a/tests/cpp/common/test_stats.cc b/tests/cpp/common/test_stats.cc
index 35aa24285d03..79f38ae6a984 100644
--- a/tests/cpp/common/test_stats.cc
+++ b/tests/cpp/common/test_stats.cc
@@ -69,24 +69,5 @@ TEST(Stats, Median) {
   ASSERT_EQ(m, .5f);
 #endif  // defined(XGBOOST_USE_CUDA)
 }
-
-
-TEST(Stats, Mean) {
-  Context ctx;
-  linalg::Tensor<float, 2> arr({1.f, 2.f, 3.f, 4.f}, {2, 2}, Context::kCpuId);
-  HostDeviceVector<float> weights;
-  auto mean = Mean(&ctx, arr, weights);
-  ASSERT_EQ(mean, 2.5);
-
-  weights.Resize(2, 1.0f);
-  mean = Mean(&ctx, arr, weights);
-  ASSERT_EQ(mean, 2.5);
-
-#if defined(XGBOOST_USE_CUDA)
-  ctx.gpu_id = 0;
-  mean = Mean(&ctx, arr, weights);
-  ASSERT_EQ(mean, 2.5);
-#endif  // defined(XGBOOST_USE_CUDA)
-}
 }  // namespace common
 }  // namespace xgboost

From b2b9924f0b43e14a7f07552b891c3dd3590b5001 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 11:12:52 +0800
Subject: [PATCH 007/133] Move transform iterator.

---
 src/common/common.h             | 72 ----------------------------
 src/common/linalg_op.h          |  3 +-
 src/common/quantile.cu          |  3 +-
 src/common/stats.cc             |  3 +-
 src/common/transform_iterator.h | 85 +++++++++++++++++++++++++++++++++
 src/data/gradient_index.h       |  1 +
 src/objective/adaptive.cc       |  1 +
 src/objective/init_estimation.h |  8 ++--
 tests/cpp/common/test_stats.cc  |  1 +
 9 files changed, 99 insertions(+), 78 deletions(-)
 create mode 100644 src/common/transform_iterator.h

diff --git a/src/common/common.h b/src/common/common.h
index c09b75fddaa3..1d51f81facc9 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -164,78 +164,6 @@ class Range {
   Iterator end_;
 };
 
-/**
- * \brief Transform iterator that takes an index and calls transform operator.
- *
- *   This is CPU-only right now as taking host device function as operator complicates the
- *   code.  For device side one can use `thrust::transform_iterator` instead.
- */
-template <typename Fn>
-class IndexTransformIter {
-  size_t iter_{0};
-  Fn fn_;
-
- public:
-  using iterator_category = std::random_access_iterator_tag;  // NOLINT
-  using value_type = std::result_of_t<Fn(size_t)>;            // NOLINT
-  using difference_type = detail::ptrdiff_t;                  // NOLINT
-  using reference = std::add_lvalue_reference_t<value_type>;  // NOLINT
-  using pointer = std::add_pointer_t<value_type>;             // NOLINT
-
- public:
-  /**
-   * \param op Transform operator, takes a size_t index as input.
-   */
-  explicit IndexTransformIter(Fn &&op) : fn_{op} {}
-  IndexTransformIter(IndexTransformIter const &) = default;
-  IndexTransformIter& operator=(IndexTransformIter&&) = default;
-  IndexTransformIter& operator=(IndexTransformIter const& that) {
-    iter_ = that.iter_;
-    return *this;
-  }
-
-  value_type operator*() const { return fn_(iter_); }
-  value_type operator[](size_t i) const {
-    auto iter = *this + i;
-    return *iter;
-  }
-
-  auto operator-(IndexTransformIter const &that) const { return iter_ - that.iter_; }
-  bool operator==(IndexTransformIter const &that) const { return iter_ == that.iter_; }
-  bool operator!=(IndexTransformIter const &that) const { return !(*this == that); }
-
-  IndexTransformIter &operator++() {
-    iter_++;
-    return *this;
-  }
-  IndexTransformIter operator++(int) {
-    auto ret = *this;
-    ++(*this);
-    return ret;
-  }
-  IndexTransformIter &operator+=(difference_type n) {
-    iter_ += n;
-    return *this;
-  }
-  IndexTransformIter &operator-=(difference_type n) {
-    (*this) += -n;
-    return *this;
-  }
-  IndexTransformIter operator+(difference_type n) const {
-    auto ret = *this;
-    return ret += n;
-  }
-  IndexTransformIter operator-(difference_type n) const {
-    auto ret = *this;
-    return ret -= n;
-  }
-};
-
-template <typename Fn>
-auto MakeIndexTransformIter(Fn&& fn) {
-  return IndexTransformIter<Fn>(std::forward<Fn>(fn));
-}
-
 int AllVisibleGPUs();
 
 inline void AssertGPUSupport() {
diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
index 0de173c8e73f..deb9618033b7 100644
--- a/src/common/linalg_op.h
+++ b/src/common/linalg_op.h
@@ -3,11 +3,12 @@
  */
 #ifndef XGBOOST_COMMON_LINALG_OP_H_
 #define XGBOOST_COMMON_LINALG_OP_H_
-#include <type_traits>
 #include <cstdint>  // std::int32_t
+#include <type_traits>
 
 #include "common.h"
 #include "threading_utils.h"
+#include "transform_iterator.h"  // MakeIndexTransformIter
 #include "xgboost/generic_parameters.h"
 #include "xgboost/linalg.h"
 
diff --git a/src/common/quantile.cu b/src/common/quantile.cu
index 5f69eafb300e..805bdbeeea7e 100644
--- a/src/common/quantile.cu
+++ b/src/common/quantile.cu
@@ -17,6 +17,7 @@
 #include "hist_util.h"
 #include "quantile.cuh"
 #include "quantile.h"
+#include "transform_iterator.h"  // MakeIndexTransformIter
 #include "xgboost/span.h"
 
 namespace xgboost {
@@ -642,7 +643,7 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts) {
                           thrust::equal_to<bst_feature_t>{},
                           [] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
     dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_values));
-    auto max_it = common::MakeIndexTransformIter([&](auto i) {
+    auto max_it = MakeIndexTransformIter([&](auto i) {
       if (IsCat(h_feature_types, i)) {
         return max_values[i].value;
       }
diff --git a/src/common/stats.cc b/src/common/stats.cc
index 5bf83182d9d4..84b403764047 100644
--- a/src/common/stats.cc
+++ b/src/common/stats.cc
@@ -2,8 +2,9 @@
 
 #include <numeric>  // std::accumulate
 
-#include "common.h"                      // OptionalWeights, MakeIndexTransformIter
+#include "common.h"                      // OptionalWeights
 #include "threading_utils.h"             // ParallelFor, MemStackAllocator
+#include "transform_iterator.h"          // MakeIndexTransformIter
 #include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
 #include "xgboost/linalg.h"              // Tensor, UnravelIndex, Apply
diff --git a/src/common/transform_iterator.h b/src/common/transform_iterator.h
new file mode 100644
index 000000000000..3e6d91f4c3e8
--- /dev/null
+++ b/src/common/transform_iterator.h
@@ -0,0 +1,85 @@
+#ifndef XGBOOST_COMMON_TRANSFORM_ITERATOR_H_
+#define XGBOOST_COMMON_TRANSFORM_ITERATOR_H_
+
+#include <cstddef>      // std::size_t
+#include <iterator>     // std::random_access_iterator_tag
+#include <type_traits>  // std::result_of_t, std::add_pointer_t, std::add_lvalue_reference_t
+
+#include "xgboost/span.h"  // ptrdiff_t
+
+namespace xgboost {
+namespace common {
+/**
+ * \brief Transform iterator that takes an index and calls transform operator.
+ *
+ *   This is CPU-only right now as taking host device function as operator complicates the
+ *   code.  For device side one can use `thrust::transform_iterator` instead.
+ */
+template <typename Fn>
+class IndexTransformIter {
+  size_t iter_{0};
+  Fn fn_;
+
+ public:
+  using iterator_category = std::random_access_iterator_tag;  // NOLINT
+  using value_type = std::result_of_t<Fn(size_t)>;            // NOLINT
+  using difference_type = detail::ptrdiff_t;                  // NOLINT
+  using reference = std::add_lvalue_reference_t<value_type>;  // NOLINT
+  using pointer = std::add_pointer_t<value_type>;             // NOLINT
+
+ public:
+  /**
+   * \param op Transform operator, takes a size_t index as input.
+   */
+  explicit IndexTransformIter(Fn &&op) : fn_{op} {}
+  IndexTransformIter(IndexTransformIter const &) = default;
+  IndexTransformIter &operator=(IndexTransformIter &&) = default;
+  IndexTransformIter &operator=(IndexTransformIter const &that) {
+    iter_ = that.iter_;
+    return *this;
+  }
+
+  value_type operator*() const { return fn_(iter_); }
+  value_type operator[](size_t i) const {
+    auto iter = *this + i;
+    return *iter;
+  }
+
+  auto operator-(IndexTransformIter const &that) const { return iter_ - that.iter_; }
+  bool operator==(IndexTransformIter const &that) const { return iter_ == that.iter_; }
+  bool operator!=(IndexTransformIter const &that) const { return !(*this == that); }
+
+  IndexTransformIter &operator++() {
+    iter_++;
+    return *this;
+  }
+  IndexTransformIter operator++(int) {
+    auto ret = *this;
+    ++(*this);
+    return ret;
+  }
+  IndexTransformIter &operator+=(difference_type n) {
+    iter_ += n;
+    return *this;
+  }
+  IndexTransformIter &operator-=(difference_type n) {
+    (*this) += -n;
+    return *this;
+  }
+  IndexTransformIter operator+(difference_type n) const {
+    auto ret = *this;
+    return ret += n;
+  }
+  IndexTransformIter operator-(difference_type n) const {
+    auto ret = *this;
+    return ret -= n;
+  }
+};
+
+template <typename Fn>
+auto MakeIndexTransformIter(Fn &&fn) {
+  return IndexTransformIter<Fn>(std::forward<Fn>(fn));
+}
+}  // namespace common
+}  // namespace xgboost
+#endif  // XGBOOST_COMMON_TRANSFORM_ITERATOR_H_
diff --git a/src/data/gradient_index.h b/src/data/gradient_index.h
index 1e58fcb429d1..85a38c8ea63d 100644
--- a/src/data/gradient_index.h
+++ b/src/data/gradient_index.h
@@ -13,6 +13,7 @@
 #include "../common/hist_util.h"
 #include "../common/numeric.h"
 #include "../common/threading_utils.h"
+#include "../common/transform_iterator.h"  // common::MakeIndexTransformIter
 #include "adapter.h"
 #include "proxy_dmatrix.h"
 #include "xgboost/base.h"
diff --git a/src/objective/adaptive.cc b/src/objective/adaptive.cc
index 6ddf39849949..4beb9d9c365e 100644
--- a/src/objective/adaptive.cc
+++ b/src/objective/adaptive.cc
@@ -10,6 +10,7 @@
 #include "../common/numeric.h"
 #include "../common/stats.h"
 #include "../common/threading_utils.h"
+#include "../common/transform_iterator.h"  // MakeIndexTransformIter
 #include "xgboost/tree_model.h"
 
 namespace xgboost {
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index 01f510dbd885..563f97a48fa1 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -5,10 +5,12 @@
 #ifndef XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
 #define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
 
-#include "../common/common.h"   // OptionalWeights, MakeIndexTransformIter
-#include "../common/numeric.h"  // cpu_impl::Reduce
+#include "../common/common.h"              // OptionalWeights
+#include "../common/numeric.h"             // cpu_impl::Reduce
+#include "../common/transform_iterator.h"  // MakeIndexTransformIter
 #include "rabit/rabit.h"
-#include "xgboost/data.h"  // MetaInfo
+#include "xgboost/data.h"    // MetaInfo
+#include "xgboost/linalg.h"  // UnravelIndex
 
 namespace xgboost {
 namespace obj {
diff --git a/tests/cpp/common/test_stats.cc b/tests/cpp/common/test_stats.cc
index 79f38ae6a984..ebd4be985bf3 100644
--- a/tests/cpp/common/test_stats.cc
+++ b/tests/cpp/common/test_stats.cc
@@ -5,6 +5,7 @@
 #include <xgboost/generic_parameters.h>
 
 #include "../../../src/common/stats.h"
+#include "../../../src/common/transform_iterator.h"  // common::MakeIndexTransformIter
 
 namespace xgboost {
 namespace common {

From f098799b2808196ba5a956fa620338ffbfdc2c6c Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 11:32:16 +0800
Subject: [PATCH 008/133] Define for l2  only.

---
 src/objective/init_estimation.cc | 34 ++++++++++++++++++++++++++++++
 src/objective/init_estimation.cu | 10 +++++++++
 src/objective/init_estimation.h  | 24 ++++-----------------
 src/objective/regression_loss.h  | 36 +++++++++++++++++++++++++++++++-
 src/objective/regression_obj.cu  | 14 +------------
 5 files changed, 84 insertions(+), 34 deletions(-)
 create mode 100644 src/objective/init_estimation.cc

diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
new file mode 100644
index 000000000000..396defebc07f
--- /dev/null
+++ b/src/objective/init_estimation.cc
@@ -0,0 +1,34 @@
+/**
+ * Copyright 2022 by XGBoost Contributors
+ *
+ * \brief Utilities for estimating initial score.
+ */
+
+#include "init_estimation.h"
+
+#include "../common/common.h"              // OptionalWeights
+#include "../common/numeric.h"             // cpu_impl::Reduce
+#include "../common/transform_iterator.h"  // MakeIndexTransformIter
+#include "rabit/rabit.h"
+#include "xgboost/linalg.h"  // UnravelIndex
+
+namespace xgboost {
+namespace obj {
+namespace cpu_impl {
+double WeightedMean(Context const* ctx, MetaInfo const& info) {
+  std::uint64_t n_samples = info.num_row_;
+  rabit::Allreduce<rabit::op::Sum>(&n_samples, 1);
+  auto y = info.labels.HostView();
+  auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
+  auto it = common::MakeIndexTransformIter([&](size_t i) -> double {
+    size_t r, c;
+    std::tie(r, c) = linalg::UnravelIndex(i, y.Shape());
+    return y(r, c) * w[r] / static_cast<double>(n_samples);
+  });
+  auto res = common::cpu_impl::Reduce(ctx, it, it + y.Size(), 0.0);
+  rabit::Allreduce<rabit::op::Sum>(&res, 1);
+  return res;
+}
+}  // namespace cpu_impl
+}  // namespace obj
+}  // namespace xgboost
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index a6aa5eef89b3..aa90b8b9115a 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -1,8 +1,18 @@
+/**
+ * Copyright 2022 by XGBoost Contributors
+ *
+ * \brief Utilities for estimating initial score.
+ */
 #include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
 
+#include <cinttypes>  // std::uint64_t
+
 #include "../common/device_helpers.cuh"  // dh::MakeTransformIterator
 #include "../common/numeric.cuh"         // Reduce
 #include "init_estimation.h"
+#include "rabit/rabit.h"
+#include "xgboost/data.h"                // MetaInfo
+#include "xgboost/generic_parameters.h"  // Context
 
 namespace xgboost {
 namespace obj {
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index 563f97a48fa1..b36d76954957 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -1,34 +1,18 @@
 /**
+ * Copyright 2022 by XGBoost Contributors
+ *
  * \brief Utilities for estimating initial score.
  */
 
 #ifndef XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
 #define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
 
-#include "../common/common.h"              // OptionalWeights
-#include "../common/numeric.h"             // cpu_impl::Reduce
-#include "../common/transform_iterator.h"  // MakeIndexTransformIter
-#include "rabit/rabit.h"
-#include "xgboost/data.h"    // MetaInfo
-#include "xgboost/linalg.h"  // UnravelIndex
+#include "xgboost/data.h"  // MetaInfo
 
 namespace xgboost {
 namespace obj {
 namespace cpu_impl {
-inline double WeightedMean(Context const* ctx, MetaInfo const& info) {
-  std::uint64_t n_samples = info.num_row_;
-  rabit::Allreduce<rabit::op::Sum>(&n_samples, 1);
-  auto y = info.labels.HostView();
-  auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
-  auto it = common::MakeIndexTransformIter([&](size_t i) -> double {
-    size_t r, c;
-    std::tie(r, c) = linalg::UnravelIndex(i, y.Shape());
-    return y(r, c) * w[r] / static_cast<double>(n_samples);
-  });
-  auto res = common::cpu_impl::Reduce(ctx, it, it + y.Size(), 0.0);
-  rabit::Allreduce<rabit::op::Sum>(&res, 1);
-  return res;
-}
+double WeightedMean(Context const* ctx, MetaInfo const& info);
 }  // namespace cpu_impl
 
 namespace cuda_impl {
diff --git a/src/objective/regression_loss.h b/src/objective/regression_loss.h
index f394432a8f28..c397bcbfd135 100644
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -5,15 +5,28 @@
 #define XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
 
 #include <dmlc/omp.h>
+#include <xgboost/data.h>                // MetaInfo
+#include <xgboost/generic_parameters.h>  // Context
+#include <xgboost/linalg.h>              // Tensor
 #include <xgboost/logging.h>
+
 #include <algorithm>
 
-#include "xgboost/task.h"
 #include "../common/math.h"
+#include "init_estimation.h"  // WeightedMean
+#include "xgboost/task.h"
 
 namespace xgboost {
 namespace obj {
 
+inline void CheckInitInputs(MetaInfo const& info) {
+  CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
+  if (!info.weights_.Empty()) {
+    CHECK_EQ(info.weights_.Size(), info.num_row_)
+        << "Number of weights should be equal to number of data points.";
+  }
+}
+
 // common regressions
 // linear regression
 struct LinearSquareLoss {
@@ -39,6 +52,15 @@ struct LinearSquareLoss {
 
   static const char* Name() { return "reg:squarederror"; }
   static ObjInfo Info() { return {ObjInfo::kRegression, true, false}; }
+
+  static bool InitEstimation(Context const* ctx, MetaInfo const& info,
+                             linalg::Tensor<float, 1>* base_margin) {
+    CheckInitInputs(info);
+    base_margin->Reshape(1);
+    auto out = base_margin->HostView();
+    out(0) = WeightedMean(ctx, info);
+    return true;
+  }
 };
 
 struct SquaredLogError {
@@ -66,6 +88,10 @@ struct SquaredLogError {
   static const char* Name() { return "reg:squaredlogerror"; }
 
   static ObjInfo Info() { return ObjInfo::kRegression; }
+
+  static bool InitEstimation(Context const*, MetaInfo const&, linalg::Tensor<float, 1>*) {
+    return false;
+  }
 };
 
 // logistic loss for probability regression task
@@ -103,6 +129,10 @@ struct LogisticRegression {
   static const char* Name() { return "reg:logistic"; }
 
   static ObjInfo Info() { return ObjInfo::kRegression; }
+
+  static bool InitEstimation(Context const*, MetaInfo const&, linalg::Tensor<float, 1>*) {
+    return false;
+  }
 };
 
 // logistic loss for binary classification task
@@ -147,6 +177,10 @@ struct LogisticRaw : public LogisticRegression {
   static const char* Name() { return "binary:logitraw"; }
 
   static ObjInfo Info() { return ObjInfo::kRegression; }
+
+  static bool InitEstimation(Context const*, MetaInfo const&, linalg::Tensor<float, 1>*) {
+    return false;
+  }
 };
 
 }  // namespace obj
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 74206999ee45..b7394b652cf7 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -22,7 +22,6 @@
 #include "../common/transform.h"
 #include "./regression_loss.h"
 #include "adaptive.h"
-#include "init_estimation.h"
 #include "xgboost/base.h"
 #include "xgboost/data.h"
 #include "xgboost/generic_parameters.h"
@@ -40,14 +39,6 @@
 namespace xgboost {
 namespace obj {
 namespace {
-void CheckInitInputs(MetaInfo const& info) {
-  CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
-  if (!info.weights_.Empty()) {
-    CHECK_EQ(info.weights_.Size(), info.num_row_)
-        << "Number of weights should be equal to number of data points.";
-  }
-}
-
 void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
   CheckInitInputs(info);
   CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
@@ -170,10 +161,7 @@ class RegLossObj : public ObjFunction {
   }
 
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
-    CheckInitInputs(info);
-    base_margin->Reshape(1);
-    auto out = base_margin->HostView();
-    out(0) = WeightedMean(ctx_, info);
+    Loss::InitEstimation(ctx_, info, base_margin);
   }
 
   void SaveConfig(Json* p_out) const override {

From 53d69435857da185034726fd9dd8988991d5375e Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 11:34:21 +0800
Subject: [PATCH 009/133] cleanup.

---
 src/objective/init_estimation.cu | 4 +++-
 src/objective/init_estimation.h  | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index aa90b8b9115a..44c3567a7c03 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -6,6 +6,7 @@
 #include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
 
 #include <cinttypes>  // std::uint64_t
+#include <cstddef>    // std::size_t
 
 #include "../common/device_helpers.cuh"  // dh::MakeTransformIterator
 #include "../common/numeric.cuh"         // Reduce
@@ -13,6 +14,7 @@
 #include "rabit/rabit.h"
 #include "xgboost/data.h"                // MetaInfo
 #include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/linalg.h"              // UnravelIndex
 
 namespace xgboost {
 namespace obj {
@@ -25,7 +27,7 @@ double WeightedMean(Context const* ctx, MetaInfo const& info) {
   auto it = dh::MakeTransformIterator<double>(
       thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) -> double {
         auto idx = linalg::UnravelIndex(i, y.Shape());
-        size_t r{std::get<0>(idx)}, c{std::get<1>(idx)};
+        std::size_t r{std::get<0>(idx)}, c{std::get<1>(idx)};
         return y(r, c) * w[r] / static_cast<double>(n_samples);
       });
   return common::cuda_impl::Reduce(ctx, it, it + y.Size(), 0.0);
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index b36d76954957..0cf43576a6d0 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -7,7 +7,8 @@
 #ifndef XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
 #define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
 
-#include "xgboost/data.h"  // MetaInfo
+#include "xgboost/data.h"                // MetaInfo
+#include "xgboost/generic_parameters.h"  // Context
 
 namespace xgboost {
 namespace obj {

From b71dcca12eda5655b60703f5b9e99a05d8bbfa4b Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 11:53:17 +0800
Subject: [PATCH 010/133] Doc.

---
 src/common/numeric.cu  | 4 +---
 src/common/numeric.cuh | 3 ++-
 src/common/numeric.h   | 8 ++++----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/common/numeric.cu b/src/common/numeric.cu
index 6c5f34e8a835..36e24d8ac6e6 100644
--- a/src/common/numeric.cu
+++ b/src/common/numeric.cu
@@ -2,10 +2,8 @@
  * Copyright 2022 by XGBoost Contributors
  */
 #include <thrust/execution_policy.h>
-#include <thrust/functional.h>  // thrust:plus
 
-#include "device_helpers.cuh"  // dh::Reduce, safe_cuda, dh::XGBCachingDeviceAllocator
-#include "numeric.cuh"
+#include "numeric.cuh"         // Reduce
 #include "numeric.h"
 #include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
diff --git a/src/common/numeric.cuh b/src/common/numeric.cuh
index 9bf774a62308..b6d6565350c2 100644
--- a/src/common/numeric.cuh
+++ b/src/common/numeric.cuh
@@ -3,6 +3,7 @@
  */
 #ifndef XGBOOST_COMMON_NUMERIC_CUH_
 #define XGBOOST_COMMON_NUMERIC_CUH_
+#include <thrust/functional.h>           // thrust:plus
 #include <xgboost/generic_parameters.h>  // Context
 
 #include "device_helpers.cuh"  // Reduce
@@ -20,4 +21,4 @@ V Reduce(Context const* /*ctx unused*/, It first, It second, V const& init) {
 }  // namespace cuda_impl
 }  // namespace common
 }  // namespace xgboost
-#endif
+#endif  // XGBOOST_COMMON_NUMERIC_CUH_
diff --git a/src/common/numeric.h b/src/common/numeric.h
index 2f7325a15024..b1eb74192e9e 100644
--- a/src/common/numeric.h
+++ b/src/common/numeric.h
@@ -109,16 +109,16 @@ inline double Reduce(Context const*, HostDeviceVector<float> const&) {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
 
 /**
- * \brief Reduction with iterator.
+ * \brief Reduction with iterator. init must be additive identity. (0 for primitive types)
  */
 namespace cpu_impl {
 template <typename It, typename V = typename It::value_type>
-double Reduce(Context const* ctx, It first, It second, V const& init) {
+V Reduce(Context const* ctx, It first, It second, V const& init) {
   size_t n = std::distance(first, second);
-  common::MemStackAllocator<double, common::DefaultMaxThreads()> result_tloc(ctx->Threads(), init);
+  common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(ctx->Threads(), init);
   common::ParallelFor(n, ctx->Threads(),
                       [&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
-  auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cend(), init);
+  auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + ctx->Threads(), init);
   return result;
 }
 }  // namespace cpu_impl

From abf6aeaa38a9da99a2ceb7f1c015a35f54a1facf Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 11:57:15 +0800
Subject: [PATCH 011/133] lint.

---
 src/common/transform_iterator.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/common/transform_iterator.h b/src/common/transform_iterator.h
index 3e6d91f4c3e8..b5549c50afc2 100644
--- a/src/common/transform_iterator.h
+++ b/src/common/transform_iterator.h
@@ -1,9 +1,13 @@
+/**
+ * Copyright 2022 by XGBoost Contributors
+ */
 #ifndef XGBOOST_COMMON_TRANSFORM_ITERATOR_H_
 #define XGBOOST_COMMON_TRANSFORM_ITERATOR_H_
 
 #include <cstddef>      // std::size_t
 #include <iterator>     // std::random_access_iterator_tag
 #include <type_traits>  // std::result_of_t, std::add_pointer_t, std::add_lvalue_reference_t
+#include <utility>      // std::forward
 
 #include "xgboost/span.h"  // ptrdiff_t
 

From 7ac09ac38f7dbc251d427fe773a76aeae94ee757 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 11:59:38 +0800
Subject: [PATCH 012/133] Fix CPU build.

---
 src/objective/init_estimation.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index 0cf43576a6d0..b553feb70e62 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -7,6 +7,7 @@
 #ifndef XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
 #define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
 
+#include "../common/common.h"            // AssertGPUSupport
 #include "xgboost/data.h"                // MetaInfo
 #include "xgboost/generic_parameters.h"  // Context
 
@@ -18,6 +19,12 @@ double WeightedMean(Context const* ctx, MetaInfo const& info);
 
 namespace cuda_impl {
 double WeightedMean(Context const* ctx, MetaInfo const& info);
+#if !defined(XGBOOST_USE_CUDA)
+inline double WeightedMean(Context const*, MetaInfo const&) {
+  common::AssertGPUSupport();
+  return 0.0;
+}
+#endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl
 
 /**

From d524413f927934faf798255a4411ca86619776e9 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 12:32:14 +0800
Subject: [PATCH 013/133] fix.

---
 src/objective/regression_obj.cu | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index b7394b652cf7..ffa89d76e48c 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -161,7 +161,9 @@ class RegLossObj : public ObjFunction {
   }
 
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
-    Loss::InitEstimation(ctx_, info, base_margin);
+    if (!Loss::InitEstimation(ctx_, info, base_margin)) {
+      ObjFunction::InitEstimation(info, base_margin);
+    }
   }
 
   void SaveConfig(Json* p_out) const override {

From dbc6b3c4cd1eb895c3e36351497f006d7c4eff17 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 17:05:50 +0800
Subject: [PATCH 014/133] Fix test.

---
 src/common/stats.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/common/stats.cc b/src/common/stats.cc
index 84b403764047..f0c6bc835345 100644
--- a/src/common/stats.cc
+++ b/src/common/stats.cc
@@ -14,7 +14,7 @@ namespace xgboost {
 namespace common {
 float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
              HostDeviceVector<float> const& weights) {
-  CHECK_EQ(t.Shape(1), 0) << "Matrix is not yet supported.";
+  CHECK_LE(t.Shape(1), 1) << "Matrix is not yet supported.";
   if (!ctx->IsCPU()) {
     weights.SetDevice(ctx->gpu_id);
     auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());

From ca4b2071a091fedb1b7b5a3f4dd6055851918388 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 17:07:57 +0800
Subject: [PATCH 015/133] lint.

---
 src/common/numeric.h | 10 +++++-----
 src/common/stats.cc  |  3 +++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/common/numeric.h b/src/common/numeric.h
index b1eb74192e9e..37f294fbb9a3 100644
--- a/src/common/numeric.h
+++ b/src/common/numeric.h
@@ -103,11 +103,6 @@ inline double Reduce(Context const*, HostDeviceVector<float> const&) {
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl
 
-/**
- * \brief Reduction.
- */
-double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
-
 /**
  * \brief Reduction with iterator. init must be additive identity. (0 for primitive types)
  */
@@ -122,6 +117,11 @@ V Reduce(Context const* ctx, It first, It second, V const& init) {
   return result;
 }
 }  // namespace cpu_impl
+
+/**
+ * \brief Reduction on host device vector.
+ */
+double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
 }  // namespace common
 }  // namespace xgboost
 
diff --git a/src/common/stats.cc b/src/common/stats.cc
index f0c6bc835345..b74e4e1cf0bf 100644
--- a/src/common/stats.cc
+++ b/src/common/stats.cc
@@ -1,3 +1,6 @@
+/*!
+ * Copyright 2022 by XGBoost Contributors
+ */
 #include "stats.h"
 
 #include <numeric>  // std::accumulate

From 6b4ad58a0f4cb755969ebde5abb5e1f2d23fe078 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 17:22:51 +0800
Subject: [PATCH 016/133] ama.

---
 amalgamation/xgboost-all0.cc    | 11 ++++++-----
 src/objective/regression_loss.h | 15 +++------------
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc
index d550a16b1853..293b6570b6f6 100644
--- a/amalgamation/xgboost-all0.cc
+++ b/amalgamation/xgboost-all0.cc
@@ -18,13 +18,14 @@
 #include "../src/metric/survival_metric.cc"
 
 // objectives
-#include "../src/objective/objective.cc"
-#include "../src/objective/regression_obj.cc"
+#include "../src/objective/adaptive.cc"
+#include "../src/objective/aft_obj.cc"
+#include "../src/objective/hinge.cc"
+#include "../src/objective/init_estimation.cc"
 #include "../src/objective/multiclass_obj.cc"
+#include "../src/objective/objective.cc"
 #include "../src/objective/rank_obj.cc"
-#include "../src/objective/hinge.cc"
-#include "../src/objective/aft_obj.cc"
-#include "../src/objective/adaptive.cc"
+#include "../src/objective/regression_obj.cc"
 
 // gbms
 #include "../src/gbm/gbm.cc"
diff --git a/src/objective/regression_loss.h b/src/objective/regression_loss.h
index c397bcbfd135..b4a34dbe025b 100644
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -96,8 +96,6 @@ struct SquaredLogError {
 
 // logistic loss for probability regression task
 struct LogisticRegression {
-  // duplication is necessary, as __device__ specifier
-  // cannot be made conditional on template parameter
   XGBOOST_DEVICE static bst_float PredTransform(bst_float x) { return common::Sigmoid(x); }
   XGBOOST_DEVICE static bool CheckLabel(bst_float x) { return x >= 0.0f && x <= 1.0f; }
   XGBOOST_DEVICE static bst_float FirstOrderGradient(bst_float predt, bst_float label) {
@@ -108,22 +106,15 @@ struct LogisticRegression {
     return fmaxf(predt * (1.0f - predt), eps);
   }
   template <typename T>
-  static T PredTransform(T x) { return common::Sigmoid(x); }
-  template <typename T>
-  static T FirstOrderGradient(T predt, T label) { return predt - label; }
-  template <typename T>
-  static T SecondOrderGradient(T predt, T label) {
-    const T eps = T(1e-16f);
-    return std::max(predt * (T(1.0f) - predt), eps);
+  static T PredTransform(T x) {
+    return common::Sigmoid(x);
   }
   static bst_float ProbToMargin(bst_float base_score) {
     CHECK(base_score > 0.0f && base_score < 1.0f)
         << "base_score must be in (0,1) for logistic loss, got: " << base_score;
     return -logf(1.0f / base_score - 1.0f);
   }
-  static const char* LabelErrorMsg() {
-    return "label must be in [0,1] for logistic regression";
-  }
+  static const char* LabelErrorMsg() { return "label must be in [0,1] for logistic regression"; }
   static const char* DefaultEvalMetric() { return "rmse"; }
 
   static const char* Name() { return "reg:logistic"; }

From 99e0fd376f636ed64383437cb29af76871b16750 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 17:29:23 +0800
Subject: [PATCH 017/133] revert.

---
 amalgamation/xgboost-all0.cc | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc
index 293b6570b6f6..d550a16b1853 100644
--- a/amalgamation/xgboost-all0.cc
+++ b/amalgamation/xgboost-all0.cc
@@ -18,14 +18,13 @@
 #include "../src/metric/survival_metric.cc"
 
 // objectives
-#include "../src/objective/adaptive.cc"
-#include "../src/objective/aft_obj.cc"
-#include "../src/objective/hinge.cc"
-#include "../src/objective/init_estimation.cc"
-#include "../src/objective/multiclass_obj.cc"
 #include "../src/objective/objective.cc"
-#include "../src/objective/rank_obj.cc"
 #include "../src/objective/regression_obj.cc"
+#include "../src/objective/multiclass_obj.cc"
+#include "../src/objective/rank_obj.cc"
+#include "../src/objective/hinge.cc"
+#include "../src/objective/aft_obj.cc"
+#include "../src/objective/adaptive.cc"
 
 // gbms
 #include "../src/gbm/gbm.cc"

From 9a109a82326e239785edad3582d3e6095a3af049 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 17:29:49 +0800
Subject: [PATCH 018/133] amalgamation.

---
 amalgamation/xgboost-all0.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc
index d550a16b1853..0ba5f89525f6 100644
--- a/amalgamation/xgboost-all0.cc
+++ b/amalgamation/xgboost-all0.cc
@@ -25,6 +25,7 @@
 #include "../src/objective/hinge.cc"
 #include "../src/objective/aft_obj.cc"
 #include "../src/objective/adaptive.cc"
+#include "../src/objective/init_estimation.cc"
 
 // gbms
 #include "../src/gbm/gbm.cc"

From e8c60a82b54f8a8683a41ff89459bb3c54e8a2f3 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 18:58:02 +0800
Subject: [PATCH 019/133] Fix tests.

---
 tests/ci_build/lint_python.py              |  1 +
 tests/python/test_training_continuation.py | 10 +++---
 tests/python/test_tree_regularization.py   | 40 +++++++++++++---------
 3 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py
index cdaf6615bf3f..5705f2bdbd3f 100644
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -126,6 +126,7 @@ def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int:
                 "tests/python/test_config.py",
                 "tests/python/test_spark/",
                 "tests/python-gpu/test_gpu_spark/",
+                "tests/python/test_tree_regularization.py",
                 "tests/ci_build/lint_python.py",
                 # demo
                 "demo/guide-python/cat_in_the_dat.py",
diff --git a/tests/python/test_training_continuation.py b/tests/python/test_training_continuation.py
index 31a408170766..df3ce38e6a8b 100644
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@@ -1,10 +1,11 @@
-import xgboost as xgb
-import testing as tm
-import numpy as np
-import pytest
 import os
 import tempfile
 
+import numpy as np
+import pytest
+import testing as tm
+
+import xgboost as xgb
 
 rng = np.random.RandomState(1337)
 
@@ -15,6 +16,7 @@ class TestTrainingContinuation:
     def generate_parameters(self):
         xgb_params_01_binary = {
             'nthread': 1,
+            "objective": "binary:logistic",
         }
 
         xgb_params_02_binary = {
diff --git a/tests/python/test_tree_regularization.py b/tests/python/test_tree_regularization.py
index 92fa9fb51ff2..c5bace3b61bb 100644
--- a/tests/python/test_tree_regularization.py
+++ b/tests/python/test_tree_regularization.py
@@ -1,19 +1,21 @@
 import numpy as np
-import xgboost as xgb
-
 from numpy.testing import assert_approx_equal
 
+import xgboost as xgb
+
 train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))
 
 
 class TestTreeRegularization:
     def test_alpha(self):
         params = {
-            'tree_method': 'exact', 'verbosity': 0,
-            'objective': 'reg:squarederror',
-            'eta': 1,
-            'lambda': 0,
-            'alpha': 0.1
+            "tree_method": "exact",
+            "verbosity": 0,
+            "objective": "reg:squarederror",
+            "eta": 1,
+            "lambda": 0,
+            "alpha": 0.1,
+            "base_score": 0.5,
         }
 
         model = xgb.train(params, train_data, 1)
@@ -27,11 +29,13 @@ def test_alpha(self):
 
     def test_lambda(self):
         params = {
-            'tree_method': 'exact', 'verbosity': 0,
-            'objective': 'reg:squarederror',
-            'eta': 1,
-            'lambda': 1,
-            'alpha': 0
+            "tree_method": "exact",
+            "verbosity": 0,
+            "objective": "reg:squarederror",
+            "eta": 1,
+            "lambda": 1,
+            "alpha": 0,
+            "base_score": 0.5,
         }
 
         model = xgb.train(params, train_data, 1)
@@ -45,11 +49,13 @@ def test_lambda(self):
 
     def test_alpha_and_lambda(self):
         params = {
-            'tree_method': 'exact', 'verbosity': 1,
-            'objective': 'reg:squarederror',
-            'eta': 1,
-            'lambda': 1,
-            'alpha': 0.1
+            "tree_method": "exact",
+            "verbosity": 1,
+            "objective": "reg:squarederror",
+            "eta": 1,
+            "lambda": 1,
+            "alpha": 0.1,
+            "base_score": 0.5,
         }
 
         model = xgb.train(params, train_data, 1)

From bacaee3c50461fcc77fc52bd040d8b420203b2ee Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Mon, 26 Sep 2022 22:39:16 +0800
Subject: [PATCH 020/133] Fix demo.

---
 demo/guide-python/feature_weights.py | 38 +++++++++++++++++-----------
 tests/ci_build/lint_python.py        |  2 ++
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/demo/guide-python/feature_weights.py b/demo/guide-python/feature_weights.py
index 34c8ed44026b..b12edb9415ec 100644
--- a/demo/guide-python/feature_weights.py
+++ b/demo/guide-python/feature_weights.py
@@ -1,20 +1,22 @@
-'''
+"""
 Demo for using feature weight to change column sampling
 =======================================================
 
     .. versionadded:: 1.3.0
-'''
+"""
+
+import argparse
 
 import numpy as np
-import xgboost
 from matplotlib import pyplot as plt
-import argparse
+
+import xgboost
 
 
-def main(args):
+def main(args: argparse.Namespace) -> None:
     rng = np.random.RandomState(1994)
 
-    kRows = 1000
+    kRows = 4196
     kCols = 10
 
     X = rng.randn(kRows, kCols)
@@ -26,26 +28,32 @@ def main(args):
     dtrain = xgboost.DMatrix(X, y)
     dtrain.set_info(feature_weights=fw)
 
-    bst = xgboost.train({'tree_method': 'hist',
-                         'colsample_bynode': 0.2},
-                        dtrain, num_boost_round=10,
-                        evals=[(dtrain, 'd')])
+    # Perform column sampling for each node split evaluation, the sampling process is
+    # weighted by feature weights.
+    bst = xgboost.train(
+        {"tree_method": "hist", "colsample_bynode": 0.2},
+        dtrain,
+        num_boost_round=10,
+        evals=[(dtrain, "d")],
+    )
     feature_map = bst.get_fscore()
+
     # feature zero has 0 weight
-    assert feature_map.get('f0', None) is None
-    assert max(feature_map.values()) == feature_map.get('f9')
+    assert feature_map.get("f0", None) is None
+    assert max(feature_map.values()) == feature_map.get("f9")
 
     if args.plot:
         xgboost.plot_importance(bst)
         plt.show()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--plot',
+        "--plot",
         type=int,
         default=1,
-        help='Set to 0 to disable plotting the evaluation history.')
+        help="Set to 0 to disable plotting the evaluation history.",
+    )
     args = parser.parse_args()
     main(args)
diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py
index 5705f2bdbd3f..4e7747c25007 100644
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -131,6 +131,7 @@ def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int:
                 # demo
                 "demo/guide-python/cat_in_the_dat.py",
                 "demo/guide-python/categorical.py",
+                "demo/guide-python/feature_weights.py",
                 "demo/guide-python/spark_estimator_examples.py",
             ]
         ):
@@ -143,6 +144,7 @@ def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int:
                 "python-package/xgboost/",
                 "demo/guide-python/external_memory.py",
                 "demo/guide-python/cat_in_the_dat.py",
+                "demo/guide-python/feature_weights.py",
                 "tests/python/test_data_iterator.py",
                 "tests/python/test_spark/test_data.py",
                 "tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py",

From 7bb82cd9380b236723a1c491cd90ad1f6761871c Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 27 Sep 2022 10:45:47 +0800
Subject: [PATCH 021/133] [WIP] try to use decision stump.

---
 src/objective/init_estimation.cc | 14 ++++++++++++++
 src/objective/init_estimation.cu | 12 ++++++++++++
 src/objective/init_estimation.h  | 10 ++++++++++
 src/objective/regression_loss.h  | 21 ---------------------
 src/objective/regression_obj.cu  | 22 +++++++++++++++++++---
 5 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index 396defebc07f..2281d11197ae 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -29,6 +29,20 @@ double WeightedMean(Context const* ctx, MetaInfo const& info) {
   rabit::Allreduce<rabit::op::Sum>(&res, 1);
   return res;
 }
+
+double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
+  auto const& h_gpair = gpair.ConstHostVector();
+  auto it = common::MakeIndexTransformIter([&](auto i) {
+    auto const& g = h_gpair[i];
+    return GradientPairPrecise{g};
+  });
+  auto sum = common::cpu_impl::Reduce(ctx, it, it + gpair.Size(), GradientPairPrecise{});
+  return sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
+}
 }  // namespace cpu_impl
+
+double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
+  return ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair) : cuda_impl::FitStump(ctx, gpair);
+}
 }  // namespace obj
 }  // namespace xgboost
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index 44c3567a7c03..6a9cc313bb72 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -32,6 +32,18 @@ double WeightedMean(Context const* ctx, MetaInfo const& info) {
       });
   return common::cuda_impl::Reduce(ctx, it, it + y.Size(), 0.0);
 }
+
+double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
+  gpair.SetDevice(ctx->gpu_id);
+  auto const& d_gpair = gpair.ConstDeviceSpan();
+  auto it = dh::MakeTransformIterator<GradientPairPrecise>(
+      thrust::make_counting_iterator(0ul),
+      [=] XGBOOST_DEVICE(std::size_t i) -> GradientPairPrecise {
+        return GradientPairPrecise{d_gpair[i]};
+      });
+  auto sum = common::cuda_impl::Reduce(ctx, it, it + d_gpair.size(), GradientPairPrecise{});
+  return sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
+}
 }  // namespace cuda_impl
 }  // namespace obj
 }  // namespace xgboost
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index b553feb70e62..ee8869431004 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -25,6 +25,14 @@ inline double WeightedMean(Context const*, MetaInfo const&) {
   return 0.0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
+
+double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
+#if !defined(XGBOOST_USE_CUDA)
+double FitStump(Context const*, HostDeviceVector<GradientPair> const&) {
+  common::AssertGPUSupport();
+  return 0.0;
+}
+#endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl
 
 /**
@@ -34,6 +42,8 @@ inline double WeightedMean(Context const*, MetaInfo const&) {
 inline double WeightedMean(Context const* ctx, MetaInfo const& info) {
   return ctx->IsCPU() ? cpu_impl::WeightedMean(ctx, info) : cuda_impl::WeightedMean(ctx, info);
 }
+
+double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
 }  // namespace obj
 }  // namespace xgboost
 #endif  // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
diff --git a/src/objective/regression_loss.h b/src/objective/regression_loss.h
index b4a34dbe025b..1983c9f74908 100644
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -52,15 +52,6 @@ struct LinearSquareLoss {
 
   static const char* Name() { return "reg:squarederror"; }
   static ObjInfo Info() { return {ObjInfo::kRegression, true, false}; }
-
-  static bool InitEstimation(Context const* ctx, MetaInfo const& info,
-                             linalg::Tensor<float, 1>* base_margin) {
-    CheckInitInputs(info);
-    base_margin->Reshape(1);
-    auto out = base_margin->HostView();
-    out(0) = WeightedMean(ctx, info);
-    return true;
-  }
 };
 
 struct SquaredLogError {
@@ -88,10 +79,6 @@ struct SquaredLogError {
   static const char* Name() { return "reg:squaredlogerror"; }
 
   static ObjInfo Info() { return ObjInfo::kRegression; }
-
-  static bool InitEstimation(Context const*, MetaInfo const&, linalg::Tensor<float, 1>*) {
-    return false;
-  }
 };
 
 // logistic loss for probability regression task
@@ -120,10 +107,6 @@ struct LogisticRegression {
   static const char* Name() { return "reg:logistic"; }
 
   static ObjInfo Info() { return ObjInfo::kRegression; }
-
-  static bool InitEstimation(Context const*, MetaInfo const&, linalg::Tensor<float, 1>*) {
-    return false;
-  }
 };
 
 // logistic loss for binary classification task
@@ -168,10 +151,6 @@ struct LogisticRaw : public LogisticRegression {
   static const char* Name() { return "binary:logitraw"; }
 
   static ObjInfo Info() { return ObjInfo::kRegression; }
-
-  static bool InitEstimation(Context const*, MetaInfo const&, linalg::Tensor<float, 1>*) {
-    return false;
-  }
 };
 
 }  // namespace obj
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index ffa89d76e48c..4ad1f83e4949 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -161,9 +161,16 @@ class RegLossObj : public ObjFunction {
   }
 
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
-    if (!Loss::InitEstimation(ctx_, info, base_margin)) {
-      ObjFunction::InitEstimation(info, base_margin);
-    }
+    HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
+    HostDeviceVector<GradientPair> gpair(info.labels.Size());
+    using Self = std::remove_cv_t<std::remove_reference_t<decltype(*this)>>;
+    Self new_obj;
+    new_obj.param_ = this->param_;
+    new_obj.GetGradient(dummy_predt, info, 0, &gpair);
+    auto intercept = FitStump(ctx_, gpair);
+    base_margin->Reshape(1);
+    auto out = base_margin->HostView();
+    out(0) = intercept;
   }
 
   void SaveConfig(Json* p_out) const override {
@@ -180,6 +187,15 @@ class RegLossObj : public ObjFunction {
   RegLossParam param_;
 };
 
+template <>
+void RegLossObj<SquaredLogError>::InitEstimation(MetaInfo const& info,
+                                                 linalg::Tensor<float, 1>* base_margin) const {
+  CheckInitInputs(info);
+  base_margin->Reshape(1);
+  auto out = base_margin->HostView();
+  out(0) = WeightedMean(ctx_, info);
+}
+
 // register the objective functions
 DMLC_REGISTER_PARAMETER(RegLossParam);
 

From 07e120e300550db522065242a43670f9149f81f0 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 27 Sep 2022 10:58:12 +0800
Subject: [PATCH 022/133] cpu build.

---
 src/objective/init_estimation.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index ee8869431004..69db3c495320 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -28,7 +28,7 @@ inline double WeightedMean(Context const*, MetaInfo const&) {
 
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
 #if !defined(XGBOOST_USE_CUDA)
-double FitStump(Context const*, HostDeviceVector<GradientPair> const&) {
+inline double FitStump(Context const*, HostDeviceVector<GradientPair> const&) {
   common::AssertGPUSupport();
   return 0.0;
 }

From 34395329379f2719e1e9cf45f11bfd7660ae2d38 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 27 Sep 2022 11:02:22 +0800
Subject: [PATCH 023/133] Fix new obj.

---
 src/objective/regression_obj.cu | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 4ad1f83e4949..9c39df4f69ac 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -163,13 +163,17 @@ class RegLossObj : public ObjFunction {
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
     HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
     HostDeviceVector<GradientPair> gpair(info.labels.Size());
-    using Self = std::remove_cv_t<std::remove_reference_t<decltype(*this)>>;
-    Self new_obj;
-    new_obj.param_ = this->param_;
-    new_obj.GetGradient(dummy_predt, info, 0, &gpair);
+
+    std::unique_ptr<ObjFunction> new_obj{ObjFunction::Create(Loss::Name(), ctx_)};
+    Json config{Object{}};
+    this->SaveConfig(&config);
+    new_obj->LoadConfig(config);
+    new_obj->GetGradient(dummy_predt, info, 0, &gpair);
+
     auto intercept = FitStump(ctx_, gpair);
     base_margin->Reshape(1);
     auto out = base_margin->HostView();
+    intercept = Loss::PredTransform(intercept);
     out(0) = intercept;
   }
 

From 67feffa352200c571d8c12506d8652f2ebd39d44 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 27 Sep 2022 11:06:37 +0800
Subject: [PATCH 024/133] Cleanup.

---
 src/objective/regression_loss.h | 42 ++++-----------------------------
 1 file changed, 4 insertions(+), 38 deletions(-)

diff --git a/src/objective/regression_loss.h b/src/objective/regression_loss.h
index 1983c9f74908..fe000d68a36b 100644
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -30,22 +30,12 @@ inline void CheckInitInputs(MetaInfo const& info) {
 // common regressions
 // linear regression
 struct LinearSquareLoss {
-  // duplication is necessary, as __device__ specifier
-  // cannot be made conditional on template parameter
   XGBOOST_DEVICE static bst_float PredTransform(bst_float x) { return x; }
   XGBOOST_DEVICE static bool CheckLabel(bst_float) { return true; }
   XGBOOST_DEVICE static bst_float FirstOrderGradient(bst_float predt, bst_float label) {
     return predt - label;
   }
-  XGBOOST_DEVICE static bst_float SecondOrderGradient(bst_float, bst_float) {
-    return 1.0f;
-  }
-  template <typename T>
-  static T PredTransform(T x) { return x; }
-  template <typename T>
-  static T FirstOrderGradient(T predt, T label) { return predt - label; }
-  template <typename T>
-  static T SecondOrderGradient(T predt, T label) { return T(1.0f); }
+  XGBOOST_DEVICE static bst_float SecondOrderGradient(bst_float, bst_float) { return 1.0f; }
   static bst_float ProbToMargin(bst_float base_score) { return base_score; }
   static const char* LabelErrorMsg() { return ""; }
   static const char* DefaultEvalMetric() { return "rmse"; }
@@ -56,17 +46,14 @@ struct LinearSquareLoss {
 
 struct SquaredLogError {
   XGBOOST_DEVICE static bst_float PredTransform(bst_float x) { return x; }
-  XGBOOST_DEVICE static bool CheckLabel(bst_float label) {
-    return label > -1;
-  }
+  XGBOOST_DEVICE static bool CheckLabel(bst_float label) { return label > -1; }
   XGBOOST_DEVICE static bst_float FirstOrderGradient(bst_float predt, bst_float label) {
     predt = fmaxf(predt, -1 + 1e-6);  // ensure correct value for log1p
     return (std::log1p(predt) - std::log1p(label)) / (predt + 1);
   }
   XGBOOST_DEVICE static bst_float SecondOrderGradient(bst_float predt, bst_float label) {
     predt = fmaxf(predt, -1 + 1e-6);
-    float res = (-std::log1p(predt) + std::log1p(label) + 1) /
-                std::pow(predt + 1, 2);
+    float res = (-std::log1p(predt) + std::log1p(label) + 1) / std::pow(predt + 1, 2);
     res = fmaxf(res, 1e-6f);
     return res;
   }
@@ -92,10 +79,6 @@ struct LogisticRegression {
     const float eps = 1e-16f;
     return fmaxf(predt * (1.0f - predt), eps);
   }
-  template <typename T>
-  static T PredTransform(T x) {
-    return common::Sigmoid(x);
-  }
   static bst_float ProbToMargin(bst_float base_score) {
     CHECK(base_score > 0.0f && base_score < 1.0f)
         << "base_score must be in (0,1) for logistic loss, got: " << base_score;
@@ -118,8 +101,6 @@ struct LogisticClassification : public LogisticRegression {
 
 // logistic loss, but predict un-transformed margin
 struct LogisticRaw : public LogisticRegression {
-  // duplication is necessary, as __device__ specifier
-  // cannot be made conditional on template parameter
   XGBOOST_DEVICE static bst_float PredTransform(bst_float x) { return x; }
   XGBOOST_DEVICE static bst_float FirstOrderGradient(bst_float predt, bst_float label) {
     predt = common::Sigmoid(predt);
@@ -130,22 +111,7 @@ struct LogisticRaw : public LogisticRegression {
     predt = common::Sigmoid(predt);
     return fmaxf(predt * (1.0f - predt), eps);
   }
-  template <typename T>
-    static T PredTransform(T x) { return x; }
-  template <typename T>
-    static T FirstOrderGradient(T predt, T label) {
-    predt = common::Sigmoid(predt);
-    return predt - label;
-  }
-  template <typename T>
-    static T SecondOrderGradient(T predt, T label) {
-    const T eps = T(1e-16f);
-    predt = common::Sigmoid(predt);
-    return std::max(predt * (T(1.0f) - predt), eps);
-  }
-  static bst_float ProbToMargin(bst_float base_score) {
-    return base_score;
-  }
+  static bst_float ProbToMargin(bst_float base_score) { return base_score; }
   static const char* DefaultEvalMetric() { return "logloss"; }
 
   static const char* Name() { return "binary:logitraw"; }

From 7f4b84b2e1369accb0f0358244296961e90f3351 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 27 Sep 2022 11:16:13 +0800
Subject: [PATCH 025/133] Fix.

---
 src/objective/regression_loss.h |  9 ---------
 src/objective/regression_obj.cu | 19 ++++++++++++++-----
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/objective/regression_loss.h b/src/objective/regression_loss.h
index fe000d68a36b..ec571e78a9b5 100644
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -18,15 +18,6 @@
 
 namespace xgboost {
 namespace obj {
-
-inline void CheckInitInputs(MetaInfo const& info) {
-  CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
-  if (!info.weights_.Empty()) {
-    CHECK_EQ(info.weights_.Size(), info.num_row_)
-        << "Number of weights should be equal to number of data points.";
-  }
-}
-
 // common regressions
 // linear regression
 struct LinearSquareLoss {
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 9c39df4f69ac..767e01684637 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -39,6 +39,14 @@
 namespace xgboost {
 namespace obj {
 namespace {
+void CheckInitInputs(MetaInfo const& info) {
+  CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
+  if (!info.weights_.Empty()) {
+    CHECK_EQ(info.weights_.Size(), info.num_row_)
+        << "Number of weights should be equal to number of data points.";
+  }
+}
+
 void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
   CheckInitInputs(info);
   CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
@@ -161,6 +169,7 @@ class RegLossObj : public ObjFunction {
   }
 
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
+    CheckInitInputs(info);
     HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
     HostDeviceVector<GradientPair> gpair(info.labels.Size());
 
@@ -170,11 +179,11 @@ class RegLossObj : public ObjFunction {
     new_obj->LoadConfig(config);
     new_obj->GetGradient(dummy_predt, info, 0, &gpair);
 
-    auto intercept = FitStump(ctx_, gpair);
+    auto score = FitStump(ctx_, gpair);
     base_margin->Reshape(1);
     auto out = base_margin->HostView();
-    intercept = Loss::PredTransform(intercept);
-    out(0) = intercept;
+    score = Loss::PredTransform(score);
+    out(0) = score;
   }
 
   void SaveConfig(Json* p_out) const override {
@@ -192,8 +201,8 @@ class RegLossObj : public ObjFunction {
 };
 
 template <>
-void RegLossObj<SquaredLogError>::InitEstimation(MetaInfo const& info,
-                                                 linalg::Tensor<float, 1>* base_margin) const {
+void RegLossObj<LinearSquareLoss>::InitEstimation(MetaInfo const& info,
+                                                  linalg::Tensor<float, 1>* base_margin) const {
   CheckInitInputs(info);
   base_margin->Reshape(1);
   auto out = base_margin->HostView();

From 5a60e1f859c01ac14288bc8b5d7225e39d721641 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 27 Sep 2022 11:17:58 +0800
Subject: [PATCH 026/133] Lint.

---
 src/objective/init_estimation.cc | 5 +++++
 src/objective/init_estimation.cu | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index 2281d11197ae..2b0b2a3fb86c 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -4,8 +4,13 @@
  * \brief Utilities for estimating initial score.
  */
 
+#if !defined(NOMINMAX) && defined(_WIN32)
+#define NOMINMAX
+#endif  // !defined(NOMINMAX)
 #include "init_estimation.h"
 
+#include <algorithm>  // std::max
+
 #include "../common/common.h"              // OptionalWeights
 #include "../common/numeric.h"             // cpu_impl::Reduce
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index 6a9cc313bb72..779b51d59668 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -3,8 +3,12 @@
  *
  * \brief Utilities for estimating initial score.
  */
+#if !defined(NOMINMAX) && defined(_WIN32)
+#define NOMINMAX
+#endif                                          // !defined(NOMINMAX)
 #include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
 
+#include <algorithm>  // std::max
 #include <cinttypes>  // std::uint64_t
 #include <cstddef>    // std::size_t
 

From ee30d1384ab4dd17c2d4871498aa8580b77dbefe Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 27 Sep 2022 11:22:52 +0800
Subject: [PATCH 027/133] Skip multi target.

---
 src/objective/regression_obj.cu | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 767e01684637..132d19627c73 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -170,6 +170,15 @@ class RegLossObj : public ObjFunction {
 
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
     CheckInitInputs(info);
+    base_margin->Reshape(1);
+    auto out = base_margin->HostView();
+
+    if (this->Targets(info) > 1) {
+      // multi-output not yet supported due to constraint in binary model format.
+      out(0) = DefaultBaseScore();
+      return;
+    }
+
     HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
     HostDeviceVector<GradientPair> gpair(info.labels.Size());
 
@@ -180,8 +189,6 @@ class RegLossObj : public ObjFunction {
     new_obj->GetGradient(dummy_predt, info, 0, &gpair);
 
     auto score = FitStump(ctx_, gpair);
-    base_margin->Reshape(1);
-    auto out = base_margin->HostView();
     score = Loss::PredTransform(score);
     out(0) = score;
   }

From 0217902f3ea25075b95a3f7614ac9f7b0bcdf66f Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Tue, 27 Sep 2022 21:30:17 +0800
Subject: [PATCH 028/133] Fix leaf weight.

---
 src/objective/init_estimation.cc | 2 +-
 src/objective/init_estimation.cu | 2 +-
 src/objective/regression_obj.cu  | 9 ---------
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index 2b0b2a3fb86c..bf7ead512921 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -42,7 +42,7 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
     return GradientPairPrecise{g};
   });
   auto sum = common::cpu_impl::Reduce(ctx, it, it + gpair.Size(), GradientPairPrecise{});
-  return sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
+  return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
 }
 }  // namespace cpu_impl
 
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index 779b51d59668..b3175d55d419 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -46,7 +46,7 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
         return GradientPairPrecise{d_gpair[i]};
       });
   auto sum = common::cuda_impl::Reduce(ctx, it, it + d_gpair.size(), GradientPairPrecise{});
-  return sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
+  return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
 }
 }  // namespace cuda_impl
 }  // namespace obj
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 132d19627c73..abc559e88c34 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -207,15 +207,6 @@ class RegLossObj : public ObjFunction {
   RegLossParam param_;
 };
 
-template <>
-void RegLossObj<LinearSquareLoss>::InitEstimation(MetaInfo const& info,
-                                                  linalg::Tensor<float, 1>* base_margin) const {
-  CheckInitInputs(info);
-  base_margin->Reshape(1);
-  auto out = base_margin->HostView();
-  out(0) = WeightedMean(ctx_, info);
-}
-
 // register the objective functions
 DMLC_REGISTER_PARAMETER(RegLossParam);
 

From 652f143e763752f36fe10d06f2cd94c0c04c3922 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 28 Sep 2022 13:54:20 +0800
Subject: [PATCH 029/133] Cleanup weighted mean.

---
 src/objective/init_estimation.cc | 17 -----------------
 src/objective/init_estimation.cu | 16 ----------------
 src/objective/init_estimation.h  | 20 --------------------
 src/objective/regression_loss.h  |  8 ++------
 src/objective/regression_obj.cu  |  1 +
 5 files changed, 3 insertions(+), 59 deletions(-)

diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index bf7ead512921..bb573f7f284f 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -11,30 +11,13 @@
 
 #include <algorithm>  // std::max
 
-#include "../common/common.h"              // OptionalWeights
 #include "../common/numeric.h"             // cpu_impl::Reduce
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
 #include "rabit/rabit.h"
-#include "xgboost/linalg.h"  // UnravelIndex
 
 namespace xgboost {
 namespace obj {
 namespace cpu_impl {
-double WeightedMean(Context const* ctx, MetaInfo const& info) {
-  std::uint64_t n_samples = info.num_row_;
-  rabit::Allreduce<rabit::op::Sum>(&n_samples, 1);
-  auto y = info.labels.HostView();
-  auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
-  auto it = common::MakeIndexTransformIter([&](size_t i) -> double {
-    size_t r, c;
-    std::tie(r, c) = linalg::UnravelIndex(i, y.Shape());
-    return y(r, c) * w[r] / static_cast<double>(n_samples);
-  });
-  auto res = common::cpu_impl::Reduce(ctx, it, it + y.Size(), 0.0);
-  rabit::Allreduce<rabit::op::Sum>(&res, 1);
-  return res;
-}
-
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
   auto const& h_gpair = gpair.ConstHostVector();
   auto it = common::MakeIndexTransformIter([&](auto i) {
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index b3175d55d419..f3971ea161be 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -16,27 +16,11 @@
 #include "../common/numeric.cuh"         // Reduce
 #include "init_estimation.h"
 #include "rabit/rabit.h"
-#include "xgboost/data.h"                // MetaInfo
 #include "xgboost/generic_parameters.h"  // Context
-#include "xgboost/linalg.h"              // UnravelIndex
 
 namespace xgboost {
 namespace obj {
 namespace cuda_impl {
-double WeightedMean(Context const* ctx, MetaInfo const& info) {
-  std::uint64_t n_samples = info.num_row_;
-  rabit::Allreduce<rabit::op::Sum>(&n_samples, 1);
-  auto y = info.labels.View(ctx->gpu_id);
-  auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
-  auto it = dh::MakeTransformIterator<double>(
-      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) -> double {
-        auto idx = linalg::UnravelIndex(i, y.Shape());
-        std::size_t r{std::get<0>(idx)}, c{std::get<1>(idx)};
-        return y(r, c) * w[r] / static_cast<double>(n_samples);
-      });
-  return common::cuda_impl::Reduce(ctx, it, it + y.Size(), 0.0);
-}
-
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
   gpair.SetDevice(ctx->gpu_id);
   auto const& d_gpair = gpair.ConstDeviceSpan();
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index 69db3c495320..bb64843f346f 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -13,19 +13,7 @@
 
 namespace xgboost {
 namespace obj {
-namespace cpu_impl {
-double WeightedMean(Context const* ctx, MetaInfo const& info);
-}  // namespace cpu_impl
-
 namespace cuda_impl {
-double WeightedMean(Context const* ctx, MetaInfo const& info);
-#if !defined(XGBOOST_USE_CUDA)
-inline double WeightedMean(Context const*, MetaInfo const&) {
-  common::AssertGPUSupport();
-  return 0.0;
-}
-#endif  // !defined(XGBOOST_USE_CUDA)
-
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
 #if !defined(XGBOOST_USE_CUDA)
 inline double FitStump(Context const*, HostDeviceVector<GradientPair> const&) {
@@ -35,14 +23,6 @@ inline double FitStump(Context const*, HostDeviceVector<GradientPair> const&) {
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl
 
-/**
- * \brief Weighted mean for distributed env. Not a general implementation since we have
- *        2-dim label with 1-dim weight.
- */
-inline double WeightedMean(Context const* ctx, MetaInfo const& info) {
-  return ctx->IsCPU() ? cpu_impl::WeightedMean(ctx, info) : cuda_impl::WeightedMean(ctx, info);
-}
-
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
 }  // namespace obj
 }  // namespace xgboost
diff --git a/src/objective/regression_loss.h b/src/objective/regression_loss.h
index ec571e78a9b5..1fd1621af376 100644
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -5,16 +5,12 @@
 #define XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
 
 #include <dmlc/omp.h>
-#include <xgboost/data.h>                // MetaInfo
-#include <xgboost/generic_parameters.h>  // Context
-#include <xgboost/linalg.h>              // Tensor
 #include <xgboost/logging.h>
 
-#include <algorithm>
+#include <cmath>
 
 #include "../common/math.h"
-#include "init_estimation.h"  // WeightedMean
-#include "xgboost/task.h"
+#include "xgboost/task.h"  // ObjInfo
 
 namespace xgboost {
 namespace obj {
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index abc559e88c34..c251d46f14b5 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -22,6 +22,7 @@
 #include "../common/transform.h"
 #include "./regression_loss.h"
 #include "adaptive.h"
+#include "init_estimation.h"  // FitStump
 #include "xgboost/base.h"
 #include "xgboost/data.h"
 #include "xgboost/generic_parameters.h"

From 1a4ec0de3c9371107437035561b50f1becd9f2bb Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 28 Sep 2022 14:06:52 +0800
Subject: [PATCH 030/133] fix test.

---
 tests/python/test_early_stopping.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py
index 29f8fb4b0a2c..4ef9adf0d5a1 100644
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -10,16 +10,12 @@ class TestEarlyStopping:
     @pytest.mark.skipif(**tm.no_sklearn())
     def test_early_stopping_nonparallel(self):
         from sklearn.datasets import load_digits
-        try:
-            from sklearn.model_selection import train_test_split
-        except ImportError:
-            from sklearn.cross_validation import train_test_split
+        from sklearn.model_selection import train_test_split
 
         digits = load_digits(n_class=2)
         X = digits['data']
         y = digits['target']
-        X_train, X_test, y_train, y_test = train_test_split(X, y,
-                                                            random_state=0)
+        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
         clf1 = xgb.XGBClassifier(learning_rate=0.1)
         clf1.fit(X_train, y_train, early_stopping_rounds=5, eval_metric="auc",
                  eval_set=[(X_test, y_test)])
@@ -30,9 +26,22 @@ def test_early_stopping_nonparallel(self):
         assert clf1.best_score == clf2.best_score
         assert clf1.best_score != 1
         # check overfit
-        clf3 = xgb.XGBClassifier(learning_rate=0.1)
-        clf3.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
-                 eval_set=[(X_test, y_test)])
+        clf3 = xgb.XGBClassifier(
+            learning_rate=0.1,
+            eval_metric="auc",
+            early_stopping_rounds=10
+        )
+        clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])
+        assert 0.53 > clf3.get_params()["base_score"] > 0.5
+
+        clf3 = xgb.XGBClassifier(
+            learning_rate=0.1,
+            base_score=.5,
+            eval_metric="auc",
+            early_stopping_rounds=10
+        )
+        clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])
+
         assert clf3.best_score == 1
 
     def evalerror(self, preds, dtrain):

From f9d911139529a5c5645dc40d6b1334687286b072 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 28 Sep 2022 14:09:39 +0800
Subject: [PATCH 031/133] fix test.

---
 tests/python/test_with_sklearn.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 7edc392f061d..0c4403e057c3 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -275,6 +275,7 @@ def test_feature_importances_gain():
         random_state=0, tree_method="exact",
         learning_rate=0.1,
         importance_type="gain",
+        base_score=0.5,
     ).fit(X, y)
 
     exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
@@ -297,6 +298,7 @@ def test_feature_importances_gain():
         tree_method="exact",
         learning_rate=0.1,
         importance_type="gain",
+        base_score=0.5,
     ).fit(X, y)
     np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
 
@@ -305,6 +307,7 @@ def test_feature_importances_gain():
         tree_method="exact",
         learning_rate=0.1,
         importance_type="gain",
+        base_score=0.5,
     ).fit(X, y)
     np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
 

From ef39bf3cc42d296fa371d632d6f419f334facdb1 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 28 Sep 2022 14:11:04 +0800
Subject: [PATCH 032/133] Fix test.

---
 tests/python/test_with_sklearn.py | 39 +++++++++++++++++++------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 0c4403e057c3..d84456390d71 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -197,19 +197,22 @@ def test_stacking_classification():
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
     clf.fit(X_train, y_train).score(X_test, y_test)
 
-
 @pytest.mark.skipif(**tm.no_pandas())
 def test_feature_importances_weight():
     from sklearn.datasets import load_digits
 
     digits = load_digits(n_class=2)
-    y = digits['target']
-    X = digits['data']
+    y = digits["target"]
+    X = digits["data"]
+
+    xgb_model = xgb.XGBClassifier(
+        random_state=0,
+        tree_method="exact",
+        learning_rate=0.1,
+        importance_type="weight",
+        base_score=0.5,
+    ).fit(X, y)
 
-    xgb_model = xgb.XGBClassifier(random_state=0,
-                                  tree_method="exact",
-                                  learning_rate=0.1,
-                                  importance_type="weight").fit(X, y)
     exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,
                     0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,
                     0., 0., 0., 0.00833333, 0.25833333, 0., 0., 0., 0.,
@@ -224,16 +227,22 @@ def test_feature_importances_weight():
     import pandas as pd
     y = pd.Series(digits['target'])
     X = pd.DataFrame(digits['data'])
-    xgb_model = xgb.XGBClassifier(random_state=0,
-                                  tree_method="exact",
-                                  learning_rate=0.1,
-                                  importance_type="weight").fit(X, y)
+    xgb_model = xgb.XGBClassifier(
+        random_state=0,
+        tree_method="exact",
+        learning_rate=0.1,
+        base_score=.5,
+        importance_type="weight"
+    ).fit(X, y)
     np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
 
-    xgb_model = xgb.XGBClassifier(random_state=0,
-                                  tree_method="exact",
-                                  learning_rate=0.1,
-                                  importance_type="weight").fit(X, y)
+    xgb_model = xgb.XGBClassifier(
+        random_state=0,
+        tree_method="exact",
+        learning_rate=0.1,
+        importance_type="weight",
+        base_score=.5,
+    ).fit(X, y)
     np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
 
     with pytest.raises(ValueError):

From 40dc08e8594177f8049dcf6dd653372cfbfc5bad Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 28 Sep 2022 14:22:59 +0800
Subject: [PATCH 033/133] Fix test.

---
 tests/python/test_with_sklearn.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index d84456390d71..4ad6bd33a4e4 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -606,18 +606,21 @@ def test_split_value_histograms():
 
     digits_2class = load_digits(n_class=2)
 
-    X = digits_2class['data']
-    y = digits_2class['target']
+    X = digits_2class["data"]
+    y = digits_2class["target"]
 
     dm = xgb.DMatrix(X, label=y)
-    params = {'max_depth': 6, 'eta': 0.01, 'verbosity': 0,
-              'objective': 'binary:logistic'}
+    params = {
+        "max_depth": 6,
+        "eta": 0.01,
+        "verbosity": 0,
+        "objective": "binary:logistic",
+        "base_score": 0.5,
+    }
 
     gbdt = xgb.train(params, dm, num_boost_round=10)
-    assert gbdt.get_split_value_histogram("not_there",
-                                          as_pandas=True).shape[0] == 0
-    assert gbdt.get_split_value_histogram("not_there",
-                                          as_pandas=False).shape[0] == 0
+    assert gbdt.get_split_value_histogram("not_there", as_pandas=True).shape[0] == 0
+    assert gbdt.get_split_value_histogram("not_there", as_pandas=False).shape[0] == 0
     assert gbdt.get_split_value_histogram("f28", bins=0).shape[0] == 1
     assert gbdt.get_split_value_histogram("f28", bins=1).shape[0] == 1
     assert gbdt.get_split_value_histogram("f28", bins=2).shape[0] == 2

From aa739fb7f162beeb037c07ec5bf0750ff2a7a03a Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 28 Sep 2022 14:44:58 +0800
Subject: [PATCH 034/133] Save the parameter.

---
 src/learner.cc | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/learner.cc b/src/learner.cc
index 0d69db7642c6..76640498859e 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -87,7 +87,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
 
   uint32_t num_target{1};
 
-  int32_t base_score_estimated{0};
+  std::int32_t base_score_estimated{0};
   /*! \brief reserved field */
   int reserved[25];
   /*! \brief constructor */
@@ -104,7 +104,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
 
   // Skip other legacy fields.
   Json ToJson() const {
-    Object obj;
+    Json obj{Object{}};
     char floats[NumericLimits<float>::kToCharsSize];
     auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
     CHECK(ret.ec == std::errc{});
@@ -119,15 +119,19 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,
                    static_cast<int64_t>(num_class));
     CHECK(ret.ec == std::errc());
-    obj["num_class"] =
-        std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};
+    obj["num_class"] = std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};
 
     ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,
                    static_cast<int64_t>(num_target));
     obj["num_target"] =
         std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};
 
-    return Json(std::move(obj));
+    ret = to_chars(integers, integers + NumericLimits<std::int64_t>::kToCharsSize,
+                   static_cast<std::int64_t>(base_score_estimated));
+    obj["base_score_estimated"] =
+        std::string{integers, static_cast<std::size_t>(std::distance(integers, ret.ptr))};
+
+    return obj;
   }
   void FromJson(Json const& obj) {
     auto const& j_param = get<Object const>(obj);
@@ -138,13 +142,15 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     if (n_targets_it != j_param.cend()) {
       m["num_target"] = get<String const>(n_targets_it->second);
     }
+    auto bse_it = j_param.find("base_score_estimated");
+    if (bse_it != j_param.cend()) {
+      m["base_score_estimated"] = get<String const>(bse_it->second);
+    }
 
     this->Init(m);
 
     std::string str = get<String const>(j_param.at("base_score"));
     from_chars(str.c_str(), str.c_str() + str.size(), base_score);
-    // It can only be estimated during the first training, we consider it estimated afterward
-    base_score_estimated = 1;
   }
 
   LearnerModelParamLegacy ByteSwap() const {

From 7685b4591a262df40cd1799e3b86f020edb416ab Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 28 Sep 2022 14:51:36 +0800
Subject: [PATCH 035/133] model schema.

---
 doc/model.schema | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/model.schema b/doc/model.schema
index be8dd06d5a0a..5b855dd4586e 100644
--- a/doc/model.schema
+++ b/doc/model.schema
@@ -532,7 +532,8 @@
           "properties": {
             "base_score": { "type": "string" },
             "num_class": { "type": "string" },
-            "num_feature": { "type": "string" }
+            "num_feature": { "type": "string" },
+            "num_target": { "type": "string" }
           }
         }
       },

From b2bb9bb39a7418422449b29efb9e4c22ec76e3d5 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Wed, 5 Oct 2022 19:51:03 +0800
Subject: [PATCH 036/133] distributed.

---
 src/learner.cc                  |  5 ++++-
 src/objective/regression_obj.cu | 13 ++++++++++++-
 tests/python/test_with_dask.py  | 10 ++++++----
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/learner.cc b/src/learner.cc
index 76640498859e..f18545760c53 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -26,7 +26,6 @@
 #include "common/charconv.h"
 #include "common/common.h"
 #include "common/io.h"
-#include "common/linalg_op.h"
 #include "common/observer.h"
 #include "common/random.h"
 #include "common/threading_utils.h"
@@ -410,6 +409,7 @@ class LearnerConfiguration : public Learner {
     // - model is configured second time due to change of parameter
     CHECK(obj_);
     if (!mparam_.base_score_estimated) {
+      std::lock_guard<std::mutex> guard(config_lock_);
       if (p_fmat) {
         // We estimate it from input data.
         linalg::Tensor<float, 1> base_score;
@@ -422,6 +422,9 @@ class LearnerConfiguration : public Learner {
       mparam_.base_score_estimated = true;
       // Update the shared model parameter
       this->ConfigureModelParam();
+      auto sync_score = mparam_.base_score;
+      rabit::Broadcast(&sync_score, sizeof(sync_score), 0);
+      CHECK_EQ(sync_score, mparam_.base_score);
     }
   }
 
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index c251d46f14b5..a6ca58b91bf9 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -191,7 +191,18 @@ class RegLossObj : public ObjFunction {
 
     auto score = FitStump(ctx_, gpair);
     score = Loss::PredTransform(score);
-    out(0) = score;
+
+    double w{0.0};
+    if (info.weights_.Empty()) {
+      w = static_cast<double>(info.num_row_);
+    } else {
+      w = common::Reduce(ctx_, info.weights_);
+    }
+    out(0) = w * score;
+    rabit::Allreduce<rabit::op::Sum>(out.Values().data(), out.Values().size());
+    rabit::Allreduce<rabit::op::Sum>(&w, 1);
+    std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
+                   [w](float v) { return v / w; });
   }
 
   void SaveConfig(Json* p_out) const override {
diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index d6eb4f32b9f7..6b61abeba754 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -481,14 +481,16 @@ def run_boost_from_prediction(
     X, y, margin = deterministic_repartition(client, X, y, margin)
     predictions_1: dd.Series = model_1.predict(X, base_margin=margin)
 
-    cls_2 = xgb.dask.DaskXGBClassifier(
+    model_2 = xgb.dask.DaskXGBClassifier(
         learning_rate=0.3, n_estimators=8, tree_method=tree_method, max_bin=512
     )
     X, y, _ = deterministic_repartition(client, X, y, None)
-    cls_2.fit(X=X, y=y)
-    predictions_2: dd.Series = cls_2.predict(X)
+    model_2.fit(X=X, y=y)
+    predictions_2: dd.Series = model_2.predict(X)
 
-    assert np.all(predictions_1.compute() == predictions_2.compute())
+    predt_1 = predictions_1.compute()
+    predt_2 = predictions_2.compute()
+    np.testing.assert_allclose(predt_1, predt_2, atol=1e-5)
 
     margined = xgb.dask.DaskXGBClassifier(n_estimators=4)
     X, y, margin = deterministic_repartition(client, X, y, margin)

From 021a281c101c9f9c1c81662d32f790ed621c17d5 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Thu, 6 Oct 2022 12:17:58 +0800
Subject: [PATCH 037/133] Handle empty datasets.

---
 src/common/linalg_op.h           |  4 ++--
 src/learner.cc                   |  2 ++
 src/objective/init_estimation.cc | 19 +++++++++++++++++++
 src/objective/init_estimation.h  |  6 ++++++
 src/objective/regression_obj.cu  | 15 ++++-----------
 5 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
index deb9618033b7..0df7804757d2 100644
--- a/src/common/linalg_op.h
+++ b/src/common/linalg_op.h
@@ -44,12 +44,12 @@ void ElementWiseKernelHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&& f
 
 #if !defined(XGBOOST_USE_CUDA)
 template <typename T, int32_t D, typename Fn>
-void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, void* s = nullptr) {
+void ElementWiseKernelDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
   common::AssertGPUSupport();
 }
 
 template <typename T, int32_t D, typename Fn>
-void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, void* s = nullptr) {
+void ElementWiseTransformDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
   common::AssertGPUSupport();
 }
 
diff --git a/src/learner.cc b/src/learner.cc
index f18545760c53..57e909a237b4 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -426,6 +426,8 @@ class LearnerConfiguration : public Learner {
       rabit::Broadcast(&sync_score, sizeof(sync_score), 0);
       CHECK_EQ(sync_score, mparam_.base_score);
     }
+    CHECK(!std::isnan(mparam_.base_score));
+    CHECK(!std::isinf(mparam_.base_score));
   }
 
   // Convert mparam to learner_model_param
diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index bb573f7f284f..69eb590073ee 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -11,9 +11,13 @@
 
 #include <algorithm>  // std::max
 
+#include "../common/math.h"                // CloseTo
 #include "../common/numeric.h"             // cpu_impl::Reduce
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
 #include "rabit/rabit.h"
+#include "xgboost/linalg.h"     // TensorView
+#include "xgboost/objective.h"  // ObjFunction
+#include "../common/linalg_op.h"
 
 namespace xgboost {
 namespace obj {
@@ -32,5 +36,20 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
   return ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair) : cuda_impl::FitStump(ctx, gpair);
 }
+
+void NormalizeBaseScore(double w, linalg::TensorView<float, 1> in_out) {
+  // Weighted average base score across all workers
+  rabit::Allreduce<rabit::op::Sum>(in_out.Values().data(), in_out.Values().size());
+  rabit::Allreduce<rabit::op::Sum>(&w, 1);
+
+  if (common::CloseTo(w, 0.0)) {
+    // Mostly for handling empty dataset test.
+    LOG(WARNING) << "Sum of weights is close to 0.0, skipping base score estimation.";
+    in_out(0) = ObjFunction::DefaultBaseScore();
+    return;
+  }
+  std::transform(linalg::cbegin(in_out), linalg::cend(in_out), linalg::begin(in_out),
+                 [w](float v) { return v / w; });
+}
 }  // namespace obj
 }  // namespace xgboost
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index bb64843f346f..adc430c3b27f 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -10,6 +10,7 @@
 #include "../common/common.h"            // AssertGPUSupport
 #include "xgboost/data.h"                // MetaInfo
 #include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/linalg.h"              // TensorView
 
 namespace xgboost {
 namespace obj {
@@ -24,6 +25,11 @@ inline double FitStump(Context const*, HostDeviceVector<GradientPair> const&) {
 }  // namespace cuda_impl
 
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
+
+/**
+ * @brief Normalize allreduced base score by sum of weights.
+ */
+void NormalizeBaseScore(double w, linalg::TensorView<float, 1> in_out);
 }  // namespace obj
 }  // namespace xgboost
 #endif  // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index a6ca58b91bf9..1b6df8e25812 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -175,7 +175,8 @@ class RegLossObj : public ObjFunction {
     auto out = base_margin->HostView();
 
     if (this->Targets(info) > 1) {
-      // multi-output not yet supported due to constraint in binary model format.
+      // multi-output not yet supported due to constraint in binary model format. (no
+      // vector in parameter)
       out(0) = DefaultBaseScore();
       return;
     }
@@ -199,10 +200,7 @@ class RegLossObj : public ObjFunction {
       w = common::Reduce(ctx_, info.weights_);
     }
     out(0) = w * score;
-    rabit::Allreduce<rabit::op::Sum>(out.Values().data(), out.Values().size());
-    rabit::Allreduce<rabit::op::Sum>(&w, 1);
-    std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
-                   [w](float v) { return v / w; });
+    NormalizeBaseScore(w, out);
   }
 
   void SaveConfig(Json* p_out) const override {
@@ -760,12 +758,7 @@ class MeanAbsoluteError : public ObjFunction {
       out(0) = common::Median(ctx_, info.labels, info.weights_) * w;
     }
 
-    // Weighted average base score across all workers
-    rabit::Allreduce<rabit::op::Sum>(out.Values().data(), out.Values().size());
-    rabit::Allreduce<rabit::op::Sum>(&w, 1);
-
-    std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
-                   [w](float v) { return v / w; });
+    NormalizeBaseScore(w, out);
   }
 
   void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,

From 82281e1cd5b178db1b0c1317bc3026ade656e180 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Thu, 6 Oct 2022 12:24:22 +0800
Subject: [PATCH 038/133] include.

---
 src/objective/init_estimation.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index 540a1f557cc7..d575ff170165 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -12,13 +12,12 @@
 #include <algorithm>  // std::max
 
 #include "../collective/communicator-inl.h"
+#include "../common/linalg_op.h"           // cbegin, cend
 #include "../common/math.h"                // CloseTo
 #include "../common/numeric.h"             // cpu_impl::Reduce
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
-#include "rabit/rabit.h"
 #include "xgboost/linalg.h"     // TensorView
 #include "xgboost/objective.h"  // ObjFunction
-#include "../common/linalg_op.h"
 
 namespace xgboost {
 namespace obj {

From bb9002b6bd16303e34e162c55f35fd113f00ce3b Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sat, 8 Oct 2022 16:13:48 +0800
Subject: [PATCH 039/133] doc.

---
 doc/parameter.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/parameter.rst b/doc/parameter.rst
index c633d0835d4d..f14c0cc5c3d0 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -370,9 +370,11 @@ Specify the learning task and the corresponding learning objective. The objectiv
   - ``reg:gamma``: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be `gamma-distributed <https://en.wikipedia.org/wiki/Gamma_distribution#Occurrence_and_applications>`_.
   - ``reg:tweedie``: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be `Tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications>`_.
 
-* ``base_score`` [default=0.5]
+* ``base_score``
 
   - The initial prediction score of all instances, global bias
+  - The parameter is automatically estimated for selected objectives before training. To
+    disable the estimation, specify a real number argument.
   - For sufficient number of iterations, changing this value will not have too much effect.
 
 * ``eval_metric`` [default according to objective]

From e414b35efeed59ad1296307f865f44ff2cd33c20 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sat, 8 Oct 2022 16:17:20 +0800
Subject: [PATCH 040/133] tests.

---
 R-package/tests/testthat/test_callbacks.R |  2 +-
 R-package/tests/testthat/test_helpers.R   | 21 +++++++++++++--------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R
index 69894bd05181..7014edca765d 100644
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -322,7 +322,7 @@ test_that("prediction in early-stopping xgb.cv works", {
   expect_output(
     cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.1, nrounds = 20,
                  early_stopping_rounds = 5, maximize = FALSE, stratified = FALSE,
-                 prediction = TRUE)
+                 prediction = TRUE, base_score = 0.5)
   , "Stopping. Best iteration")
 
   expect_false(is.null(cv$best_iteration))
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index fdd0ce02b5a6..e667a4054015 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -22,13 +22,17 @@ label <- df[, ifelse(Improved == "Marked", 1, 0)]
 
 # binary
 nrounds <- 12
-bst.Tree <- xgboost(data = sparse_matrix, label = label, max_depth = 9,
-                    eta = 1, nthread = 2, nrounds = nrounds, verbose = 0,
-                    objective = "binary:logistic", booster = "gbtree")
-
-bst.GLM <- xgboost(data = sparse_matrix, label = label,
-                   eta = 1, nthread = 1, nrounds = nrounds, verbose = 0,
-                   objective = "binary:logistic", booster = "gblinear")
+bst.Tree <- xgboost(
+  data = sparse_matrix, label = label, max_depth = 9,
+  eta = 1, nthread = 2, nrounds = nrounds, verbose = 0,
+  objective = "binary:logistic", booster = "gbtree", base_score = 0.5
+)
+
+bst.GLM <- xgboost(
+  data = sparse_matrix, label = label,
+  eta = 1, nthread = 1, nrounds = nrounds, verbose = 0,
+  objective = "binary:logistic", booster = "gblinear", base_score = 0.5
+)
 
 feature.names <- colnames(sparse_matrix)
 
@@ -345,7 +349,8 @@ test_that("xgb.importance works with and without feature names", {
   m <- xgboost::xgboost(
     data = as.matrix(data.frame(x = c(0, 1))),
     label = c(1, 2),
-    nrounds = 1
+    nrounds = 1,
+    base_score = 0.5
   )
   df <- xgb.model.dt.tree(model = m)
   expect_equal(df$Feature, "Leaf")

From cd945e7ff2d979d2ece202fa447ff17d9359fe95 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sun, 9 Oct 2022 11:37:41 +0800
Subject: [PATCH 041/133] Bring back weighted mean.

---
 src/objective/init_estimation.cc | 20 +++++++++++++--
 src/objective/init_estimation.cu | 14 ++++++++++
 src/objective/init_estimation.h  | 21 +++++++++++++++
 src/objective/regression_obj.cu  | 44 +++++++++++++++++---------------
 4 files changed, 77 insertions(+), 22 deletions(-)

diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index d575ff170165..d1d5380da14e 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -10,14 +10,15 @@
 #include "init_estimation.h"
 
 #include <algorithm>  // std::max
+#include <cinttypes>  // std::uint64_t
 
 #include "../collective/communicator-inl.h"
 #include "../common/linalg_op.h"           // cbegin, cend
 #include "../common/math.h"                // CloseTo
 #include "../common/numeric.h"             // cpu_impl::Reduce
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
-#include "xgboost/linalg.h"     // TensorView
-#include "xgboost/objective.h"  // ObjFunction
+#include "xgboost/linalg.h"                // TensorView
+#include "xgboost/objective.h"             // ObjFunction
 
 namespace xgboost {
 namespace obj {
@@ -31,6 +32,21 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
   auto sum = common::cpu_impl::Reduce(ctx, it, it + gpair.Size(), GradientPairPrecise{});
   return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
 }
+
+double WeightedMean(Context const* ctx, MetaInfo const& info) {
+  std::uint64_t n_samples = info.num_row_;
+  collective::Allreduce<collective::Operation::kSum>(&n_samples, 1);
+  auto y = info.labels.HostView();
+  auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
+  auto it = common::MakeIndexTransformIter([&](size_t i) -> double {
+    size_t r, c;
+    std::tie(r, c) = linalg::UnravelIndex(i, y.Shape());
+    return y(r, c) * w[r] / static_cast<double>(n_samples);
+  });
+  auto res = common::cpu_impl::Reduce(ctx, it, it + y.Size(), 0.0);
+  collective::Allreduce<collective::Operation::kSum>(&res, 1);
+  return res;
+}
 }  // namespace cpu_impl
 
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index f3971ea161be..7535f8533bf1 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -32,6 +32,20 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
   auto sum = common::cuda_impl::Reduce(ctx, it, it + d_gpair.size(), GradientPairPrecise{});
   return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
 }
+
+double WeightedMean(Context const* ctx, MetaInfo const& info) {
+  std::uint64_t n_samples = info.num_row_;
+  rabit::Allreduce<rabit::op::Sum>(&n_samples, 1);
+  auto y = info.labels.View(ctx->gpu_id);
+  auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
+  auto it = dh::MakeTransformIterator<double>(
+      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) -> double {
+        auto idx = linalg::UnravelIndex(i, y.Shape());
+        std::size_t r{std::get<0>(idx)}, c{std::get<1>(idx)};
+        return y(r, c) * w[r] / static_cast<double>(n_samples);
+      });
+  return common::cuda_impl::Reduce(ctx, it, it + y.Size(), 0.0);
+}
 }  // namespace cuda_impl
 }  // namespace obj
 }  // namespace xgboost
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index adc430c3b27f..8d0f74052956 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -14,6 +14,11 @@
 
 namespace xgboost {
 namespace obj {
+
+namespace cpu_impl {
+double WeightedMean(Context const* ctx, MetaInfo const& info);
+}  // namespace cpu_impl
+
 namespace cuda_impl {
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
 #if !defined(XGBOOST_USE_CUDA)
@@ -22,6 +27,14 @@ inline double FitStump(Context const*, HostDeviceVector<GradientPair> const&) {
   return 0.0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
+
+double WeightedMean(Context const* ctx, MetaInfo const& info);
+#if !defined(XGBOOST_USE_CUDA)
+inline double WeightedMean(Context const*, MetaInfo const&) {
+  common::AssertGPUSupport();
+  return 0.0;
+}
+#endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl
 
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
@@ -30,6 +43,14 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
  * @brief Normalize allreduced base score by sum of weights.
  */
 void NormalizeBaseScore(double w, linalg::TensorView<float, 1> in_out);
+
+/**
+ * \brief Weighted mean for distributed env. Not a general implementation since we have
+ *        2-dim label with 1-dim weight.
+ */
+inline double WeightedMean(Context const* ctx, MetaInfo const& info) {
+  return ctx->IsCPU() ? cpu_impl::WeightedMean(ctx, info) : cuda_impl::WeightedMean(ctx, info);
+}
 }  // namespace obj
 }  // namespace xgboost
 #endif  // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 1b6df8e25812..bacd45d90e71 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -174,33 +174,37 @@ class RegLossObj : public ObjFunction {
     base_margin->Reshape(1);
     auto out = base_margin->HostView();
 
+    auto distributed_mean = param_.scale_pos_weight == 1.0f && collective::IsDistributed();
     if (this->Targets(info) > 1) {
       // multi-output not yet supported due to constraint in binary model format. (no
       // vector in parameter)
-      out(0) = DefaultBaseScore();
-      return;
-    }
-
-    HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
-    HostDeviceVector<GradientPair> gpair(info.labels.Size());
+      out(0) = ObjFunction::DefaultBaseScore();
+    } else if (distributed_mean) {
+      // When scale pos weight is not specified, we use the exact weighted mean.
+      auto score = WeightedMean(ctx_, info);
+      out(0) = score;
+    } else {
+      HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
+      HostDeviceVector<GradientPair> gpair(info.labels.Size());
 
-    std::unique_ptr<ObjFunction> new_obj{ObjFunction::Create(Loss::Name(), ctx_)};
-    Json config{Object{}};
-    this->SaveConfig(&config);
-    new_obj->LoadConfig(config);
-    new_obj->GetGradient(dummy_predt, info, 0, &gpair);
+      std::unique_ptr<ObjFunction> new_obj{ObjFunction::Create(Loss::Name(), ctx_)};
+      Json config{Object{}};
+      this->SaveConfig(&config);
+      new_obj->LoadConfig(config);
+      new_obj->GetGradient(dummy_predt, info, 0, &gpair);
 
-    auto score = FitStump(ctx_, gpair);
-    score = Loss::PredTransform(score);
+      auto score = FitStump(ctx_, gpair);
+      score = Loss::PredTransform(score);
 
-    double w{0.0};
-    if (info.weights_.Empty()) {
-      w = static_cast<double>(info.num_row_);
-    } else {
-      w = common::Reduce(ctx_, info.weights_);
+      double w{0.0};
+      if (info.weights_.Empty()) {
+        w = static_cast<double>(info.num_row_);
+      } else {
+        w = common::Reduce(ctx_, info.weights_);
+      }
+      out(0) = w * score;
+      NormalizeBaseScore(w, out);
     }
-    out(0) = w * score;
-    NormalizeBaseScore(w, out);
   }
 
   void SaveConfig(Json* p_out) const override {

From 0d061af12bc3b6dcf0a15497e795021076d86469 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sun, 9 Oct 2022 12:24:54 +0800
Subject: [PATCH 042/133] Revert "Bring back weighted mean."

This reverts commit cd945e7ff2d979d2ece202fa447ff17d9359fe95.
---
 src/objective/init_estimation.cc | 20 ++-------------
 src/objective/init_estimation.cu | 14 ----------
 src/objective/init_estimation.h  | 21 ---------------
 src/objective/regression_obj.cu  | 44 +++++++++++++++-----------------
 4 files changed, 22 insertions(+), 77 deletions(-)

diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index d1d5380da14e..d575ff170165 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -10,15 +10,14 @@
 #include "init_estimation.h"
 
 #include <algorithm>  // std::max
-#include <cinttypes>  // std::uint64_t
 
 #include "../collective/communicator-inl.h"
 #include "../common/linalg_op.h"           // cbegin, cend
 #include "../common/math.h"                // CloseTo
 #include "../common/numeric.h"             // cpu_impl::Reduce
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
-#include "xgboost/linalg.h"                // TensorView
-#include "xgboost/objective.h"             // ObjFunction
+#include "xgboost/linalg.h"     // TensorView
+#include "xgboost/objective.h"  // ObjFunction
 
 namespace xgboost {
 namespace obj {
@@ -32,21 +31,6 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
   auto sum = common::cpu_impl::Reduce(ctx, it, it + gpair.Size(), GradientPairPrecise{});
   return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
 }
-
-double WeightedMean(Context const* ctx, MetaInfo const& info) {
-  std::uint64_t n_samples = info.num_row_;
-  collective::Allreduce<collective::Operation::kSum>(&n_samples, 1);
-  auto y = info.labels.HostView();
-  auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
-  auto it = common::MakeIndexTransformIter([&](size_t i) -> double {
-    size_t r, c;
-    std::tie(r, c) = linalg::UnravelIndex(i, y.Shape());
-    return y(r, c) * w[r] / static_cast<double>(n_samples);
-  });
-  auto res = common::cpu_impl::Reduce(ctx, it, it + y.Size(), 0.0);
-  collective::Allreduce<collective::Operation::kSum>(&res, 1);
-  return res;
-}
 }  // namespace cpu_impl
 
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index 7535f8533bf1..f3971ea161be 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -32,20 +32,6 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
   auto sum = common::cuda_impl::Reduce(ctx, it, it + d_gpair.size(), GradientPairPrecise{});
   return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
 }
-
-double WeightedMean(Context const* ctx, MetaInfo const& info) {
-  std::uint64_t n_samples = info.num_row_;
-  rabit::Allreduce<rabit::op::Sum>(&n_samples, 1);
-  auto y = info.labels.View(ctx->gpu_id);
-  auto w = common::OptionalWeights{info.weights_.ConstHostSpan()};
-  auto it = dh::MakeTransformIterator<double>(
-      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) -> double {
-        auto idx = linalg::UnravelIndex(i, y.Shape());
-        std::size_t r{std::get<0>(idx)}, c{std::get<1>(idx)};
-        return y(r, c) * w[r] / static_cast<double>(n_samples);
-      });
-  return common::cuda_impl::Reduce(ctx, it, it + y.Size(), 0.0);
-}
 }  // namespace cuda_impl
 }  // namespace obj
 }  // namespace xgboost
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index 8d0f74052956..adc430c3b27f 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -14,11 +14,6 @@
 
 namespace xgboost {
 namespace obj {
-
-namespace cpu_impl {
-double WeightedMean(Context const* ctx, MetaInfo const& info);
-}  // namespace cpu_impl
-
 namespace cuda_impl {
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
 #if !defined(XGBOOST_USE_CUDA)
@@ -27,14 +22,6 @@ inline double FitStump(Context const*, HostDeviceVector<GradientPair> const&) {
   return 0.0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
-
-double WeightedMean(Context const* ctx, MetaInfo const& info);
-#if !defined(XGBOOST_USE_CUDA)
-inline double WeightedMean(Context const*, MetaInfo const&) {
-  common::AssertGPUSupport();
-  return 0.0;
-}
-#endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl
 
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
@@ -43,14 +30,6 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
  * @brief Normalize allreduced base score by sum of weights.
  */
 void NormalizeBaseScore(double w, linalg::TensorView<float, 1> in_out);
-
-/**
- * \brief Weighted mean for distributed env. Not a general implementation since we have
- *        2-dim label with 1-dim weight.
- */
-inline double WeightedMean(Context const* ctx, MetaInfo const& info) {
-  return ctx->IsCPU() ? cpu_impl::WeightedMean(ctx, info) : cuda_impl::WeightedMean(ctx, info);
-}
 }  // namespace obj
 }  // namespace xgboost
 #endif  // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index bacd45d90e71..1b6df8e25812 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -174,37 +174,33 @@ class RegLossObj : public ObjFunction {
     base_margin->Reshape(1);
     auto out = base_margin->HostView();
 
-    auto distributed_mean = param_.scale_pos_weight == 1.0f && collective::IsDistributed();
     if (this->Targets(info) > 1) {
       // multi-output not yet supported due to constraint in binary model format. (no
       // vector in parameter)
-      out(0) = ObjFunction::DefaultBaseScore();
-    } else if (distributed_mean) {
-      // When scale pos weight is not specified, we use the exact weighted mean.
-      auto score = WeightedMean(ctx_, info);
-      out(0) = score;
-    } else {
-      HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
-      HostDeviceVector<GradientPair> gpair(info.labels.Size());
+      out(0) = DefaultBaseScore();
+      return;
+    }
 
-      std::unique_ptr<ObjFunction> new_obj{ObjFunction::Create(Loss::Name(), ctx_)};
-      Json config{Object{}};
-      this->SaveConfig(&config);
-      new_obj->LoadConfig(config);
-      new_obj->GetGradient(dummy_predt, info, 0, &gpair);
+    HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
+    HostDeviceVector<GradientPair> gpair(info.labels.Size());
 
-      auto score = FitStump(ctx_, gpair);
-      score = Loss::PredTransform(score);
+    std::unique_ptr<ObjFunction> new_obj{ObjFunction::Create(Loss::Name(), ctx_)};
+    Json config{Object{}};
+    this->SaveConfig(&config);
+    new_obj->LoadConfig(config);
+    new_obj->GetGradient(dummy_predt, info, 0, &gpair);
 
-      double w{0.0};
-      if (info.weights_.Empty()) {
-        w = static_cast<double>(info.num_row_);
-      } else {
-        w = common::Reduce(ctx_, info.weights_);
-      }
-      out(0) = w * score;
-      NormalizeBaseScore(w, out);
+    auto score = FitStump(ctx_, gpair);
+    score = Loss::PredTransform(score);
+
+    double w{0.0};
+    if (info.weights_.Empty()) {
+      w = static_cast<double>(info.num_row_);
+    } else {
+      w = common::Reduce(ctx_, info.weights_);
     }
+    out(0) = w * score;
+    NormalizeBaseScore(w, out);
   }
 
   void SaveConfig(Json* p_out) const override {

From 925ceaaaef8715c1e4bb165dbc00d8e874d0c706 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sun, 9 Oct 2022 12:28:30 +0800
Subject: [PATCH 043/133] hardcode.

---
 .../ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
index 00cc4d5750e2..8831bef84d55 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
@@ -42,6 +42,7 @@ class XGBoostClassifierSuite extends FunSuite with PerTest with TmpFolderPerSuit
       "max_depth" -> "6",
       "silent" -> "1",
       "objective" -> "binary:logistic",
+      "base_score" -> 0.5,
       "num_round" -> round,
       "tree_method" -> treeMethod,
       "num_workers" -> numWorkers)

From 20535767f24c958529388ce91630248fc5a665a1 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sun, 9 Oct 2022 12:31:01 +0800
Subject: [PATCH 044/133] fix.

---
 .../dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala   | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
index 8831bef84d55..f31207b9fb7e 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
@@ -42,7 +42,6 @@ class XGBoostClassifierSuite extends FunSuite with PerTest with TmpFolderPerSuit
       "max_depth" -> "6",
       "silent" -> "1",
       "objective" -> "binary:logistic",
-      "base_score" -> 0.5,
       "num_round" -> round,
       "tree_method" -> treeMethod,
       "num_workers" -> numWorkers)
@@ -262,10 +261,10 @@ class XGBoostClassifierSuite extends FunSuite with PerTest with TmpFolderPerSuit
       "eta" -> "1",
       "max_depth" -> "6",
       "silent" -> "1",
+      "base_score" -> 0.5,
       "objective" -> "binary:logistic",
       "tree_method" -> treeMethod,
       "max_bin" -> 16)
-
     val model1 = ScalaXGBoost.train(trainingDM, paramMap, round)
     val prediction1 = model1.predict(testDM)
 
@@ -454,5 +453,4 @@ class XGBoostClassifierSuite extends FunSuite with PerTest with TmpFolderPerSuit
     assert(!compareTwoFiles(new File(modelJsonPath, "data/XGBoostClassificationModel").getPath,
       nativeUbjModelPath))
   }
-
 }

From e9659b2dc000346491838fdba25d338ee61538ef Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sun, 9 Oct 2022 14:18:50 +0800
Subject: [PATCH 045/133] Fix dask test.

---
 .../test_gpu_with_dask/test_gpu_with_dask.py         |  2 +-
 tests/python/test_with_dask.py                       | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
index 026a4f2ce37c..70622a095659 100644
--- a/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask/test_gpu_with_dask.py
@@ -45,7 +45,7 @@
     import xgboost as xgb
     from dask.distributed import Client
     from dask import array as da
-    from dask_cuda import LocalCUDACluster, utils
+    from dask_cuda import LocalCUDACluster
     import cudf
 except ImportError:
     pass
diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index 9832ac6b840e..c80bb5b5ad54 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -1656,13 +1656,17 @@ def sqr(
 
             results_custom = reg.evals_result()
 
-            reg = xgb.dask.DaskXGBRegressor(n_estimators=rounds, tree_method='hist')
+            reg = xgb.dask.DaskXGBRegressor(
+                n_estimators=rounds, tree_method="hist", base_score=0.5
+            )
             reg.fit(X, y, eval_set=[(X, y)])
             results_native = reg.evals_result()
 
-            np.testing.assert_allclose(results_custom['validation_0']['rmse'],
-                                       results_native['validation_0']['rmse'])
-            tm.non_increasing(results_native['validation_0']['rmse'])
+            np.testing.assert_allclose(
+                results_custom["validation_0"]["rmse"],
+                results_native["validation_0"]["rmse"],
+            )
+            tm.non_increasing(results_native["validation_0"]["rmse"])
 
     def test_no_duplicated_partition(self) -> None:
         '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't

From 8178844c1174648f46b750f974a780352fd1d8a6 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sun, 9 Oct 2022 14:56:10 +0800
Subject: [PATCH 046/133] includes.

---
 src/objective/init_estimation.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index adc430c3b27f..c286b6769e1d 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -8,8 +8,9 @@
 #define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
 
 #include "../common/common.h"            // AssertGPUSupport
-#include "xgboost/data.h"                // MetaInfo
+#include "xgboost/base.h"                // GradientPair
 #include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
 #include "xgboost/linalg.h"              // TensorView
 
 namespace xgboost {

From 9807862069237684181d98c4bc4c307fb86f8c76 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 12 Oct 2022 17:03:02 +0800
Subject: [PATCH 047/133] Use dh reduce.

---
 src/objective/init_estimation.cu | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index f3971ea161be..a78aee527ca8 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -23,13 +23,15 @@ namespace obj {
 namespace cuda_impl {
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
   gpair.SetDevice(ctx->gpu_id);
-  auto const& d_gpair = gpair.ConstDeviceSpan();
+  auto d_gpair = gpair.ConstDeviceSpan();
   auto it = dh::MakeTransformIterator<GradientPairPrecise>(
       thrust::make_counting_iterator(0ul),
       [=] XGBOOST_DEVICE(std::size_t i) -> GradientPairPrecise {
         return GradientPairPrecise{d_gpair[i]};
       });
-  auto sum = common::cuda_impl::Reduce(ctx, it, it + d_gpair.size(), GradientPairPrecise{});
+  dh::XGBCachingDeviceAllocator<char> alloc;
+  auto sum = dh::Reduce(thrust::cuda::par(alloc), it, it + d_gpair.size(), GradientPairPrecise{},
+                        thrust::plus<GradientPairPrecise>{});
   return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
 }
 }  // namespace cuda_impl

From 7e07d2865fab00dcfb8338fe46c60fc5c2fa5a80 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 12 Oct 2022 18:13:52 +0800
Subject: [PATCH 048/133] Fix.

---
 src/common/numeric.cu | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/common/numeric.cu b/src/common/numeric.cu
index 36e24d8ac6e6..af38f7e9be84 100644
--- a/src/common/numeric.cu
+++ b/src/common/numeric.cu
@@ -5,6 +5,7 @@
 
 #include "numeric.cuh"         // Reduce
 #include "numeric.h"
+#include "device_helpers.cuh"
 #include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
 
@@ -14,7 +15,7 @@ namespace cuda_impl {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
   values.SetDevice(ctx->gpu_id);
   auto const d_values = values.ConstDeviceSpan();
-  return Reduce(ctx, d_values.cbegin(), d_values.cend(), 0.0);
+  return Reduce(ctx, dh::tcbegin(d_values), dh::tcend(d_values), 0.0);
 }
 }  // namespace cuda_impl
 }  // namespace common

From ed757c2e18811bad024a82be013db5a0c519c843 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 12 Oct 2022 18:22:00 +0800
Subject: [PATCH 049/133] Fix.

---
 tests/python/test_with_dask.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index c80bb5b5ad54..4105edf56d85 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -1418,7 +1418,11 @@ def minimum_bin():
         else:
             assert tm.non_increasing(history)
         # Make sure that it's decreasing
-        assert history[-1] < history[0]
+        if is_stump():
+            # we might have already got the best score with base_score.
+            assert history[-1] <= history[0]
+        else:
+            assert history[-1] < history[0]
 
     @given(params=hist_parameter_strategy,
            dataset=tm.dataset_strategy)

From 707605282b2ef6bb5b3b7357cce25a04b49837ab Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 12 Oct 2022 18:45:12 +0800
Subject: [PATCH 050/133] Fixes.

---
 python-package/xgboost/sklearn.py |  4 ++--
 tests/python/test_with_dask.py    | 14 +++++++++++---
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 8e7dcfa5ee0d..b2cd9d26f541 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -1082,7 +1082,7 @@ def predict(
         validate_features: bool = True,
         base_margin: Optional[ArrayLike] = None,
         iteration_range: Optional[Tuple[int, int]] = None,
-    ) -> np.ndarray:
+    ) -> ArrayLike:
         """Predict with `X`.  If the model is trained with early stopping, then `best_iteration`
         is used automatically.  For tree models, when data is on GPU, like cupy array or
         cuDF dataframe and `predictor` is not specified, the prediction is run on GPU
@@ -1532,7 +1532,7 @@ def predict(
         validate_features: bool = True,
         base_margin: Optional[ArrayLike] = None,
         iteration_range: Optional[Tuple[int, int]] = None,
-    ) -> np.ndarray:
+    ) -> ArrayLike:
         with config_context(verbosity=self.verbosity):
             class_probs = super().predict(
                 X=X,
diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index 4105edf56d85..74121d3acec0 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -13,7 +13,7 @@
 from math import ceil
 from operator import attrgetter, getitem
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Dict, List, Optional, Tuple, Type, Union, TypeVar
 
 import hypothesis
 import numpy as np
@@ -158,12 +158,15 @@ def deterministic_persist_per_worker(df, client):
     return df2
 
 
+Margin = TypeVar("Margin", dd.DataFrame, dd.Series, None)
+
+
 def deterministic_repartition(
     client: Client,
     X: dd.DataFrame,
     y: dd.Series,
-    m: Optional[Union[dd.DataFrame, dd.Series]],
-) -> Tuple[dd.DataFrame, dd.Series, Optional[Union[dd.DataFrame, dd.Series]]]:
+    m: Margin,
+) -> Tuple[dd.DataFrame, dd.Series, Margin]:
     # force repartition the data to avoid non-deterministic result
     if any(X.map_partitions(lambda x: _is_cudf_df(x)).compute()):
         # dask_cudf seems to be doing fine for now
@@ -490,6 +493,10 @@ def run_boost_from_prediction(
 
     predt_1 = predictions_1.compute()
     predt_2 = predictions_2.compute()
+    if hasattr(predt_1, "to_numpy"):
+        predt_1 = predt_1.to_numpy()
+    if hasattr(predt_2, "to_numpy"):
+        predt_2 = predt_2.to_numpy()
     np.testing.assert_allclose(predt_1, predt_2, atol=1e-5)
 
     margined = xgb.dask.DaskXGBClassifier(n_estimators=4)
@@ -715,6 +722,7 @@ def run_dask_classifier(
 def test_dask_classifier(model: str, client: "Client") -> None:
     X, y, w = generate_array(with_weights=True)
     y = (y * 10).astype(np.int32)
+    assert w
     run_dask_classifier(X, y, w, model, None, client, 10)
 
     y_bin = y.copy()

From 0702c58ef13d11b82311d972ad79a712855831b2 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 12 Oct 2022 19:01:28 +0800
Subject: [PATCH 051/133] Cleanup.

---
 src/common/numeric.cc            |  2 --
 src/common/numeric.cu            |  7 ++++---
 src/common/numeric.cuh           | 24 ------------------------
 src/objective/init_estimation.cu |  4 +---
 4 files changed, 5 insertions(+), 32 deletions(-)
 delete mode 100644 src/common/numeric.cuh

diff --git a/src/common/numeric.cc b/src/common/numeric.cc
index 7e2bb07425c4..b6e68d8a5089 100644
--- a/src/common/numeric.cc
+++ b/src/common/numeric.cc
@@ -3,10 +3,8 @@
  */
 #include "numeric.h"
 
-#include <numeric>      // std::accumulate
 #include <type_traits>  // std::is_same
 
-#include "threading_utils.h"             // MemStackAllocator, ParallelFor, DefaultMaxThreads
 #include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
 
diff --git a/src/common/numeric.cu b/src/common/numeric.cu
index af38f7e9be84..aff7b50bdadc 100644
--- a/src/common/numeric.cu
+++ b/src/common/numeric.cu
@@ -3,9 +3,8 @@
  */
 #include <thrust/execution_policy.h>
 
-#include "numeric.cuh"         // Reduce
-#include "numeric.h"
 #include "device_helpers.cuh"
+#include "numeric.h"
 #include "xgboost/generic_parameters.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
 
@@ -15,7 +14,9 @@ namespace cuda_impl {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
   values.SetDevice(ctx->gpu_id);
   auto const d_values = values.ConstDeviceSpan();
-  return Reduce(ctx, dh::tcbegin(d_values), dh::tcend(d_values), 0.0);
+  dh::XGBCachingDeviceAllocator<char> alloc;
+  return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,
+                    thrust::plus<float>{});
 }
 }  // namespace cuda_impl
 }  // namespace common
diff --git a/src/common/numeric.cuh b/src/common/numeric.cuh
deleted file mode 100644
index b6d6565350c2..000000000000
--- a/src/common/numeric.cuh
+++ /dev/null
@@ -1,24 +0,0 @@
-/*!
- * Copyright 2022 by XGBoost Contributors
- */
-#ifndef XGBOOST_COMMON_NUMERIC_CUH_
-#define XGBOOST_COMMON_NUMERIC_CUH_
-#include <thrust/functional.h>           // thrust:plus
-#include <xgboost/generic_parameters.h>  // Context
-
-#include "device_helpers.cuh"  // Reduce
-#include "numeric.h"
-
-namespace xgboost {
-namespace common {
-namespace cuda_impl {
-template <typename It, typename V = typename It::value_type>
-V Reduce(Context const* /*ctx unused*/, It first, It second, V const& init) {
-  dh::XGBCachingDeviceAllocator<char> alloc;
-  auto res = dh::Reduce(thrust::cuda::par(alloc), first, second, init, thrust::plus<V>{});
-  return res;
-}
-}  // namespace cuda_impl
-}  // namespace common
-}  // namespace xgboost
-#endif  // XGBOOST_COMMON_NUMERIC_CUH_
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index a78aee527ca8..bee460d76dda 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -9,11 +9,9 @@
 #include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
 
 #include <algorithm>  // std::max
-#include <cinttypes>  // std::uint64_t
 #include <cstddef>    // std::size_t
 
-#include "../common/device_helpers.cuh"  // dh::MakeTransformIterator
-#include "../common/numeric.cuh"         // Reduce
+#include "../common/device_helpers.cuh"  // dh::MakeTransformIterator, dh::Reduce
 #include "init_estimation.h"
 #include "rabit/rabit.h"
 #include "xgboost/generic_parameters.h"  // Context

From 1b88ef68a876e64311e31138c02f216aa8ed0d05 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 12 Oct 2022 19:04:07 +0800
Subject: [PATCH 052/133] cleanup.

---
 src/common/stats.cu | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/src/common/stats.cu b/src/common/stats.cu
index 1ef6a34f919c..2dd6b33d172d 100644
--- a/src/common/stats.cu
+++ b/src/common/stats.cu
@@ -42,22 +42,6 @@ float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
   CHECK_EQ(quantile.Size(), 1);
   return quantile.HostVector().front();
 }
-
-float Mean(Context const* ctx, linalg::TensorView<float const, 2> t,
-           common::OptionalWeights weights) {
-  dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
-  double size = t.Shape(0);
-  CHECK_NE(size, 0);
-  auto val_it = dh::MakeTransformIterator<float>(
-      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
-        auto idx = linalg::UnravelIndex(i, t.Shape());
-        auto ridx = std::get<0>(idx);
-        return linalg::detail::Apply(t, std::move(idx)) * weights[ridx] / size;
-      });
-  dh::XGBCachingDeviceAllocator<char> alloc;
-  auto mean = thrust::reduce(thrust::cuda::par(alloc), val_it, val_it + t.Size(), 0.0f);
-  return mean;
-}
 }  // namespace cuda_impl
 }  // namespace common
 }  // namespace xgboost

From 8b0445715351b27cd4c11fdfdafb332b78f0fdd2 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 12 Oct 2022 19:28:11 +0800
Subject: [PATCH 053/133] Reduce.

---
 src/objective/init_estimation.cc | 22 ++--------------------
 src/objective/init_estimation.cu |  9 +++++++--
 src/objective/init_estimation.h  |  7 ++-----
 src/objective/regression_obj.cu  | 14 ++++++++++++--
 4 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index d575ff170165..020fcf947612 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -12,12 +12,8 @@
 #include <algorithm>  // std::max
 
 #include "../collective/communicator-inl.h"
-#include "../common/linalg_op.h"           // cbegin, cend
-#include "../common/math.h"                // CloseTo
 #include "../common/numeric.h"             // cpu_impl::Reduce
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
-#include "xgboost/linalg.h"     // TensorView
-#include "xgboost/objective.h"  // ObjFunction
 
 namespace xgboost {
 namespace obj {
@@ -29,6 +25,8 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
     return GradientPairPrecise{g};
   });
   auto sum = common::cpu_impl::Reduce(ctx, it, it + gpair.Size(), GradientPairPrecise{});
+  static_assert(sizeof(sum) == sizeof(double) * 2, "");
+  collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double*>(&sum), 2);
   return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
 }
 }  // namespace cpu_impl
@@ -36,21 +34,5 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
 double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
   return ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair) : cuda_impl::FitStump(ctx, gpair);
 }
-
-void NormalizeBaseScore(double w, linalg::TensorView<float, 1> in_out) {
-  // Weighted average base score across all workers
-  collective::Allreduce<collective::Operation::kSum>(in_out.Values().data(),
-                                                     in_out.Values().size());
-  collective::Allreduce<collective::Operation::kSum>(&w, 1);
-
-  if (common::CloseTo(w, 0.0)) {
-    // Mostly for handling empty dataset test.
-    LOG(WARNING) << "Sum of weights is close to 0.0, skipping base score estimation.";
-    in_out(0) = ObjFunction::DefaultBaseScore();
-    return;
-  }
-  std::transform(linalg::cbegin(in_out), linalg::cend(in_out), linalg::begin(in_out),
-                 [w](float v) { return v / w; });
-}
 }  // namespace obj
 }  // namespace xgboost
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index bee460d76dda..55a9fbf14a31 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -6,15 +6,18 @@
 #if !defined(NOMINMAX) && defined(_WIN32)
 #define NOMINMAX
 #endif                                          // !defined(NOMINMAX)
+#include <thrust/execution_policy.h>            // cuda::par
 #include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
 
 #include <algorithm>  // std::max
 #include <cstddef>    // std::size_t
 
-#include "../common/device_helpers.cuh"  // dh::MakeTransformIterator, dh::Reduce
+#include "../collective/communicator-inl.h"  // Allreduce
+#include "../common/device_helpers.cuh"      // dh::MakeTransformIterator, dh::Reduce
 #include "init_estimation.h"
-#include "rabit/rabit.h"
+#include "xgboost/base.h"                // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
 #include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
 
 namespace xgboost {
 namespace obj {
@@ -30,6 +33,8 @@ double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair)
   dh::XGBCachingDeviceAllocator<char> alloc;
   auto sum = dh::Reduce(thrust::cuda::par(alloc), it, it + d_gpair.size(), GradientPairPrecise{},
                         thrust::plus<GradientPairPrecise>{});
+  static_assert(sizeof(sum) == sizeof(double) * 2, "");
+  collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double*>(&sum), 2);
   return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
 }
 }  // namespace cuda_impl
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index c286b6769e1d..5e19c9fae7b0 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -24,13 +24,10 @@ inline double FitStump(Context const*, HostDeviceVector<GradientPair> const&) {
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl
-
-double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
-
 /**
- * @brief Normalize allreduced base score by sum of weights.
+ * @brief Fit a tree stump as an estimation of base_score.
  */
-void NormalizeBaseScore(double w, linalg::TensorView<float, 1> in_out);
+double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
 }  // namespace obj
 }  // namespace xgboost
 #endif  // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 1b6df8e25812..bcc190b82074 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -192,6 +192,7 @@ class RegLossObj : public ObjFunction {
 
     auto score = FitStump(ctx_, gpair);
     score = Loss::PredTransform(score);
+    out(0) = score;
 
     double w{0.0};
     if (info.weights_.Empty()) {
@@ -200,7 +201,6 @@ class RegLossObj : public ObjFunction {
       w = common::Reduce(ctx_, info.weights_);
     }
     out(0) = w * score;
-    NormalizeBaseScore(w, out);
   }
 
   void SaveConfig(Json* p_out) const override {
@@ -758,7 +758,17 @@ class MeanAbsoluteError : public ObjFunction {
       out(0) = common::Median(ctx_, info.labels, info.weights_) * w;
     }
 
-    NormalizeBaseScore(w, out);
+    collective::Allreduce<collective::Operation::kSum>(out.Values().data(), out.Values().size());
+    collective::Allreduce<collective::Operation::kSum>(&w, 1);
+
+    if (common::CloseTo(w, 0.0)) {
+      // Mostly for handling empty dataset test.
+      LOG(WARNING) << "Sum of weights is close to 0.0, skipping base score estimation.";
+      out(0) = ObjFunction::DefaultBaseScore();
+      return;
+    }
+    std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
+                   [w](float v) { return v / w; });
   }
 
   void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,

From c18f035a48bfd051763ecb970f7262a32ca90d2c Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 12 Oct 2022 20:43:48 +0800
Subject: [PATCH 054/133] Fix.

---
 src/objective/regression_obj.cu | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index bcc190b82074..834d1932c15d 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -193,14 +193,6 @@ class RegLossObj : public ObjFunction {
     auto score = FitStump(ctx_, gpair);
     score = Loss::PredTransform(score);
     out(0) = score;
-
-    double w{0.0};
-    if (info.weights_.Empty()) {
-      w = static_cast<double>(info.num_row_);
-    } else {
-      w = common::Reduce(ctx_, info.weights_);
-    }
-    out(0) = w * score;
   }
 
   void SaveConfig(Json* p_out) const override {

From 4251f358b8f2026eecf68ee577807d8a265c2f3a Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 12 Oct 2022 21:35:16 +0800
Subject: [PATCH 055/133] Cleanup.

---
 src/objective/regression_obj.cu | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 834d1932c15d..4b5ac46ad05e 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -190,9 +190,7 @@ class RegLossObj : public ObjFunction {
     new_obj->LoadConfig(config);
     new_obj->GetGradient(dummy_predt, info, 0, &gpair);
 
-    auto score = FitStump(ctx_, gpair);
-    score = Loss::PredTransform(score);
-    out(0) = score;
+    out(0) = Loss::PredTransform(FitStump(ctx_, gpair));
   }
 
   void SaveConfig(Json* p_out) const override {

From a08fce4fbd22139757a5146ce70bb7d24b51ce04 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 12 Oct 2022 21:55:42 +0800
Subject: [PATCH 056/133] Fix.

---
 tests/python/test_with_dask.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index 74121d3acec0..40484eeef4d9 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -1402,16 +1402,22 @@ def run_updater_test(
         else:
             w = None
 
-        m = xgb.dask.DaskDMatrix(
-            client, data=X, label=y, weight=w)
-        history = xgb.dask.train(client, params=params, dtrain=m,
-                                 num_boost_round=num_rounds,
-                                 evals=[(m, 'train')])['history']
+        m = xgb.dask.DaskDMatrix(client, data=X, label=y, weight=w)
+        history = xgb.dask.train(
+            client,
+            params=params,
+            dtrain=m,
+            num_boost_round=num_rounds,
+            evals=[(m, "train")],
+        )["history"]
         note(history)
-        history = history['train'][dataset.metric]
+        history = history["train"][dataset.metric]
 
         def is_stump():
-            return params["max_depth"] == 1 or params["max_leaves"] == 1
+            return (
+                params.get("max_depth", None) == 1
+                or params.get("max_leaves", None) == 1
+            )
 
         def minimum_bin():
             return "max_bin" in params and params["max_bin"] == 2

From 154c9c4bcd1d6ec9e80865d5dd275773c0ee9ee7 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 01:38:12 +0800
Subject: [PATCH 057/133] Fix spark test.

---
 tests/python/test_spark/test_spark_local.py | 45 ++++++++++-----------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index 3894bed4b5f0..4ae5f622a9c2 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -92,22 +92,18 @@ def setUp(self):
             ],
         )
 
-        # >>> X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
-        # >>> y = np.array([0, 1])
-        # >>> cl1 = xgboost.XGBClassifier()
-        # >>> cl1.fit(X, y)
-        # >>> cl1.predict(X)
-        # array([0, 0])
-        # >>> cl1.predict_proba(X)
-        # array([[0.5, 0.5],
-        #        [0.5, 0.5]], dtype=float32)
-        # >>> cl2 = xgboost.XGBClassifier(max_depth=5, n_estimators=10, scale_pos_weight=4)
-        # >>> cl2.fit(X, y)
-        # >>> cl2.predict(X)
-        # array([1, 1])
-        # >>> cl2.predict_proba(X)
-        # array([[0.27574146, 0.72425854 ],
-        #        [0.27574146, 0.72425854 ]], dtype=float32)
+        X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
+        y = np.array([0, 1])
+        cl1 = XGBClassifier()
+        cl1.fit(X, y)
+        p1 = cl1.predict(X)
+        proba1 = cl1.predict_proba(X)
+
+        cl2 = XGBClassifier(max_depth=5, n_estimators=10, scale_pos_weight=4)
+        cl2.fit(X, y)
+        p2 = cl2.predict(X)
+        proba2 = cl2.predict_proba(X)
+
         self.cls_params = {"max_depth": 5, "n_estimators": 10, "scale_pos_weight": 4}
 
         cls_df_train_data = [
@@ -120,21 +116,22 @@ def setUp(self):
         self.cls_df_train_large = self.session.createDataFrame(
             cls_df_train_data * 100, ["features", "label"]
         )
+        print("list(proba1[1, :]):", list(proba1[1, :]), p1.shape, type(p1[0]))
         self.cls_df_test = self.session.createDataFrame(
             [
                 (
                     Vectors.dense(1.0, 2.0, 3.0),
-                    0,
-                    [0.5, 0.5],
-                    1,
-                    [0.27574146, 0.72425854],
+                    int(p1[0]),
+                    [float(p) for p in list(proba1[0, :])],
+                    int(p2[0]),
+                    [float(p) for p in list(proba2[0, :])],
                 ),
                 (
                     Vectors.sparse(3, {1: 1.0, 2: 5.5}),
-                    0,
-                    [0.5, 0.5],
-                    1,
-                    [0.27574146, 0.72425854],
+                    int(p1[1]),
+                    [float(p) for p in list(proba1[1, :])],
+                    int(p2[1]),
+                    [float(p) for p in list(proba2[1, :])],
                 ),
             ],
             [

From 44b8cd480fea330e91a7f7746924c6fa6d57a162 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 01:49:43 +0800
Subject: [PATCH 058/133] hard coded tests.

---
 tests/python/test_spark/test_spark_local.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index 4ae5f622a9c2..b77f95176b92 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -116,7 +116,7 @@ def setUp(self):
         self.cls_df_train_large = self.session.createDataFrame(
             cls_df_train_data * 100, ["features", "label"]
         )
-        print("list(proba1[1, :]):", list(proba1[1, :]), p1.shape, type(p1[0]))
+
         self.cls_df_test = self.session.createDataFrame(
             [
                 (
@@ -293,7 +293,7 @@ def setUp(self):
             ],
         )
         self.cls_with_eval_best_score = 0.6931
-        self.cls_with_eval_and_weight_best_score = 0.6378
+        self.cls_with_eval_and_weight_best_score = 0.63765
 
         # Test classifier with both base margin and without
         # >>> import numpy as np
@@ -916,10 +916,8 @@ def test_classifier_with_weight_eval(self):
                 np.allclose(row.probability, row.expected_prob_with_eval, atol=1e-3)
             )
         # with weight and eval
-        # Added scale_pos_weight because in 1.4.2, the original answer returns 0.5 which
-        # doesn't really indicate this working correctly.
         classifier_with_weight_eval = SparkXGBClassifier(
-            weight_col="weight", scale_pos_weight=4, **self.cls_params_with_eval
+            weight_col="weight", **self.cls_params_with_eval
         )
         model_with_weight_eval = classifier_with_weight_eval.fit(
             self.cls_df_train_with_eval_weight
@@ -927,13 +925,12 @@ def test_classifier_with_weight_eval(self):
         pred_result_with_weight_eval = model_with_weight_eval.transform(
             self.cls_df_test_with_eval_weight
         ).collect()
-        self.assertTrue(
-            np.isclose(
-                model_with_weight_eval._xgb_sklearn_model.best_score,
-                self.cls_with_eval_and_weight_best_score,
-                atol=1e-3,
-            )
+        np.testing.assert_allclose(
+            model_with_weight_eval._xgb_sklearn_model.best_score,
+            self.cls_with_eval_and_weight_best_score,
+            atol=1e-3,
         )
+
         for row in pred_result_with_weight_eval:
             self.assertTrue(
                 np.allclose(

From 12063447ac04a5fa0086432c2581bb5e93b3918c Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 14:42:17 +0800
Subject: [PATCH 059/133] Run tests on github action.

---
 .github/workflows/python_tests.yml          | 51 ++++++++++++++++++++-
 CITATION                                    |  1 -
 tests/ci_build/conda_env/cpu_test.yml       |  3 ++
 tests/ci_build/conda_env/macos_cpu_test.yml | 13 +++---
 4 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
index 7ad742045223..4f18c043079b 100644
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -134,7 +134,7 @@ jobs:
     strategy:
       matrix:
         config:
-          - {os: macos-11, python-version "3.8" }
+          - {os: macos-11, python-version: "3.8" }
 
     steps:
     - uses: actions/checkout@v2
@@ -178,3 +178,52 @@ jobs:
       shell: bash -l {0}
       run: |
         pytest -s -v ./tests/python
+
+  python-tests-on-ubuntu:
+    name: Test XGBoost Python package on ${{ matrix.config.os }}
+    runs-on: ${{ matrix.config.os }}
+    timeout-minutes: 90
+    strategy:
+      matrix:
+        config:
+          - {os: ubuntu-latest, python-version: "3.8"}
+
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        submodules: 'true'
+
+    - uses: conda-incubator/setup-miniconda@v2
+      with:
+        auto-update-conda: true
+        python-version: ${{ matrix.config.python-version }}
+        activate-environment: macos_test
+        environment-file: tests/ci_build/conda_env/cpu_test.yml
+
+    - name: Display Conda env
+      shell: bash -l {0}
+      run: |
+        conda info
+        conda list
+
+    - name: Build XGBoost on Ubuntu
+      shell: bash -l {0}
+      run: |
+        sudo apt install ninja -y
+
+        mkdir build
+        cd build
+        cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
+        ninja
+
+    - name: Install Python package
+      shell: bash -l {0}
+      run: |
+        cd python-package
+        python --version
+        python setup.py install
+
+    - name: Test Python package
+      shell: bash -l {0}
+      run: |
+        pytest -s -v ./tests/python
diff --git a/CITATION b/CITATION
index 189062510236..b2acce7c13fe 100644
--- a/CITATION
+++ b/CITATION
@@ -15,4 +15,3 @@
  address = {New York, NY, USA},
  keywords = {large-scale machine learning},
 }
-
diff --git a/tests/ci_build/conda_env/cpu_test.yml b/tests/ci_build/conda_env/cpu_test.yml
index e1e70d812886..284e168d565a 100644
--- a/tests/ci_build/conda_env/cpu_test.yml
+++ b/tests/ci_build/conda_env/cpu_test.yml
@@ -20,7 +20,10 @@ dependencies:
 - hypothesis>=6.46
 - astroid
 - sh
+- sphinx
+- recommonmark
 - mock
+- breathe
 - pytest
 - pytest-cov
 - python-kubernetes
diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/tests/ci_build/conda_env/macos_cpu_test.yml
index 11d82ff7b605..c03a93e74c15 100644
--- a/tests/ci_build/conda_env/macos_cpu_test.yml
+++ b/tests/ci_build/conda_env/macos_cpu_test.yml
@@ -1,5 +1,6 @@
 name: macos_test
 channels:
+- defaults
 - conda-forge
 dependencies:
 - python=3.8
@@ -14,14 +15,13 @@ dependencies:
 - scikit-learn
 - pandas
 - matplotlib
-- dask
-- distributed
-- graphviz
+- dask>=2022.6
+- distributed>=2022.6
 - python-graphviz
-- hypothesis
+- hypothesis>=6.46
 - astroid
-- sphinx
 - sh
+- sphinx
 - recommonmark
 - mock
 - breathe
@@ -35,8 +35,7 @@ dependencies:
 - py-ubjson
 - cffi
 - pyarrow
-- pyspark
-- cloudpickle
+- shap
 - pip:
   - sphinx_rtd_theme
   - datatable

From 14e7bbedf3781b7d4a6a52b331ff1cebfa479f34 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 14:44:37 +0800
Subject: [PATCH 060/133] Hard coded.

---
 tests/python/test_spark/test_spark_local.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index b77f95176b92..ccc6dd83278a 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -293,7 +293,7 @@ def setUp(self):
             ],
         )
         self.cls_with_eval_best_score = 0.6931
-        self.cls_with_eval_and_weight_best_score = 0.63765
+        self.cls_with_eval_and_weight_best_score = 0.636592
 
         # Test classifier with both base margin and without
         # >>> import numpy as np
@@ -932,10 +932,8 @@ def test_classifier_with_weight_eval(self):
         )
 
         for row in pred_result_with_weight_eval:
-            self.assertTrue(
-                np.allclose(
-                    row.probability, row.expected_prob_with_weight_and_eval, atol=1e-3
-                )
+            np.testing.assert_allclose(
+                row.probability, row.expected_prob_with_weight_and_eval, atol=1e-3
             )
 
     def test_num_workers_param(self):

From 701acdf9d0997cf18056977816403d69c20cdba9 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 15:29:59 +0800
Subject: [PATCH 061/133] Fix mgpu test.

---
 src/common/host_device_vector.cu   | 4 ++++
 src/data/data.cc                   | 2 +-
 src/learner.cc                     | 4 +++-
 tests/python-gpu/test_from_cupy.py | 5 +++--
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu
index 456c60a67071..00f19230dc7e 100644
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@@ -162,6 +162,10 @@ class HostDeviceVectorImpl {
     if (device_ >= 0) {
       LazySyncHost(GPUAccess::kNone);
     }
+
+    if (device_ >= 0 && device >= 0) {
+      CHECK_EQ(device_, device) << "New device ordinal is different from previous one.";
+    }
     device_ = device;
     if (device_ >= 0) {
       LazyResizeDevice(data_h_.size());
diff --git a/src/data/data.cc b/src/data/data.cc
index 3559ea00fc00..75720fabfc40 100644
--- a/src/data/data.cc
+++ b/src/data/data.cc
@@ -678,7 +678,7 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col
   }
 }
 
-void MetaInfo::Validate(int32_t device) const {
+void MetaInfo::Validate(std::int32_t device) const {
   if (group_ptr_.size() != 0 && weights_.Size() != 0) {
     CHECK_EQ(group_ptr_.size(), weights_.Size() + 1)
         << "Size of weights must equal to number of groups when ranking "
diff --git a/src/learner.cc b/src/learner.cc
index 5fe9a522f19d..7ce624060124 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -412,9 +412,11 @@ class LearnerConfiguration : public Learner {
     if (!mparam_.base_score_estimated) {
       std::lock_guard<std::mutex> guard(config_lock_);
       if (p_fmat) {
+        auto const& info = p_fmat->Info();
+        info.Validate(Ctx()->gpu_id);
         // We estimate it from input data.
         linalg::Tensor<float, 1> base_score;
-        obj_->InitEstimation(p_fmat->Info(), &base_score);
+        obj_->InitEstimation(info, &base_score);
         mparam_.base_score = base_score(0);
         CHECK(!std::isnan(mparam_.base_score));
       } else {
diff --git a/tests/python-gpu/test_from_cupy.py b/tests/python-gpu/test_from_cupy.py
index 77fa694e58fa..f3a563bab818 100644
--- a/tests/python-gpu/test_from_cupy.py
+++ b/tests/python-gpu/test_from_cupy.py
@@ -219,5 +219,6 @@ def test_specified_device(self):
         dtrain = dmatrix_from_cupy(
             np.float32, xgb.DeviceQuantileDMatrix, np.nan)
         with pytest.raises(xgb.core.XGBoostError):
-            xgb.train({'tree_method': 'gpu_hist', 'gpu_id': 1},
-                      dtrain, num_boost_round=10)
+            xgb.train(
+                {'tree_method': 'gpu_hist', 'gpu_id': 1}, dtrain, num_boost_round=10
+            )

From 397bbc3a464ed76118477f5d3029c286d7e7f404 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 15:58:43 +0800
Subject: [PATCH 062/133] Get devices.

---
 .../python-gpu/test_gpu_spark/discover_gpu.sh | 15 +++++++++++++-
 .../test_gpu_spark/test_gpu_spark.py          | 20 ++++++++++++++++---
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/tests/python-gpu/test_gpu_spark/discover_gpu.sh b/tests/python-gpu/test_gpu_spark/discover_gpu.sh
index 42dd0551784d..a40bbb7c4617 100755
--- a/tests/python-gpu/test_gpu_spark/discover_gpu.sh
+++ b/tests/python-gpu/test_gpu_spark/discover_gpu.sh
@@ -1,3 +1,16 @@
 #!/bin/bash
 
-echo "{\"name\":\"gpu\",\"addresses\":[\"0\",\"1\",\"2\",\"3\"]}"
+# This script is only made for running XGBoost tests on official CI where we have access
+# to a 4-GPU cluster, the discovery command is for running tests on a local machine where
+# the driver and the GPU worker might be the same machine.
+
+if ! command -v nvidia-smi &> /dev/null
+then
+    # default to 4 GPUs
+    echo "{\"name\":\"gpu\",\"addresses\":[\"0\",\"1\",\"2\",\"3\"]}"
+    exit
+else
+    # https://github.com/apache/spark/blob/master/examples/src/main/scripts/getGpusResources.sh
+    ADDRS=`nvidia-smi --query-gpu=index --format=csv,noheader | sed -e ':a' -e 'N' -e'$!ba' -e 's/\n/","/g'`
+    echo {\"name\": \"gpu\", \"addresses\":[\"$ADDRS\"]}
+fi
diff --git a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
index ce5b9d8c8d42..8c45e44e3732 100644
--- a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
+++ b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
@@ -1,4 +1,6 @@
+import json
 import logging
+import subprocess
 import sys
 
 import pytest
@@ -7,7 +9,7 @@
 sys.path.append("tests/python")
 import testing as tm
 
-if tm.no_dask()["condition"]:
+if tm.no_spark()["condition"]:
     pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
 if sys.platform.startswith("win"):
     pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
@@ -18,8 +20,20 @@
 from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor
 
 gpu_discovery_script_path = "tests/python-gpu/test_gpu_spark/discover_gpu.sh"
-executor_gpu_amount = 4
-executor_cores = 4
+
+
+def get_devices():
+    """This works only if driver is the same machine of worker."""
+    completed = subprocess.run(gpu_discovery_script_path, stdout=subprocess.PIPE)
+    assert completed.returncode == 0, "Failed to execute discovery script."
+    msg = completed.stdout.decode("utf-8")
+    result = json.loads(msg)
+    addresses = result["addresses"]
+    return addresses
+
+
+executor_gpu_amount = len(get_devices())
+executor_cores = len(get_devices())
 num_workers = executor_gpu_amount
 
 

From 27934b3788c358cf6269f99416ac7faca142b569 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 16:25:06 +0800
Subject: [PATCH 063/133] Fix path.

---
 python-package/xgboost/testing.py                 | 5 +++++
 tests/python-gpu/test_gpu_spark/test_gpu_spark.py | 7 ++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/python-package/xgboost/testing.py b/python-package/xgboost/testing.py
index 9e1b54276037..488a62e40dfb 100644
--- a/python-package/xgboost/testing.py
+++ b/python-package/xgboost/testing.py
@@ -1,9 +1,14 @@
 """Utilities for defining Python tests."""
 
+import os
 import socket
 from platform import system
 from typing import TypedDict
 
+CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+PROJECT_ROOT = os.path.normpath(os.path.join(CURDIR, os.path.pardir, os.path.pardir))
+
+
 PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
 
 
diff --git a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
index 8c45e44e3732..ea7e475e9fd4 100644
--- a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
+++ b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
@@ -1,11 +1,14 @@
 import json
 import logging
+import os
 import subprocess
 import sys
 
 import pytest
 import sklearn
 
+from xgboost import testing
+
 sys.path.append("tests/python")
 import testing as tm
 
@@ -19,7 +22,9 @@
 from pyspark.sql import SparkSession
 from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor
 
-gpu_discovery_script_path = "tests/python-gpu/test_gpu_spark/discover_gpu.sh"
+gpu_discovery_script_path = os.path.join(
+    testing.PROJECT_ROOT, "tests/python-gpu/test_gpu_spark/discover_gpu.sh"
+)
 
 
 def get_devices():

From edc1e92dfe43cbd5cfa43b466f62f80e7344cd25 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 16:33:48 +0800
Subject: [PATCH 064/133] Unnecessary duplication.

---
 python-package/xgboost/testing.py             | 13 +++++++
 tests/python-gpu/test_gpu_spark/test_data.py  | 10 ++---
 .../test_gpu_spark/test_gpu_spark.py          | 10 +----
 tests/python/test_spark/test_data.py          |  9 ++---
 tests/python/test_spark/test_spark_local.py   | 37 ++++++-------------
 .../test_spark/test_spark_local_cluster.py    | 10 ++---
 tests/python/test_spark/utils.py              | 13 +++----
 tests/python/testing.py                       |  9 -----
 8 files changed, 44 insertions(+), 67 deletions(-)

diff --git a/python-package/xgboost/testing.py b/python-package/xgboost/testing.py
index 488a62e40dfb..75b0eff8948e 100644
--- a/python-package/xgboost/testing.py
+++ b/python-package/xgboost/testing.py
@@ -44,3 +44,16 @@ def has_ipv6() -> bool:
 def skip_ipv6() -> PytestSkip:
     """PyTest skip mark for IPv6."""
     return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
+
+
+def skip_spark() -> PytestSkip:
+    """Pytest skip mark for PySpark tests."""
+    if system() != "Linux":
+        return {"condition": True, "reason": "Unsupported platform."}
+
+    try:
+        import pyspark          # noqa
+        SPARK_INSTALLED = True
+    except ImportError:
+        SPARK_INSTALLED = False
+    return {"condition": not SPARK_INSTALLED, "reason": "Spark is not installed"}
diff --git a/tests/python-gpu/test_gpu_spark/test_data.py b/tests/python-gpu/test_gpu_spark/test_data.py
index 64028b91383b..04b640f5774d 100644
--- a/tests/python-gpu/test_gpu_spark/test_data.py
+++ b/tests/python-gpu/test_gpu_spark/test_data.py
@@ -1,18 +1,16 @@
 import sys
-from typing import List
 
-import numpy as np
 import pandas as pd
 import pytest
 
+from xgboost import testing
+
 sys.path.append("tests/python")
 
 import testing as tm
 
-if tm.no_spark()["condition"]:
-    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
-if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
-    pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
+if testing.skip_spark()["condition"]:
+    pytest.skip(msg=testing.skip_spark()["reason"], allow_module_level=True)
 
 
 from test_spark.test_data import run_dmatrix_ctor
diff --git a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
index ea7e475e9fd4..6ad327ec90f9 100644
--- a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
+++ b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
@@ -2,20 +2,14 @@
 import logging
 import os
 import subprocess
-import sys
 
 import pytest
 import sklearn
 
 from xgboost import testing
 
-sys.path.append("tests/python")
-import testing as tm
-
-if tm.no_spark()["condition"]:
-    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
-if sys.platform.startswith("win"):
-    pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
+if testing.skip_spark()["condition"]:
+    pytest.skip(msg=testing.skip_spark()["reason"], allow_module_level=True)
 
 from pyspark.ml.linalg import Vectors
 from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
diff --git a/tests/python/test_spark/test_data.py b/tests/python/test_spark/test_data.py
index 9c8bd5018772..9db1e316c460 100644
--- a/tests/python/test_spark/test_data.py
+++ b/tests/python/test_spark/test_data.py
@@ -4,12 +4,11 @@
 import numpy as np
 import pandas as pd
 import pytest
-import testing as tm
 
-if tm.no_spark()["condition"]:
-    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
-if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
-    pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
+from xgboost import testing
+
+if testing.skip_spark()["condition"]:
+    pytest.skip(msg=testing.skip_spark()["reason"], allow_module_level=True)
 
 from xgboost.spark.data import (
     _read_csr_matrix_from_unwrapped_spark_vec,
diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index ccc6dd83278a..9a59849378b7 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -1,19 +1,16 @@
 import glob
 import logging
 import random
-import sys
 import uuid
 
 import numpy as np
 import pytest
-import testing as tm
 
 import xgboost as xgb
+from xgboost import testing
 
-if tm.no_spark()["condition"]:
-    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
-if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
-    pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
+if testing.skip_spark()["condition"]:
+    pytest.skip(msg=testing.skip_spark()["reason"], allow_module_level=True)
 
 from pyspark.ml import Pipeline, PipelineModel
 from pyspark.ml.evaluation import (
@@ -165,9 +162,7 @@ def setUp(self):
             multi_cls_df_train_data * 100, ["features", "label"]
         )
         self.multi_cls_df_test = self.session.createDataFrame(
-            [
-                (Vectors.dense(1.0, 2.0, 3.0), [0.5374, 0.2312, 0.2312]),
-            ],
+            [(Vectors.dense(1.0, 2.0, 3.0), [0.5374, 0.2312, 0.2312]),],
             ["features", "expected_probability"],
         )
 
@@ -336,9 +331,7 @@ def setUp(self):
             ["features", "label", "weight"],
         )
         self.cls_df_test_without_base_margin = self.session.createDataFrame(
-            [
-                (Vectors.dense(1.0, 2.0, 3.0), [0.3333, 0.6666], 1),
-            ],
+            [(Vectors.dense(1.0, 2.0, 3.0), [0.3333, 0.6666], 1),],
             [
                 "features",
                 "expected_prob_without_base_margin",
@@ -356,9 +349,7 @@ def setUp(self):
             ["features", "label", "weight", "base_margin"],
         )
         self.cls_df_test_with_same_base_margin = self.session.createDataFrame(
-            [
-                (Vectors.dense(1.0, 2.0, 3.0), 0, [0.4415, 0.5585], 1),
-            ],
+            [(Vectors.dense(1.0, 2.0, 3.0), 0, [0.4415, 0.5585], 1),],
             [
                 "features",
                 "base_margin",
@@ -377,9 +368,7 @@ def setUp(self):
             ["features", "label", "weight", "base_margin"],
         )
         self.cls_df_test_with_different_base_margin = self.session.createDataFrame(
-            [
-                (Vectors.dense(1.0, 2.0, 3.0), 1, [0.2252, 0.7747], 1),
-            ],
+            [(Vectors.dense(1.0, 2.0, 3.0), 1, [0.2252, 0.7747], 1),],
             [
                 "features",
                 "base_margin",
@@ -807,11 +796,9 @@ def test_classifier_with_base_margin(self):
         model_with_different_base_margin = cls_with_different_base_margin.fit(
             self.cls_df_train_with_different_base_margin
         )
-        pred_result_with_different_base_margin = (
-            model_with_different_base_margin.transform(
-                self.cls_df_test_with_different_base_margin
-            ).collect()
-        )
+        pred_result_with_different_base_margin = model_with_different_base_margin.transform(
+            self.cls_df_test_with_different_base_margin
+        ).collect()
         for row in pred_result_with_different_base_margin:
             self.assertTrue(
                 np.isclose(
@@ -1133,9 +1120,7 @@ def test_empty_partition(self):
         data_trans = vector_assembler.setHandleInvalid("keep").transform(raw_df)
         data_trans.show(100)
 
-        classifier = SparkXGBClassifier(
-            num_workers=4,
-        )
+        classifier = SparkXGBClassifier(num_workers=4,)
         classifier.fit(data_trans)
 
     def test_early_stop_param_validation(self):
diff --git a/tests/python/test_spark/test_spark_local_cluster.py b/tests/python/test_spark/test_spark_local_cluster.py
index 9276e08f36bc..f7017d4a67ce 100644
--- a/tests/python/test_spark/test_spark_local_cluster.py
+++ b/tests/python/test_spark/test_spark_local_cluster.py
@@ -1,17 +1,15 @@
 import json
 import os
 import random
-import sys
 import uuid
 
 import numpy as np
 import pytest
-import testing as tm
 
-if tm.no_spark()["condition"]:
-    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
-if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
-    pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
+from xgboost import testing
+
+if testing.skip_spark()["condition"]:
+    pytest.skip(msg=testing.skip_spark()["reason"], allow_module_level=True)
 
 from pyspark.ml.linalg import Vectors
 from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor
diff --git a/tests/python/test_spark/utils.py b/tests/python/test_spark/utils.py
index 23968fbcc86f..56aceaf6c7ab 100644
--- a/tests/python/test_spark/utils.py
+++ b/tests/python/test_spark/utils.py
@@ -4,17 +4,16 @@
 import sys
 import tempfile
 import unittest
+from io import StringIO
 
 import pytest
-import testing as tm
-from six import StringIO
 
-if tm.no_spark()["condition"]:
-    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
-if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
-    pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
+from xgboost import testing
 
-from pyspark.sql import SparkSession, SQLContext
+if testing.skip_spark()["condition"]:
+    pytest.skip(msg=testing.skip_spark()["reason"], allow_module_level=True)
+
+from pyspark.sql import SparkSession
 from xgboost.spark.utils import _get_default_params_from_func
 
 
diff --git a/tests/python/testing.py b/tests/python/testing.py
index 63d33de97e5a..7dd60aeaf17f 100644
--- a/tests/python/testing.py
+++ b/tests/python/testing.py
@@ -57,15 +57,6 @@ def no_dask():
     return {"condition": not DASK_INSTALLED, "reason": "Dask is not installed"}
 
 
-def no_spark():
-    try:
-        import pyspark          # noqa
-        SPARK_INSTALLED = True
-    except ImportError:
-        SPARK_INSTALLED = False
-    return {"condition": not SPARK_INSTALLED, "reason": "Spark is not installed"}
-
-
 def no_pandas():
     return {'condition': not PANDAS_INSTALLED,
             'reason': 'Pandas is not installed.'}

From 46d316538a53668adff79cc90cfcf11603fc3dec Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 17:52:36 +0800
Subject: [PATCH 065/133] Fix eval test.

---
 tests/python/test_spark/test_spark_local.py | 335 ++++++++++++--------
 1 file changed, 204 insertions(+), 131 deletions(-)

diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index 9a59849378b7..b5a0320a65e0 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -2,6 +2,8 @@
 import logging
 import random
 import uuid
+from collections import namedtuple
+from typing import Generator
 
 import numpy as np
 import pytest
@@ -21,6 +23,7 @@
 from pyspark.ml.functions import vector_to_array
 from pyspark.ml.linalg import Vectors
 from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
+from pyspark.sql import SparkSession
 from pyspark.sql import functions as spark_sql_func
 from xgboost.spark import (
     SparkXGBClassifier,
@@ -38,6 +41,187 @@
 logging.getLogger("py4j").setLevel(logging.INFO)
 
 
+@pytest.fixture
+def pyspark_test_session():
+    config = {
+        "spark.master": "local[4]",
+        "spark.python.worker.reuse": "false",
+        "spark.driver.host": "127.0.0.1",
+        "spark.task.maxFailures": "1",
+        "spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled": "false",
+        "spark.sql.pyspark.jvmStacktrace.enabled": "true",
+    }
+
+    builder = SparkSession.builder.appName("XGBoost PySpark Python API Tests")
+    for k, v in config.items():
+        builder.config(k, v)
+    spark = builder.getOrCreate()
+    logging.getLogger("pyspark").setLevel(logging.INFO)
+
+    return spark
+
+
+RegWithWeight = namedtuple(
+    "RegWithWeight",
+    (
+        "reg_params_with_eval",
+        "reg_df_train_with_eval_weight",
+        "reg_df_test_with_eval_weight",
+        "reg_with_eval_best_score",
+        "reg_with_eval_and_weight_best_score",
+    ),
+)
+
+
+@pytest.fixture
+def reg_with_weight(
+    pyspark_test_session: SparkSession,
+) -> Generator[RegWithWeight, SparkSession, None]:
+    reg_params_with_eval = {
+        "validation_indicator_col": "isVal",
+        "early_stopping_rounds": 1,
+        "eval_metric": "rmse",
+    }
+
+    X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5], [4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
+    w = np.array([1.0, 2.0, 1.0, 2.0])
+    y = np.array([0, 1, 2, 3])
+
+    reg1 = XGBRegressor()
+    reg1.fit(X, y, sample_weight=w)
+    predt1 = reg1.predict(X)
+
+    X_train = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
+    X_val = np.array([[4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
+    y_train = np.array([0, 1])
+    y_val = np.array([2, 3])
+    w_train = np.array([1.0, 2.0])
+    w_val = np.array([1.0, 2.0])
+
+    reg2 = XGBRegressor()
+    reg2.fit(
+        X_train,
+        y_train,
+        eval_set=[(X_val, y_val)],
+        early_stopping_rounds=1,
+        eval_metric="rmse",
+    )
+    predt2 = reg2.predict(X)
+    best_score2 = reg2.best_score
+
+    reg3 = XGBRegressor()
+    reg3.fit(
+        X_train,
+        y_train,
+        sample_weight=w_train,
+        eval_set=[(X_val, y_val)],
+        sample_weight_eval_set=[w_val],
+        early_stopping_rounds=1,
+        eval_metric="rmse",
+    )
+    predt3 = reg3.predict(X)
+    best_score3 = reg3.best_score
+
+    reg_df_train_with_eval_weight = pyspark_test_session.createDataFrame(
+        [
+            (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
+            (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
+            (Vectors.dense(4.0, 5.0, 6.0), 2, True, 1.0),
+            (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 3, True, 2.0),
+        ],
+        ["features", "label", "isVal", "weight"],
+    )
+
+    reg_df_test_with_eval_weight = pyspark_test_session.createDataFrame(
+        [
+            (
+                Vectors.dense(1.0, 2.0, 3.0),
+                float(predt1[0]),
+                float(predt2[0]),
+                float(predt3[0]),
+            ),
+            (
+                Vectors.sparse(3, {1: 1.0, 2: 5.5}),
+                float(predt1[1]),
+                float(predt2[1]),
+                float(predt3[1]),
+            ),
+        ],
+        [
+            "features",
+            "expected_prediction_with_weight",
+            "expected_prediction_with_eval",
+            "expected_prediction_with_weight_and_eval",
+        ],
+    )
+    yield RegWithWeight(
+        reg_params_with_eval,
+        reg_df_train_with_eval_weight,
+        reg_df_test_with_eval_weight,
+        best_score2,
+        best_score3,
+    )
+
+
+class TestPySparkLocal:
+    def test_regressor_with_weight_eval(self, reg_with_weight: RegWithWeight) -> None:
+        # with weight
+        regressor_with_weight = SparkXGBRegressor(weight_col="weight")
+        model_with_weight = regressor_with_weight.fit(
+            reg_with_weight.reg_df_train_with_eval_weight
+        )
+        pred_result_with_weight = model_with_weight.transform(
+            reg_with_weight.reg_df_test_with_eval_weight
+        ).collect()
+        for row in pred_result_with_weight:
+            assert np.isclose(
+                row.prediction, row.expected_prediction_with_weight, atol=1e-3
+            )
+
+        # with eval
+        regressor_with_eval = SparkXGBRegressor(**reg_with_weight.reg_params_with_eval)
+        model_with_eval = regressor_with_eval.fit(
+            reg_with_weight.reg_df_train_with_eval_weight
+        )
+        assert np.isclose(
+            model_with_eval._xgb_sklearn_model.best_score,
+            reg_with_weight.reg_with_eval_best_score,
+            atol=1e-3,
+        ), (
+            f"Expected best score: {reg_with_weight.reg_with_eval_best_score}, but ",
+            f"get {model_with_eval._xgb_sklearn_model.best_score}",
+        )
+
+        pred_result_with_eval = model_with_eval.transform(
+            reg_with_weight.reg_df_test_with_eval_weight
+        ).collect()
+        for row in pred_result_with_eval:
+            np.testing.assert_allclose(
+                row.prediction, row.expected_prediction_with_eval, atol=1e-3
+            )
+        # with weight and eval
+        regressor_with_weight_eval = SparkXGBRegressor(
+            weight_col="weight", **reg_with_weight.reg_params_with_eval
+        )
+        model_with_weight_eval = regressor_with_weight_eval.fit(
+            reg_with_weight.reg_df_train_with_eval_weight
+        )
+        pred_result_with_weight_eval = model_with_weight_eval.transform(
+            reg_with_weight.reg_df_test_with_eval_weight
+        ).collect()
+        np.testing.assert_allclose(
+            model_with_weight_eval._xgb_sklearn_model.best_score,
+            reg_with_weight.reg_with_eval_and_weight_best_score,
+            atol=1e-3,
+        )
+        for row in pred_result_with_weight_eval:
+            np.testing.assert_allclose(
+                row.prediction,
+                row.expected_prediction_with_weight_and_eval,
+                atol=1e-3,
+            )
+
+
 class XgboostLocalTest(SparkTestCase):
     def setUp(self):
         logging.getLogger().setLevel("INFO")
@@ -162,69 +346,11 @@ def setUp(self):
             multi_cls_df_train_data * 100, ["features", "label"]
         )
         self.multi_cls_df_test = self.session.createDataFrame(
-            [(Vectors.dense(1.0, 2.0, 3.0), [0.5374, 0.2312, 0.2312]),],
-            ["features", "expected_probability"],
-        )
-
-        # Test regressor with weight and eval set
-        # >>> import numpy as np
-        # >>> import xgboost
-        # >>> X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5], [4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
-        # >>> w = np.array([1.0, 2.0, 1.0, 2.0])
-        # >>> y = np.array([0, 1, 2, 3])
-        # >>> reg1 = xgboost.XGBRegressor()
-        # >>> reg1.fit(X, y, sample_weight=w)
-        # >>> reg1.predict(X)
-        # >>> array([1.0679445e-03, 1.0000550e+00, ...
-        # >>> X_train = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
-        # >>> X_val = np.array([[4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
-        # >>> y_train = np.array([0, 1])
-        # >>> y_val = np.array([2, 3])
-        # >>> w_train = np.array([1.0, 2.0])
-        # >>> w_val = np.array([1.0, 2.0])
-        # >>> reg2 = xgboost.XGBRegressor()
-        # >>> reg2.fit(X_train, y_train, eval_set=[(X_val, y_val)],
-        # >>>          early_stopping_rounds=1, eval_metric='rmse')
-        # >>> reg2.predict(X)
-        # >>> array([8.8370638e-04, 9.9911624e-01, ...
-        # >>> reg2.best_score
-        # 2.0000002682208837
-        # >>> reg3 = xgboost.XGBRegressor()
-        # >>> reg3.fit(X_train, y_train, sample_weight=w_train, eval_set=[(X_val, y_val)],
-        # >>>          sample_weight_eval_set=[w_val],
-        # >>>          early_stopping_rounds=1, eval_metric='rmse')
-        # >>> reg3.predict(X)
-        # >>> array([0.03155671, 0.98874104,...
-        # >>> reg3.best_score
-        # 1.9970891552124017
-        self.reg_df_train_with_eval_weight = self.session.createDataFrame(
             [
-                (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
-                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
-                (Vectors.dense(4.0, 5.0, 6.0), 2, True, 1.0),
-                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 3, True, 2.0),
-            ],
-            ["features", "label", "isVal", "weight"],
-        )
-        self.reg_params_with_eval = {
-            "validation_indicator_col": "isVal",
-            "early_stopping_rounds": 1,
-            "eval_metric": "rmse",
-        }
-        self.reg_df_test_with_eval_weight = self.session.createDataFrame(
-            [
-                (Vectors.dense(1.0, 2.0, 3.0), 0.001068, 0.00088, 0.03155),
-                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1.000055, 0.9991, 0.9887),
-            ],
-            [
-                "features",
-                "expected_prediction_with_weight",
-                "expected_prediction_with_eval",
-                "expected_prediction_with_weight_and_eval",
+                (Vectors.dense(1.0, 2.0, 3.0), [0.5374, 0.2312, 0.2312]),
             ],
+            ["features", "expected_probability"],
         )
-        self.reg_with_eval_best_score = 2.0
-        self.reg_with_eval_and_weight_best_score = 1.997
 
         # Test classifier with weight and eval set
         # >>> import numpy as np
@@ -331,7 +457,9 @@ def setUp(self):
             ["features", "label", "weight"],
         )
         self.cls_df_test_without_base_margin = self.session.createDataFrame(
-            [(Vectors.dense(1.0, 2.0, 3.0), [0.3333, 0.6666], 1),],
+            [
+                (Vectors.dense(1.0, 2.0, 3.0), [0.3333, 0.6666], 1),
+            ],
             [
                 "features",
                 "expected_prob_without_base_margin",
@@ -349,7 +477,9 @@ def setUp(self):
             ["features", "label", "weight", "base_margin"],
         )
         self.cls_df_test_with_same_base_margin = self.session.createDataFrame(
-            [(Vectors.dense(1.0, 2.0, 3.0), 0, [0.4415, 0.5585], 1),],
+            [
+                (Vectors.dense(1.0, 2.0, 3.0), 0, [0.4415, 0.5585], 1),
+            ],
             [
                 "features",
                 "base_margin",
@@ -368,7 +498,9 @@ def setUp(self):
             ["features", "label", "weight", "base_margin"],
         )
         self.cls_df_test_with_different_base_margin = self.session.createDataFrame(
-            [(Vectors.dense(1.0, 2.0, 3.0), 1, [0.2252, 0.7747], 1),],
+            [
+                (Vectors.dense(1.0, 2.0, 3.0), 1, [0.2252, 0.7747], 1),
+            ],
             [
                 "features",
                 "base_margin",
@@ -796,9 +928,11 @@ def test_classifier_with_base_margin(self):
         model_with_different_base_margin = cls_with_different_base_margin.fit(
             self.cls_df_train_with_different_base_margin
         )
-        pred_result_with_different_base_margin = model_with_different_base_margin.transform(
-            self.cls_df_test_with_different_base_margin
-        ).collect()
+        pred_result_with_different_base_margin = (
+            model_with_different_base_margin.transform(
+                self.cls_df_test_with_different_base_margin
+            ).collect()
+        )
         for row in pred_result_with_different_base_margin:
             self.assertTrue(
                 np.isclose(
@@ -809,69 +943,6 @@ def test_classifier_with_base_margin(self):
                 row.probability, row.expected_prob_with_base_margin, atol=1e-3
             )
 
-    def test_regressor_with_weight_eval(self):
-        # with weight
-        regressor_with_weight = SparkXGBRegressor(weight_col="weight")
-        model_with_weight = regressor_with_weight.fit(
-            self.reg_df_train_with_eval_weight
-        )
-        pred_result_with_weight = model_with_weight.transform(
-            self.reg_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_weight:
-            assert np.isclose(
-                row.prediction, row.expected_prediction_with_weight, atol=1e-3
-            )
-
-        # with eval
-        regressor_with_eval = SparkXGBRegressor(**self.reg_params_with_eval)
-        model_with_eval = regressor_with_eval.fit(self.reg_df_train_with_eval_weight)
-        assert np.isclose(
-            model_with_eval._xgb_sklearn_model.best_score,
-            self.reg_with_eval_best_score,
-            atol=1e-3,
-        ), (
-            f"Expected best score: {self.reg_with_eval_best_score}, but ",
-            f"get {model_with_eval._xgb_sklearn_model.best_score}",
-        )
-
-        pred_result_with_eval = model_with_eval.transform(
-            self.reg_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_eval:
-            self.assertTrue(
-                np.isclose(
-                    row.prediction, row.expected_prediction_with_eval, atol=1e-3
-                ),
-                f"Expect prediction is {row.expected_prediction_with_eval},"
-                f"but get {row.prediction}",
-            )
-        # with weight and eval
-        regressor_with_weight_eval = SparkXGBRegressor(
-            weight_col="weight", **self.reg_params_with_eval
-        )
-        model_with_weight_eval = regressor_with_weight_eval.fit(
-            self.reg_df_train_with_eval_weight
-        )
-        pred_result_with_weight_eval = model_with_weight_eval.transform(
-            self.reg_df_test_with_eval_weight
-        ).collect()
-        self.assertTrue(
-            np.isclose(
-                model_with_weight_eval._xgb_sklearn_model.best_score,
-                self.reg_with_eval_and_weight_best_score,
-                atol=1e-3,
-            )
-        )
-        for row in pred_result_with_weight_eval:
-            self.assertTrue(
-                np.isclose(
-                    row.prediction,
-                    row.expected_prediction_with_weight_and_eval,
-                    atol=1e-3,
-                )
-            )
-
     def test_classifier_with_weight_eval(self):
         # with weight
         classifier_with_weight = SparkXGBClassifier(weight_col="weight")
@@ -919,7 +990,7 @@ def test_classifier_with_weight_eval(self):
         )
 
         for row in pred_result_with_weight_eval:
-            np.testing.assert_allclose(
+            np.testing.assert_allclose(  # failed
                 row.probability, row.expected_prob_with_weight_and_eval, atol=1e-3
             )
 
@@ -1120,7 +1191,9 @@ def test_empty_partition(self):
         data_trans = vector_assembler.setHandleInvalid("keep").transform(raw_df)
         data_trans.show(100)
 
-        classifier = SparkXGBClassifier(num_workers=4,)
+        classifier = SparkXGBClassifier(
+            num_workers=4,
+        )
         classifier.fit(data_trans)
 
     def test_early_stop_param_validation(self):

From 543185ecb5a349bcd5b09f0b164c85036cf58b1a Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 18:15:38 +0800
Subject: [PATCH 066/133] Fix local test.

---
 python-package/xgboost/testing.py           |   6 +-
 tests/python/test_spark/test_spark_local.py | 265 +++++++++++---------
 2 files changed, 150 insertions(+), 121 deletions(-)

diff --git a/python-package/xgboost/testing.py b/python-package/xgboost/testing.py
index 75b0eff8948e..34ae3da74c68 100644
--- a/python-package/xgboost/testing.py
+++ b/python-package/xgboost/testing.py
@@ -52,7 +52,11 @@ def skip_spark() -> PytestSkip:
         return {"condition": True, "reason": "Unsupported platform."}
 
     try:
-        import pyspark          # noqa
+        import pyspark  # noqa
+
+        # just in case there's a pyspark stub created by some other libraries
+        from pyspark.ml import Pipeline  # noqa
+
         SPARK_INSTALLED = True
     except ImportError:
         SPARK_INSTALLED = False
diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index b5a0320a65e0..4ad525db1fe4 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -42,7 +42,7 @@
 
 
 @pytest.fixture
-def pyspark_test_session():
+def pyspark_test_session() -> Generator[SparkSession, None, None]:
     config = {
         "spark.master": "local[4]",
         "spark.python.worker.reuse": "false",
@@ -58,7 +58,7 @@ def pyspark_test_session():
     spark = builder.getOrCreate()
     logging.getLogger("pyspark").setLevel(logging.INFO)
 
-    return spark
+    yield spark
 
 
 RegWithWeight = namedtuple(
@@ -163,6 +163,98 @@ def reg_with_weight(
     )
 
 
+ClfWithWeight = namedtuple(
+    "ClfWithWeight",
+    (
+        "cls_params_with_eval",
+        "cls_df_train_with_eval_weight",
+        "cls_df_test_with_eval_weight",
+        "cls_with_eval_best_score",
+        "cls_with_eval_and_weight_best_score",
+    ),
+)
+
+
+@pytest.fixture
+def clf_with_weight(
+    pyspark_test_session: SparkSession,
+) -> Generator[ClfWithWeight, SparkSession, None]:
+    """Test classifier with weight and eval set."""
+
+    X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5], [4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
+    w = np.array([1.0, 2.0, 1.0, 2.0])
+    y = np.array([0, 1, 0, 1])
+    cls1 = XGBClassifier()
+    cls1.fit(X, y, sample_weight=w)
+
+    X_train = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
+    X_val = np.array([[4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
+    y_train = np.array([0, 1])
+    y_val = np.array([0, 1])
+    w_train = np.array([1.0, 2.0])
+    w_val = np.array([1.0, 2.0])
+    cls2 = XGBClassifier()
+    cls2.fit(
+        X_train,
+        y_train,
+        eval_set=[(X_val, y_val)],
+        early_stopping_rounds=1,
+        eval_metric="logloss",
+    )
+
+    cls3 = XGBClassifier()
+    cls3.fit(
+        X_train,
+        y_train,
+        sample_weight=w_train,
+        eval_set=[(X_val, y_val)],
+        sample_weight_eval_set=[w_val],
+        early_stopping_rounds=1,
+        eval_metric="logloss",
+    )
+
+    cls_df_train_with_eval_weight = pyspark_test_session.createDataFrame(
+        [
+            (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
+            (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
+            (Vectors.dense(4.0, 5.0, 6.0), 0, True, 1.0),
+            (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, True, 2.0),
+        ],
+        ["features", "label", "isVal", "weight"],
+    )
+    cls_params_with_eval = {
+        "validation_indicator_col": "isVal",
+        "early_stopping_rounds": 1,
+        "eval_metric": "logloss",
+    }
+    print("cls1.predict_proba(X)", cls1.predict_proba(X).shape, cls1.predict_proba(X))
+    cls_df_test_with_eval_weight = pyspark_test_session.createDataFrame(
+        [
+            (
+                Vectors.dense(1.0, 2.0, 3.0),
+                [float(p) for p in cls1.predict_proba(X)[0, :]],
+                [float(p) for p in cls2.predict_proba(X)[0, :]],
+                [float(p) for p in cls3.predict_proba(X)[0, :]],
+            ),
+        ],
+        [
+            "features",
+            "expected_prob_with_weight",
+            "expected_prob_with_eval",
+            "expected_prob_with_weight_and_eval",
+        ],
+    )
+    cls_with_eval_best_score = cls2.best_score
+    cls_with_eval_and_weight_best_score = cls3.best_score
+    yield ClfWithWeight(
+        cls_params_with_eval,
+        cls_df_train_with_eval_weight,
+        cls_df_test_with_eval_weight,
+        cls_with_eval_best_score,
+        cls_with_eval_and_weight_best_score,
+    )
+
+
 class TestPySparkLocal:
     def test_regressor_with_weight_eval(self, reg_with_weight: RegWithWeight) -> None:
         # with weight
@@ -187,9 +279,6 @@ def test_regressor_with_weight_eval(self, reg_with_weight: RegWithWeight) -> Non
             model_with_eval._xgb_sklearn_model.best_score,
             reg_with_weight.reg_with_eval_best_score,
             atol=1e-3,
-        ), (
-            f"Expected best score: {reg_with_weight.reg_with_eval_best_score}, but ",
-            f"get {model_with_eval._xgb_sklearn_model.best_score}",
         )
 
         pred_result_with_eval = model_with_eval.transform(
@@ -221,6 +310,57 @@ def test_regressor_with_weight_eval(self, reg_with_weight: RegWithWeight) -> Non
                 atol=1e-3,
             )
 
+    def test_classifier_with_weight_eval(self, clf_with_weight: ClfWithWeight) -> None:
+        # with weight
+        classifier_with_weight = SparkXGBClassifier(weight_col="weight")
+        model_with_weight = classifier_with_weight.fit(
+            clf_with_weight.cls_df_train_with_eval_weight
+        )
+        pred_result_with_weight = model_with_weight.transform(
+            clf_with_weight.cls_df_test_with_eval_weight
+        ).collect()
+        for row in pred_result_with_weight:
+            assert np.allclose(
+                row.probability, row.expected_prob_with_weight, atol=1e-3
+            )
+        # with eval
+        classifier_with_eval = SparkXGBClassifier(
+            **clf_with_weight.cls_params_with_eval
+        )
+        model_with_eval = classifier_with_eval.fit(
+            clf_with_weight.cls_df_train_with_eval_weight
+        )
+        assert np.isclose(
+            model_with_eval._xgb_sklearn_model.best_score,
+            clf_with_weight.cls_with_eval_best_score,
+            atol=1e-3,
+        )
+        pred_result_with_eval = model_with_eval.transform(
+            clf_with_weight.cls_df_test_with_eval_weight
+        ).collect()
+        for row in pred_result_with_eval:
+            assert np.allclose(row.probability, row.expected_prob_with_eval, atol=1e-3)
+        # with weight and eval
+        classifier_with_weight_eval = SparkXGBClassifier(
+            weight_col="weight", **clf_with_weight.cls_params_with_eval
+        )
+        model_with_weight_eval = classifier_with_weight_eval.fit(
+            clf_with_weight.cls_df_train_with_eval_weight
+        )
+        pred_result_with_weight_eval = model_with_weight_eval.transform(
+            clf_with_weight.cls_df_test_with_eval_weight
+        ).collect()
+        np.testing.assert_allclose(
+            model_with_weight_eval._xgb_sklearn_model.best_score,
+            clf_with_weight.cls_with_eval_and_weight_best_score,
+            atol=1e-3,
+        )
+
+        for row in pred_result_with_weight_eval:
+            np.testing.assert_allclose(  # failed
+                row.probability, row.expected_prob_with_weight_and_eval, atol=1e-3
+            )
+
 
 class XgboostLocalTest(SparkTestCase):
     def setUp(self):
@@ -352,70 +492,6 @@ def setUp(self):
             ["features", "expected_probability"],
         )
 
-        # Test classifier with weight and eval set
-        # >>> import numpy as np
-        # >>> import xgboost
-        # >>> X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5], [4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
-        # >>> w = np.array([1.0, 2.0, 1.0, 2.0])
-        # >>> y = np.array([0, 1, 0, 1])
-        # >>> cls1 = xgboost.XGBClassifier()
-        # >>> cls1.fit(X, y, sample_weight=w)
-        # >>> cls1.predict_proba(X)
-        # array([[0.3333333, 0.6666667],...
-        # >>> X_train = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
-        # >>> X_val = np.array([[4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
-        # >>> y_train = np.array([0, 1])
-        # >>> y_val = np.array([0, 1])
-        # >>> w_train = np.array([1.0, 2.0])
-        # >>> w_val = np.array([1.0, 2.0])
-        # >>> cls2 = xgboost.XGBClassifier()
-        # >>> cls2.fit(X_train, y_train, eval_set=[(X_val, y_val)],
-        # >>>               early_stopping_rounds=1, eval_metric='logloss')
-        # >>> cls2.predict_proba(X)
-        # array([[0.5, 0.5],...
-        # >>> cls2.best_score
-        # 0.6931
-        # >>> cls3 = xgboost.XGBClassifier()
-        # >>> cls3.fit(X_train, y_train, sample_weight=w_train, eval_set=[(X_val, y_val)],
-        # >>>               sample_weight_eval_set=[w_val],
-        # >>>               early_stopping_rounds=1, eval_metric='logloss')
-        # >>> cls3.predict_proba(X)
-        # array([[0.3344962, 0.6655038],...
-        # >>> cls3.best_score
-        # 0.6365
-        self.cls_df_train_with_eval_weight = self.session.createDataFrame(
-            [
-                (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
-                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
-                (Vectors.dense(4.0, 5.0, 6.0), 0, True, 1.0),
-                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, True, 2.0),
-            ],
-            ["features", "label", "isVal", "weight"],
-        )
-        self.cls_params_with_eval = {
-            "validation_indicator_col": "isVal",
-            "early_stopping_rounds": 1,
-            "eval_metric": "logloss",
-        }
-        self.cls_df_test_with_eval_weight = self.session.createDataFrame(
-            [
-                (
-                    Vectors.dense(1.0, 2.0, 3.0),
-                    [0.3333, 0.6666],
-                    [0.5, 0.5],
-                    [0.3097, 0.6903],
-                ),
-            ],
-            [
-                "features",
-                "expected_prob_with_weight",
-                "expected_prob_with_eval",
-                "expected_prob_with_weight_and_eval",
-            ],
-        )
-        self.cls_with_eval_best_score = 0.6931
-        self.cls_with_eval_and_weight_best_score = 0.636592
-
         # Test classifier with both base margin and without
         # >>> import numpy as np
         # >>> import xgboost
@@ -943,57 +1019,6 @@ def test_classifier_with_base_margin(self):
                 row.probability, row.expected_prob_with_base_margin, atol=1e-3
             )
 
-    def test_classifier_with_weight_eval(self):
-        # with weight
-        classifier_with_weight = SparkXGBClassifier(weight_col="weight")
-        model_with_weight = classifier_with_weight.fit(
-            self.cls_df_train_with_eval_weight
-        )
-        pred_result_with_weight = model_with_weight.transform(
-            self.cls_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_weight:
-            self.assertTrue(
-                np.allclose(row.probability, row.expected_prob_with_weight, atol=1e-3)
-            )
-        # with eval
-        classifier_with_eval = SparkXGBClassifier(**self.cls_params_with_eval)
-        model_with_eval = classifier_with_eval.fit(self.cls_df_train_with_eval_weight)
-        self.assertTrue(
-            np.isclose(
-                model_with_eval._xgb_sklearn_model.best_score,
-                self.cls_with_eval_best_score,
-                atol=1e-3,
-            )
-        )
-        pred_result_with_eval = model_with_eval.transform(
-            self.cls_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_eval:
-            self.assertTrue(
-                np.allclose(row.probability, row.expected_prob_with_eval, atol=1e-3)
-            )
-        # with weight and eval
-        classifier_with_weight_eval = SparkXGBClassifier(
-            weight_col="weight", **self.cls_params_with_eval
-        )
-        model_with_weight_eval = classifier_with_weight_eval.fit(
-            self.cls_df_train_with_eval_weight
-        )
-        pred_result_with_weight_eval = model_with_weight_eval.transform(
-            self.cls_df_test_with_eval_weight
-        ).collect()
-        np.testing.assert_allclose(
-            model_with_weight_eval._xgb_sklearn_model.best_score,
-            self.cls_with_eval_and_weight_best_score,
-            atol=1e-3,
-        )
-
-        for row in pred_result_with_weight_eval:
-            np.testing.assert_allclose(  # failed
-                row.probability, row.expected_prob_with_weight_and_eval, atol=1e-3
-            )
-
     def test_num_workers_param(self):
         regressor = SparkXGBRegressor(num_workers=-1)
         self.assertRaises(ValueError, regressor._validate_params)

From a16e02365b09e3d8f10a4cc7edab544284635fec Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 21:15:57 +0800
Subject: [PATCH 067/133] Fixes.

---
 tests/python/test_spark/test_spark_local.py | 29 ++++++++-------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index 4ad525db1fe4..28ded3f02413 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -36,13 +36,11 @@
 
 from xgboost import XGBClassifier, XGBModel, XGBRegressor
 
-from .utils import SparkTestCase
-
 logging.getLogger("py4j").setLevel(logging.INFO)
 
 
 @pytest.fixture
-def pyspark_test_session() -> Generator[SparkSession, None, None]:
+def spark() -> Generator[SparkSession, None, None]:
     config = {
         "spark.master": "local[4]",
         "spark.python.worker.reuse": "false",
@@ -55,10 +53,9 @@ def pyspark_test_session() -> Generator[SparkSession, None, None]:
     builder = SparkSession.builder.appName("XGBoost PySpark Python API Tests")
     for k, v in config.items():
         builder.config(k, v)
-    spark = builder.getOrCreate()
     logging.getLogger("pyspark").setLevel(logging.INFO)
-
-    yield spark
+    sess = builder.getOrCreate()
+    yield sess
 
 
 RegWithWeight = namedtuple(
@@ -75,7 +72,7 @@ def pyspark_test_session() -> Generator[SparkSession, None, None]:
 
 @pytest.fixture
 def reg_with_weight(
-    pyspark_test_session: SparkSession,
+    spark: SparkSession,
 ) -> Generator[RegWithWeight, SparkSession, None]:
     reg_params_with_eval = {
         "validation_indicator_col": "isVal",
@@ -98,31 +95,27 @@ def reg_with_weight(
     w_train = np.array([1.0, 2.0])
     w_val = np.array([1.0, 2.0])
 
-    reg2 = XGBRegressor()
+    reg2 = XGBRegressor(early_stopping_rounds=1, eval_metric="rmse")
     reg2.fit(
         X_train,
         y_train,
         eval_set=[(X_val, y_val)],
-        early_stopping_rounds=1,
-        eval_metric="rmse",
     )
     predt2 = reg2.predict(X)
     best_score2 = reg2.best_score
 
-    reg3 = XGBRegressor()
+    reg3 = XGBRegressor(early_stopping_rounds=1, eval_metric="rmse")
     reg3.fit(
         X_train,
         y_train,
         sample_weight=w_train,
         eval_set=[(X_val, y_val)],
         sample_weight_eval_set=[w_val],
-        early_stopping_rounds=1,
-        eval_metric="rmse",
     )
     predt3 = reg3.predict(X)
     best_score3 = reg3.best_score
 
-    reg_df_train_with_eval_weight = pyspark_test_session.createDataFrame(
+    reg_df_train_with_eval_weight = spark.createDataFrame(
         [
             (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
             (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
@@ -132,7 +125,7 @@ def reg_with_weight(
         ["features", "label", "isVal", "weight"],
     )
 
-    reg_df_test_with_eval_weight = pyspark_test_session.createDataFrame(
+    reg_df_test_with_eval_weight = spark.createDataFrame(
         [
             (
                 Vectors.dense(1.0, 2.0, 3.0),
@@ -177,7 +170,7 @@ def reg_with_weight(
 
 @pytest.fixture
 def clf_with_weight(
-    pyspark_test_session: SparkSession,
+    spark: SparkSession,
 ) -> Generator[ClfWithWeight, SparkSession, None]:
     """Test classifier with weight and eval set."""
 
@@ -213,7 +206,7 @@ def clf_with_weight(
         eval_metric="logloss",
     )
 
-    cls_df_train_with_eval_weight = pyspark_test_session.createDataFrame(
+    cls_df_train_with_eval_weight = spark.createDataFrame(
         [
             (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
             (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
@@ -228,7 +221,7 @@ def clf_with_weight(
         "eval_metric": "logloss",
     }
     print("cls1.predict_proba(X)", cls1.predict_proba(X).shape, cls1.predict_proba(X))
-    cls_df_test_with_eval_weight = pyspark_test_session.createDataFrame(
+    cls_df_test_with_eval_weight = spark.createDataFrame(
         [
             (
                 Vectors.dense(1.0, 2.0, 3.0),

From e0927694dec31d2b7f9d78149f4263636f86ed22 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 21:17:16 +0800
Subject: [PATCH 068/133] Fix.

---
 tests/python/test_spark/test_spark_local_cluster.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/test_spark/test_spark_local_cluster.py b/tests/python/test_spark/test_spark_local_cluster.py
index f7017d4a67ce..6bfa75abeb83 100644
--- a/tests/python/test_spark/test_spark_local_cluster.py
+++ b/tests/python/test_spark/test_spark_local_cluster.py
@@ -216,7 +216,7 @@ def setUp(self):
             ],
         )
         self.reg_best_score_eval = 5.239e-05
-        self.reg_best_score_weight_and_eval = 4.810e-05
+        self.reg_best_score_weight_and_eval = 4.850e-05
 
     def test_regressor_basic_with_params(self):
         regressor = SparkXGBRegressor(**self.reg_params)

From f383ee9188bcb41e175c58eb48e40243cfd5b41e Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 21:21:26 +0800
Subject: [PATCH 069/133] format.

---
 python-package/xgboost/dask.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py
index 7d21ff9fcb48..7e75c01c9ff4 100644
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@@ -103,13 +103,7 @@
 
 _DaskCollection = Union["da.Array", "dd.DataFrame", "dd.Series"]
 _DataT = Union["da.Array", "dd.DataFrame"]  # do not use series as predictor
-TrainReturnT = TypedDict(
-    "TrainReturnT",
-    {
-        "booster": Booster,
-        "history": Dict,
-    },
-)
+TrainReturnT = TypedDict("TrainReturnT", {"booster": Booster, "history": Dict,},)
 
 __all__ = [
     "CommunicatorContext",
@@ -150,8 +144,7 @@
 
 
 def _try_start_tracker(
-    n_workers: int,
-    addrs: List[Union[Optional[str], Optional[Tuple[str, int]]]],
+    n_workers: int, addrs: List[Union[Optional[str], Optional[Tuple[str, int]]]],
 ) -> Dict[str, Union[int, str]]:
     env: Dict[str, Union[int, str]] = {"DMLC_NUM_WORKER": n_workers}
     try:
@@ -1649,16 +1642,11 @@ def predict(
         )
 
     async def _apply_async(
-        self,
-        X: _DataT,
-        iteration_range: Optional[Tuple[int, int]] = None,
+        self, X: _DataT, iteration_range: Optional[Tuple[int, int]] = None,
     ) -> Any:
         iteration_range = self._get_iteration_range(iteration_range)
         test_dmatrix = await DaskDMatrix(
-            self.client,
-            data=X,
-            missing=self.missing,
-            feature_types=self.feature_types,
+            self.client, data=X, missing=self.missing, feature_types=self.feature_types,
         )
         predts = await predict(
             self.client,

From 6f86ca614ffbe9efd4718ef33809265af1256398 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 22:24:15 +0800
Subject: [PATCH 070/133] Fix.

---
 tests/python/test_spark/test_spark_local.py | 93 ---------------------
 1 file changed, 93 deletions(-)

diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index a3bc0b5ec693..120188570d2e 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -1004,99 +1004,6 @@ def test_classifier_with_base_margin(self):
                 row.probability, row.expected_prob_with_base_margin, atol=1e-3
             )
 
-<<<<<<< HEAD
-=======
-    def test_regressor_with_weight_eval(self):
-        # with weight
-        regressor_with_weight = SparkXGBRegressor(weight_col="weight")
-        model_with_weight = regressor_with_weight.fit(
-            self.reg_df_train_with_eval_weight
-        )
-        pred_result_with_weight = model_with_weight.transform(
-            self.reg_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_weight:
-            assert np.isclose(
-                row.prediction, row.expected_prediction_with_weight, atol=1e-3
-            )
-
-        # with eval
-        regressor_with_eval = SparkXGBRegressor(**self.reg_params_with_eval)
-        model_with_eval = regressor_with_eval.fit(self.reg_df_train_with_eval_weight)
-        assert np.isclose(
-            model_with_eval._xgb_sklearn_model.best_score,
-            self.reg_with_eval_best_score,
-            atol=1e-3,
-        ), (
-            f"Expected best score: {self.reg_with_eval_best_score}, but ",
-            f"get {model_with_eval._xgb_sklearn_model.best_score}",
-        )
-
-        pred_result_with_eval = model_with_eval.transform(
-            self.reg_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_eval:
-            self.assertTrue(
-                np.isclose(
-                    row.prediction, row.expected_prediction_with_eval, atol=1e-3
-                ),
-                f"Expect prediction is {row.expected_prediction_with_eval},"
-                f"but get {row.prediction}",
-            )
-        # with weight and eval
-        regressor_with_weight_eval = SparkXGBRegressor(
-            weight_col="weight", **self.reg_params_with_eval
-        )
-        model_with_weight_eval = regressor_with_weight_eval.fit(
-            self.reg_df_train_with_eval_weight
-        )
-        pred_result_with_weight_eval = model_with_weight_eval.transform(
-            self.reg_df_test_with_eval_weight
-        ).collect()
-        self.assertTrue(
-            np.isclose(
-                model_with_weight_eval._xgb_sklearn_model.best_score,
-                self.reg_with_eval_and_weight_best_score,
-                atol=1e-3,
-            )
-        )
-        for row in pred_result_with_weight_eval:
-            self.assertTrue(
-                np.isclose(
-                    row.prediction,
-                    row.expected_prediction_with_weight_and_eval,
-                    atol=1e-3,
-                )
-            )
-
-    def test_classifier_with_weight_eval(self):
-        # with weight and eval
-        # Added scale_pos_weight because in 1.4.2, the original answer returns 0.5 which
-        # doesn't really indicate this working correctly.
-        classifier_with_weight_eval = SparkXGBClassifier(
-            weight_col="weight", scale_pos_weight=4, **self.cls_params_with_eval
-        )
-        model_with_weight_eval = classifier_with_weight_eval.fit(
-            self.cls_df_train_with_eval_weight
-        )
-        pred_result_with_weight_eval = model_with_weight_eval.transform(
-            self.cls_df_test_with_eval_weight
-        ).collect()
-        self.assertTrue(
-            np.isclose(
-                model_with_weight_eval._xgb_sklearn_model.best_score,
-                self.cls_with_eval_and_weight_best_score,
-                atol=1e-3,
-            )
-        )
-        for row in pred_result_with_weight_eval:
-            self.assertTrue(
-                np.allclose(
-                    row.probability, row.expected_prob_with_weight_and_eval, atol=1e-3
-                )
-            )
-
->>>>>>> master
     def test_num_workers_param(self):
         regressor = SparkXGBRegressor(num_workers=-1)
         self.assertRaises(ValueError, regressor._validate_params)

From 38c07b59fa4547e76637568bab737a81358144d5 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 22:26:21 +0800
Subject: [PATCH 071/133] Remove google test.

---
 .github/workflows/python_tests.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
index 4f18c043079b..0ece0f859dee 100644
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -164,7 +164,7 @@ jobs:
         # Set prefix, to use OpenMP library from Conda env
         # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
         # to learn why we don't use libomp from Homebrew.
-        cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
+        cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
         ninja
 
     - name: Install Python package
@@ -209,11 +209,11 @@ jobs:
     - name: Build XGBoost on Ubuntu
       shell: bash -l {0}
       run: |
-        sudo apt install ninja -y
+        sudo apt install ninja-build -y
 
         mkdir build
         cd build
-        cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
+        cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
         ninja
 
     - name: Install Python package

From cb48bdaed4b1b0354ce1c470b153713d4f995691 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 22:41:10 +0800
Subject: [PATCH 072/133] black version.

---
 python-package/xgboost/dask.py              | 20 ++++++++++++++++----
 tests/python/test_spark/test_spark_local.py |  2 ++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py
index 7e75c01c9ff4..7d21ff9fcb48 100644
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@@ -103,7 +103,13 @@
 
 _DaskCollection = Union["da.Array", "dd.DataFrame", "dd.Series"]
 _DataT = Union["da.Array", "dd.DataFrame"]  # do not use series as predictor
-TrainReturnT = TypedDict("TrainReturnT", {"booster": Booster, "history": Dict,},)
+TrainReturnT = TypedDict(
+    "TrainReturnT",
+    {
+        "booster": Booster,
+        "history": Dict,
+    },
+)
 
 __all__ = [
     "CommunicatorContext",
@@ -144,7 +150,8 @@
 
 
 def _try_start_tracker(
-    n_workers: int, addrs: List[Union[Optional[str], Optional[Tuple[str, int]]]],
+    n_workers: int,
+    addrs: List[Union[Optional[str], Optional[Tuple[str, int]]]],
 ) -> Dict[str, Union[int, str]]:
     env: Dict[str, Union[int, str]] = {"DMLC_NUM_WORKER": n_workers}
     try:
@@ -1642,11 +1649,16 @@ def predict(
         )
 
     async def _apply_async(
-        self, X: _DataT, iteration_range: Optional[Tuple[int, int]] = None,
+        self,
+        X: _DataT,
+        iteration_range: Optional[Tuple[int, int]] = None,
     ) -> Any:
         iteration_range = self._get_iteration_range(iteration_range)
         test_dmatrix = await DaskDMatrix(
-            self.client, data=X, missing=self.missing, feature_types=self.feature_types,
+            self.client,
+            data=X,
+            missing=self.missing,
+            feature_types=self.feature_types,
         )
         predts = await predict(
             self.client,
diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index 120188570d2e..633806c2b177 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -33,6 +33,8 @@
 
 from xgboost import XGBClassifier, XGBModel, XGBRegressor
 
+from .utils import SparkTestCase
+
 logging.getLogger("py4j").setLevel(logging.INFO)
 
 pytestmark = pytest.mark.timeout(60)

From a192fd087e25a73ddf7c80b3808a1eb06ba8e46a Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 22:48:09 +0800
Subject: [PATCH 073/133] Enable type hint for users.

---
 python-package/xgboost/spark/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python-package/xgboost/spark/__init__.py b/python-package/xgboost/spark/__init__.py
index c0b347eefb30..8e755753ad6c 100644
--- a/python-package/xgboost/spark/__init__.py
+++ b/python-package/xgboost/spark/__init__.py
@@ -1,4 +1,3 @@
-# type: ignore
 """PySpark XGBoost integration interface
 """
 
@@ -7,7 +6,7 @@
 except ImportError as e:
     raise ImportError("pyspark package needs to be installed to use this module") from e
 
-from .estimator import (
+from .estimator import (        # type: ignore
     SparkXGBClassifier,
     SparkXGBClassifierModel,
     SparkXGBRanker,

From 8965500f86c0dec317edc319148b344f2d5d7c0a Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 22:56:13 +0800
Subject: [PATCH 074/133] env name.

---
 .github/workflows/python_tests.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
index 0ece0f859dee..be53b387c80b 100644
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -125,7 +125,7 @@ jobs:
     - name: Test Python package
       shell: bash -l {0}
       run: |
-        pytest -s -v ./tests/python
+        pytest -s -v -rxXs --durations=0 ./tests/python
 
   python-tests-on-macos:
     name: Test XGBoost Python package on ${{ matrix.config.os }}
@@ -177,7 +177,7 @@ jobs:
     - name: Test Python package
       shell: bash -l {0}
       run: |
-        pytest -s -v ./tests/python
+        pytest -s -v -rxXs --durations=0 ./tests/python
 
   python-tests-on-ubuntu:
     name: Test XGBoost Python package on ${{ matrix.config.os }}
@@ -197,7 +197,7 @@ jobs:
       with:
         auto-update-conda: true
         python-version: ${{ matrix.config.python-version }}
-        activate-environment: macos_test
+        activate-environment: cpu_test
         environment-file: tests/ci_build/conda_env/cpu_test.yml
 
     - name: Display Conda env
@@ -226,4 +226,4 @@ jobs:
     - name: Test Python package
       shell: bash -l {0}
       run: |
-        pytest -s -v ./tests/python
+        pytest -s -v -rxXs --durations=0 ./tests/python

From d7cd66faef1db58dae1a3f447a98a57da40310d0 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 23:14:28 +0800
Subject: [PATCH 075/133] black.

---
 python-package/xgboost/spark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-package/xgboost/spark/__init__.py b/python-package/xgboost/spark/__init__.py
index 8e755753ad6c..7d99896b3b85 100644
--- a/python-package/xgboost/spark/__init__.py
+++ b/python-package/xgboost/spark/__init__.py
@@ -6,7 +6,7 @@
 except ImportError as e:
     raise ImportError("pyspark package needs to be installed to use this module") from e
 
-from .estimator import (        # type: ignore
+from .estimator import (  # type: ignore
     SparkXGBClassifier,
     SparkXGBClassifierModel,
     SparkXGBRanker,

From 4c5ab95a9713b06393d549871d1ab438ec9252cb Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 13 Oct 2022 23:28:07 +0800
Subject: [PATCH 076/133] Fix.

---
 tests/python/test_with_dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index 211c320eb54d..60a60b877f77 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -724,7 +724,7 @@ def run_dask_classifier(
 def test_dask_classifier(model: str, client: "Client") -> None:
     X, y, w = generate_array(with_weights=True)
     y = (y * 10).astype(np.int32)
-    assert w
+    assert w is not None
     run_dask_classifier(X, y, w, model, None, client, 10)
 
     y_bin = y.copy()

From 144640723177eda6b6dcb7f1520f058f00ebfbec Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 14 Oct 2022 00:01:55 +0800
Subject: [PATCH 077/133] lint.

pylint.
---
 python-package/xgboost/testing.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python-package/xgboost/testing.py b/python-package/xgboost/testing.py
index 34ae3da74c68..965e0c8c134e 100644
--- a/python-package/xgboost/testing.py
+++ b/python-package/xgboost/testing.py
@@ -1,5 +1,5 @@
 """Utilities for defining Python tests."""
-
+# pylint: disable=unused-import
 import os
 import socket
 from platform import system
@@ -57,7 +57,7 @@ def skip_spark() -> PytestSkip:
         # just in case there's a pyspark stub created by some other libraries
         from pyspark.ml import Pipeline  # noqa
 
-        SPARK_INSTALLED = True
+        spark_installed = True
     except ImportError:
-        SPARK_INSTALLED = False
-    return {"condition": not SPARK_INSTALLED, "reason": "Spark is not installed"}
+        spark_installed = False
+    return {"condition": not spark_installed, "reason": "Spark is not installed"}

From 9336ab9db1bdd2c10e5b9aec5b25c224006a648e Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 14 Oct 2022 00:13:58 +0800
Subject: [PATCH 078/133] fix threads.

---
 tests/python/test_with_dask.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index 60a60b877f77..31dd8bbb9693 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -54,8 +54,10 @@
 
 @pytest.fixture(scope="module")
 def cluster():
+    n_workers = 2
+    n_threads = len(os.sched_getaffinity(0)) // n_workers
     with LocalCluster(
-        n_workers=2, threads_per_worker=2, dashboard_address=":0"
+        n_workers=n_workers, threads_per_worker=n_threads, dashboard_address=":0"
     ) as dask_cluster:
         yield dask_cluster
 

From 63b7c8aa1fafdc16e3388dca6d7055d8c31f46db Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 14 Oct 2022 17:33:02 +0800
Subject: [PATCH 079/133] Merge.

---
 include/xgboost/generic_parameters.h |   1 +
 include/xgboost/learner.h            |   8 +-
 src/learner.cc                       | 210 +++++++++++++++++----------
 tests/cpp/test_learner.cc            |  92 +++++++++++-
 4 files changed, 225 insertions(+), 86 deletions(-)

diff --git a/include/xgboost/generic_parameters.h b/include/xgboost/generic_parameters.h
index 0375ecfafdc2..14f82912d51c 100644
--- a/include/xgboost/generic_parameters.h
+++ b/include/xgboost/generic_parameters.h
@@ -36,6 +36,7 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
   int gpu_id{kCpuId};
   // fail when gpu_id is invalid
   bool fail_on_invalid_gpu_id {false};
+  // FIXME(jiamingy): Move this to model training parameter instead.
   bool validate_parameters {false};
 
   /*!
diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index 34ae5a4d53bb..739801b735af 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -82,8 +82,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   ~Learner() override;
   /*!
    * \brief Configure Learner based on set parameters.
+   *
+   * \param p_fmat The optional training DMatrix.
    */
-  virtual void Configure() = 0;
+  virtual void Configure(DMatrix const* p_fmat = nullptr) = 0;
   /*!
    * \brief update the model for one iteration
    *  With the specified objective function.
@@ -161,7 +163,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
    * \brief Get number of boosted rounds from gradient booster.
    */
   virtual int32_t BoostedRounds() const = 0;
-  virtual uint32_t Groups() const = 0;
+  virtual bst_group_t Groups() const = 0;
 
   void LoadModel(Json const& in) override = 0;
   void SaveModel(Json* out) const override = 0;
@@ -328,7 +330,7 @@ struct LearnerModelParam {
   void Copy(LearnerModelParam const& that);
 
   /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
-  bool Initialized() const { return num_feature != 0; }
+  bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
 };
 
 }  // namespace xgboost
diff --git a/src/learner.cc b/src/learner.cc
index 7ce624060124..4fd3142fe6d7 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -63,6 +63,15 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
 
 namespace xgboost {
 Learner::~Learner() = default;
+namespace {
+StringView ModelNotFitted() { return "Model is not yet initialized (not fitted)."; }
+
+template <typename T>
+T& UsePtr(T& ptr) {  // NOLINT
+  CHECK(ptr);
+  return ptr;
+}
+}  // anonymous namespace
 
 /*! \brief training parameter for regression
  *
@@ -74,20 +83,28 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   /* \brief global bias */
   bst_float base_score;
   /* \brief number of features  */
-  uint32_t num_feature;
+  bst_feature_t num_feature;
   /* \brief number of classes, if it is multi-class classification  */
-  int32_t num_class;
+  std::int32_t num_class;
   /*! \brief Model contain additional properties */
   int32_t contain_extra_attrs;
   /*! \brief Model contain eval metrics */
   int32_t contain_eval_metrics;
   /*! \brief the version of XGBoost. */
-  uint32_t major_version;
-  uint32_t minor_version;
+  std::uint32_t major_version;
+  std::uint32_t minor_version;
 
   uint32_t num_target{1};
-
-  std::int32_t base_score_estimated{0};
+  /**
+   * \brief Whether we should calculate the base score from training data.
+   *
+   *   This is a private parameter as we can't expose it as boolean due to binary model
+   *   format. Exposing it as integer creates inconsistency with other parameters.
+   *
+   *   Automatically disabled when base_score is specifed by user. int32 is used instead
+   *   of bool for the ease of serialization.
+   */
+  std::int32_t boost_from_average{true};
   /*! \brief reserved field */
   int reserved[25];
   /*! \brief constructor */
@@ -97,7 +114,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     num_target = 1;
     major_version = std::get<0>(Version::Self());
     minor_version = std::get<1>(Version::Self());
-    base_score_estimated = 0;
+    boost_from_average = true;
     static_assert(sizeof(LearnerModelParamLegacy) == 136,
                   "Do not change the size of this struct, as it will break binary IO.");
   }
@@ -127,8 +144,8 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
         std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};
 
     ret = to_chars(integers, integers + NumericLimits<std::int64_t>::kToCharsSize,
-                   static_cast<std::int64_t>(base_score_estimated));
-    obj["base_score_estimated"] =
+                   static_cast<std::int64_t>(boost_from_average));
+    obj["boost_from_average"] =
         std::string{integers, static_cast<std::size_t>(std::distance(integers, ret.ptr))};
 
     return obj;
@@ -142,9 +159,9 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     if (n_targets_it != j_param.cend()) {
       m["num_target"] = get<String const>(n_targets_it->second);
     }
-    auto bse_it = j_param.find("base_score_estimated");
+    auto bse_it = j_param.find("boost_from_average");
     if (bse_it != j_param.cend()) {
-      m["base_score_estimated"] = get<String const>(bse_it->second);
+      m["boost_from_average"] = get<String const>(bse_it->second);
     }
 
     this->Init(m);
@@ -163,7 +180,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
     dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
     dmlc::ByteSwap(&x.num_target, sizeof(x.num_target), 1);
-    dmlc::ByteSwap(&x.base_score_estimated, sizeof(x.base_score_estimated), 1);
+    dmlc::ByteSwap(&x.boost_from_average, sizeof(x.boost_from_average), 1);
     dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
     return x;
   }
@@ -171,17 +188,43 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   template <typename Container>
   Args UpdateAllowUnknown(Container const& kwargs) {
     // Detect whether user has made their own base score.
-    if (std::find_if(kwargs.cbegin(), kwargs.cend(),
-                     [](auto const& kv) { return kv.first == "base_score"; }) != kwargs.cend()) {
-      base_score_estimated = true;
-    }
-    if (std::find_if(kwargs.cbegin(), kwargs.cend(), [](auto const& kv) {
-          return kv.first == "base_score_estimated";
-        }) != kwargs.cend()) {
-      LOG(FATAL) << "`base_score_estimated` cannot be specified as hyper-parameter.";
+    auto find_key = [&kwargs](char const* key) {
+      return std::find_if(kwargs.cbegin(), kwargs.cend(),
+                          [key](auto const& kv) { return kv.first == key; });
+    };
+    auto it = find_key("base_score");
+    if (it != kwargs.cend()) {
+      boost_from_average = false;
     }
     return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);
   }
+  // sanity check
+  void Validate() {
+    if (!collective::IsDistributed()) {
+      return;
+    }
+
+    std::array<std::int32_t, 6> data;
+    std::size_t pos{0};
+    std::memcpy(data.data() + pos, &base_score, sizeof(base_score));
+    pos += 1;
+    std::memcpy(data.data() + pos, &num_feature, sizeof(num_feature));
+    pos += 1;
+    std::memcpy(data.data() + pos, &num_class, sizeof(num_class));
+    pos += 1;
+    std::memcpy(data.data() + pos, &num_target, sizeof(num_target));
+    pos += 1;
+    std::memcpy(data.data() + pos, &major_version, sizeof(major_version));
+    pos += 1;
+    std::memcpy(data.data() + pos, &minor_version, sizeof(minor_version));
+    pos += 1;
+
+    std::array<std::int32_t, 6> sync;
+    std::copy(data.cbegin(), data.cend(), sync.begin());
+    collective::Broadcast(sync.data(), sync.size(), 0);
+    CHECK(std::equal(data.cbegin(), data.cend(), sync.cbegin()))
+        << "Different model parameter across workers.";
+  }
 
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
@@ -200,7 +243,9 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
         .set_default(1)
         .set_lower_bound(1)
         .describe("Number of target for multi-target regression.");
-    DMLC_DECLARE_FIELD(base_score_estimated).set_default(0);
+    DMLC_DECLARE_FIELD(boost_from_average)
+        .set_default(true)
+        .describe("Whether we should calculate the base score from training data.");
   }
 };
 
@@ -229,7 +274,7 @@ LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy
 
 linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
   // multi-class is not yet supported.
-  CHECK_EQ(base_score_.Size(), 1);
+  CHECK_EQ(base_score_.Size(), 1) << ModelNotFitted();
   if (device == Context::kCpuId) {
     // Make sure that we won't run into race condition.
     CHECK(base_score_.Data()->HostCanRead());
@@ -375,6 +420,8 @@ class LearnerConfiguration : public Learner {
 
  protected:
   std::atomic<bool> need_configuration_;
+  // std::atomic<bool> need_init_model_;
+
   std::map<std::string, std::string> cfg_;
   // Stores information like best-iteration for early stopping.
   std::map<std::string, std::string> attributes_;
@@ -390,6 +437,21 @@ class LearnerConfiguration : public Learner {
   // Initial prediction.
   std::vector<std::string> metric_names_;
 
+  void ConfigureModelParamWithoutBaseScore() {
+    // Convert mparam to learner_model_param
+    this->ConfigureTargets();
+
+    auto task = UsePtr(obj_)->Task();
+    linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
+    auto h_base_score = base_score.HostView();
+
+    // transform to margin
+    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
+    // move it to model param, which is shared with all other components.
+    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
+    CHECK(learner_model_param_.Initialized());
+    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
+  }
   /**
    * \brief Calculate the `base_score` based on input data.
    *
@@ -408,48 +470,40 @@ class LearnerConfiguration : public Learner {
     // - model loaded from new binary or JSON.
     // - model is created from scratch.
     // - model is configured second time due to change of parameter
-    CHECK(obj_);
-    if (!mparam_.base_score_estimated) {
-      std::lock_guard<std::mutex> guard(config_lock_);
-      if (p_fmat) {
-        auto const& info = p_fmat->Info();
-        info.Validate(Ctx()->gpu_id);
-        // We estimate it from input data.
-        linalg::Tensor<float, 1> base_score;
-        obj_->InitEstimation(info, &base_score);
-        mparam_.base_score = base_score(0);
-        CHECK(!std::isnan(mparam_.base_score));
-      } else {
-        mparam_.base_score = ObjFunction::DefaultBaseScore();
-      }
-      mparam_.base_score_estimated = true;
-      // Update the shared model parameter
-      this->ConfigureModelParam();
-      auto sync_score = mparam_.base_score;
-      rabit::Broadcast(&sync_score, sizeof(sync_score), 0);
-      CHECK_EQ(sync_score, mparam_.base_score);
+
+    // if (!need_init_model_) {
+    //   return;
+    // }
+    // std::lock_guard<std::mutex> guard(config_lock_);
+    // if (!need_init_model_) {
+    //   return;
+    // }
+
+    if (!learner_model_param_.Initialized()) {
+      // CHECK(!p_fmat);  // only using custom objective can reach here.
+      // First call to have some basic model info for boosted rounds to work.
+      this->ConfigureModelParamWithoutBaseScore();
+    }
+
+    if (p_fmat) {
+      auto const& info = p_fmat->Info();
+      info.Validate(Ctx()->gpu_id);
+      // We estimate it from input data.
+      linalg::Tensor<float, 1> base_score;
+      UsePtr(obj_)->InitEstimation(info, &base_score);
+      mparam_.base_score = base_score(0);
+      CHECK(!std::isnan(mparam_.base_score));
+    } else {
+      mparam_.base_score = ObjFunction::DefaultBaseScore();
     }
+    // Update the shared model parameter
+    this->ConfigureModelParamWithoutBaseScore();
+    mparam_.Validate();
+
     CHECK(!std::isnan(mparam_.base_score));
     CHECK(!std::isinf(mparam_.base_score));
   }
 
-  // Convert mparam to learner_model_param
-  void ConfigureModelParam() {
-    this->ConfigureTargets();
-
-    CHECK(obj_);
-    auto task = obj_->Task();
-    linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
-    auto h_base_score = base_score.HostView();
-
-    // transform to margin
-    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
-    // move it to model param, which is shared with all other components.
-    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
-    CHECK(learner_model_param_.Initialized());
-    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
-  }
-
  public:
   explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix> > cache)
       : need_configuration_{true} {
@@ -467,7 +521,7 @@ class LearnerConfiguration : public Learner {
   }
 
   // Configuration before data is known.
-  void Configure() override {
+  void Configure(DMatrix const* p_fmat = nullptr) override {
     // Varient of double checked lock
     if (!this->need_configuration_) {
       return;
@@ -509,11 +563,15 @@ class LearnerConfiguration : public Learner {
     learner_model_param_.task = obj_->Task();  // required by gbm configuration.
     this->ConfigureGBM(old_tparam, args);
     ctx_.ConfigureGpuId(this->gbm_->UseGPU());
-    this->ConfigureModelParam();
+
+    // Configure model param without base score
+    this->InitBaseScore(p_fmat);
+    // this->ConfigureModelParamWithoutBaseScore();
 
     this->ConfigureMetrics(args);
 
-    this->need_configuration_ = false;
+    this->need_configuration_ =
+        UsePtr(this->gbm_)->BoostedRounds() == 0 && mparam_.boost_from_average;
     if (ctx_.validate_parameters) {
       this->ValidateParameters();
     }
@@ -523,8 +581,8 @@ class LearnerConfiguration : public Learner {
   }
 
   void CheckModelInitialized() const {
-    CHECK(learner_model_param_.Initialized()) << "Model not yet initialized.";
-    CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0);
+    CHECK(learner_model_param_.Initialized()) << ModelNotFitted();
+    // CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0) << ModelNotFitted();
   }
 
   virtual PredictionContainer* GetPredictionCache() const {
@@ -1300,8 +1358,8 @@ class LearnerImpl : public LearnerIO {
   void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) override {
     monitor_.Start("UpdateOneIter");
     TrainingObserver::Instance().Update(iter);
-    this->Configure();
-    this->InitBaseScore(train.get());
+    this->Configure(train.get());
+    // this->InitBaseScore(train.get());
 
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
@@ -1330,9 +1388,9 @@ class LearnerImpl : public LearnerIO {
   void BoostOneIter(int iter, std::shared_ptr<DMatrix> train,
                     HostDeviceVector<GradientPair>* in_gpair) override {
     monitor_.Start("BoostOneIter");
-    this->Configure();
-    // Should have been set to default in the first prediction.
-    CHECK(mparam_.base_score_estimated);
+    this->Configure(nullptr);
+    // Custom objective, use default instead.
+    // this->InitBaseScore(nullptr);
 
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
@@ -1352,7 +1410,8 @@ class LearnerImpl : public LearnerIO {
                           const std::vector<std::shared_ptr<DMatrix>>& data_sets,
                           const std::vector<std::string>& data_names) override {
     monitor_.Start("EvalOneIter");
-    this->Configure();
+    this->Configure(nullptr);
+    // this->InitBaseScore(nullptr);
     this->CheckModelInitialized();
 
     std::ostringstream os;
@@ -1392,8 +1451,8 @@ class LearnerImpl : public LearnerIO {
     int multiple_predictions = static_cast<int>(pred_leaf) +
                                static_cast<int>(pred_interactions) +
                                static_cast<int>(pred_contribs);
-    this->Configure();
-    this->InitBaseScore(nullptr);
+    this->Configure(nullptr);
+    // this->InitBaseScore(nullptr);
     this->CheckModelInitialized();
 
     CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
@@ -1418,13 +1477,13 @@ class LearnerImpl : public LearnerIO {
     }
   }
 
-  int32_t BoostedRounds() const override {
+  std::int32_t BoostedRounds() const override {
     if (!this->gbm_) { return 0; }  // haven't call train or LoadModel.
     CHECK(!this->need_configuration_);
     return this->gbm_->BoostedRounds();
   }
 
-  uint32_t Groups() const override {
+  bst_group_t Groups() const override {
     CHECK(!this->need_configuration_);
     this->CheckModelInitialized();
     return this->learner_model_param_.num_output_group;
@@ -1437,8 +1496,7 @@ class LearnerImpl : public LearnerIO {
   void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
                       HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
                       uint32_t iteration_end) override {
-    this->Configure();
-    this->InitBaseScore(nullptr);
+    this->Configure(nullptr);
     this->CheckModelInitialized();
 
     auto& out_predictions = this->GetThreadLocal().prediction_entry;
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 49c1d9537426..fa36cdcc8584 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -493,13 +493,6 @@ TEST(Learner, InitEstimation) {
     auto base_score =
         std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
     ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
-
-    ASSERT_THROW(
-        {
-          learner->SetParam("base_score_estimated", "1");
-          learner->Configure();
-        },
-        dmlc::Error);
   }
 
   {
@@ -522,4 +515,89 @@ TEST(Learner, InitEstimation) {
     ASSERT_FLOAT_EQ(base_score, 1.3);
   }
 }
+
+class InitBaseScore : public ::testing::Test {
+ protected:
+  std::size_t static constexpr Cols() { return 10; }
+  std::shared_ptr<DMatrix> Xy_;
+
+  void SetUp() override { Xy_ = RandomDataGenerator{10, Cols(), 0}.GenerateDMatrix(true); }
+
+ public:
+  void TestBoostFromAvg() {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
+    learner->SetParam("objective", "reg:absoluteerror");
+    learner->SetParam("base_score", "1.3");
+    Json config(Object{});
+    learner->Configure();
+    learner->SaveConfig(&config);
+
+    auto base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    // no change
+    ASSERT_FLOAT_EQ(base_score, 1.3);
+    auto from_avg = std::stoi(
+        get<String const>(config["learner"]["learner_model_param"]["boost_from_average"]));
+    // from_avg is disabled when base score is set
+    ASSERT_EQ(from_avg, 0);
+    // in the future when we can deprecate the binary model, user can set the parameter directly.
+    learner->SetParam("boost_from_average", "1");
+    learner->Configure();
+    learner->SaveConfig(&config);
+    from_avg = std::stoi(
+        get<String const>(config["learner"]["learner_model_param"]["boost_from_average"]));
+    ASSERT_EQ(from_avg, 1);
+  }
+
+  void TestInitAfterLoad() {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
+    learner->SetParam("objective", "reg:absoluteerror");
+    learner->Configure();
+
+    Json model{Object{}};
+    learner->SaveModel(&model);
+    auto base_score =
+        std::stof(get<String const>(model["learner"]["learner_model_param"]["base_score"]));
+    ASSERT_FLOAT_EQ(base_score, ObjFunction::DefaultBaseScore());
+
+    learner.reset(Learner::Create({Xy_}));
+    learner->LoadModel(model);
+    Json config(Object{});
+    learner->Configure();
+    learner->SaveConfig(&config);
+    base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    ASSERT_FLOAT_EQ(base_score, ObjFunction::DefaultBaseScore());
+
+    learner->UpdateOneIter(0, Xy_);
+    learner->SaveConfig(&config);
+    base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+  }
+
+  void TestInitWithPredt() {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
+    learner->SetParam("objective", "reg:absoluteerror");
+    HostDeviceVector<float> predt;
+    learner->Predict(Xy_, false, &predt, 0, 0);
+    Json config(Object{});
+    learner->SaveConfig(&config);
+    auto base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    ASSERT_FLOAT_EQ(base_score, ObjFunction::DefaultBaseScore());
+
+    learner->UpdateOneIter(0, Xy_);
+    learner->SaveConfig(&config);
+    base_score =
+        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+  }
+};
+
+TEST_F(InitBaseScore, FromAvgParam) { this->TestBoostFromAvg(); }
+
+TEST_F(InitBaseScore, InitAfterLoad) { this->TestInitAfterLoad(); }
+
+TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); }
 }  // namespace xgboost

From 27bd7ee3654abe26354e26169de7e3bc173c9b8c Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 14 Oct 2022 18:07:09 +0800
Subject: [PATCH 080/133] Revert "Merge."

This reverts commit 63b7c8aa1fafdc16e3388dca6d7055d8c31f46db.
---
 include/xgboost/generic_parameters.h |   1 -
 include/xgboost/learner.h            |   8 +-
 src/learner.cc                       | 210 ++++++++++-----------------
 tests/cpp/test_learner.cc            |  92 +-----------
 4 files changed, 86 insertions(+), 225 deletions(-)

diff --git a/include/xgboost/generic_parameters.h b/include/xgboost/generic_parameters.h
index 14f82912d51c..0375ecfafdc2 100644
--- a/include/xgboost/generic_parameters.h
+++ b/include/xgboost/generic_parameters.h
@@ -36,7 +36,6 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
   int gpu_id{kCpuId};
   // fail when gpu_id is invalid
   bool fail_on_invalid_gpu_id {false};
-  // FIXME(jiamingy): Move this to model training parameter instead.
   bool validate_parameters {false};
 
   /*!
diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index 739801b735af..34ae5a4d53bb 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -82,10 +82,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   ~Learner() override;
   /*!
    * \brief Configure Learner based on set parameters.
-   *
-   * \param p_fmat The optional training DMatrix.
    */
-  virtual void Configure(DMatrix const* p_fmat = nullptr) = 0;
+  virtual void Configure() = 0;
   /*!
    * \brief update the model for one iteration
    *  With the specified objective function.
@@ -163,7 +161,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
    * \brief Get number of boosted rounds from gradient booster.
    */
   virtual int32_t BoostedRounds() const = 0;
-  virtual bst_group_t Groups() const = 0;
+  virtual uint32_t Groups() const = 0;
 
   void LoadModel(Json const& in) override = 0;
   void SaveModel(Json* out) const override = 0;
@@ -330,7 +328,7 @@ struct LearnerModelParam {
   void Copy(LearnerModelParam const& that);
 
   /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
-  bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
+  bool Initialized() const { return num_feature != 0; }
 };
 
 }  // namespace xgboost
diff --git a/src/learner.cc b/src/learner.cc
index 4fd3142fe6d7..7ce624060124 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -63,15 +63,6 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
 
 namespace xgboost {
 Learner::~Learner() = default;
-namespace {
-StringView ModelNotFitted() { return "Model is not yet initialized (not fitted)."; }
-
-template <typename T>
-T& UsePtr(T& ptr) {  // NOLINT
-  CHECK(ptr);
-  return ptr;
-}
-}  // anonymous namespace
 
 /*! \brief training parameter for regression
  *
@@ -83,28 +74,20 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   /* \brief global bias */
   bst_float base_score;
   /* \brief number of features  */
-  bst_feature_t num_feature;
+  uint32_t num_feature;
   /* \brief number of classes, if it is multi-class classification  */
-  std::int32_t num_class;
+  int32_t num_class;
   /*! \brief Model contain additional properties */
   int32_t contain_extra_attrs;
   /*! \brief Model contain eval metrics */
   int32_t contain_eval_metrics;
   /*! \brief the version of XGBoost. */
-  std::uint32_t major_version;
-  std::uint32_t minor_version;
+  uint32_t major_version;
+  uint32_t minor_version;
 
   uint32_t num_target{1};
-  /**
-   * \brief Whether we should calculate the base score from training data.
-   *
-   *   This is a private parameter as we can't expose it as boolean due to binary model
-   *   format. Exposing it as integer creates inconsistency with other parameters.
-   *
-   *   Automatically disabled when base_score is specifed by user. int32 is used instead
-   *   of bool for the ease of serialization.
-   */
-  std::int32_t boost_from_average{true};
+
+  std::int32_t base_score_estimated{0};
   /*! \brief reserved field */
   int reserved[25];
   /*! \brief constructor */
@@ -114,7 +97,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     num_target = 1;
     major_version = std::get<0>(Version::Self());
     minor_version = std::get<1>(Version::Self());
-    boost_from_average = true;
+    base_score_estimated = 0;
     static_assert(sizeof(LearnerModelParamLegacy) == 136,
                   "Do not change the size of this struct, as it will break binary IO.");
   }
@@ -144,8 +127,8 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
         std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};
 
     ret = to_chars(integers, integers + NumericLimits<std::int64_t>::kToCharsSize,
-                   static_cast<std::int64_t>(boost_from_average));
-    obj["boost_from_average"] =
+                   static_cast<std::int64_t>(base_score_estimated));
+    obj["base_score_estimated"] =
         std::string{integers, static_cast<std::size_t>(std::distance(integers, ret.ptr))};
 
     return obj;
@@ -159,9 +142,9 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     if (n_targets_it != j_param.cend()) {
       m["num_target"] = get<String const>(n_targets_it->second);
     }
-    auto bse_it = j_param.find("boost_from_average");
+    auto bse_it = j_param.find("base_score_estimated");
     if (bse_it != j_param.cend()) {
-      m["boost_from_average"] = get<String const>(bse_it->second);
+      m["base_score_estimated"] = get<String const>(bse_it->second);
     }
 
     this->Init(m);
@@ -180,7 +163,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
     dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
     dmlc::ByteSwap(&x.num_target, sizeof(x.num_target), 1);
-    dmlc::ByteSwap(&x.boost_from_average, sizeof(x.boost_from_average), 1);
+    dmlc::ByteSwap(&x.base_score_estimated, sizeof(x.base_score_estimated), 1);
     dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
     return x;
   }
@@ -188,42 +171,16 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   template <typename Container>
   Args UpdateAllowUnknown(Container const& kwargs) {
     // Detect whether user has made their own base score.
-    auto find_key = [&kwargs](char const* key) {
-      return std::find_if(kwargs.cbegin(), kwargs.cend(),
-                          [key](auto const& kv) { return kv.first == key; });
-    };
-    auto it = find_key("base_score");
-    if (it != kwargs.cend()) {
-      boost_from_average = false;
+    if (std::find_if(kwargs.cbegin(), kwargs.cend(),
+                     [](auto const& kv) { return kv.first == "base_score"; }) != kwargs.cend()) {
+      base_score_estimated = true;
     }
-    return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);
-  }
-  // sanity check
-  void Validate() {
-    if (!collective::IsDistributed()) {
-      return;
+    if (std::find_if(kwargs.cbegin(), kwargs.cend(), [](auto const& kv) {
+          return kv.first == "base_score_estimated";
+        }) != kwargs.cend()) {
+      LOG(FATAL) << "`base_score_estimated` cannot be specified as hyper-parameter.";
     }
-
-    std::array<std::int32_t, 6> data;
-    std::size_t pos{0};
-    std::memcpy(data.data() + pos, &base_score, sizeof(base_score));
-    pos += 1;
-    std::memcpy(data.data() + pos, &num_feature, sizeof(num_feature));
-    pos += 1;
-    std::memcpy(data.data() + pos, &num_class, sizeof(num_class));
-    pos += 1;
-    std::memcpy(data.data() + pos, &num_target, sizeof(num_target));
-    pos += 1;
-    std::memcpy(data.data() + pos, &major_version, sizeof(major_version));
-    pos += 1;
-    std::memcpy(data.data() + pos, &minor_version, sizeof(minor_version));
-    pos += 1;
-
-    std::array<std::int32_t, 6> sync;
-    std::copy(data.cbegin(), data.cend(), sync.begin());
-    collective::Broadcast(sync.data(), sync.size(), 0);
-    CHECK(std::equal(data.cbegin(), data.cend(), sync.cbegin()))
-        << "Different model parameter across workers.";
+    return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);
   }
 
   // declare parameters
@@ -243,9 +200,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
         .set_default(1)
         .set_lower_bound(1)
         .describe("Number of target for multi-target regression.");
-    DMLC_DECLARE_FIELD(boost_from_average)
-        .set_default(true)
-        .describe("Whether we should calculate the base score from training data.");
+    DMLC_DECLARE_FIELD(base_score_estimated).set_default(0);
   }
 };
 
@@ -274,7 +229,7 @@ LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy
 
 linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
   // multi-class is not yet supported.
-  CHECK_EQ(base_score_.Size(), 1) << ModelNotFitted();
+  CHECK_EQ(base_score_.Size(), 1);
   if (device == Context::kCpuId) {
     // Make sure that we won't run into race condition.
     CHECK(base_score_.Data()->HostCanRead());
@@ -420,8 +375,6 @@ class LearnerConfiguration : public Learner {
 
  protected:
   std::atomic<bool> need_configuration_;
-  // std::atomic<bool> need_init_model_;
-
   std::map<std::string, std::string> cfg_;
   // Stores information like best-iteration for early stopping.
   std::map<std::string, std::string> attributes_;
@@ -437,21 +390,6 @@ class LearnerConfiguration : public Learner {
   // Initial prediction.
   std::vector<std::string> metric_names_;
 
-  void ConfigureModelParamWithoutBaseScore() {
-    // Convert mparam to learner_model_param
-    this->ConfigureTargets();
-
-    auto task = UsePtr(obj_)->Task();
-    linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
-    auto h_base_score = base_score.HostView();
-
-    // transform to margin
-    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
-    // move it to model param, which is shared with all other components.
-    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
-    CHECK(learner_model_param_.Initialized());
-    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
-  }
   /**
    * \brief Calculate the `base_score` based on input data.
    *
@@ -470,40 +408,48 @@ class LearnerConfiguration : public Learner {
     // - model loaded from new binary or JSON.
     // - model is created from scratch.
     // - model is configured second time due to change of parameter
-
-    // if (!need_init_model_) {
-    //   return;
-    // }
-    // std::lock_guard<std::mutex> guard(config_lock_);
-    // if (!need_init_model_) {
-    //   return;
-    // }
-
-    if (!learner_model_param_.Initialized()) {
-      // CHECK(!p_fmat);  // only using custom objective can reach here.
-      // First call to have some basic model info for boosted rounds to work.
-      this->ConfigureModelParamWithoutBaseScore();
-    }
-
-    if (p_fmat) {
-      auto const& info = p_fmat->Info();
-      info.Validate(Ctx()->gpu_id);
-      // We estimate it from input data.
-      linalg::Tensor<float, 1> base_score;
-      UsePtr(obj_)->InitEstimation(info, &base_score);
-      mparam_.base_score = base_score(0);
-      CHECK(!std::isnan(mparam_.base_score));
-    } else {
-      mparam_.base_score = ObjFunction::DefaultBaseScore();
+    CHECK(obj_);
+    if (!mparam_.base_score_estimated) {
+      std::lock_guard<std::mutex> guard(config_lock_);
+      if (p_fmat) {
+        auto const& info = p_fmat->Info();
+        info.Validate(Ctx()->gpu_id);
+        // We estimate it from input data.
+        linalg::Tensor<float, 1> base_score;
+        obj_->InitEstimation(info, &base_score);
+        mparam_.base_score = base_score(0);
+        CHECK(!std::isnan(mparam_.base_score));
+      } else {
+        mparam_.base_score = ObjFunction::DefaultBaseScore();
+      }
+      mparam_.base_score_estimated = true;
+      // Update the shared model parameter
+      this->ConfigureModelParam();
+      auto sync_score = mparam_.base_score;
+      rabit::Broadcast(&sync_score, sizeof(sync_score), 0);
+      CHECK_EQ(sync_score, mparam_.base_score);
     }
-    // Update the shared model parameter
-    this->ConfigureModelParamWithoutBaseScore();
-    mparam_.Validate();
-
     CHECK(!std::isnan(mparam_.base_score));
     CHECK(!std::isinf(mparam_.base_score));
   }
 
+  // Convert mparam to learner_model_param
+  void ConfigureModelParam() {
+    this->ConfigureTargets();
+
+    CHECK(obj_);
+    auto task = obj_->Task();
+    linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
+    auto h_base_score = base_score.HostView();
+
+    // transform to margin
+    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
+    // move it to model param, which is shared with all other components.
+    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
+    CHECK(learner_model_param_.Initialized());
+    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
+  }
+
  public:
   explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix> > cache)
       : need_configuration_{true} {
@@ -521,7 +467,7 @@ class LearnerConfiguration : public Learner {
   }
 
   // Configuration before data is known.
-  void Configure(DMatrix const* p_fmat = nullptr) override {
+  void Configure() override {
     // Varient of double checked lock
     if (!this->need_configuration_) {
       return;
@@ -563,15 +509,11 @@ class LearnerConfiguration : public Learner {
     learner_model_param_.task = obj_->Task();  // required by gbm configuration.
     this->ConfigureGBM(old_tparam, args);
     ctx_.ConfigureGpuId(this->gbm_->UseGPU());
-
-    // Configure model param without base score
-    this->InitBaseScore(p_fmat);
-    // this->ConfigureModelParamWithoutBaseScore();
+    this->ConfigureModelParam();
 
     this->ConfigureMetrics(args);
 
-    this->need_configuration_ =
-        UsePtr(this->gbm_)->BoostedRounds() == 0 && mparam_.boost_from_average;
+    this->need_configuration_ = false;
     if (ctx_.validate_parameters) {
       this->ValidateParameters();
     }
@@ -581,8 +523,8 @@ class LearnerConfiguration : public Learner {
   }
 
   void CheckModelInitialized() const {
-    CHECK(learner_model_param_.Initialized()) << ModelNotFitted();
-    // CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0) << ModelNotFitted();
+    CHECK(learner_model_param_.Initialized()) << "Model not yet initialized.";
+    CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0);
   }
 
   virtual PredictionContainer* GetPredictionCache() const {
@@ -1358,8 +1300,8 @@ class LearnerImpl : public LearnerIO {
   void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) override {
     monitor_.Start("UpdateOneIter");
     TrainingObserver::Instance().Update(iter);
-    this->Configure(train.get());
-    // this->InitBaseScore(train.get());
+    this->Configure();
+    this->InitBaseScore(train.get());
 
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
@@ -1388,9 +1330,9 @@ class LearnerImpl : public LearnerIO {
   void BoostOneIter(int iter, std::shared_ptr<DMatrix> train,
                     HostDeviceVector<GradientPair>* in_gpair) override {
     monitor_.Start("BoostOneIter");
-    this->Configure(nullptr);
-    // Custom objective, use default instead.
-    // this->InitBaseScore(nullptr);
+    this->Configure();
+    // Should have been set to default in the first prediction.
+    CHECK(mparam_.base_score_estimated);
 
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
@@ -1410,8 +1352,7 @@ class LearnerImpl : public LearnerIO {
                           const std::vector<std::shared_ptr<DMatrix>>& data_sets,
                           const std::vector<std::string>& data_names) override {
     monitor_.Start("EvalOneIter");
-    this->Configure(nullptr);
-    // this->InitBaseScore(nullptr);
+    this->Configure();
     this->CheckModelInitialized();
 
     std::ostringstream os;
@@ -1451,8 +1392,8 @@ class LearnerImpl : public LearnerIO {
     int multiple_predictions = static_cast<int>(pred_leaf) +
                                static_cast<int>(pred_interactions) +
                                static_cast<int>(pred_contribs);
-    this->Configure(nullptr);
-    // this->InitBaseScore(nullptr);
+    this->Configure();
+    this->InitBaseScore(nullptr);
     this->CheckModelInitialized();
 
     CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
@@ -1477,13 +1418,13 @@ class LearnerImpl : public LearnerIO {
     }
   }
 
-  std::int32_t BoostedRounds() const override {
+  int32_t BoostedRounds() const override {
     if (!this->gbm_) { return 0; }  // haven't call train or LoadModel.
     CHECK(!this->need_configuration_);
     return this->gbm_->BoostedRounds();
   }
 
-  bst_group_t Groups() const override {
+  uint32_t Groups() const override {
     CHECK(!this->need_configuration_);
     this->CheckModelInitialized();
     return this->learner_model_param_.num_output_group;
@@ -1496,7 +1437,8 @@ class LearnerImpl : public LearnerIO {
   void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
                       HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
                       uint32_t iteration_end) override {
-    this->Configure(nullptr);
+    this->Configure();
+    this->InitBaseScore(nullptr);
     this->CheckModelInitialized();
 
     auto& out_predictions = this->GetThreadLocal().prediction_entry;
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index fa36cdcc8584..49c1d9537426 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -493,6 +493,13 @@ TEST(Learner, InitEstimation) {
     auto base_score =
         std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
     ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+
+    ASSERT_THROW(
+        {
+          learner->SetParam("base_score_estimated", "1");
+          learner->Configure();
+        },
+        dmlc::Error);
   }
 
   {
@@ -515,89 +522,4 @@ TEST(Learner, InitEstimation) {
     ASSERT_FLOAT_EQ(base_score, 1.3);
   }
 }
-
-class InitBaseScore : public ::testing::Test {
- protected:
-  std::size_t static constexpr Cols() { return 10; }
-  std::shared_ptr<DMatrix> Xy_;
-
-  void SetUp() override { Xy_ = RandomDataGenerator{10, Cols(), 0}.GenerateDMatrix(true); }
-
- public:
-  void TestBoostFromAvg() {
-    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
-    learner->SetParam("objective", "reg:absoluteerror");
-    learner->SetParam("base_score", "1.3");
-    Json config(Object{});
-    learner->Configure();
-    learner->SaveConfig(&config);
-
-    auto base_score =
-        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
-    // no change
-    ASSERT_FLOAT_EQ(base_score, 1.3);
-    auto from_avg = std::stoi(
-        get<String const>(config["learner"]["learner_model_param"]["boost_from_average"]));
-    // from_avg is disabled when base score is set
-    ASSERT_EQ(from_avg, 0);
-    // in the future when we can deprecate the binary model, user can set the parameter directly.
-    learner->SetParam("boost_from_average", "1");
-    learner->Configure();
-    learner->SaveConfig(&config);
-    from_avg = std::stoi(
-        get<String const>(config["learner"]["learner_model_param"]["boost_from_average"]));
-    ASSERT_EQ(from_avg, 1);
-  }
-
-  void TestInitAfterLoad() {
-    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
-    learner->SetParam("objective", "reg:absoluteerror");
-    learner->Configure();
-
-    Json model{Object{}};
-    learner->SaveModel(&model);
-    auto base_score =
-        std::stof(get<String const>(model["learner"]["learner_model_param"]["base_score"]));
-    ASSERT_FLOAT_EQ(base_score, ObjFunction::DefaultBaseScore());
-
-    learner.reset(Learner::Create({Xy_}));
-    learner->LoadModel(model);
-    Json config(Object{});
-    learner->Configure();
-    learner->SaveConfig(&config);
-    base_score =
-        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
-    ASSERT_FLOAT_EQ(base_score, ObjFunction::DefaultBaseScore());
-
-    learner->UpdateOneIter(0, Xy_);
-    learner->SaveConfig(&config);
-    base_score =
-        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
-    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
-  }
-
-  void TestInitWithPredt() {
-    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
-    learner->SetParam("objective", "reg:absoluteerror");
-    HostDeviceVector<float> predt;
-    learner->Predict(Xy_, false, &predt, 0, 0);
-    Json config(Object{});
-    learner->SaveConfig(&config);
-    auto base_score =
-        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
-    ASSERT_FLOAT_EQ(base_score, ObjFunction::DefaultBaseScore());
-
-    learner->UpdateOneIter(0, Xy_);
-    learner->SaveConfig(&config);
-    base_score =
-        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
-    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
-  }
-};
-
-TEST_F(InitBaseScore, FromAvgParam) { this->TestBoostFromAvg(); }
-
-TEST_F(InitBaseScore, InitAfterLoad) { this->TestInitAfterLoad(); }
-
-TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); }
 }  // namespace xgboost

From 56ac100aeeb1ba0a787d5c0144a135d1f6123867 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 14 Oct 2022 18:55:00 +0800
Subject: [PATCH 081/133] boost from avg.

---
 include/xgboost/learner.h |   2 +-
 src/learner.cc            | 153 ++++++++++++++++++++++++--------------
 tests/cpp/test_learner.cc | 152 ++++++++++++++++++++++++++-----------
 3 files changed, 204 insertions(+), 103 deletions(-)

diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index 34ae5a4d53bb..6969c7d7dd48 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -328,7 +328,7 @@ struct LearnerModelParam {
   void Copy(LearnerModelParam const& that);
 
   /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
-  bool Initialized() const { return num_feature != 0; }
+  bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
 };
 
 }  // namespace xgboost
diff --git a/src/learner.cc b/src/learner.cc
index 7ce624060124..f836ebb2e5e0 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -63,6 +63,15 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
 
 namespace xgboost {
 Learner::~Learner() = default;
+namespace {
+StringView ModelNotFitted() { return "Model is not yet initialized (not fitted)."; }
+
+template <typename T>
+T& UsePtr(T& ptr) {  // NOLINT
+  CHECK(ptr);
+  return ptr;
+}
+}  // anonymous namespace
 
 /*! \brief training parameter for regression
  *
@@ -74,20 +83,28 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   /* \brief global bias */
   bst_float base_score;
   /* \brief number of features  */
-  uint32_t num_feature;
+  bst_feature_t num_feature;
   /* \brief number of classes, if it is multi-class classification  */
-  int32_t num_class;
+  std::int32_t num_class;
   /*! \brief Model contain additional properties */
   int32_t contain_extra_attrs;
   /*! \brief Model contain eval metrics */
   int32_t contain_eval_metrics;
   /*! \brief the version of XGBoost. */
-  uint32_t major_version;
-  uint32_t minor_version;
+  std::uint32_t major_version;
+  std::uint32_t minor_version;
 
   uint32_t num_target{1};
-
-  std::int32_t base_score_estimated{0};
+  /**
+   * \brief Whether we should calculate the base score from training data.
+   *
+   *   This is a private parameter as we can't expose it as boolean due to binary model
+   *   format. Exposing it as integer creates inconsistency with other parameters.
+   *
+   *   Automatically disabled when base_score is specifed by user. int32 is used instead
+   *   of bool for the ease of serialization.
+   */
+  std::int32_t boost_from_average{true};
   /*! \brief reserved field */
   int reserved[25];
   /*! \brief constructor */
@@ -97,7 +114,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     num_target = 1;
     major_version = std::get<0>(Version::Self());
     minor_version = std::get<1>(Version::Self());
-    base_score_estimated = 0;
+    boost_from_average = true;
     static_assert(sizeof(LearnerModelParamLegacy) == 136,
                   "Do not change the size of this struct, as it will break binary IO.");
   }
@@ -127,8 +144,8 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
         std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};
 
     ret = to_chars(integers, integers + NumericLimits<std::int64_t>::kToCharsSize,
-                   static_cast<std::int64_t>(base_score_estimated));
-    obj["base_score_estimated"] =
+                   static_cast<std::int64_t>(boost_from_average));
+    obj["boost_from_average"] =
         std::string{integers, static_cast<std::size_t>(std::distance(integers, ret.ptr))};
 
     return obj;
@@ -142,9 +159,9 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     if (n_targets_it != j_param.cend()) {
       m["num_target"] = get<String const>(n_targets_it->second);
     }
-    auto bse_it = j_param.find("base_score_estimated");
+    auto bse_it = j_param.find("boost_from_average");
     if (bse_it != j_param.cend()) {
-      m["base_score_estimated"] = get<String const>(bse_it->second);
+      m["boost_from_average"] = get<String const>(bse_it->second);
     }
 
     this->Init(m);
@@ -163,7 +180,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
     dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
     dmlc::ByteSwap(&x.num_target, sizeof(x.num_target), 1);
-    dmlc::ByteSwap(&x.base_score_estimated, sizeof(x.base_score_estimated), 1);
+    dmlc::ByteSwap(&x.boost_from_average, sizeof(x.boost_from_average), 1);
     dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
     return x;
   }
@@ -171,17 +188,43 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   template <typename Container>
   Args UpdateAllowUnknown(Container const& kwargs) {
     // Detect whether user has made their own base score.
-    if (std::find_if(kwargs.cbegin(), kwargs.cend(),
-                     [](auto const& kv) { return kv.first == "base_score"; }) != kwargs.cend()) {
-      base_score_estimated = true;
-    }
-    if (std::find_if(kwargs.cbegin(), kwargs.cend(), [](auto const& kv) {
-          return kv.first == "base_score_estimated";
-        }) != kwargs.cend()) {
-      LOG(FATAL) << "`base_score_estimated` cannot be specified as hyper-parameter.";
+    auto find_key = [&kwargs](char const* key) {
+      return std::find_if(kwargs.cbegin(), kwargs.cend(),
+                          [key](auto const& kv) { return kv.first == key; });
+    };
+    auto it = find_key("base_score");
+    if (it != kwargs.cend()) {
+      boost_from_average = false;
     }
     return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);
   }
+  // sanity check
+  void Validate() {
+    if (!collective::IsDistributed()) {
+      return;
+    }
+
+    std::array<std::int32_t, 6> data;
+    std::size_t pos{0};
+    std::memcpy(data.data() + pos, &base_score, sizeof(base_score));
+    pos += 1;
+    std::memcpy(data.data() + pos, &num_feature, sizeof(num_feature));
+    pos += 1;
+    std::memcpy(data.data() + pos, &num_class, sizeof(num_class));
+    pos += 1;
+    std::memcpy(data.data() + pos, &num_target, sizeof(num_target));
+    pos += 1;
+    std::memcpy(data.data() + pos, &major_version, sizeof(major_version));
+    pos += 1;
+    std::memcpy(data.data() + pos, &minor_version, sizeof(minor_version));
+    pos += 1;
+
+    std::array<std::int32_t, 6> sync;
+    std::copy(data.cbegin(), data.cend(), sync.begin());
+    collective::Broadcast(sync.data(), sync.size(), 0);
+    CHECK(std::equal(data.cbegin(), data.cend(), sync.cbegin()))
+        << "Different model parameter across workers.";
+  }
 
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
@@ -200,7 +243,9 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
         .set_default(1)
         .set_lower_bound(1)
         .describe("Number of target for multi-target regression.");
-    DMLC_DECLARE_FIELD(base_score_estimated).set_default(0);
+    DMLC_DECLARE_FIELD(boost_from_average)
+        .set_default(true)
+        .describe("Whether we should calculate the base score from training data.");
   }
 };
 
@@ -229,7 +274,7 @@ LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy
 
 linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
   // multi-class is not yet supported.
-  CHECK_EQ(base_score_.Size(), 1);
+  CHECK_EQ(base_score_.Size(), 1) << ModelNotFitted();
   if (device == Context::kCpuId) {
     // Make sure that we won't run into race condition.
     CHECK(base_score_.Data()->HostCanRead());
@@ -390,6 +435,21 @@ class LearnerConfiguration : public Learner {
   // Initial prediction.
   std::vector<std::string> metric_names_;
 
+  void ConfigureModelParamWithoutBaseScore() {
+    // Convert mparam to learner_model_param
+    this->ConfigureTargets();
+
+    auto task = UsePtr(obj_)->Task();
+    linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
+    auto h_base_score = base_score.HostView();
+
+    // transform to margin
+    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
+    // move it to model param, which is shared with all other components.
+    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
+    CHECK(learner_model_param_.Initialized());
+    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
+  }
   /**
    * \brief Calculate the `base_score` based on input data.
    *
@@ -408,48 +468,27 @@ class LearnerConfiguration : public Learner {
     // - model loaded from new binary or JSON.
     // - model is created from scratch.
     // - model is configured second time due to change of parameter
-    CHECK(obj_);
-    if (!mparam_.base_score_estimated) {
-      std::lock_guard<std::mutex> guard(config_lock_);
+    if (!learner_model_param_.Initialized()) {
+      this->ConfigureModelParamWithoutBaseScore();
+    }
+    if (mparam_.boost_from_average && UsePtr(gbm_)->BoostedRounds() == 0) {
       if (p_fmat) {
         auto const& info = p_fmat->Info();
         info.Validate(Ctx()->gpu_id);
         // We estimate it from input data.
         linalg::Tensor<float, 1> base_score;
-        obj_->InitEstimation(info, &base_score);
+        UsePtr(obj_)->InitEstimation(info, &base_score);
         mparam_.base_score = base_score(0);
         CHECK(!std::isnan(mparam_.base_score));
-      } else {
-        mparam_.base_score = ObjFunction::DefaultBaseScore();
       }
-      mparam_.base_score_estimated = true;
       // Update the shared model parameter
-      this->ConfigureModelParam();
-      auto sync_score = mparam_.base_score;
-      rabit::Broadcast(&sync_score, sizeof(sync_score), 0);
-      CHECK_EQ(sync_score, mparam_.base_score);
+      this->ConfigureModelParamWithoutBaseScore();
+      mparam_.Validate();
     }
     CHECK(!std::isnan(mparam_.base_score));
     CHECK(!std::isinf(mparam_.base_score));
   }
 
-  // Convert mparam to learner_model_param
-  void ConfigureModelParam() {
-    this->ConfigureTargets();
-
-    CHECK(obj_);
-    auto task = obj_->Task();
-    linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
-    auto h_base_score = base_score.HostView();
-
-    // transform to margin
-    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
-    // move it to model param, which is shared with all other components.
-    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
-    CHECK(learner_model_param_.Initialized());
-    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
-  }
-
  public:
   explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix> > cache)
       : need_configuration_{true} {
@@ -509,7 +548,8 @@ class LearnerConfiguration : public Learner {
     learner_model_param_.task = obj_->Task();  // required by gbm configuration.
     this->ConfigureGBM(old_tparam, args);
     ctx_.ConfigureGpuId(this->gbm_->UseGPU());
-    this->ConfigureModelParam();
+
+    this->ConfigureModelParamWithoutBaseScore();
 
     this->ConfigureMetrics(args);
 
@@ -523,8 +563,8 @@ class LearnerConfiguration : public Learner {
   }
 
   void CheckModelInitialized() const {
-    CHECK(learner_model_param_.Initialized()) << "Model not yet initialized.";
-    CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0);
+    CHECK(learner_model_param_.Initialized()) << ModelNotFitted();
+    CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0) << ModelNotFitted();
   }
 
   virtual PredictionContainer* GetPredictionCache() const {
@@ -1331,8 +1371,6 @@ class LearnerImpl : public LearnerIO {
                     HostDeviceVector<GradientPair>* in_gpair) override {
     monitor_.Start("BoostOneIter");
     this->Configure();
-    // Should have been set to default in the first prediction.
-    CHECK(mparam_.base_score_estimated);
 
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
@@ -1393,7 +1431,9 @@ class LearnerImpl : public LearnerIO {
                                static_cast<int>(pred_interactions) +
                                static_cast<int>(pred_contribs);
     this->Configure();
-    this->InitBaseScore(nullptr);
+    if (training) {
+      this->InitBaseScore(nullptr);
+    }
     this->CheckModelInitialized();
 
     CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
@@ -1438,7 +1478,6 @@ class LearnerImpl : public LearnerIO {
                       HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
                       uint32_t iteration_end) override {
     this->Configure();
-    this->InitBaseScore(nullptr);
     this->CheckModelInitialized();
 
     auto& out_predictions = this->GetThreadLocal().prediction_entry;
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 49c1d9537426..6f9f6e1e2209 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -453,73 +453,135 @@ TEST(Learner, MultiTarget) {
 /**
  * Test the model initialization sequence is correctly performed.
  */
-TEST(Learner, InitEstimation) {
-  size_t constexpr kCols = 10;
-  auto Xy = RandomDataGenerator{10, kCols, 0}.GenerateDMatrix(true);
+class InitBaseScore : public ::testing::Test {
+ protected:
+  std::size_t static constexpr Cols() { return 10; }
+  std::shared_ptr<DMatrix> Xy_;
 
-  {
-    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
-    learner->SetParam("objective", "reg:absoluteerror");
-    learner->Configure();
-    HostDeviceVector<float> predt;
-    learner->Predict(Xy, false, &predt, 0, 0);
+  void SetUp() override { Xy_ = RandomDataGenerator{10, Cols(), 0}.GenerateDMatrix(true); }
 
-    auto h_predt = predt.ConstHostSpan();
-    for (auto v : h_predt) {
-      ASSERT_EQ(v, ObjFunction::DefaultBaseScore());
-    }
+  static float GetBaseScore(Json const &config) {
+    return std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+  }
+
+ public:
+  void TestUpdateConfig() {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
+    learner->SetParam("objective", "reg:absoluteerror");
+    learner->UpdateOneIter(0, Xy_);
     Json config{Object{}};
     learner->SaveConfig(&config);
-    auto base_score =
-        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
-    // No base score is estimated yet.
-    ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
+    auto base_score = GetBaseScore(config);
+    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+
+    // already initialized
+    auto Xy1 = RandomDataGenerator{100, Cols(), 0}.Seed(321).GenerateDMatrix(true);
+    learner->UpdateOneIter(1, Xy1);
+    learner->SaveConfig(&config);
+    auto base_score1 = GetBaseScore(config);
+    ASSERT_EQ(base_score, base_score1);
+
+    Json model{Object{}};
+    learner->SaveModel(&model);
+    learner.reset(Learner::Create({}));
+    learner->LoadModel(model);
+    learner->Configure();
+    learner->UpdateOneIter(2, Xy1);
+    learner->SaveConfig(&config);
+    auto base_score2 = GetBaseScore(config);
+    ASSERT_EQ(base_score, base_score2);
   }
 
-  {
-    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+  void TestBoostFromAvgParam() {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
     learner->SetParam("objective", "reg:absoluteerror");
-    learner->UpdateOneIter(0, Xy);
+    learner->SetParam("base_score", "1.3");
+    Json config(Object{});
+    learner->Configure();
+    learner->SaveConfig(&config);
+
+    auto base_score = GetBaseScore(config);
+    // no change
+    ASSERT_FLOAT_EQ(base_score, 1.3);
 
     HostDeviceVector<float> predt;
-    learner->Predict(Xy, false, &predt, 0, 0);
+    learner->Predict(Xy_, false, &predt, 0, 0);
     auto h_predt = predt.ConstHostSpan();
     for (auto v : h_predt) {
-      ASSERT_NE(v, ObjFunction::DefaultBaseScore());
+      ASSERT_FLOAT_EQ(v, 1.3);
     }
-
-    Json config{Object{}};
+    learner->UpdateOneIter(0, Xy_);
     learner->SaveConfig(&config);
-    auto base_score =
-        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
-    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+    base_score = GetBaseScore(config);
+    // no change
+    ASSERT_FLOAT_EQ(base_score, 1.3);
 
-    ASSERT_THROW(
-        {
-          learner->SetParam("base_score_estimated", "1");
-          learner->Configure();
-        },
-        dmlc::Error);
+    auto from_avg = std::stoi(
+        get<String const>(config["learner"]["learner_model_param"]["boost_from_average"]));
+    // from_avg is disabled when base score is set
+    ASSERT_EQ(from_avg, 0);
+    // in the future when we can deprecate the binary model, user can set the parameter directly.
+    learner->SetParam("boost_from_average", "1");
+    learner->Configure();
+    learner->SaveConfig(&config);
+    from_avg = std::stoi(
+        get<String const>(config["learner"]["learner_model_param"]["boost_from_average"]));
+    ASSERT_EQ(from_avg, 1);
   }
 
-  {
-    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+  void TestInitAfterLoad() {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
     learner->SetParam("objective", "reg:absoluteerror");
-    learner->SetParam("base_score", "1.3");
     learner->Configure();
+
+    Json model{Object{}};
+    learner->SaveModel(&model);
+    auto base_score = GetBaseScore(model);
+    ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
+
+    learner.reset(Learner::Create({Xy_}));
+    learner->LoadModel(model);
+    Json config(Object{});
+    learner->Configure();
+    learner->SaveConfig(&config);
+    base_score = GetBaseScore(config);
+    ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
+
+    learner->UpdateOneIter(0, Xy_);
+    learner->SaveConfig(&config);
+    base_score = GetBaseScore(config);
+    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+  }
+
+  void TestInitWithPredt() {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
+    learner->SetParam("objective", "reg:absoluteerror");
     HostDeviceVector<float> predt;
-    learner->Predict(Xy, false, &predt, 0, 0);
+    learner->Predict(Xy_, false, &predt, 0, 0);
+
     auto h_predt = predt.ConstHostSpan();
     for (auto v : h_predt) {
-      ASSERT_FLOAT_EQ(v, 1.3);
+      ASSERT_EQ(v, ObjFunction::DefaultBaseScore());
     }
-    learner->UpdateOneIter(0, Xy);
-    Json config{Object{}};
+
+    Json config(Object{});
     learner->SaveConfig(&config);
-    auto base_score =
-        std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
-    // no change
-    ASSERT_FLOAT_EQ(base_score, 1.3);
+    auto base_score = GetBaseScore(config);
+    ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
+
+    // since prediction is not used for trianing, the train procedure still runs estimation
+    learner->UpdateOneIter(0, Xy_);
+    learner->SaveConfig(&config);
+    base_score = GetBaseScore(config);
+    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
   }
-}
+};
+
+TEST_F(InitBaseScore, TestUpdateConfig) { this->TestUpdateConfig(); }
+
+TEST_F(InitBaseScore, FromAvgParam) { this->TestBoostFromAvgParam(); }
+
+TEST_F(InitBaseScore, InitAfterLoad) { this->TestInitAfterLoad(); }
+
+TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); }
 }  // namespace xgboost

From a2c72d55a9af2532ca556fa7b957cc174ebfa2f2 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 14 Oct 2022 19:24:33 +0800
Subject: [PATCH 082/133] Fix build.

---
 src/learner.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/learner.cc b/src/learner.cc
index f836ebb2e5e0..967ebdb0f2e6 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -12,6 +12,7 @@
 #include <dmlc/thread_local.h>
 
 #include <algorithm>
+#include <array>
 #include <atomic>
 #include <iomanip>
 #include <limits>  // std::numeric_limits

From cc9b4ff87ea4d7024ed10eac04d895c842d3d8c1 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 14 Oct 2022 20:15:13 +0800
Subject: [PATCH 083/133] Fix macos.

---
 tests/python/test_with_dask.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index 31dd8bbb9693..61ebf5d469be 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -55,7 +55,11 @@
 @pytest.fixture(scope="module")
 def cluster():
     n_workers = 2
-    n_threads = len(os.sched_getaffinity(0)) // n_workers
+    if hasattr(os, "sched_getaffinity"):
+        n_threads = len(os.sched_getaffinity(0)) // n_workers
+    else:
+        n_threads = os.cpu_count() // n_workers
+
     with LocalCluster(
         n_workers=n_workers, threads_per_worker=n_threads, dashboard_address=":0"
     ) as dask_cluster:

From 49460214f25e32a536c447f7551221b71fa0c1f1 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 14 Oct 2022 21:30:33 +0800
Subject: [PATCH 084/133] Model is fitted.

---
 include/xgboost/gbm.h | 5 +++++
 src/gbm/gblinear.cc   | 2 ++
 src/gbm/gbtree.h      | 4 ++++
 src/learner.cc        | 2 +-
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h
index 9951f7778ca2..07dd823716a1 100644
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@@ -75,6 +75,11 @@ class GradientBooster : public Model, public Configurable {
   /*! \brief Return number of boosted rounds.
    */
   virtual int32_t BoostedRounds() const = 0;
+  /**
+   * \brief Whether the model has already been trained. When tree booster is chosen, then
+   *        returns true when there are existing trees.
+   */
+  virtual bool ModelFitted() const = 0;
   /*!
    * \brief perform update to the model(boosting)
    * \param p_fmat feature matrix that provide access to features
diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc
index c8cdfeb476b1..2498865e9110 100644
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -95,6 +95,8 @@ class GBLinear : public GradientBooster {
     return model_.num_boosted_rounds;
   }
 
+  bool ModelFitted() const override { return BoostedRounds() != 0; }
+
   void Load(dmlc::Stream* fi) override {
     model_.Load(fi);
   }
diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h
index ce82cb29629f..78224aba4408 100644
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@@ -252,6 +252,10 @@ class GBTree : public GradientBooster {
     return model_.trees.size() / this->LayerTrees();
   }
 
+  bool ModelFitted() const override {
+    return !model_.trees.empty() || !model_.trees_to_update.empty();
+  }
+
   void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *out_preds,
                     bool training, unsigned layer_begin, unsigned layer_end) override;
 
diff --git a/src/learner.cc b/src/learner.cc
index 967ebdb0f2e6..184ef3c53b36 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -472,7 +472,7 @@ class LearnerConfiguration : public Learner {
     if (!learner_model_param_.Initialized()) {
       this->ConfigureModelParamWithoutBaseScore();
     }
-    if (mparam_.boost_from_average && UsePtr(gbm_)->BoostedRounds() == 0) {
+    if (mparam_.boost_from_average && !UsePtr(gbm_)->ModelFitted()) {
       if (p_fmat) {
         auto const& info = p_fmat->Info();
         info.Validate(Ctx()->gpu_id);

From b3ac21bf6d27d3b58cd998939b6ee8e1ec0c616e Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Mon, 5 Dec 2022 23:53:50 +0800
Subject: [PATCH 085/133] Revert local changes.

---
 tests/python/test_spark/test_spark_local.py | 586 ++++++++------------
 1 file changed, 244 insertions(+), 342 deletions(-)

diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
index e4f7dd0d3de7..b7505b4a89e3 100644
--- a/tests/python/test_spark/test_spark_local.py
+++ b/tests/python/test_spark/test_spark_local.py
@@ -1,18 +1,20 @@
 import glob
 import logging
 import random
+import sys
 import uuid
-from collections import namedtuple
-from typing import Generator
 
 import numpy as np
 import pytest
+import testing as tm
 
 import xgboost as xgb
 from xgboost import testing
 
-if testing.skip_spark()["condition"]:
-    pytest.skip(msg=testing.skip_spark()["reason"], allow_module_level=True)
+if tm.no_spark()["condition"]:
+    pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
+if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
+    pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
 
 from pyspark.ml import Pipeline, PipelineModel
 from pyspark.ml.evaluation import BinaryClassificationEvaluator
@@ -20,7 +22,6 @@
 from pyspark.ml.functions import vector_to_array
 from pyspark.ml.linalg import Vectors
 from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
-from pyspark.sql import SparkSession
 from pyspark.sql import functions as spark_sql_func
 from xgboost.spark import (
     SparkXGBClassifier,
@@ -40,322 +41,6 @@
 pytestmark = testing.timeout(60)
 
 
-@pytest.fixture
-def spark() -> Generator[SparkSession, None, None]:
-    config = {
-        "spark.master": "local[4]",
-        "spark.python.worker.reuse": "false",
-        "spark.driver.host": "127.0.0.1",
-        "spark.task.maxFailures": "1",
-        "spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled": "false",
-        "spark.sql.pyspark.jvmStacktrace.enabled": "true",
-    }
-
-    builder = SparkSession.builder.appName("XGBoost PySpark Python API Tests")
-    for k, v in config.items():
-        builder.config(k, v)
-    logging.getLogger("pyspark").setLevel(logging.INFO)
-    sess = builder.getOrCreate()
-    yield sess
-
-
-RegWithWeight = namedtuple(
-    "RegWithWeight",
-    (
-        "reg_params_with_eval",
-        "reg_df_train_with_eval_weight",
-        "reg_df_test_with_eval_weight",
-        "reg_with_eval_best_score",
-        "reg_with_eval_and_weight_best_score",
-    ),
-)
-
-
-@pytest.fixture
-def reg_with_weight(
-    spark: SparkSession,
-) -> Generator[RegWithWeight, SparkSession, None]:
-    reg_params_with_eval = {
-        "validation_indicator_col": "isVal",
-        "early_stopping_rounds": 1,
-        "eval_metric": "rmse",
-    }
-
-    X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5], [4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
-    w = np.array([1.0, 2.0, 1.0, 2.0])
-    y = np.array([0, 1, 2, 3])
-
-    reg1 = XGBRegressor()
-    reg1.fit(X, y, sample_weight=w)
-    predt1 = reg1.predict(X)
-
-    X_train = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
-    X_val = np.array([[4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
-    y_train = np.array([0, 1])
-    y_val = np.array([2, 3])
-    w_train = np.array([1.0, 2.0])
-    w_val = np.array([1.0, 2.0])
-
-    reg2 = XGBRegressor(early_stopping_rounds=1, eval_metric="rmse")
-    reg2.fit(
-        X_train,
-        y_train,
-        eval_set=[(X_val, y_val)],
-    )
-    predt2 = reg2.predict(X)
-    best_score2 = reg2.best_score
-
-    reg3 = XGBRegressor(early_stopping_rounds=1, eval_metric="rmse")
-    reg3.fit(
-        X_train,
-        y_train,
-        sample_weight=w_train,
-        eval_set=[(X_val, y_val)],
-        sample_weight_eval_set=[w_val],
-    )
-    predt3 = reg3.predict(X)
-    best_score3 = reg3.best_score
-
-    reg_df_train_with_eval_weight = spark.createDataFrame(
-        [
-            (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
-            (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
-            (Vectors.dense(4.0, 5.0, 6.0), 2, True, 1.0),
-            (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 3, True, 2.0),
-        ],
-        ["features", "label", "isVal", "weight"],
-    )
-
-    reg_df_test_with_eval_weight = spark.createDataFrame(
-        [
-            (
-                Vectors.dense(1.0, 2.0, 3.0),
-                float(predt1[0]),
-                float(predt2[0]),
-                float(predt3[0]),
-            ),
-            (
-                Vectors.sparse(3, {1: 1.0, 2: 5.5}),
-                float(predt1[1]),
-                float(predt2[1]),
-                float(predt3[1]),
-            ),
-        ],
-        [
-            "features",
-            "expected_prediction_with_weight",
-            "expected_prediction_with_eval",
-            "expected_prediction_with_weight_and_eval",
-        ],
-    )
-    yield RegWithWeight(
-        reg_params_with_eval,
-        reg_df_train_with_eval_weight,
-        reg_df_test_with_eval_weight,
-        best_score2,
-        best_score3,
-    )
-
-
-ClfWithWeight = namedtuple(
-    "ClfWithWeight",
-    (
-        "cls_params_with_eval",
-        "cls_df_train_with_eval_weight",
-        "cls_df_test_with_eval_weight",
-        "cls_with_eval_best_score",
-        "cls_with_eval_and_weight_best_score",
-    ),
-)
-
-
-@pytest.fixture
-def clf_with_weight(
-    spark: SparkSession,
-) -> Generator[ClfWithWeight, SparkSession, None]:
-    """Test classifier with weight and eval set."""
-
-    X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5], [4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
-    w = np.array([1.0, 2.0, 1.0, 2.0])
-    y = np.array([0, 1, 0, 1])
-    cls1 = XGBClassifier()
-    cls1.fit(X, y, sample_weight=w)
-
-    X_train = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
-    X_val = np.array([[4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
-    y_train = np.array([0, 1])
-    y_val = np.array([0, 1])
-    w_train = np.array([1.0, 2.0])
-    w_val = np.array([1.0, 2.0])
-    cls2 = XGBClassifier()
-    cls2.fit(
-        X_train,
-        y_train,
-        eval_set=[(X_val, y_val)],
-        early_stopping_rounds=1,
-        eval_metric="logloss",
-    )
-
-    cls3 = XGBClassifier()
-    cls3.fit(
-        X_train,
-        y_train,
-        sample_weight=w_train,
-        eval_set=[(X_val, y_val)],
-        sample_weight_eval_set=[w_val],
-        early_stopping_rounds=1,
-        eval_metric="logloss",
-    )
-
-    cls_df_train_with_eval_weight = spark.createDataFrame(
-        [
-            (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
-            (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
-            (Vectors.dense(4.0, 5.0, 6.0), 0, True, 1.0),
-            (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, True, 2.0),
-        ],
-        ["features", "label", "isVal", "weight"],
-    )
-    cls_params_with_eval = {
-        "validation_indicator_col": "isVal",
-        "early_stopping_rounds": 1,
-        "eval_metric": "logloss",
-    }
-    print("cls1.predict_proba(X)", cls1.predict_proba(X).shape, cls1.predict_proba(X))
-    cls_df_test_with_eval_weight = spark.createDataFrame(
-        [
-            (
-                Vectors.dense(1.0, 2.0, 3.0),
-                [float(p) for p in cls1.predict_proba(X)[0, :]],
-                [float(p) for p in cls2.predict_proba(X)[0, :]],
-                [float(p) for p in cls3.predict_proba(X)[0, :]],
-            ),
-        ],
-        [
-            "features",
-            "expected_prob_with_weight",
-            "expected_prob_with_eval",
-            "expected_prob_with_weight_and_eval",
-        ],
-    )
-    cls_with_eval_best_score = cls2.best_score
-    cls_with_eval_and_weight_best_score = cls3.best_score
-    yield ClfWithWeight(
-        cls_params_with_eval,
-        cls_df_train_with_eval_weight,
-        cls_df_test_with_eval_weight,
-        cls_with_eval_best_score,
-        cls_with_eval_and_weight_best_score,
-    )
-
-
-class TestPySparkLocal:
-    def test_regressor_with_weight_eval(self, reg_with_weight: RegWithWeight) -> None:
-        # with weight
-        regressor_with_weight = SparkXGBRegressor(weight_col="weight")
-        model_with_weight = regressor_with_weight.fit(
-            reg_with_weight.reg_df_train_with_eval_weight
-        )
-        pred_result_with_weight = model_with_weight.transform(
-            reg_with_weight.reg_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_weight:
-            assert np.isclose(
-                row.prediction, row.expected_prediction_with_weight, atol=1e-3
-            )
-
-        # with eval
-        regressor_with_eval = SparkXGBRegressor(**reg_with_weight.reg_params_with_eval)
-        model_with_eval = regressor_with_eval.fit(
-            reg_with_weight.reg_df_train_with_eval_weight
-        )
-        assert np.isclose(
-            model_with_eval._xgb_sklearn_model.best_score,
-            reg_with_weight.reg_with_eval_best_score,
-            atol=1e-3,
-        )
-
-        pred_result_with_eval = model_with_eval.transform(
-            reg_with_weight.reg_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_eval:
-            np.testing.assert_allclose(
-                row.prediction, row.expected_prediction_with_eval, atol=1e-3
-            )
-        # with weight and eval
-        regressor_with_weight_eval = SparkXGBRegressor(
-            weight_col="weight", **reg_with_weight.reg_params_with_eval
-        )
-        model_with_weight_eval = regressor_with_weight_eval.fit(
-            reg_with_weight.reg_df_train_with_eval_weight
-        )
-        pred_result_with_weight_eval = model_with_weight_eval.transform(
-            reg_with_weight.reg_df_test_with_eval_weight
-        ).collect()
-        np.testing.assert_allclose(
-            model_with_weight_eval._xgb_sklearn_model.best_score,
-            reg_with_weight.reg_with_eval_and_weight_best_score,
-            atol=1e-3,
-        )
-        for row in pred_result_with_weight_eval:
-            np.testing.assert_allclose(
-                row.prediction,
-                row.expected_prediction_with_weight_and_eval,
-                atol=1e-3,
-            )
-
-    def test_classifier_with_weight_eval(self, clf_with_weight: ClfWithWeight) -> None:
-        # with weight
-        classifier_with_weight = SparkXGBClassifier(weight_col="weight")
-        model_with_weight = classifier_with_weight.fit(
-            clf_with_weight.cls_df_train_with_eval_weight
-        )
-        pred_result_with_weight = model_with_weight.transform(
-            clf_with_weight.cls_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_weight:
-            assert np.allclose(
-                row.probability, row.expected_prob_with_weight, atol=1e-3
-            )
-        # with eval
-        classifier_with_eval = SparkXGBClassifier(
-            **clf_with_weight.cls_params_with_eval
-        )
-        model_with_eval = classifier_with_eval.fit(
-            clf_with_weight.cls_df_train_with_eval_weight
-        )
-        assert np.isclose(
-            model_with_eval._xgb_sklearn_model.best_score,
-            clf_with_weight.cls_with_eval_best_score,
-            atol=1e-3,
-        )
-        pred_result_with_eval = model_with_eval.transform(
-            clf_with_weight.cls_df_test_with_eval_weight
-        ).collect()
-        for row in pred_result_with_eval:
-            assert np.allclose(row.probability, row.expected_prob_with_eval, atol=1e-3)
-        # with weight and eval
-        classifier_with_weight_eval = SparkXGBClassifier(
-            weight_col="weight", **clf_with_weight.cls_params_with_eval
-        )
-        model_with_weight_eval = classifier_with_weight_eval.fit(
-            clf_with_weight.cls_df_train_with_eval_weight
-        )
-        pred_result_with_weight_eval = model_with_weight_eval.transform(
-            clf_with_weight.cls_df_test_with_eval_weight
-        ).collect()
-        np.testing.assert_allclose(
-            model_with_weight_eval._xgb_sklearn_model.best_score,
-            clf_with_weight.cls_with_eval_and_weight_best_score,
-            atol=1e-3,
-        )
-
-        for row in pred_result_with_weight_eval:
-            np.testing.assert_allclose(  # failed
-                row.probability, row.expected_prob_with_weight_and_eval, atol=1e-3
-            )
-
-
 class XgboostLocalTest(SparkTestCase):
     def setUp(self):
         logging.getLogger().setLevel("INFO")
@@ -407,18 +92,22 @@ def setUp(self):
             ],
         )
 
-        X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
-        y = np.array([0, 1])
-        cl1 = XGBClassifier()
-        cl1.fit(X, y)
-        p1 = cl1.predict(X)
-        proba1 = cl1.predict_proba(X)
-
-        cl2 = XGBClassifier(max_depth=5, n_estimators=10, scale_pos_weight=4)
-        cl2.fit(X, y)
-        p2 = cl2.predict(X)
-        proba2 = cl2.predict_proba(X)
-
+        # >>> X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
+        # >>> y = np.array([0, 1])
+        # >>> cl1 = xgboost.XGBClassifier()
+        # >>> cl1.fit(X, y)
+        # >>> cl1.predict(X)
+        # array([0, 0])
+        # >>> cl1.predict_proba(X)
+        # array([[0.5, 0.5],
+        #        [0.5, 0.5]], dtype=float32)
+        # >>> cl2 = xgboost.XGBClassifier(max_depth=5, n_estimators=10, scale_pos_weight=4)
+        # >>> cl2.fit(X, y)
+        # >>> cl2.predict(X)
+        # array([1, 1])
+        # >>> cl2.predict_proba(X)
+        # array([[0.27574146, 0.72425854 ],
+        #        [0.27574146, 0.72425854 ]], dtype=float32)
         self.cls_params = {"max_depth": 5, "n_estimators": 10, "scale_pos_weight": 4}
 
         cls_df_train_data = [
@@ -431,22 +120,21 @@ def setUp(self):
         self.cls_df_train_large = self.session.createDataFrame(
             cls_df_train_data * 100, ["features", "label"]
         )
-
         self.cls_df_test = self.session.createDataFrame(
             [
                 (
                     Vectors.dense(1.0, 2.0, 3.0),
-                    int(p1[0]),
-                    [float(p) for p in list(proba1[0, :])],
-                    int(p2[0]),
-                    [float(p) for p in list(proba2[0, :])],
+                    0,
+                    [0.5, 0.5],
+                    1,
+                    [0.27574146, 0.72425854],
                 ),
                 (
                     Vectors.sparse(3, {1: 1.0, 2: 5.5}),
-                    int(p1[1]),
-                    [float(p) for p in list(proba1[1, :])],
-                    int(p2[1]),
-                    [float(p) for p in list(proba2[1, :])],
+                    0,
+                    [0.5, 0.5],
+                    1,
+                    [0.27574146, 0.72425854],
                 ),
             ],
             [
@@ -486,6 +174,130 @@ def setUp(self):
             ["features", "expected_probability"],
         )
 
+        # Test regressor with weight and eval set
+        # >>> import numpy as np
+        # >>> import xgboost
+        # >>> X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5], [4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
+        # >>> w = np.array([1.0, 2.0, 1.0, 2.0])
+        # >>> y = np.array([0, 1, 2, 3])
+        # >>> reg1 = xgboost.XGBRegressor()
+        # >>> reg1.fit(X, y, sample_weight=w)
+        # >>> reg1.predict(X)
+        # >>> array([1.0679445e-03, 1.0000550e+00, ...
+        # >>> X_train = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
+        # >>> X_val = np.array([[4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
+        # >>> y_train = np.array([0, 1])
+        # >>> y_val = np.array([2, 3])
+        # >>> w_train = np.array([1.0, 2.0])
+        # >>> w_val = np.array([1.0, 2.0])
+        # >>> reg2 = xgboost.XGBRegressor()
+        # >>> reg2.fit(X_train, y_train, eval_set=[(X_val, y_val)],
+        # >>>          early_stopping_rounds=1, eval_metric='rmse')
+        # >>> reg2.predict(X)
+        # >>> array([8.8370638e-04, 9.9911624e-01, ...
+        # >>> reg2.best_score
+        # 2.0000002682208837
+        # >>> reg3 = xgboost.XGBRegressor()
+        # >>> reg3.fit(X_train, y_train, sample_weight=w_train, eval_set=[(X_val, y_val)],
+        # >>>          sample_weight_eval_set=[w_val],
+        # >>>          early_stopping_rounds=1, eval_metric='rmse')
+        # >>> reg3.predict(X)
+        # >>> array([0.03155671, 0.98874104,...
+        # >>> reg3.best_score
+        # 1.9970891552124017
+        self.reg_df_train_with_eval_weight = self.session.createDataFrame(
+            [
+                (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
+                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
+                (Vectors.dense(4.0, 5.0, 6.0), 2, True, 1.0),
+                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 3, True, 2.0),
+            ],
+            ["features", "label", "isVal", "weight"],
+        )
+        self.reg_params_with_eval = {
+            "validation_indicator_col": "isVal",
+            "early_stopping_rounds": 1,
+            "eval_metric": "rmse",
+        }
+        self.reg_df_test_with_eval_weight = self.session.createDataFrame(
+            [
+                (Vectors.dense(1.0, 2.0, 3.0), 0.001068, 0.00088, 0.03155),
+                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1.000055, 0.9991, 0.9887),
+            ],
+            [
+                "features",
+                "expected_prediction_with_weight",
+                "expected_prediction_with_eval",
+                "expected_prediction_with_weight_and_eval",
+            ],
+        )
+        self.reg_with_eval_best_score = 2.0
+        self.reg_with_eval_and_weight_best_score = 1.997
+
+        # Test classifier with weight and eval set
+        # >>> import numpy as np
+        # >>> import xgboost
+        # >>> X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5], [4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
+        # >>> w = np.array([1.0, 2.0, 1.0, 2.0])
+        # >>> y = np.array([0, 1, 0, 1])
+        # >>> cls1 = xgboost.XGBClassifier()
+        # >>> cls1.fit(X, y, sample_weight=w)
+        # >>> cls1.predict_proba(X)
+        # array([[0.3333333, 0.6666667],...
+        # >>> X_train = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
+        # >>> X_val = np.array([[4.0, 5.0, 6.0], [0.0, 6.0, 7.5]])
+        # >>> y_train = np.array([0, 1])
+        # >>> y_val = np.array([0, 1])
+        # >>> w_train = np.array([1.0, 2.0])
+        # >>> w_val = np.array([1.0, 2.0])
+        # >>> cls2 = xgboost.XGBClassifier()
+        # >>> cls2.fit(X_train, y_train, eval_set=[(X_val, y_val)],
+        # >>>               early_stopping_rounds=1, eval_metric='logloss')
+        # >>> cls2.predict_proba(X)
+        # array([[0.5, 0.5],...
+        # >>> cls2.best_score
+        # 0.6931
+        # >>> cls3 = xgboost.XGBClassifier()
+        # >>> cls3.fit(X_train, y_train, sample_weight=w_train, eval_set=[(X_val, y_val)],
+        # >>>               sample_weight_eval_set=[w_val],
+        # >>>               early_stopping_rounds=1, eval_metric='logloss')
+        # >>> cls3.predict_proba(X)
+        # array([[0.3344962, 0.6655038],...
+        # >>> cls3.best_score
+        # 0.6365
+        self.cls_df_train_with_eval_weight = self.session.createDataFrame(
+            [
+                (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),
+                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),
+                (Vectors.dense(4.0, 5.0, 6.0), 0, True, 1.0),
+                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, True, 2.0),
+            ],
+            ["features", "label", "isVal", "weight"],
+        )
+        self.cls_params_with_eval = {
+            "validation_indicator_col": "isVal",
+            "early_stopping_rounds": 1,
+            "eval_metric": "logloss",
+        }
+        self.cls_df_test_with_eval_weight = self.session.createDataFrame(
+            [
+                (
+                    Vectors.dense(1.0, 2.0, 3.0),
+                    [0.3333, 0.6666],
+                    [0.5, 0.5],
+                    [0.3097, 0.6903],
+                ),
+            ],
+            [
+                "features",
+                "expected_prob_with_weight",
+                "expected_prob_with_eval",
+                "expected_prob_with_weight_and_eval",
+            ],
+        )
+        self.cls_with_eval_best_score = 0.6931
+        self.cls_with_eval_and_weight_best_score = 0.6378
+
         # Test classifier with both base margin and without
         # >>> import numpy as np
         # >>> import xgboost
@@ -1006,6 +818,96 @@ def test_classifier_with_base_margin(self):
                 row.probability, row.expected_prob_with_base_margin, atol=1e-3
             )
 
+    def test_regressor_with_weight_eval(self):
+        # with weight
+        regressor_with_weight = SparkXGBRegressor(weight_col="weight")
+        model_with_weight = regressor_with_weight.fit(
+            self.reg_df_train_with_eval_weight
+        )
+        pred_result_with_weight = model_with_weight.transform(
+            self.reg_df_test_with_eval_weight
+        ).collect()
+        for row in pred_result_with_weight:
+            assert np.isclose(
+                row.prediction, row.expected_prediction_with_weight, atol=1e-3
+            )
+
+        # with eval
+        regressor_with_eval = SparkXGBRegressor(**self.reg_params_with_eval)
+        model_with_eval = regressor_with_eval.fit(self.reg_df_train_with_eval_weight)
+        assert np.isclose(
+            model_with_eval._xgb_sklearn_model.best_score,
+            self.reg_with_eval_best_score,
+            atol=1e-3,
+        ), (
+            f"Expected best score: {self.reg_with_eval_best_score}, but ",
+            f"get {model_with_eval._xgb_sklearn_model.best_score}",
+        )
+
+        pred_result_with_eval = model_with_eval.transform(
+            self.reg_df_test_with_eval_weight
+        ).collect()
+        for row in pred_result_with_eval:
+            self.assertTrue(
+                np.isclose(
+                    row.prediction, row.expected_prediction_with_eval, atol=1e-3
+                ),
+                f"Expect prediction is {row.expected_prediction_with_eval},"
+                f"but get {row.prediction}",
+            )
+        # with weight and eval
+        regressor_with_weight_eval = SparkXGBRegressor(
+            weight_col="weight", **self.reg_params_with_eval
+        )
+        model_with_weight_eval = regressor_with_weight_eval.fit(
+            self.reg_df_train_with_eval_weight
+        )
+        pred_result_with_weight_eval = model_with_weight_eval.transform(
+            self.reg_df_test_with_eval_weight
+        ).collect()
+        self.assertTrue(
+            np.isclose(
+                model_with_weight_eval._xgb_sklearn_model.best_score,
+                self.reg_with_eval_and_weight_best_score,
+                atol=1e-3,
+            )
+        )
+        for row in pred_result_with_weight_eval:
+            self.assertTrue(
+                np.isclose(
+                    row.prediction,
+                    row.expected_prediction_with_weight_and_eval,
+                    atol=1e-3,
+                )
+            )
+
+    def test_classifier_with_weight_eval(self):
+        # with weight and eval
+        # Added scale_pos_weight because in 1.4.2, the original answer returns 0.5 which
+        # doesn't really indicate this working correctly.
+        classifier_with_weight_eval = SparkXGBClassifier(
+            weight_col="weight", scale_pos_weight=4, **self.cls_params_with_eval
+        )
+        model_with_weight_eval = classifier_with_weight_eval.fit(
+            self.cls_df_train_with_eval_weight
+        )
+        pred_result_with_weight_eval = model_with_weight_eval.transform(
+            self.cls_df_test_with_eval_weight
+        ).collect()
+        self.assertTrue(
+            np.isclose(
+                model_with_weight_eval._xgb_sklearn_model.best_score,
+                self.cls_with_eval_and_weight_best_score,
+                atol=1e-3,
+            )
+        )
+        for row in pred_result_with_weight_eval:
+            self.assertTrue(
+                np.allclose(
+                    row.probability, row.expected_prob_with_weight_and_eval, atol=1e-3
+                )
+            )
+
     def test_num_workers_param(self):
         regressor = SparkXGBRegressor(num_workers=-1)
         self.assertRaises(ValueError, regressor._validate_params)

From ba08063eb5fdeafb93a81e56cfb1c919571068fd Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Wed, 7 Dec 2022 01:07:50 +0800
Subject: [PATCH 086/133] isort.

---
 tests/test_distributed/test_with_dask/test_with_dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py
index 6074354bb7a1..c454eea0acd0 100644
--- a/tests/test_distributed/test_with_dask/test_with_dask.py
+++ b/tests/test_distributed/test_with_dask/test_with_dask.py
@@ -12,7 +12,7 @@
 from math import ceil
 from operator import attrgetter, getitem
 from pathlib import Path
-from typing import Any, Dict, Generator, Optional, Tuple, Type, Union, TypeVar
+from typing import Any, Dict, Generator, Optional, Tuple, Type, TypeVar, Union
 
 import hypothesis
 import numpy as np

From b52d72ed1da0689aeda59973cec95697276919cf Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Wed, 7 Dec 2022 01:09:50 +0800
Subject: [PATCH 087/133] Revert conda changes.

---
 tests/ci_build/conda_env/cpu_test.yml       |  3 ---
 tests/ci_build/conda_env/macos_cpu_test.yml | 13 +++++++------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/tests/ci_build/conda_env/cpu_test.yml b/tests/ci_build/conda_env/cpu_test.yml
index 2c7bc0dadd35..98c7a5928892 100644
--- a/tests/ci_build/conda_env/cpu_test.yml
+++ b/tests/ci_build/conda_env/cpu_test.yml
@@ -20,10 +20,7 @@ dependencies:
 - hypothesis>=6.46
 - astroid
 - sh
-- sphinx
-- recommonmark
 - mock
-- breathe
 - pytest
 - pytest-timeout
 - pytest-cov
diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/tests/ci_build/conda_env/macos_cpu_test.yml
index c03a93e74c15..11d82ff7b605 100644
--- a/tests/ci_build/conda_env/macos_cpu_test.yml
+++ b/tests/ci_build/conda_env/macos_cpu_test.yml
@@ -1,6 +1,5 @@
 name: macos_test
 channels:
-- defaults
 - conda-forge
 dependencies:
 - python=3.8
@@ -15,13 +14,14 @@ dependencies:
 - scikit-learn
 - pandas
 - matplotlib
-- dask>=2022.6
-- distributed>=2022.6
+- dask
+- distributed
+- graphviz
 - python-graphviz
-- hypothesis>=6.46
+- hypothesis
 - astroid
-- sh
 - sphinx
+- sh
 - recommonmark
 - mock
 - breathe
@@ -35,7 +35,8 @@ dependencies:
 - py-ubjson
 - cffi
 - pyarrow
-- shap
+- pyspark
+- cloudpickle
 - pip:
   - sphinx_rtd_theme
   - datatable

From 9c943e9db2527ec04753c69b5af06eb1796b58d5 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 14 Dec 2022 19:01:38 +0800
Subject: [PATCH 088/133] Fix merge.

---
 src/common/stats.cc              |  2 +-
 src/objective/init_estimation.cu | 14 +++++++-------
 src/objective/init_estimation.h  |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/common/stats.cc b/src/common/stats.cc
index b74e4e1cf0bf..bf0c8d621770 100644
--- a/src/common/stats.cc
+++ b/src/common/stats.cc
@@ -8,7 +8,7 @@
 #include "common.h"                      // OptionalWeights
 #include "threading_utils.h"             // ParallelFor, MemStackAllocator
 #include "transform_iterator.h"          // MakeIndexTransformIter
-#include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/context.h"             // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
 #include "xgboost/linalg.h"              // Tensor, UnravelIndex, Apply
 #include "xgboost/logging.h"             // CHECK_EQ
diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index 55a9fbf14a31..8f95873333b5 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -9,15 +9,15 @@
 #include <thrust/execution_policy.h>            // cuda::par
 #include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
 
-#include <algorithm>  // std::max
-#include <cstddef>    // std::size_t
+#include <algorithm>                            // std::max
+#include <cstddef>                              // std::size_t
 
-#include "../collective/communicator-inl.h"  // Allreduce
-#include "../common/device_helpers.cuh"      // dh::MakeTransformIterator, dh::Reduce
+#include "../collective/communicator-inl.h"     // Allreduce
+#include "../common/device_helpers.cuh"         // dh::MakeTransformIterator, dh::Reduce
 #include "init_estimation.h"
-#include "xgboost/base.h"                // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
-#include "xgboost/generic_parameters.h"  // Context
-#include "xgboost/host_device_vector.h"  // HostDeviceVector
+#include "xgboost/base.h"                       // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
+#include "xgboost/context.h"                    // Context
+#include "xgboost/host_device_vector.h"         // HostDeviceVector
 
 namespace xgboost {
 namespace obj {
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index 5e19c9fae7b0..4b41a348b62b 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -9,7 +9,7 @@
 
 #include "../common/common.h"            // AssertGPUSupport
 #include "xgboost/base.h"                // GradientPair
-#include "xgboost/generic_parameters.h"  // Context
+#include "xgboost/context.h"             // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
 #include "xgboost/linalg.h"              // TensorView
 

From 733b2d8d51191f983543089216a5ffdacac4762f Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 14 Dec 2022 22:06:23 +0800
Subject: [PATCH 089/133] Add cpp dependencies.

---
 .github/workflows/python_tests.yml                          | 4 ++--
 .../ci_build/conda_env/{cpu_test.yml => linux_cpu_test.yml} | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)
 rename tests/ci_build/conda_env/{cpu_test.yml => linux_cpu_test.yml} (89%)

diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
index ae683e949baa..f429023a77c4 100644
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -232,8 +232,8 @@ jobs:
       with:
         cache-downloads: true
         cache-env: true
-        environment-name: cpu_test
-        environment-file: tests/ci_build/conda_env/cpu_test.yml
+        environment-name: linux_cpu_test
+        environment-file: tests/ci_build/conda_env/linux_cpu_test.yml
 
     - name: Display Conda env
       shell: bash -l {0}
diff --git a/tests/ci_build/conda_env/cpu_test.yml b/tests/ci_build/conda_env/linux_cpu_test.yml
similarity index 89%
rename from tests/ci_build/conda_env/cpu_test.yml
rename to tests/ci_build/conda_env/linux_cpu_test.yml
index eff76cd8c17d..0c426eb356da 100644
--- a/tests/ci_build/conda_env/cpu_test.yml
+++ b/tests/ci_build/conda_env/linux_cpu_test.yml
@@ -1,8 +1,12 @@
-name: cpu_test
+name: linux_cpu_test
 channels:
 - conda-forge
 dependencies:
 - python=3.8
+- cmake
+- c-compiler
+- cxx-compiler
+- ninja
 - pip
 - wheel
 - pyyaml

From cdb741ab20edcb1a513dcaca98c48df37ce27de6 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 02:19:01 +0800
Subject: [PATCH 090/133] Multi target.

---
 include/xgboost/base.h                      |  2 +
 include/xgboost/linalg.h                    | 26 +++++++-
 include/xgboost/objective.h                 |  2 +-
 src/objective/init_estimation.cc            | 67 +++++++++++++++++----
 src/objective/init_estimation.h             | 16 +++--
 src/objective/regression_obj.cu             | 22 +++----
 tests/cpp/objective/test_init_estimation.cc | 50 +++++++++++++++
 7 files changed, 148 insertions(+), 37 deletions(-)
 create mode 100644 tests/cpp/objective/test_init_estimation.cc

diff --git a/include/xgboost/base.h b/include/xgboost/base.h
index 05c13bed0363..34312223c0cb 100644
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -134,6 +134,8 @@ using bst_row_t = std::size_t;   // NOLINT
 using bst_node_t = int32_t;      // NOLINT
 /*! \brief Type for ranking group index. */
 using bst_group_t = uint32_t;    // NOLINT
+/*! \brief Type for indexing target variables. */
+using bst_target_t = std::size_t;  // NOLINT
 
 namespace detail {
 /*! \brief Implementation of gradient statistics pair. Template specialisation
diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index d5b255b82b27..ca816bcdb7a4 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -15,6 +15,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cinttypes>  // std::int32_t
 #include <limits>
 #include <string>
 #include <tuple>
@@ -388,9 +389,9 @@ class TensorView {
    * \brief Create a tensor with data, shape and strides.  Don't use this constructor if
    *        stride can be calculated from shape.
    */
-  template <typename I, int32_t D>
+  template <typename I, std::int32_t D>
   LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
-                       int32_t device)
+                       std::int32_t device)
       : data_{data}, ptr_{data_.data()}, device_{device} {
     static_assert(D == kDim, "Invalid shape & stride.");
     detail::UnrollLoop<D>([&](auto i) {
@@ -833,6 +834,27 @@ class Tensor {
   int32_t DeviceIdx() const { return data_.DeviceIdx(); }
 };
 
+template <typename T>
+using Vector = Tensor<T, 1>;
+
+template <typename T, typename... Index>
+auto Constant(Context const *ctx, T v, Index &&...index) {
+  Tensor<T, sizeof...(Index)> t;
+  t.SetDevice(ctx->gpu_id);
+  t.Reshape(index...);
+  t.Data()->Fill(std::move(v));
+  return t;
+}
+
+
+/**
+ * \brief Like `np.zeros`, return a new array of given shape and type, filled with zeros.
+ */
+template <typename T, typename... Index>
+auto Zeros(Context const *ctx, Index &&...index) {
+  return Constant(ctx, static_cast<T>(0), index...);
+}
+
 // Only first axis is supported for now.
 template <typename T, int32_t D>
 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
index 9186ef710631..e5da2878bf91 100644
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -93,7 +93,7 @@ class ObjFunction : public Configurable {
    * \brief Return number of targets for input matrix.  Right now XGBoost supports only
    *        multi-target regression.
    */
-  virtual uint32_t Targets(MetaInfo const& info) const {
+  virtual bst_target_t Targets(MetaInfo const& info) const {
     if (info.labels.Shape(1) > 1) {
       LOG(FATAL) << "multioutput is not supported by current objective function";
     }
diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index 020fcf947612..8c86e4aa2cc8 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -10,29 +10,74 @@
 #include "init_estimation.h"
 
 #include <algorithm>  // std::max
+#include <cstddef>    // std::size_t
 
 #include "../collective/communicator-inl.h"
 #include "../common/numeric.h"             // cpu_impl::Reduce
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
+#include "xgboost/linalg.h"                // TensorView
 
 namespace xgboost {
 namespace obj {
 namespace cpu_impl {
-double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
-  auto const& h_gpair = gpair.ConstHostVector();
-  auto it = common::MakeIndexTransformIter([&](auto i) {
-    auto const& g = h_gpair[i];
-    return GradientPairPrecise{g};
+void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
+              linalg::VectorView<float> out) {
+  // 2 rows, first one is gradient, sencond one is hessian. Number of columns equal to
+  // number of targets.
+  auto n_targets = out.Size();
+  CHECK_EQ(n_targets, gpair.Shape(1));
+  linalg::Tensor<double, 2> sum = linalg::Zeros<double>(ctx, 2, n_targets);
+  CHECK(sum.HostView().CContiguous());
+  auto sum_grad = sum.HostView().Slice(0, linalg::All());
+  auto sum_hess = sum.HostView().Slice(1, linalg::All());
+
+  // first dim for gpair is samples, second dim is target.
+  // Reduce by column
+  common::ParallelFor(gpair.Shape(1), 1, [&](auto j) {
+    for (std::size_t i = 0; i < gpair.Shape(0); ++i) {
+      sum_grad(j) += gpair(i, j).GetGrad();
+      sum_hess(j) += gpair(i, j).GetHess();
+    }
   });
-  auto sum = common::cpu_impl::Reduce(ctx, it, it + gpair.Size(), GradientPairPrecise{});
-  static_assert(sizeof(sum) == sizeof(double) * 2, "");
-  collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double*>(&sum), 2);
-  return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
+  CHECK(sum_grad.CContiguous());
+  collective::Allreduce<collective::Operation::kSum>(sum_grad.Values().data(), sum_grad.Size());
+  CHECK(sum_hess.CContiguous());
+  collective::Allreduce<collective::Operation::kSum>(sum_hess.Values().data(), sum_hess.Size());
+
+  for (std::size_t i = 0; i < sum_hess.Size(); ++i) {
+    out(i) = static_cast<float>(CalcUnregulatedWeight(sum_grad(i), sum_hess(i)));
+  }
 }
 }  // namespace cpu_impl
 
-double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
-  return ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair) : cuda_impl::FitStump(ctx, gpair);
+namespace cuda_impl {
+void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
+              linalg::VectorView<float> out);
+
+#if !defined(XGBOOST_USE_CUDA)
+inline void FitStump(Context const*, linalg::TensorView<GradientPair const, 2>,
+                     linalg::VectorView<float>) {
+  common::AssertGPUSupport();
+  return 0.0;
+}
+#endif  // !defined(XGBOOST_USE_CUDA)
+}  // namespace cuda_impl
+
+void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
+              bst_target_t n_targets, linalg::Vector<float>* out) {
+  out->SetDevice(ctx->gpu_id);
+  out->Reshape(n_targets);
+
+  auto n_samples = gpair.Size() / n_targets;
+  std::size_t shape[2]{n_samples, n_targets};
+  std::size_t strides[2];
+  linalg::detail::CalcStride<2, true>(shape, strides);
+
+  gpair.SetDevice(ctx->gpu_id);
+  linalg::TensorView<GradientPair const, 2> gpair_t{
+      ctx->IsCPU() ? gpair.ConstHostSpan() : gpair.ConstDeviceSpan(), shape, strides, ctx->gpu_id};
+  ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair_t, out->HostView())
+               : cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id));
 }
 }  // namespace obj
 }  // namespace xgboost
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
index 4b41a348b62b..3cc9e7d9fde6 100644
--- a/src/objective/init_estimation.h
+++ b/src/objective/init_estimation.h
@@ -15,19 +15,17 @@
 
 namespace xgboost {
 namespace obj {
-namespace cuda_impl {
-double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
-#if !defined(XGBOOST_USE_CUDA)
-inline double FitStump(Context const*, HostDeviceVector<GradientPair> const&) {
-  common::AssertGPUSupport();
-  return 0.0;
+
+template <typename T>
+XGBOOST_DEVICE inline double CalcUnregulatedWeight(T sum_grad, T sum_hess) {
+  return -sum_grad / std::max(sum_hess, static_cast<double>(kRtEps));
 }
-#endif  // !defined(XGBOOST_USE_CUDA)
-}  // namespace cuda_impl
+
 /**
  * @brief Fit a tree stump as an estimation of base_score.
  */
-double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair);
+void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
+              bst_target_t n_targets, linalg::Vector<float>* out);
 }  // namespace obj
 }  // namespace xgboost
 #endif  // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 8773febcf8c5..740d0c829731 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -82,7 +82,7 @@ class RegLossObj : public ObjFunction {
 
   ObjInfo Task() const override { return Loss::Info(); }
 
-  uint32_t Targets(MetaInfo const& info) const override {
+  bst_target_t Targets(MetaInfo const& info) const override {
     // Multi-target regression.
     return std::max(static_cast<size_t>(1), info.labels.Shape(1));
   }
@@ -171,16 +171,6 @@ class RegLossObj : public ObjFunction {
 
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
     CheckInitInputs(info);
-    base_margin->Reshape(1);
-    auto out = base_margin->HostView();
-
-    if (this->Targets(info) > 1) {
-      // multi-output not yet supported due to constraint in binary model format. (no
-      // vector in parameter)
-      out(0) = DefaultBaseScore();
-      return;
-    }
-
     HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
     HostDeviceVector<GradientPair> gpair(info.labels.Size());
 
@@ -189,8 +179,12 @@ class RegLossObj : public ObjFunction {
     this->SaveConfig(&config);
     new_obj->LoadConfig(config);
     new_obj->GetGradient(dummy_predt, info, 0, &gpair);
-
-    out(0) = Loss::PredTransform(FitStump(ctx_, gpair));
+    bst_target_t n_targets = this->Targets(info);
+    FitStump(ctx_, gpair, n_targets, base_margin);
+    auto h_base_margin = base_margin->HostView();
+    for (bst_target_t t = 0; t < n_targets; ++t) {
+      h_base_margin(t) = Loss::PredTransform(h_base_margin(t));
+    }
   }
 
   void SaveConfig(Json* p_out) const override {
@@ -245,7 +239,7 @@ class PseudoHuberRegression : public ObjFunction {
  public:
   void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }
   ObjInfo Task() const override { return ObjInfo::kRegression; }
-  uint32_t Targets(MetaInfo const& info) const override {
+  bst_target_t Targets(MetaInfo const& info) const override {
     return std::max(static_cast<size_t>(1), info.labels.Shape(1));
   }
 
diff --git a/tests/cpp/objective/test_init_estimation.cc b/tests/cpp/objective/test_init_estimation.cc
new file mode 100644
index 000000000000..392d9b25677a
--- /dev/null
+++ b/tests/cpp/objective/test_init_estimation.cc
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2022 by XGBoost Contributors
+ */
+
+#include <gtest/gtest.h>
+#include <xgboost/linalg.h>
+
+#include "../../src/common/linalg_op.h"
+#include "../../src/objective/init_estimation.h"
+
+namespace xgboost {
+namespace obj {
+
+void TestFitStump(Context const *ctx) {
+  std::size_t constexpr kRows = 16, kTargets = 2;
+  HostDeviceVector<GradientPair> gpair;
+  auto &h_gpair = gpair.HostVector();
+  h_gpair.resize(kRows * kTargets);
+  for (std::size_t t = 0; t < kTargets; ++t) {
+    for (std::size_t i = 0; i < kRows; ++i) {
+      h_gpair.at(t * kRows + i) = GradientPair{static_cast<float>(i), 1};
+    }
+  }
+  linalg::Vector<float> out;
+  FitStump(ctx, gpair, kTargets, &out);
+  auto h_out = out.HostView();
+  for (auto it = linalg::cbegin(h_out); it != linalg::cend(h_out); ++it) {
+    // sum_hess == kRows
+    auto n = static_cast<float>(kRows);
+    auto sum_grad = n * (n - 1) / 2;
+    // ASSERT_EQ(static_cast<float>(-sum_grad / n), *it);
+    std::cout << *it << std::endl;
+  }
+  std::cout << std::endl;
+}
+
+TEST(InitEstimation, FitStump) {
+  Context ctx;
+  TestFitStump(&ctx);
+}
+
+#if defined(XGBOOST_USE_CUDA)
+TEST(InitEstimation, GPUFitStump) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  TestFitStump(&ctx);
+}
+#endif  // defined(XGBOOST_USE_CUDA)
+}  // namespace obj
+}  // namespace xgboost

From 98d7f09793946cfc3d82f5c3e8da016f94bf8a98 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 03:14:14 +0800
Subject: [PATCH 091/133] GPU.

---
 src/objective/init_estimation.cu            | 84 +++++++++++++++------
 tests/cpp/objective/test_init_estimation.cc |  4 +-
 2 files changed, 63 insertions(+), 25 deletions(-)

diff --git a/src/objective/init_estimation.cu b/src/objective/init_estimation.cu
index 8f95873333b5..ae8520ae5934 100644
--- a/src/objective/init_estimation.cu
+++ b/src/objective/init_estimation.cu
@@ -5,37 +5,77 @@
  */
 #if !defined(NOMINMAX) && defined(_WIN32)
 #define NOMINMAX
-#endif                                          // !defined(NOMINMAX)
-#include <thrust/execution_policy.h>            // cuda::par
-#include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
+#endif                                            // !defined(NOMINMAX)
+#include <thrust/execution_policy.h>              // cuda::par
+#include <thrust/functional.h>                    // thrust::equal_to
+#include <thrust/iterator/counting_iterator.h>    // thrust::make_counting_iterator
+#include <thrust/iterator/zip_iterator.h>         // thrust::make_zip_iterator
 
-#include <algorithm>                            // std::max
-#include <cstddef>                              // std::size_t
+#include <algorithm>                              // std::max
+#include <cstddef>                                // std::size_t
 
-#include "../collective/communicator-inl.h"     // Allreduce
-#include "../common/device_helpers.cuh"         // dh::MakeTransformIterator, dh::Reduce
+#include "../collective/device_communicator.cuh"  // DeviceCommunicator
+#include "../common/device_helpers.cuh"           // dh::MakeTransformIterator::Reduce,TypedDiscard
 #include "init_estimation.h"
-#include "xgboost/base.h"                       // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
-#include "xgboost/context.h"                    // Context
-#include "xgboost/host_device_vector.h"         // HostDeviceVector
+#include "xgboost/base.h"     // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
+#include "xgboost/context.h"  // Context
+#include "xgboost/span.h"     // span
 
 namespace xgboost {
 namespace obj {
 namespace cuda_impl {
-double FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair) {
-  gpair.SetDevice(ctx->gpu_id);
-  auto d_gpair = gpair.ConstDeviceSpan();
-  auto it = dh::MakeTransformIterator<GradientPairPrecise>(
-      thrust::make_counting_iterator(0ul),
-      [=] XGBOOST_DEVICE(std::size_t i) -> GradientPairPrecise {
-        return GradientPairPrecise{d_gpair[i]};
+void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
+              linalg::VectorView<float> out) {
+  // 2 rows, first one is gradient, sencond one is hessian. Number of columns equal to
+  // number of targets.
+  auto n_targets = out.Size();
+  CHECK_EQ(n_targets, gpair.Shape(1));
+  linalg::Tensor<double, 2> sum = linalg::Zeros<double>(ctx, 2, n_targets);
+  CHECK(out.Contiguous());
+  auto sum_grad = sum.View(ctx->gpu_id).Slice(0, linalg::All());
+  auto sum_hess = sum.View(ctx->gpu_id).Slice(1, linalg::All());
+
+  // Reduce by column
+  auto key_it = dh::MakeTransformIterator<bst_target_t>(
+      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bst_target_t {
+        return i / gpair.Shape(0);
+        return std::get<1>(linalg::UnravelIndex(i, gpair.Shape()));
       });
+  auto grad_it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),
+                                                   [=] XGBOOST_DEVICE(std::size_t i) -> double {
+                                                     auto target = i / gpair.Shape(0);
+                                                     auto sample = i % gpair.Shape(0);
+                                                     return gpair(sample, target).GetGrad();
+                                                   });
+  auto hess_it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),
+                                                   [=] XGBOOST_DEVICE(std::size_t i) -> double {
+                                                     auto target = i / gpair.Shape(0);
+                                                     auto sample = i % gpair.Shape(0);
+                                                     return gpair(sample, target).GetHess();
+                                                   });
+  auto val_it = thrust::make_zip_iterator(grad_it, hess_it);
+  CHECK(sum_grad.CContiguous());
+  CHECK(sum_hess.CContiguous());
+  auto out_it =
+      thrust::make_zip_iterator(dh::tbegin(sum_grad.Values()), dh::tbegin(sum_hess.Values()));
+
   dh::XGBCachingDeviceAllocator<char> alloc;
-  auto sum = dh::Reduce(thrust::cuda::par(alloc), it, it + d_gpair.size(), GradientPairPrecise{},
-                        thrust::plus<GradientPairPrecise>{});
-  static_assert(sizeof(sum) == sizeof(double) * 2, "");
-  collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double*>(&sum), 2);
-  return -sum.GetGrad() / std::max(sum.GetHess(), 1e-6);
+  auto policy = thrust::cuda::par(alloc);
+  thrust::reduce_by_key(policy, key_it, key_it + gpair.Size(), val_it,
+                        dh::TypedDiscard<bst_target_t>{}, out_it, thrust::equal_to<bst_target_t>{},
+                        [=] __device__(auto lhs, auto rhs) {
+                          return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs),
+                                                    thrust::get<1>(lhs) + thrust::get<1>(rhs));
+                        });
+
+  collective::DeviceCommunicator* communicator = collective::Communicator::GetDevice(ctx->gpu_id);
+  communicator->AllReduceSum(sum_grad.Values().data(), sum_grad.Size());
+  communicator->AllReduceSum(sum_hess.Values().data(), sum_hess.Size());
+
+  thrust::for_each_n(policy, thrust::make_counting_iterator(0ul), n_targets,
+                     [=] XGBOOST_DEVICE(std::size_t i) mutable {
+                       out(i) = static_cast<float>(CalcUnregulatedWeight(sum_grad(i), sum_hess(i)));
+                     });
 }
 }  // namespace cuda_impl
 }  // namespace obj
diff --git a/tests/cpp/objective/test_init_estimation.cc b/tests/cpp/objective/test_init_estimation.cc
index 392d9b25677a..715bdedf32b7 100644
--- a/tests/cpp/objective/test_init_estimation.cc
+++ b/tests/cpp/objective/test_init_estimation.cc
@@ -28,10 +28,8 @@ void TestFitStump(Context const *ctx) {
     // sum_hess == kRows
     auto n = static_cast<float>(kRows);
     auto sum_grad = n * (n - 1) / 2;
-    // ASSERT_EQ(static_cast<float>(-sum_grad / n), *it);
-    std::cout << *it << std::endl;
+    ASSERT_EQ(static_cast<float>(-sum_grad / n), *it);
   }
-  std::cout << std::endl;
 }
 
 TEST(InitEstimation, FitStump) {

From a2efb891b909b21d1f487a03cc60988d59306ff9 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 03:19:41 +0800
Subject: [PATCH 092/133] rename.

---
 src/objective/regression_obj.cu                                 | 2 +-
 src/{objective/init_estimation.cc => tree/fit_stump.cc}         | 2 +-
 src/{objective/init_estimation.cu => tree/fit_stump.cu}         | 2 +-
 src/{objective/init_estimation.h => tree/fit_stump.h}           | 0
 .../test_init_estimation.cc => tree/test_fit_stump.cc}          | 2 +-
 5 files changed, 4 insertions(+), 4 deletions(-)
 rename src/{objective/init_estimation.cc => tree/fit_stump.cc} (99%)
 rename src/{objective/init_estimation.cu => tree/fit_stump.cu} (99%)
 rename src/{objective/init_estimation.h => tree/fit_stump.h} (100%)
 rename tests/cpp/{objective/test_init_estimation.cc => tree/test_fit_stump.cc} (96%)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 740d0c829731..6316b8c189a6 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -20,9 +20,9 @@
 #include "../common/stats.h"
 #include "../common/threading_utils.h"
 #include "../common/transform.h"
+#include "../tree/fit_stump.h"
 #include "./regression_loss.h"
 #include "adaptive.h"
-#include "init_estimation.h"  // FitStump
 #include "xgboost/base.h"
 #include "xgboost/context.h"
 #include "xgboost/data.h"
diff --git a/src/objective/init_estimation.cc b/src/tree/fit_stump.cc
similarity index 99%
rename from src/objective/init_estimation.cc
rename to src/tree/fit_stump.cc
index 8c86e4aa2cc8..fb2dab6b9452 100644
--- a/src/objective/init_estimation.cc
+++ b/src/tree/fit_stump.cc
@@ -7,7 +7,7 @@
 #if !defined(NOMINMAX) && defined(_WIN32)
 #define NOMINMAX
 #endif  // !defined(NOMINMAX)
-#include "init_estimation.h"
+#include "fit_stump.h"
 
 #include <algorithm>  // std::max
 #include <cstddef>    // std::size_t
diff --git a/src/objective/init_estimation.cu b/src/tree/fit_stump.cu
similarity index 99%
rename from src/objective/init_estimation.cu
rename to src/tree/fit_stump.cu
index ae8520ae5934..2b9ecab8e64d 100644
--- a/src/objective/init_estimation.cu
+++ b/src/tree/fit_stump.cu
@@ -16,7 +16,7 @@
 
 #include "../collective/device_communicator.cuh"  // DeviceCommunicator
 #include "../common/device_helpers.cuh"           // dh::MakeTransformIterator::Reduce,TypedDiscard
-#include "init_estimation.h"
+#include "fit_stump.h"
 #include "xgboost/base.h"     // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
 #include "xgboost/context.h"  // Context
 #include "xgboost/span.h"     // span
diff --git a/src/objective/init_estimation.h b/src/tree/fit_stump.h
similarity index 100%
rename from src/objective/init_estimation.h
rename to src/tree/fit_stump.h
diff --git a/tests/cpp/objective/test_init_estimation.cc b/tests/cpp/tree/test_fit_stump.cc
similarity index 96%
rename from tests/cpp/objective/test_init_estimation.cc
rename to tests/cpp/tree/test_fit_stump.cc
index 715bdedf32b7..03f11e0ace24 100644
--- a/tests/cpp/objective/test_init_estimation.cc
+++ b/tests/cpp/tree/test_fit_stump.cc
@@ -1,12 +1,12 @@
 /**
  * Copyright 2022 by XGBoost Contributors
  */
+#include "../../src/tree/fit_stump.h"
 
 #include <gtest/gtest.h>
 #include <xgboost/linalg.h>
 
 #include "../../src/common/linalg_op.h"
-#include "../../src/objective/init_estimation.h"
 
 namespace xgboost {
 namespace obj {

From c923d3088dee87af6abc6ea64f095a4b69d9e329 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 03:32:20 +0800
Subject: [PATCH 093/133] Cleanup.

---
 src/common/host_device_vector.cc |  1 +
 src/common/host_device_vector.cu |  1 +
 src/tree/fit_stump.cc            | 22 +++++---------
 src/tree/fit_stump.cu            | 51 +++++++++++---------------------
 4 files changed, 26 insertions(+), 49 deletions(-)

diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc
index fc33317bd7a8..030070d9aecd 100644
--- a/src/common/host_device_vector.cc
+++ b/src/common/host_device_vector.cc
@@ -172,6 +172,7 @@ void HostDeviceVector<T>::SetDevice(int) const {}
 template class HostDeviceVector<bst_float>;
 template class HostDeviceVector<double>;
 template class HostDeviceVector<GradientPair>;
+template class HostDeviceVector<GradientPairPrecise>;
 template class HostDeviceVector<int32_t>;   // bst_node_t
 template class HostDeviceVector<uint8_t>;
 template class HostDeviceVector<FeatureType>;
diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu
index 00f19230dc7e..a5c5dbf8fa1b 100644
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@@ -404,6 +404,7 @@ void HostDeviceVector<T>::Resize(size_t new_size, T v) {
 template class HostDeviceVector<bst_float>;
 template class HostDeviceVector<double>;
 template class HostDeviceVector<GradientPair>;
+template class HostDeviceVector<GradientPairPrecise>;
 template class HostDeviceVector<int32_t>;   // bst_node_t
 template class HostDeviceVector<uint8_t>;
 template class HostDeviceVector<FeatureType>;
diff --git a/src/tree/fit_stump.cc b/src/tree/fit_stump.cc
index fb2dab6b9452..0859bc5c5047 100644
--- a/src/tree/fit_stump.cc
+++ b/src/tree/fit_stump.cc
@@ -22,30 +22,22 @@ namespace obj {
 namespace cpu_impl {
 void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
               linalg::VectorView<float> out) {
-  // 2 rows, first one is gradient, sencond one is hessian. Number of columns equal to
-  // number of targets.
   auto n_targets = out.Size();
   CHECK_EQ(n_targets, gpair.Shape(1));
-  linalg::Tensor<double, 2> sum = linalg::Zeros<double>(ctx, 2, n_targets);
-  CHECK(sum.HostView().CContiguous());
-  auto sum_grad = sum.HostView().Slice(0, linalg::All());
-  auto sum_hess = sum.HostView().Slice(1, linalg::All());
-
+  linalg::Vector<GradientPairPrecise> sum = linalg::Constant(ctx, GradientPairPrecise{}, n_targets);
+  auto h_sum = sum.HostView();
   // first dim for gpair is samples, second dim is target.
   // Reduce by column
   common::ParallelFor(gpair.Shape(1), 1, [&](auto j) {
     for (std::size_t i = 0; i < gpair.Shape(0); ++i) {
-      sum_grad(j) += gpair(i, j).GetGrad();
-      sum_hess(j) += gpair(i, j).GetHess();
+      h_sum(j) += GradientPairPrecise{gpair(i, j)};
     }
   });
-  CHECK(sum_grad.CContiguous());
-  collective::Allreduce<collective::Operation::kSum>(sum_grad.Values().data(), sum_grad.Size());
-  CHECK(sum_hess.CContiguous());
-  collective::Allreduce<collective::Operation::kSum>(sum_hess.Values().data(), sum_hess.Size());
+  collective::Allreduce<collective::Operation::kSum>(
+      reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2);
 
-  for (std::size_t i = 0; i < sum_hess.Size(); ++i) {
-    out(i) = static_cast<float>(CalcUnregulatedWeight(sum_grad(i), sum_hess(i)));
+  for (std::size_t i = 0; i < h_sum.Size(); ++i) {
+    out(i) = static_cast<float>(CalcUnregulatedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));
   }
 }
 }  // namespace cpu_impl
diff --git a/src/tree/fit_stump.cu b/src/tree/fit_stump.cu
index 2b9ecab8e64d..c185b4e6f769 100644
--- a/src/tree/fit_stump.cu
+++ b/src/tree/fit_stump.cu
@@ -26,14 +26,10 @@ namespace obj {
 namespace cuda_impl {
 void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
               linalg::VectorView<float> out) {
-  // 2 rows, first one is gradient, sencond one is hessian. Number of columns equal to
-  // number of targets.
   auto n_targets = out.Size();
   CHECK_EQ(n_targets, gpair.Shape(1));
-  linalg::Tensor<double, 2> sum = linalg::Zeros<double>(ctx, 2, n_targets);
+  linalg::Vector<GradientPairPrecise> sum = linalg::Constant(ctx, GradientPairPrecise{}, n_targets);
   CHECK(out.Contiguous());
-  auto sum_grad = sum.View(ctx->gpu_id).Slice(0, linalg::All());
-  auto sum_hess = sum.View(ctx->gpu_id).Slice(1, linalg::All());
 
   // Reduce by column
   auto key_it = dh::MakeTransformIterator<bst_target_t>(
@@ -41,41 +37,28 @@ void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpai
         return i / gpair.Shape(0);
         return std::get<1>(linalg::UnravelIndex(i, gpair.Shape()));
       });
-  auto grad_it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),
-                                                   [=] XGBOOST_DEVICE(std::size_t i) -> double {
-                                                     auto target = i / gpair.Shape(0);
-                                                     auto sample = i % gpair.Shape(0);
-                                                     return gpair(sample, target).GetGrad();
-                                                   });
-  auto hess_it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),
-                                                   [=] XGBOOST_DEVICE(std::size_t i) -> double {
-                                                     auto target = i / gpair.Shape(0);
-                                                     auto sample = i % gpair.Shape(0);
-                                                     return gpair(sample, target).GetHess();
-                                                   });
-  auto val_it = thrust::make_zip_iterator(grad_it, hess_it);
-  CHECK(sum_grad.CContiguous());
-  CHECK(sum_hess.CContiguous());
-  auto out_it =
-      thrust::make_zip_iterator(dh::tbegin(sum_grad.Values()), dh::tbegin(sum_hess.Values()));
+  auto grad_it = dh::MakeTransformIterator<GradientPairPrecise>(
+      thrust::make_counting_iterator(0ul),
+      [=] XGBOOST_DEVICE(std::size_t i) -> GradientPairPrecise {
+        auto target = i / gpair.Shape(0);
+        auto sample = i % gpair.Shape(0);
+        return GradientPairPrecise{gpair(sample, target)};
+      });
+  auto d_sum = sum.View(ctx->gpu_id);
 
   dh::XGBCachingDeviceAllocator<char> alloc;
   auto policy = thrust::cuda::par(alloc);
-  thrust::reduce_by_key(policy, key_it, key_it + gpair.Size(), val_it,
-                        dh::TypedDiscard<bst_target_t>{}, out_it, thrust::equal_to<bst_target_t>{},
-                        [=] __device__(auto lhs, auto rhs) {
-                          return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs),
-                                                    thrust::get<1>(lhs) + thrust::get<1>(rhs));
-                        });
+  thrust::reduce_by_key(policy, key_it, key_it + gpair.Size(), grad_it,
+                        dh::TypedDiscard<bst_target_t>{}, dh::tbegin(d_sum.Values()));
 
   collective::DeviceCommunicator* communicator = collective::Communicator::GetDevice(ctx->gpu_id);
-  communicator->AllReduceSum(sum_grad.Values().data(), sum_grad.Size());
-  communicator->AllReduceSum(sum_hess.Values().data(), sum_hess.Size());
+  communicator->AllReduceSum(reinterpret_cast<double*>(d_sum.Values().data()), d_sum.Size() * 2);
 
-  thrust::for_each_n(policy, thrust::make_counting_iterator(0ul), n_targets,
-                     [=] XGBOOST_DEVICE(std::size_t i) mutable {
-                       out(i) = static_cast<float>(CalcUnregulatedWeight(sum_grad(i), sum_hess(i)));
-                     });
+  thrust::for_each_n(
+      policy, thrust::make_counting_iterator(0ul), n_targets,
+      [=] XGBOOST_DEVICE(std::size_t i) mutable {
+        out(i) = static_cast<float>(CalcUnregulatedWeight(d_sum(i).GetGrad(), d_sum(i).GetHess()));
+      });
 }
 }  // namespace cuda_impl
 }  // namespace obj

From c182094d5b8c7be853371ef63189f2bb1e3a931a Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 03:33:55 +0800
Subject: [PATCH 094/133] comment.

---
 src/tree/fit_stump.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tree/fit_stump.cc b/src/tree/fit_stump.cc
index 0859bc5c5047..9045484032b4 100644
--- a/src/tree/fit_stump.cc
+++ b/src/tree/fit_stump.cc
@@ -59,7 +59,7 @@ void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
               bst_target_t n_targets, linalg::Vector<float>* out) {
   out->SetDevice(ctx->gpu_id);
   out->Reshape(n_targets);
-
+  // column-major
   auto n_samples = gpair.Size() / n_targets;
   std::size_t shape[2]{n_samples, n_targets};
   std::size_t strides[2];

From ce59f2a00792454f6a4ced96102fb021fef8c868 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 03:36:51 +0800
Subject: [PATCH 095/133] Remove.

---
 src/objective/regression_obj.cu | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 6316b8c189a6..732facd4036c 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -182,9 +182,14 @@ class RegLossObj : public ObjFunction {
     bst_target_t n_targets = this->Targets(info);
     FitStump(ctx_, gpair, n_targets, base_margin);
     auto h_base_margin = base_margin->HostView();
+    // workaround, we don't support multi-target due to binary model serialization for
+    // base margin.
+    float v = 0;
     for (bst_target_t t = 0; t < n_targets; ++t) {
-      h_base_margin(t) = Loss::PredTransform(h_base_margin(t));
+      v += Loss::PredTransform(h_base_margin(t)) / static_cast<float>(n_targets);
     }
+    base_margin->Reshape(1);
+    base_margin->HostView()(0) = v;
   }
 
   void SaveConfig(Json* p_out) const override {

From fcb855496f9bed7f2841c73cc1c93709b8b3e583 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 03:40:24 +0800
Subject: [PATCH 096/133] Cleanup.

---
 python-package/xgboost/spark/__init__.py | 2 +-
 src/common/numeric.cu                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python-package/xgboost/spark/__init__.py b/python-package/xgboost/spark/__init__.py
index 7d99896b3b85..7c18eeba46b5 100644
--- a/python-package/xgboost/spark/__init__.py
+++ b/python-package/xgboost/spark/__init__.py
@@ -6,7 +6,7 @@
 except ImportError as e:
     raise ImportError("pyspark package needs to be installed to use this module") from e
 
-from .estimator import (  # type: ignore
+from .estimator import (
     SparkXGBClassifier,
     SparkXGBClassifierModel,
     SparkXGBRanker,
diff --git a/src/common/numeric.cu b/src/common/numeric.cu
index 270cddd5edaa..b292edf1aa7f 100644
--- a/src/common/numeric.cu
+++ b/src/common/numeric.cu
@@ -3,7 +3,7 @@
  */
 #include <thrust/execution_policy.h>
 
-#include "device_helpers.cuh"
+#include "device_helpers.cuh"            // dh::Reduce, dh::XGBCachingDeviceAllocator
 #include "numeric.h"
 #include "xgboost/context.h"             // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector

From 70284689cafac973ea49845252d5cc6ef8ac085e Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 03:43:57 +0800
Subject: [PATCH 097/133] cpu build & cleanup.

---
 src/objective/regression_obj.cu  |  2 +-
 src/tree/fit_stump.cc            |  5 ++---
 src/tree/fit_stump.cu            |  5 ++---
 src/tree/fit_stump.h             | 10 +++++-----
 tests/cpp/tree/test_fit_stump.cc |  4 ++--
 5 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 732facd4036c..d52d44dad4c0 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -180,7 +180,7 @@ class RegLossObj : public ObjFunction {
     new_obj->LoadConfig(config);
     new_obj->GetGradient(dummy_predt, info, 0, &gpair);
     bst_target_t n_targets = this->Targets(info);
-    FitStump(ctx_, gpair, n_targets, base_margin);
+    tree::FitStump(ctx_, gpair, n_targets, base_margin);
     auto h_base_margin = base_margin->HostView();
     // workaround, we don't support multi-target due to binary model serialization for
     // base margin.
diff --git a/src/tree/fit_stump.cc b/src/tree/fit_stump.cc
index 9045484032b4..425233603ce3 100644
--- a/src/tree/fit_stump.cc
+++ b/src/tree/fit_stump.cc
@@ -18,7 +18,7 @@
 #include "xgboost/linalg.h"                // TensorView
 
 namespace xgboost {
-namespace obj {
+namespace tree {
 namespace cpu_impl {
 void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
               linalg::VectorView<float> out) {
@@ -50,7 +50,6 @@ void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpai
 inline void FitStump(Context const*, linalg::TensorView<GradientPair const, 2>,
                      linalg::VectorView<float>) {
   common::AssertGPUSupport();
-  return 0.0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl
@@ -71,5 +70,5 @@ void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
   ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair_t, out->HostView())
                : cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id));
 }
-}  // namespace obj
+}  // namespace tree
 }  // namespace xgboost
diff --git a/src/tree/fit_stump.cu b/src/tree/fit_stump.cu
index c185b4e6f769..b7bbcdb87b95 100644
--- a/src/tree/fit_stump.cu
+++ b/src/tree/fit_stump.cu
@@ -22,7 +22,7 @@
 #include "xgboost/span.h"     // span
 
 namespace xgboost {
-namespace obj {
+namespace tree {
 namespace cuda_impl {
 void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
               linalg::VectorView<float> out) {
@@ -35,7 +35,6 @@ void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpai
   auto key_it = dh::MakeTransformIterator<bst_target_t>(
       thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bst_target_t {
         return i / gpair.Shape(0);
-        return std::get<1>(linalg::UnravelIndex(i, gpair.Shape()));
       });
   auto grad_it = dh::MakeTransformIterator<GradientPairPrecise>(
       thrust::make_counting_iterator(0ul),
@@ -61,5 +60,5 @@ void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpai
       });
 }
 }  // namespace cuda_impl
-}  // namespace obj
+}  // namespace tree
 }  // namespace xgboost
diff --git a/src/tree/fit_stump.h b/src/tree/fit_stump.h
index 3cc9e7d9fde6..96af3f6c0de5 100644
--- a/src/tree/fit_stump.h
+++ b/src/tree/fit_stump.h
@@ -4,8 +4,8 @@
  * \brief Utilities for estimating initial score.
  */
 
-#ifndef XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
-#define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
+#ifndef XGBOOST_TREE_FIT_STUMP_H_
+#define XGBOOST_TREE_FIT_STUMP_H_
 
 #include "../common/common.h"            // AssertGPUSupport
 #include "xgboost/base.h"                // GradientPair
@@ -14,7 +14,7 @@
 #include "xgboost/linalg.h"              // TensorView
 
 namespace xgboost {
-namespace obj {
+namespace tree {
 
 template <typename T>
 XGBOOST_DEVICE inline double CalcUnregulatedWeight(T sum_grad, T sum_hess) {
@@ -26,6 +26,6 @@ XGBOOST_DEVICE inline double CalcUnregulatedWeight(T sum_grad, T sum_hess) {
  */
 void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
               bst_target_t n_targets, linalg::Vector<float>* out);
-}  // namespace obj
+}  // namespace tree
 }  // namespace xgboost
-#endif  // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
+#endif  // XGBOOST_TREE_FIT_STUMP_H_
diff --git a/tests/cpp/tree/test_fit_stump.cc b/tests/cpp/tree/test_fit_stump.cc
index 03f11e0ace24..2910faa2f7c8 100644
--- a/tests/cpp/tree/test_fit_stump.cc
+++ b/tests/cpp/tree/test_fit_stump.cc
@@ -9,7 +9,7 @@
 #include "../../src/common/linalg_op.h"
 
 namespace xgboost {
-namespace obj {
+namespace tree {
 
 void TestFitStump(Context const *ctx) {
   std::size_t constexpr kRows = 16, kTargets = 2;
@@ -44,5 +44,5 @@ TEST(InitEstimation, GPUFitStump) {
   TestFitStump(&ctx);
 }
 #endif  // defined(XGBOOST_USE_CUDA)
-}  // namespace obj
+}  // namespace tree
 }  // namespace xgboost

From c08a2e108d65c2eb8c7f075d1d41647d5b2a0d78 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 04:14:05 +0800
Subject: [PATCH 098/133] remove.

---
 src/objective/regression_obj.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index d52d44dad4c0..bd24dd584955 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -186,7 +186,7 @@ class RegLossObj : public ObjFunction {
     // base margin.
     float v = 0;
     for (bst_target_t t = 0; t < n_targets; ++t) {
-      v += Loss::PredTransform(h_base_margin(t)) / static_cast<float>(n_targets);
+      v += h_base_margin(t) / static_cast<float>(n_targets);
     }
     base_margin->Reshape(1);
     base_margin->HostView()(0) = v;

From 93b223ef53ffe3e95725fd538cdbe8d5b55dc7e6 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 04:22:18 +0800
Subject: [PATCH 099/133] revert.

---
 src/objective/regression_obj.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index bd24dd584955..d52d44dad4c0 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -186,7 +186,7 @@ class RegLossObj : public ObjFunction {
     // base margin.
     float v = 0;
     for (bst_target_t t = 0; t < n_targets; ++t) {
-      v += h_base_margin(t) / static_cast<float>(n_targets);
+      v += Loss::PredTransform(h_base_margin(t)) / static_cast<float>(n_targets);
     }
     base_margin->Reshape(1);
     base_margin->HostView()(0) = v;

From 19d6f235db752adab0074a6ee94c141b74cf9004 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 04:41:03 +0800
Subject: [PATCH 100/133] exclude multi

---
 src/objective/multiclass_obj.cu | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/objective/multiclass_obj.cu b/src/objective/multiclass_obj.cu
index 312992ec59f2..5fc216b3976b 100644
--- a/src/objective/multiclass_obj.cu
+++ b/src/objective/multiclass_obj.cu
@@ -6,20 +6,19 @@
  */
 #include <dmlc/omp.h>
 
-#include <vector>
 #include <algorithm>
 #include <limits>
 #include <utility>
-
-#include "xgboost/parameter.h"
-#include "xgboost/data.h"
-#include "xgboost/logging.h"
-#include "xgboost/objective.h"
-#include "xgboost/json.h"
+#include <vector>
 
 #include "../common/common.h"
 #include "../common/math.h"
 #include "../common/transform.h"
+#include "xgboost/data.h"
+#include "xgboost/json.h"
+#include "xgboost/logging.h"
+#include "xgboost/objective.h"
+#include "xgboost/parameter.h"
 
 namespace xgboost {
 namespace obj {
@@ -183,6 +182,13 @@ class SoftmaxMultiClassObj : public ObjFunction {
     FromJson(in["softmax_multiclass_param"], &param_);
   }
 
+  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
+    // Not yet supported.
+    base_margin->SetDevice(Context::kCpuId);
+    base_margin->Reshape(1);
+    base_margin->HostView()(0) = DefaultBaseScore();
+  }
+
  private:
   // output probability
   bool output_prob_;

From eb4d9b888b7472bf83497011d7b9835efdb9bfb4 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 05:20:58 +0800
Subject: [PATCH 101/133] Extend to other objectives.

---
 src/common/stats.cc              | 20 ++++++++++++++++++-
 src/common/stats.cu              | 11 +++++++++++
 src/common/stats.h               |  8 ++++++++
 src/objective/multiclass_obj.cu  |  8 ++++----
 src/objective/objective.cc       | 32 ++++++++++++++++++++++++++-----
 src/objective/objective.cu       |  0
 src/objective/regression_obj.cu  | 33 +-------------------------------
 src/objective/validation.h       | 16 ++++++++++++++++
 tests/cpp/common/test_stats.cc   | 29 ++++++++++++++++++++++++++++
 tests/cpp/tree/test_fit_stump.cc |  6 +++---
 10 files changed, 118 insertions(+), 45 deletions(-)
 create mode 100644 src/objective/objective.cu
 create mode 100644 src/objective/validation.h

diff --git a/src/common/stats.cc b/src/common/stats.cc
index bf0c8d621770..8d1b54f0ecd3 100644
--- a/src/common/stats.cc
+++ b/src/common/stats.cc
@@ -3,7 +3,7 @@
  */
 #include "stats.h"
 
-#include <numeric>  // std::accumulate
+#include <numeric>                       // std::accumulate
 
 #include "common.h"                      // OptionalWeights
 #include "threading_utils.h"             // ParallelFor, MemStackAllocator
@@ -42,5 +42,23 @@ float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
   }
   return q;
 }
+
+void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<float>* out) {
+  v.SetDevice(ctx->gpu_id);
+  out->SetDevice(ctx->gpu_id);
+  out->Reshape(1);
+
+  if (ctx->IsCPU()) {
+    auto h_v = v.HostView();
+    float n = v.Size();
+    MemStackAllocator<float, DefaultMaxThreads()> tloc(ctx->Threads(), 0.0f);
+    ParallelFor(v.Size(), ctx->Threads(),
+                [&](auto i) { tloc[omp_get_thread_num()] += h_v(i) / n; });
+    auto ret = std::accumulate(tloc.cbegin(), tloc.cend(), .0f);
+    out->HostView()(0) = ret;
+  } else {
+    cuda_impl::Mean(ctx, v.View(ctx->gpu_id), out->View(ctx->gpu_id));
+  }
+}
 }  // namespace common
 }  // namespace xgboost
diff --git a/src/common/stats.cu b/src/common/stats.cu
index 414b43bb601c..2e728a8bc333 100644
--- a/src/common/stats.cu
+++ b/src/common/stats.cu
@@ -42,6 +42,17 @@ float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
   CHECK_EQ(quantile.Size(), 1);
   return quantile.HostVector().front();
 }
+
+void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out) {
+  float n = v.Size();
+  auto it = dh::MakeTransformIterator<float>(
+      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return v(i) / n; });
+  std::size_t bytes;
+  CHECK_EQ(out.Size(), 1);
+  cub::DeviceReduce::Sum(nullptr, bytes, it, out.Values().data(), v.Size());
+  dh::TemporaryArray<char> temp{bytes};
+  cub::DeviceReduce::Sum(temp.data().get(), bytes, it, out.Values().data(), v.Size());
+}
 }  // namespace cuda_impl
 }  // namespace common
 }  // namespace xgboost
diff --git a/src/common/stats.h b/src/common/stats.h
index 33522cbfde60..fdee63593dba 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -96,16 +96,24 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
 
 namespace cuda_impl {
 float Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights);
+void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
+
 #if !defined(XGBOOST_USE_CUDA)
 inline float Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights) {
   common::AssertGPUSupport();
   return 0;
 }
+inline void Mean(Context const*, linalg::VectorView<float>, linalg::VectorView<float>) {
+  common::AssertGPUSupport();
+  return 0;
+}
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl
 
 float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
              HostDeviceVector<float> const& weights);
+
+void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<float>* out);
 }  // namespace common
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_STATS_H_
diff --git a/src/objective/multiclass_obj.cu b/src/objective/multiclass_obj.cu
index 5fc216b3976b..0ac8de7239f3 100644
--- a/src/objective/multiclass_obj.cu
+++ b/src/objective/multiclass_obj.cu
@@ -182,11 +182,11 @@ class SoftmaxMultiClassObj : public ObjFunction {
     FromJson(in["softmax_multiclass_param"], &param_);
   }
 
-  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
+  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const override {
     // Not yet supported.
-    base_margin->SetDevice(Context::kCpuId);
-    base_margin->Reshape(1);
-    base_margin->HostView()(0) = DefaultBaseScore();
+    base_score->SetDevice(Context::kCpuId);
+    base_score->Reshape(1);
+    base_score->HostView()(0) = DefaultBaseScore();
   }
 
  private:
diff --git a/src/objective/objective.cc b/src/objective/objective.cc
index 9512233dc71a..637a877411bf 100644
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -4,11 +4,15 @@
  * \brief Registry of all objective functions.
  */
 #include <dmlc/registry.h>
-#include <xgboost/context.h>
+#include <xgboost/context.h>  // Context
 #include <xgboost/objective.h>
 
 #include <sstream>
 
+#include "../common/stats.h"    // Mean
+#include "../tree/fit_stump.h"  // FitStump
+#include "validation.h"         // CheckInitInputs
+#include "xgboost/data.h"       // MetaInfo
 #include "xgboost/host_device_vector.h"
 
 namespace dmlc {
@@ -32,10 +36,28 @@ ObjFunction* ObjFunction::Create(const std::string& name, Context const* ctx) {
   return pobj;
 }
 
-void ObjFunction::InitEstimation(MetaInfo const&, linalg::Tensor<float, 1>* base_score) const {
-  CHECK(base_score);
-  base_score->Reshape(1);
-  (*base_score)(0) = DefaultBaseScore();
+void ObjFunction::InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const {
+  obj::CheckInitInputs(info);
+  // Avoid altering any state in child objective.
+  HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
+  HostDeviceVector<GradientPair> gpair(info.labels.Size());
+
+  Json config{Object{}};
+  this->SaveConfig(&config);
+
+  std::unique_ptr<ObjFunction> new_obj{
+      ObjFunction::Create(get<String const>(config["name"]), ctx_)};
+  new_obj->LoadConfig(config);
+  new_obj->GetGradient(dummy_predt, info, 0, &gpair);
+  bst_target_t n_targets = this->Targets(info);
+  linalg::Vector<float> leaf_weight;
+  tree::FitStump(ctx_, gpair, n_targets, &leaf_weight);
+
+  // workaround, we don't support multi-target due to binary model serialization for
+  // base margin.
+  common::Mean(ctx_, leaf_weight, base_score);
+  auto h_base_margin = base_score->HostView();
+  this->PredTransform(base_score->Data());
 }
 }  // namespace xgboost
 
diff --git a/src/objective/objective.cu b/src/objective/objective.cu
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index d52d44dad4c0..36ae3fb97470 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -20,9 +20,9 @@
 #include "../common/stats.h"
 #include "../common/threading_utils.h"
 #include "../common/transform.h"
-#include "../tree/fit_stump.h"
 #include "./regression_loss.h"
 #include "adaptive.h"
+#include "validation.h"  // CheckInitInputs
 #include "xgboost/base.h"
 #include "xgboost/context.h"
 #include "xgboost/data.h"
@@ -40,14 +40,6 @@
 namespace xgboost {
 namespace obj {
 namespace {
-void CheckInitInputs(MetaInfo const& info) {
-  CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
-  if (!info.weights_.Empty()) {
-    CHECK_EQ(info.weights_.Size(), info.num_row_)
-        << "Number of weights should be equal to number of data points.";
-  }
-}
-
 void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
   CheckInitInputs(info);
   CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
@@ -169,29 +161,6 @@ class RegLossObj : public ObjFunction {
     return Loss::ProbToMargin(base_score);
   }
 
-  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
-    CheckInitInputs(info);
-    HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
-    HostDeviceVector<GradientPair> gpair(info.labels.Size());
-
-    std::unique_ptr<ObjFunction> new_obj{ObjFunction::Create(Loss::Name(), ctx_)};
-    Json config{Object{}};
-    this->SaveConfig(&config);
-    new_obj->LoadConfig(config);
-    new_obj->GetGradient(dummy_predt, info, 0, &gpair);
-    bst_target_t n_targets = this->Targets(info);
-    tree::FitStump(ctx_, gpair, n_targets, base_margin);
-    auto h_base_margin = base_margin->HostView();
-    // workaround, we don't support multi-target due to binary model serialization for
-    // base margin.
-    float v = 0;
-    for (bst_target_t t = 0; t < n_targets; ++t) {
-      v += Loss::PredTransform(h_base_margin(t)) / static_cast<float>(n_targets);
-    }
-    base_margin->Reshape(1);
-    base_margin->HostView()(0) = v;
-  }
-
   void SaveConfig(Json* p_out) const override {
     auto& out = *p_out;
     out["name"] = String(Loss::Name());
diff --git a/src/objective/validation.h b/src/objective/validation.h
new file mode 100644
index 000000000000..d2cce79eccc9
--- /dev/null
+++ b/src/objective/validation.h
@@ -0,0 +1,16 @@
+/**
+ * Copyright 2022 by XGBoost Contributors
+ */
+#include <xgboost/data.h>  //MetaInfo
+
+namespace xgboost {
+namespace obj {
+inline void CheckInitInputs(MetaInfo const& info) {
+  CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
+  if (!info.weights_.Empty()) {
+    CHECK_EQ(info.weights_.Size(), info.num_row_)
+        << "Number of weights should be equal to number of data points.";
+  }
+}
+}  // namespace obj
+}  // namespace xgboost
diff --git a/tests/cpp/common/test_stats.cc b/tests/cpp/common/test_stats.cc
index 55e999ad091e..40a784b639af 100644
--- a/tests/cpp/common/test_stats.cc
+++ b/tests/cpp/common/test_stats.cc
@@ -3,6 +3,7 @@
  */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>
+#include <xgboost/linalg.h>  // Tensor,Vector
 
 #include "../../../src/common/stats.h"
 #include "../../../src/common/transform_iterator.h"  // common::MakeIndexTransformIter
@@ -70,5 +71,33 @@ TEST(Stats, Median) {
   ASSERT_EQ(m, .5f);
 #endif  // defined(XGBOOST_USE_CUDA)
 }
+namespace {
+void TestMean(Context const* ctx) {
+  std::size_t n{128};
+  linalg::Vector<float> data({n}, ctx->gpu_id);
+  auto h_v = data.HostView().Values();
+  std::iota(h_v.begin(), h_v.end(), .0f);
+
+  auto nf = static_cast<float>(n);
+  float mean = nf * (nf - 1) / 2 / n;
+
+  linalg::Vector<float> res{{1}, ctx->gpu_id};
+  Mean(ctx, data, &res);
+  auto h_res = res.HostView();
+  ASSERT_EQ(h_res.Size(), 1);
+  ASSERT_EQ(mean, h_res(0));
+}
+}  // anonymous namespace
+
+TEST(Stats, Mean) {
+  Context ctx;
+  TestMean(&ctx);
+}
+
+TEST(Stats, GPUMean) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  TestMean(&ctx);
+}
 }  // namespace common
 }  // namespace xgboost
diff --git a/tests/cpp/tree/test_fit_stump.cc b/tests/cpp/tree/test_fit_stump.cc
index 2910faa2f7c8..aaa8bd4e2612 100644
--- a/tests/cpp/tree/test_fit_stump.cc
+++ b/tests/cpp/tree/test_fit_stump.cc
@@ -1,16 +1,15 @@
 /**
  * Copyright 2022 by XGBoost Contributors
  */
-#include "../../src/tree/fit_stump.h"
-
 #include <gtest/gtest.h>
 #include <xgboost/linalg.h>
 
 #include "../../src/common/linalg_op.h"
+#include "../../src/tree/fit_stump.h"
 
 namespace xgboost {
 namespace tree {
-
+namespace {
 void TestFitStump(Context const *ctx) {
   std::size_t constexpr kRows = 16, kTargets = 2;
   HostDeviceVector<GradientPair> gpair;
@@ -31,6 +30,7 @@ void TestFitStump(Context const *ctx) {
     ASSERT_EQ(static_cast<float>(-sum_grad / n), *it);
   }
 }
+}  // anonymous namespace
 
 TEST(InitEstimation, FitStump) {
   Context ctx;

From f9310a9fd0a7ddb4fce1fef1ada9a660920041fc Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 05:33:59 +0800
Subject: [PATCH 102/133] cpu build.

---
 src/common/stats.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/common/stats.h b/src/common/stats.h
index fdee63593dba..3572d2f0a02b 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -103,9 +103,8 @@ inline float Median(Context const*, linalg::TensorView<float const, 2>, Optional
   common::AssertGPUSupport();
   return 0;
 }
-inline void Mean(Context const*, linalg::VectorView<float>, linalg::VectorView<float>) {
+inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
   common::AssertGPUSupport();
-  return 0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl

From b2c65d9c06d6697fdf31d0083b314da0b9c5b052 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 06:01:17 +0800
Subject: [PATCH 103/133] cpu test.

---
 src/objective/validation.h     | 2 +-
 src/tree/fit_stump.cc          | 2 +-
 tests/cpp/common/test_stats.cc | 2 ++
 tests/cpp/test_learner.cc      | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/objective/validation.h b/src/objective/validation.h
index d2cce79eccc9..2dc025700272 100644
--- a/src/objective/validation.h
+++ b/src/objective/validation.h
@@ -7,7 +7,7 @@ namespace xgboost {
 namespace obj {
 inline void CheckInitInputs(MetaInfo const& info) {
   CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
-  if (!info.weights_.Empty()) {
+  if (!info.weights_.Empty() && info.group_ptr_.empty()) {
     CHECK_EQ(info.weights_.Size(), info.num_row_)
         << "Number of weights should be equal to number of data points.";
   }
diff --git a/src/tree/fit_stump.cc b/src/tree/fit_stump.cc
index 425233603ce3..fb442009f7fe 100644
--- a/src/tree/fit_stump.cc
+++ b/src/tree/fit_stump.cc
@@ -28,7 +28,7 @@ void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpai
   auto h_sum = sum.HostView();
   // first dim for gpair is samples, second dim is target.
   // Reduce by column
-  common::ParallelFor(gpair.Shape(1), 1, [&](auto j) {
+  common::ParallelFor(gpair.Shape(1), ctx->Threads(), [&](auto j) {
     for (std::size_t i = 0; i < gpair.Shape(0); ++i) {
       h_sum(j) += GradientPairPrecise{gpair(i, j)};
     }
diff --git a/tests/cpp/common/test_stats.cc b/tests/cpp/common/test_stats.cc
index 40a784b639af..03e50a9846e9 100644
--- a/tests/cpp/common/test_stats.cc
+++ b/tests/cpp/common/test_stats.cc
@@ -94,10 +94,12 @@ TEST(Stats, Mean) {
   TestMean(&ctx);
 }
 
+#if defined(XGBOOST_USE_CUDA)
 TEST(Stats, GPUMean) {
   Context ctx;
   ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
   TestMean(&ctx);
 }
+#endif  // defined(XGBOOST_USE_CUDA)
 }  // namespace common
 }  // namespace xgboost
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index fc3fa17ac7a2..8a847500accd 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -66,7 +66,7 @@ TEST(Learner, CheckGroup) {
 
   std::shared_ptr<DMatrix> p_mat{
       RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
-  std::vector<bst_float> weight(kNumGroups);
+  std::vector<bst_float> weight(kNumGroups, 1);
   std::vector<bst_int> group(kNumGroups);
   group[0] = 2;
   group[1] = 3;

From 1be9b5d3b507fe02b6261a5d31367de49543c377 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 20:50:12 +0800
Subject: [PATCH 104/133] Fix.

---
 src/common/linalg_op.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
index 9ae3533110a7..f55927402d31 100644
--- a/src/common/linalg_op.h
+++ b/src/common/linalg_op.h
@@ -63,7 +63,7 @@ void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn)
 #endif  // !defined(XGBOOST_USE_CUDA)
 
 template <typename T, std::int32_t kDim>
-auto cbegin(TensorView<T, kDim> v) {  // NOLINT
+auto cbegin(TensorView<T, kDim> const& v) {  // NOLINT
   auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
     return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
   });
@@ -71,19 +71,19 @@ auto cbegin(TensorView<T, kDim> v) {  // NOLINT
 }
 
 template <typename T, std::int32_t kDim>
-auto cend(TensorView<T, kDim> v) {  // NOLINT
+auto cend(TensorView<T, kDim> const& v) {  // NOLINT
   return cbegin(v) + v.Size();
 }
 
 template <typename T, std::int32_t kDim>
-auto begin(TensorView<T, kDim> v) {  // NOLINT
+auto begin(TensorView<T, kDim>& v) {  // NOLINT
   auto it = common::MakeIndexTransformIter(
       [&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
   return it;
 }
 
 template <typename T, std::int32_t kDim>
-auto end(TensorView<T, kDim> v) {  // NOLINT
+auto end(TensorView<T, kDim>& v) {  // NOLINT
   return begin(v) + v.Size();
 }
 }  // namespace linalg

From d89c6093f5b8aa0cc8f5461d233f5d766b0bf9d6 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 20:51:36 +0800
Subject: [PATCH 105/133] R build.

---
 R-package/src/Makevars.in  | 2 ++
 R-package/src/Makevars.win | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index 5c6c624e0ec0..77c9967ea13c 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -56,6 +56,7 @@ OBJECTS= \
     $(PKGROOT)/src/predictor/cpu_predictor.o \
     $(PKGROOT)/src/tree/constraints.o \
     $(PKGROOT)/src/tree/param.o \
+    $(PKGROOT)/src/tree/fit_stump.o \
     $(PKGROOT)/src/tree/tree_model.o \
     $(PKGROOT)/src/tree/tree_updater.o \
     $(PKGROOT)/src/tree/updater_approx.o \
@@ -86,6 +87,7 @@ OBJECTS= \
     $(PKGROOT)/src/common/pseudo_huber.o \
     $(PKGROOT)/src/common/quantile.o \
     $(PKGROOT)/src/common/random.o \
+    $(PKGROOT)/src/common/stats.o \
     $(PKGROOT)/src/common/survival_util.o \
     $(PKGROOT)/src/common/threading_utils.o \
     $(PKGROOT)/src/common/timer.o \
diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win
index b12fca7c8830..4c2ba9597f8f 100644
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -56,6 +56,7 @@ OBJECTS= \
     $(PKGROOT)/src/predictor/cpu_predictor.o \
     $(PKGROOT)/src/tree/constraints.o \
     $(PKGROOT)/src/tree/param.o \
+    $(PKGROOT)/src/tree/fit_stump.o \
     $(PKGROOT)/src/tree/tree_model.o \
     $(PKGROOT)/src/tree/tree_updater.o \
     $(PKGROOT)/src/tree/updater_approx.o \
@@ -86,6 +87,7 @@ OBJECTS= \
     $(PKGROOT)/src/common/pseudo_huber.o \
     $(PKGROOT)/src/common/quantile.o \
     $(PKGROOT)/src/common/random.o \
+    $(PKGROOT)/src/common/stats.o \
     $(PKGROOT)/src/common/survival_util.o \
     $(PKGROOT)/src/common/threading_utils.o \
     $(PKGROOT)/src/common/timer.o \

From d0f40818687cac30d96dfbb1b86b730ba7662190 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 22:58:35 +0800
Subject: [PATCH 106/133] move back to reg.

---
 src/objective/regression_obj.cu | 39 ++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 36ae3fb97470..fa970b89f0dd 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -20,6 +20,7 @@
 #include "../common/stats.h"
 #include "../common/threading_utils.h"
 #include "../common/transform.h"
+#include "../tree/fit_stump.h"  // FitStump
 #include "./regression_loss.h"
 #include "adaptive.h"
 #include "validation.h"  // CheckInitInputs
@@ -44,6 +45,32 @@ void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& pre
   CheckInitInputs(info);
   CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
 }
+
+class RegInitEstimation : public ObjFunction {
+  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const override {
+    obj::CheckInitInputs(info);
+    // Avoid altering any state in child objective.
+    HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
+    HostDeviceVector<GradientPair> gpair(info.labels.Size());
+
+    Json config{Object{}};
+    this->SaveConfig(&config);
+
+    std::unique_ptr<ObjFunction> new_obj{
+        ObjFunction::Create(get<String const>(config["name"]), this->ctx_)};
+    new_obj->LoadConfig(config);
+    new_obj->GetGradient(dummy_predt, info, 0, &gpair);
+    bst_target_t n_targets = this->Targets(info);
+    linalg::Vector<float> leaf_weight;
+    tree::FitStump(this->ctx_, gpair, n_targets, &leaf_weight);
+
+    // workaround, we don't support multi-target due to binary model serialization for
+    // base margin.
+    common::Mean(this->ctx_, leaf_weight, base_score);
+    auto h_base_margin = base_score->HostView();
+    this->PredTransform(base_score->Data());
+  }
+};
 }  // anonymous namespace
 
 #if defined(XGBOOST_USE_CUDA)
@@ -60,7 +87,7 @@ struct RegLossParam : public XGBoostParameter<RegLossParam> {
 };
 
 template<typename Loss>
-class RegLossObj : public ObjFunction {
+class RegLossObj : public RegInitEstimation {
  protected:
   HostDeviceVector<float> additional_input_;
 
@@ -207,7 +234,7 @@ XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
     return new RegLossObj<LinearSquareLoss>(); });
 // End deprecated
 
-class PseudoHuberRegression : public ObjFunction {
+class PseudoHuberRegression : public RegInitEstimation {
   PesudoHuberParam param_;
 
  public:
@@ -282,7 +309,7 @@ struct PoissonRegressionParam : public XGBoostParameter<PoissonRegressionParam>
 };
 
 // poisson regression for count
-class PoissonRegression : public ObjFunction {
+class PoissonRegression : public RegInitEstimation {
  public:
   // declare functions
   void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
@@ -377,7 +404,7 @@ XGBOOST_REGISTER_OBJECTIVE(PoissonRegression, "count:poisson")
 
 
 // cox regression for survival data (negative values mean they are censored)
-class CoxRegression : public ObjFunction {
+class CoxRegression : public RegInitEstimation {
  public:
   void Configure(Args const&) override {}
   ObjInfo Task() const override { return ObjInfo::kRegression; }
@@ -474,7 +501,7 @@ XGBOOST_REGISTER_OBJECTIVE(CoxRegression, "survival:cox")
 .set_body([]() { return new CoxRegression(); });
 
 // gamma regression
-class GammaRegression : public ObjFunction {
+class GammaRegression : public RegInitEstimation {
  public:
   void Configure(Args const&) override {}
   ObjInfo Task() const override { return ObjInfo::kRegression; }
@@ -565,7 +592,7 @@ struct TweedieRegressionParam : public XGBoostParameter<TweedieRegressionParam>
 };
 
 // tweedie regression
-class TweedieRegression : public ObjFunction {
+class TweedieRegression : public RegInitEstimation {
  public:
   // declare functions
   void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {

From 3cb7500aa0f90c0360e381c178c2a12a3c5553ca Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 15 Dec 2022 23:00:49 +0800
Subject: [PATCH 107/133] Revert changes.

---
 src/objective/multiclass_obj.cu | 20 +++++++-------------
 src/objective/objective.cc      | 32 +++++---------------------------
 src/objective/objective.cu      |  0
 src/objective/regression_obj.cu |  9 ++++++++-
 src/objective/validation.h      | 16 ----------------
 5 files changed, 20 insertions(+), 57 deletions(-)
 delete mode 100644 src/objective/objective.cu
 delete mode 100644 src/objective/validation.h

diff --git a/src/objective/multiclass_obj.cu b/src/objective/multiclass_obj.cu
index 0ac8de7239f3..312992ec59f2 100644
--- a/src/objective/multiclass_obj.cu
+++ b/src/objective/multiclass_obj.cu
@@ -6,19 +6,20 @@
  */
 #include <dmlc/omp.h>
 
+#include <vector>
 #include <algorithm>
 #include <limits>
 #include <utility>
-#include <vector>
 
-#include "../common/common.h"
-#include "../common/math.h"
-#include "../common/transform.h"
+#include "xgboost/parameter.h"
 #include "xgboost/data.h"
-#include "xgboost/json.h"
 #include "xgboost/logging.h"
 #include "xgboost/objective.h"
-#include "xgboost/parameter.h"
+#include "xgboost/json.h"
+
+#include "../common/common.h"
+#include "../common/math.h"
+#include "../common/transform.h"
 
 namespace xgboost {
 namespace obj {
@@ -182,13 +183,6 @@ class SoftmaxMultiClassObj : public ObjFunction {
     FromJson(in["softmax_multiclass_param"], &param_);
   }
 
-  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const override {
-    // Not yet supported.
-    base_score->SetDevice(Context::kCpuId);
-    base_score->Reshape(1);
-    base_score->HostView()(0) = DefaultBaseScore();
-  }
-
  private:
   // output probability
   bool output_prob_;
diff --git a/src/objective/objective.cc b/src/objective/objective.cc
index 637a877411bf..9512233dc71a 100644
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -4,15 +4,11 @@
  * \brief Registry of all objective functions.
  */
 #include <dmlc/registry.h>
-#include <xgboost/context.h>  // Context
+#include <xgboost/context.h>
 #include <xgboost/objective.h>
 
 #include <sstream>
 
-#include "../common/stats.h"    // Mean
-#include "../tree/fit_stump.h"  // FitStump
-#include "validation.h"         // CheckInitInputs
-#include "xgboost/data.h"       // MetaInfo
 #include "xgboost/host_device_vector.h"
 
 namespace dmlc {
@@ -36,28 +32,10 @@ ObjFunction* ObjFunction::Create(const std::string& name, Context const* ctx) {
   return pobj;
 }
 
-void ObjFunction::InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const {
-  obj::CheckInitInputs(info);
-  // Avoid altering any state in child objective.
-  HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
-  HostDeviceVector<GradientPair> gpair(info.labels.Size());
-
-  Json config{Object{}};
-  this->SaveConfig(&config);
-
-  std::unique_ptr<ObjFunction> new_obj{
-      ObjFunction::Create(get<String const>(config["name"]), ctx_)};
-  new_obj->LoadConfig(config);
-  new_obj->GetGradient(dummy_predt, info, 0, &gpair);
-  bst_target_t n_targets = this->Targets(info);
-  linalg::Vector<float> leaf_weight;
-  tree::FitStump(ctx_, gpair, n_targets, &leaf_weight);
-
-  // workaround, we don't support multi-target due to binary model serialization for
-  // base margin.
-  common::Mean(ctx_, leaf_weight, base_score);
-  auto h_base_margin = base_score->HostView();
-  this->PredTransform(base_score->Data());
+void ObjFunction::InitEstimation(MetaInfo const&, linalg::Tensor<float, 1>* base_score) const {
+  CHECK(base_score);
+  base_score->Reshape(1);
+  (*base_score)(0) = DefaultBaseScore();
 }
 }  // namespace xgboost
 
diff --git a/src/objective/objective.cu b/src/objective/objective.cu
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index fa970b89f0dd..cfc7cd348cac 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -23,7 +23,6 @@
 #include "../tree/fit_stump.h"  // FitStump
 #include "./regression_loss.h"
 #include "adaptive.h"
-#include "validation.h"  // CheckInitInputs
 #include "xgboost/base.h"
 #include "xgboost/context.h"
 #include "xgboost/data.h"
@@ -41,6 +40,14 @@
 namespace xgboost {
 namespace obj {
 namespace {
+void CheckInitInputs(MetaInfo const& info) {
+  CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
+  if (!info.weights_.Empty() && info.group_ptr_.empty()) {
+    CHECK_EQ(info.weights_.Size(), info.num_row_)
+        << "Number of weights should be equal to number of data points.";
+  }
+}
+
 void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
   CheckInitInputs(info);
   CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
diff --git a/src/objective/validation.h b/src/objective/validation.h
deleted file mode 100644
index 2dc025700272..000000000000
--- a/src/objective/validation.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/**
- * Copyright 2022 by XGBoost Contributors
- */
-#include <xgboost/data.h>  //MetaInfo
-
-namespace xgboost {
-namespace obj {
-inline void CheckInitInputs(MetaInfo const& info) {
-  CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
-  if (!info.weights_.Empty() && info.group_ptr_.empty()) {
-    CHECK_EQ(info.weights_.Size(), info.num_row_)
-        << "Number of weights should be equal to number of data points.";
-  }
-}
-}  // namespace obj
-}  // namespace xgboost

From 3409ea27eee5db15342dff2e7170133175346b93 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 00:05:25 +0800
Subject: [PATCH 108/133] Timeout.

---
 demo/guide-python/sklearn_parallel.py | 13 +++++++++----
 tests/ci_build/lint_python.py         |  1 +
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/demo/guide-python/sklearn_parallel.py b/demo/guide-python/sklearn_parallel.py
index b0fc49d81e5e..2ebefffc767f 100644
--- a/demo/guide-python/sklearn_parallel.py
+++ b/demo/guide-python/sklearn_parallel.py
@@ -12,10 +12,15 @@
 if __name__ == "__main__":
     print("Parallel Parameter optimization")
     X, y = fetch_california_housing(return_X_y=True)
-    xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count() // 2)
-    clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
-                                   'n_estimators': [50, 100, 200]}, verbose=1,
-                       n_jobs=2)
+    xgb_model = xgb.XGBRegressor(
+        n_jobs=multiprocessing.cpu_count() // 2, tree_method="hist"
+    )
+    clf = GridSearchCV(
+        xgb_model,
+        {"max_depth": [2, 4, 6], "n_estimators": [50, 100, 200]},
+        verbose=1,
+        n_jobs=2,
+    )
     clf.fit(X, y)
     print(clf.best_score_)
     print(clf.best_params_)
diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py
index 88412c511552..894c48bdfba7 100644
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -156,6 +156,7 @@ def main(args: argparse.Namespace) -> None:
                 "demo/guide-python/cat_in_the_dat.py",
                 "demo/guide-python/categorical.py",
                 "demo/guide-python/feature_weights.py",
+                "demo/guide-python/sklearn_parallel.py",
                 "demo/guide-python/spark_estimator_examples.py",
                 # CI
                 "tests/ci_build/lint_python.py",

From 6efabc2fa9230cfc7ec3bc89bd43257e04ad2cbd Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 00:06:54 +0800
Subject: [PATCH 109/133] cleanup.

---
 src/objective/regression_obj.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index cfc7cd348cac..04eb040d0678 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -55,7 +55,7 @@ void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& pre
 
 class RegInitEstimation : public ObjFunction {
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const override {
-    obj::CheckInitInputs(info);
+    CheckInitInputs(info);
     // Avoid altering any state in child objective.
     HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
     HostDeviceVector<GradientPair> gpair(info.labels.Size());

From cac6ce12be6f1e9231ff622d4f788580de3ca2df Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 00:09:11 +0800
Subject: [PATCH 110/133] cleanup.

---
 src/objective/regression_obj.cu | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 04eb040d0678..ed1c98b3628d 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -57,8 +57,8 @@ class RegInitEstimation : public ObjFunction {
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const override {
     CheckInitInputs(info);
     // Avoid altering any state in child objective.
-    HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f);
-    HostDeviceVector<GradientPair> gpair(info.labels.Size());
+    HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f, this->ctx_->gpu_id);
+    HostDeviceVector<GradientPair> gpair(info.labels.Size(), GradientPair{}, this->ctx_->gpu_id);
 
     Json config{Object{}};
     this->SaveConfig(&config);
@@ -74,7 +74,6 @@ class RegInitEstimation : public ObjFunction {
     // workaround, we don't support multi-target due to binary model serialization for
     // base margin.
     common::Mean(this->ctx_, leaf_weight, base_score);
-    auto h_base_margin = base_score->HostView();
     this->PredTransform(base_score->Data());
   }
 };

From 028612556758da1ed3b04cb012b3efd196080897 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 00:31:20 +0800
Subject: [PATCH 111/133] Add dask tests.

---
 python-package/xgboost/testing/dask.py        | 24 +++++++++++++++++++
 .../test_gpu_with_dask/test_gpu_with_dask.py  |  4 ++++
 .../test_with_dask/test_with_dask.py          |  5 ++++
 3 files changed, 33 insertions(+)
 create mode 100644 python-package/xgboost/testing/dask.py

diff --git a/python-package/xgboost/testing/dask.py b/python-package/xgboost/testing/dask.py
new file mode 100644
index 000000000000..e30c0894e419
--- /dev/null
+++ b/python-package/xgboost/testing/dask.py
@@ -0,0 +1,24 @@
+import numpy as np
+from dask import array as da
+from distributed import Client
+
+import xgboost as xgb
+
+
+def check_init_estimation(tree_method: str, client: Client) -> None:
+    from sklearn.datasets import make_regression
+
+    X, y = make_regression(n_samples=4096, n_features=32)
+    reg = xgb.XGBRegressor(n_estimators=1, max_depth=1, tree_method=tree_method)
+    reg.fit(X, y)
+    base_score = reg.get_params()["base_score"]
+
+    dX = da.from_array(X).rechunk(chunks=(32, None))
+    dy = da.from_array(y).rechunk(chunks=(32,))
+    dreg = xgb.dask.DaskXGBRegressor(
+        n_estimators=1, max_depth=1, tree_method=tree_method
+    )
+    dreg.client = client
+    dreg.fit(dX, dy)
+    dbase_score = dreg.get_params()["base_score"]
+    np.testing.assert_allclose(base_score, dbase_score)
diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
index d295a3fc3308..18ddcc1ed287 100644
--- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
@@ -42,6 +42,7 @@
     from dask import array as da
     from dask.distributed import Client
     from dask_cuda import LocalCUDACluster
+    from xgboost.testing.dask import check_init_estimation
 
     from xgboost import dask as dxgb
 except ImportError:
@@ -220,6 +221,9 @@ def test_boost_from_prediction(self, local_cuda_client: Client) -> None:
         y = dd.from_array(y_, chunksize=50).map_partitions(cudf.from_pandas)
         run_boost_from_prediction_multi_class(X, y, "gpu_hist", local_cuda_client)
 
+    def test_init_estimation(self, local_cuda_client: Client) -> None:
+        check_init_estimation("gpu_hist", local_cuda_client)
+
     @pytest.mark.skipif(**tm.no_dask_cudf())
     def test_dask_dataframe(self, local_cuda_client: Client) -> None:
         run_with_dask_dataframe(dxgb.DaskDMatrix, local_cuda_client)
diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py
index c454eea0acd0..fc5823e76577 100644
--- a/tests/test_distributed/test_with_dask/test_with_dask.py
+++ b/tests/test_distributed/test_with_dask/test_with_dask.py
@@ -40,6 +40,7 @@
 from distributed import Client, LocalCluster
 from toolz import sliding_window  # dependency of dask
 from xgboost.dask import DaskDMatrix
+from xgboost.testing.dask import check_init_estimation
 
 dask.config.set({"distributed.scheduler.allowed-failures": False})
 
@@ -2018,6 +2019,10 @@ def _() -> xgb.dask.DaskXGBClassifier:
             assert f.result().get_booster().num_boosted_rounds() == i + 1
 
 
+def test_init_estimation(client: Client) -> None:
+    check_init_estimation("hist", client)
+
+
 class TestDaskCallbacks:
     @pytest.mark.skipif(**tm.no_sklearn())
     def test_early_stopping(self, client: "Client") -> None:

From d75028e85370606a5e267b158ff7e6d4957b7bd0 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 00:36:39 +0800
Subject: [PATCH 112/133] add dask clf test.

---
 python-package/xgboost/testing/dask.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/python-package/xgboost/testing/dask.py b/python-package/xgboost/testing/dask.py
index e30c0894e419..634359378c29 100644
--- a/python-package/xgboost/testing/dask.py
+++ b/python-package/xgboost/testing/dask.py
@@ -5,7 +5,26 @@
 import xgboost as xgb
 
 
-def check_init_estimation(tree_method: str, client: Client) -> None:
+def check_init_estimation_clf(tree_method: str, client: Client) -> None:
+    from sklearn.datasets import make_classification
+
+    X, y = make_classification(n_samples=4096, n_features=32)
+    clf = xgb.XGBRegressor(n_estimators=1, max_depth=1, tree_method=tree_method)
+    clf.fit(X, y)
+    base_score = clf.get_params()["base_score"]
+
+    dX = da.from_array(X).rechunk(chunks=(32, None))
+    dy = da.from_array(y).rechunk(chunks=(32,))
+    dclf = xgb.dask.DaskXGBRegressor(
+        n_estimators=1, max_depth=1, tree_method=tree_method
+    )
+    dclf.client = client
+    dclf.fit(dX, dy)
+    dbase_score = dclf.get_params()["base_score"]
+    np.testing.assert_allclose(base_score, dbase_score)
+
+
+def check_init_estimation_reg(tree_method: str, client: Client) -> None:
     from sklearn.datasets import make_regression
 
     X, y = make_regression(n_samples=4096, n_features=32)
@@ -22,3 +41,8 @@ def check_init_estimation(tree_method: str, client: Client) -> None:
     dreg.fit(dX, dy)
     dbase_score = dreg.get_params()["base_score"]
     np.testing.assert_allclose(base_score, dbase_score)
+
+
+def check_init_estimation(tree_method: str, client: Client) -> None:
+    check_init_estimation_reg(tree_method, client)
+    check_init_estimation_clf(tree_method, client)

From 007b494463c64bb8829d49d26a04e3cd1fd679a7 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 00:38:54 +0800
Subject: [PATCH 113/133] rng.

---
 python-package/xgboost/testing/dask.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/python-package/xgboost/testing/dask.py b/python-package/xgboost/testing/dask.py
index 634359378c29..3527400ded61 100644
--- a/python-package/xgboost/testing/dask.py
+++ b/python-package/xgboost/testing/dask.py
@@ -8,26 +8,27 @@
 def check_init_estimation_clf(tree_method: str, client: Client) -> None:
     from sklearn.datasets import make_classification
 
-    X, y = make_classification(n_samples=4096, n_features=32)
-    clf = xgb.XGBRegressor(n_estimators=1, max_depth=1, tree_method=tree_method)
+    X, y = make_classification(n_samples=4096 * 2, n_features=32, random_state=1994)
+    clf = xgb.XGBClassifier(n_estimators=1, max_depth=1, tree_method=tree_method)
     clf.fit(X, y)
     base_score = clf.get_params()["base_score"]
 
     dX = da.from_array(X).rechunk(chunks=(32, None))
     dy = da.from_array(y).rechunk(chunks=(32,))
-    dclf = xgb.dask.DaskXGBRegressor(
+    dclf = xgb.dask.DaskXGBClassifier(
         n_estimators=1, max_depth=1, tree_method=tree_method
     )
     dclf.client = client
     dclf.fit(dX, dy)
     dbase_score = dclf.get_params()["base_score"]
     np.testing.assert_allclose(base_score, dbase_score)
+    print(base_score, dbase_score)
 
 
 def check_init_estimation_reg(tree_method: str, client: Client) -> None:
     from sklearn.datasets import make_regression
 
-    X, y = make_regression(n_samples=4096, n_features=32)
+    X, y = make_regression(n_samples=4096 * 2, n_features=32, random_state=1994)
     reg = xgb.XGBRegressor(n_estimators=1, max_depth=1, tree_method=tree_method)
     reg.fit(X, y)
     base_score = reg.get_params()["base_score"]
@@ -41,6 +42,7 @@ def check_init_estimation_reg(tree_method: str, client: Client) -> None:
     dreg.fit(dX, dy)
     dbase_score = dreg.get_params()["base_score"]
     np.testing.assert_allclose(base_score, dbase_score)
+    print(base_score, dbase_score)
 
 
 def check_init_estimation(tree_method: str, client: Client) -> None:

From 7db21c2f9494f5e8f4f0a94283c9b563dbaebeb5 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 03:43:10 +0800
Subject: [PATCH 114/133] lint.

---
 python-package/xgboost/testing/dask.py               | 12 ++++++++----
 .../test_with_dask/test_with_dask.py                 |  4 +++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/python-package/xgboost/testing/dask.py b/python-package/xgboost/testing/dask.py
index 3527400ded61..d925630909b5 100644
--- a/python-package/xgboost/testing/dask.py
+++ b/python-package/xgboost/testing/dask.py
@@ -1,3 +1,4 @@
+"""Tests for dask shared by different test modules."""
 import numpy as np
 from dask import array as da
 from distributed import Client
@@ -6,6 +7,7 @@
 
 
 def check_init_estimation_clf(tree_method: str, client: Client) -> None:
+    """Test init estimation for classsifier."""
     from sklearn.datasets import make_classification
 
     X, y = make_classification(n_samples=4096 * 2, n_features=32, random_state=1994)
@@ -13,19 +15,20 @@ def check_init_estimation_clf(tree_method: str, client: Client) -> None:
     clf.fit(X, y)
     base_score = clf.get_params()["base_score"]
 
-    dX = da.from_array(X).rechunk(chunks=(32, None))
+    dx = da.from_array(X).rechunk(chunks=(32, None))
     dy = da.from_array(y).rechunk(chunks=(32,))
     dclf = xgb.dask.DaskXGBClassifier(
         n_estimators=1, max_depth=1, tree_method=tree_method
     )
     dclf.client = client
-    dclf.fit(dX, dy)
+    dclf.fit(dx, dy)
     dbase_score = dclf.get_params()["base_score"]
     np.testing.assert_allclose(base_score, dbase_score)
     print(base_score, dbase_score)
 
 
 def check_init_estimation_reg(tree_method: str, client: Client) -> None:
+    """Test init estimation for regressor."""
     from sklearn.datasets import make_regression
 
     X, y = make_regression(n_samples=4096 * 2, n_features=32, random_state=1994)
@@ -33,18 +36,19 @@ def check_init_estimation_reg(tree_method: str, client: Client) -> None:
     reg.fit(X, y)
     base_score = reg.get_params()["base_score"]
 
-    dX = da.from_array(X).rechunk(chunks=(32, None))
+    dx = da.from_array(X).rechunk(chunks=(32, None))
     dy = da.from_array(y).rechunk(chunks=(32,))
     dreg = xgb.dask.DaskXGBRegressor(
         n_estimators=1, max_depth=1, tree_method=tree_method
     )
     dreg.client = client
-    dreg.fit(dX, dy)
+    dreg.fit(dx, dy)
     dbase_score = dreg.get_params()["base_score"]
     np.testing.assert_allclose(base_score, dbase_score)
     print(base_score, dbase_score)
 
 
 def check_init_estimation(tree_method: str, client: Client) -> None:
+    """Test init estimation."""
     check_init_estimation_reg(tree_method, client)
     check_init_estimation_clf(tree_method, client)
diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py
index fc5823e76577..b5f8e87267da 100644
--- a/tests/test_distributed/test_with_dask/test_with_dask.py
+++ b/tests/test_distributed/test_with_dask/test_with_dask.py
@@ -53,8 +53,10 @@
 
 @pytest.fixture(scope="module")
 def cluster() -> Generator:
+    n_threads = os.cpu_count()
+    assert n_threads is not None
     with LocalCluster(
-        n_workers=2, threads_per_worker=2, dashboard_address=":0"
+        n_workers=2, threads_per_worker=n_threads // 2, dashboard_address=":0"
     ) as dask_cluster:
         yield dask_cluster
 

From c695509e88283471a80e38f013b73fae86e8e114 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 05:20:09 +0800
Subject: [PATCH 115/133] subobject.

---
 src/objective/regression_obj.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index ed1c98b3628d..2ef06551e235 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -52,6 +52,7 @@ void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& pre
   CheckInitInputs(info);
   CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
 }
+}  // anonymous namespace
 
 class RegInitEstimation : public ObjFunction {
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const override {
@@ -77,7 +78,6 @@ class RegInitEstimation : public ObjFunction {
     this->PredTransform(base_score->Data());
   }
 };
-}  // anonymous namespace
 
 #if defined(XGBOOST_USE_CUDA)
 DMLC_REGISTRY_FILE_TAG(regression_obj_gpu);

From 469a39d19ca581da09d9f8b2e2bd27f532621e09 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 05:54:20 +0800
Subject: [PATCH 116/133] test.

---
 python-package/xgboost/testing/updater.py | 53 +++++++++++++++++++++++
 tests/python-gpu/test_gpu_updaters.py     |  4 ++
 tests/python/test_updaters.py             |  4 ++
 3 files changed, 61 insertions(+)
 create mode 100644 python-package/xgboost/testing/updater.py

diff --git a/python-package/xgboost/testing/updater.py b/python-package/xgboost/testing/updater.py
new file mode 100644
index 000000000000..0891cce5877c
--- /dev/null
+++ b/python-package/xgboost/testing/updater.py
@@ -0,0 +1,53 @@
+"""Tests for updaters."""
+import numpy as np
+
+import xgboost as xgb
+
+
+def check_init_estimation(tree_method: str) -> None:
+    """Test for init estimation."""
+    from sklearn.datasets import (
+        make_regression,
+        make_classification,
+        make_multilabel_classification,
+    )
+
+    def run_reg(X: np.ndarray, y: np.ndarray) -> None:
+        reg = xgb.XGBRegressor(tree_method=tree_method, max_depth=1, n_estimators=1)
+        reg.fit(X, y, eval_set=[(X, y)])
+        base_score_0 = reg.get_params()["base_score"]
+        score_0 = reg.evals_result()["validation_0"]["rmse"][0]
+
+        reg = xgb.XGBRegressor(
+            tree_method=tree_method, max_depth=1, n_estimators=1, boost_from_average=0
+        )
+        reg.fit(X, y, eval_set=[(X, y)])
+        base_score_1 = reg.get_params()["base_score"]
+        score_1 = reg.evals_result()["validation_0"]["rmse"][0]
+        assert not np.isclose(base_score_0, base_score_1)
+        assert score_0 < score_1  # must be better
+
+    X, y = make_regression(n_samples=4096)
+    run_reg(X, y)
+    X, y = make_regression(n_samples=4096, n_targets=3)
+    run_reg(X, y)
+
+    def run_clf(X: np.ndarray, y: np.ndarray) -> None:
+        clf = xgb.XGBClassifier(tree_method=tree_method, max_depth=1, n_estimators=1)
+        clf.fit(X, y, eval_set=[(X, y)])
+        base_score_0 = clf.get_params()["base_score"]
+        score_0 = clf.evals_result()["validation_0"]["logloss"][0]
+
+        clf = xgb.XGBClassifier(
+            tree_method=tree_method, max_depth=1, n_estimators=1, boost_from_average=0
+        )
+        clf.fit(X, y, eval_set=[(X, y)])
+        base_score_1 = clf.get_params()["base_score"]
+        score_1 = clf.evals_result()["validation_0"]["logloss"][0]
+        assert not np.isclose(base_score_0, base_score_1)
+        assert score_0 < score_1  # must be better
+
+    X, y = make_classification(n_samples=4096)
+    run_clf(X, y)
+    X, y = make_multilabel_classification(n_samples=4096, n_labels=3, n_classes=5)
+    run_clf(X, y)
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index 8a2501eb8ba5..08aad6402144 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -5,6 +5,7 @@
 import pytest
 from hypothesis import assume, given, note, settings, strategies
 from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy
+from xgboost.testing.updater import check_init_estimation
 
 import xgboost as xgb
 from xgboost import testing as tm
@@ -204,3 +205,6 @@ def test_specified_gpu_id_gpu_update(self, dataset, gpu_id):
     @pytest.mark.parametrize("weighted", [True, False])
     def test_adaptive(self, weighted) -> None:
         self.cputest.run_adaptive("gpu_hist", weighted)
+
+    def test_init_estimation(self) -> None:
+        check_init_estimation("gpu_hist")
diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py
index 1682d90a1281..d716bd130227 100644
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -10,6 +10,7 @@
     exact_parameter_strategy,
     hist_parameter_strategy,
 )
+from xgboost.testing.updater import check_init_estimation
 
 import xgboost as xgb
 from xgboost import testing as tm
@@ -451,3 +452,6 @@ def get_score(config: Dict) -> float:
     )
     def test_adaptive(self, tree_method, weighted) -> None:
         self.run_adaptive(tree_method, weighted)
+
+    def test_init_estimation(self) -> None:
+        check_init_estimation("hist")

From 5ded8f58fc9aa6fb12be88e365b0949435e3ab37 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 05:55:32 +0800
Subject: [PATCH 117/133] isort.

---
 python-package/xgboost/testing/updater.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-package/xgboost/testing/updater.py b/python-package/xgboost/testing/updater.py
index 0891cce5877c..8b8cb61c98fa 100644
--- a/python-package/xgboost/testing/updater.py
+++ b/python-package/xgboost/testing/updater.py
@@ -7,9 +7,9 @@
 def check_init_estimation(tree_method: str) -> None:
     """Test for init estimation."""
     from sklearn.datasets import (
-        make_regression,
         make_classification,
         make_multilabel_classification,
+        make_regression,
     )
 
     def run_reg(X: np.ndarray, y: np.ndarray) -> None:

From 6db5b6c51f681e3e98f2956d238a29f9799221bb Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Fri, 16 Dec 2022 23:34:38 +0800
Subject: [PATCH 118/133] Refactor PySpark tests.

- Convert classifier tests to pytest tests.
- Replace hardcoded tests.
---
 .../test_with_spark/test_spark_local.py       | 492 +++++++++---------
 1 file changed, 254 insertions(+), 238 deletions(-)

diff --git a/tests/test_distributed/test_with_spark/test_spark_local.py b/tests/test_distributed/test_with_spark/test_spark_local.py
index 6754bacc6a99..fa7bdd94fa78 100644
--- a/tests/test_distributed/test_with_spark/test_spark_local.py
+++ b/tests/test_distributed/test_with_spark/test_spark_local.py
@@ -1,9 +1,10 @@
 import glob
 import logging
 import random
+import tempfile
 import uuid
 from collections import namedtuple
-from typing import Generator
+from typing import Generator, Sequence, Type
 
 import numpy as np
 import pytest
@@ -248,6 +249,87 @@ def clf_with_weight(
     )
 
 
+ClfData = namedtuple(
+    "ClfData", ("cls_params", "cls_df_train", "cls_df_train_large", "cls_df_test")
+)
+
+
+@pytest.fixture
+def clf_data(spark: SparkSession) -> Generator[ClfData, None, None]:
+    cls_params = {"max_depth": 5, "n_estimators": 10, "scale_pos_weight": 4}
+
+    X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
+    y = np.array([0, 1])
+    cl1 = xgb.XGBClassifier()
+    cl1.fit(X, y)
+    predt0 = cl1.predict(X)
+    proba0: np.ndarray = cl1.predict_proba(X)
+    cl2 = xgb.XGBClassifier(max_depth=5, n_estimators=10, scale_pos_weight=4)
+    cl2.fit(X, y)
+    predt1 = cl2.predict(X)
+    proba1: np.ndarray = cl2.predict_proba(X)
+
+    # convert np array to pyspark dataframe
+    cls_df_train_data = [
+        (Vectors.dense(X[0, :]), int(y[0])),
+        (Vectors.sparse(3, {1: float(X[1, 1]), 2: float(X[1, 2])}), int(y[1])),
+    ]
+    cls_df_train = spark.createDataFrame(cls_df_train_data, ["features", "label"])
+
+    cls_df_train_large = spark.createDataFrame(
+        cls_df_train_data * 100, ["features", "label"]
+    )
+
+    cls_df_test = spark.createDataFrame(
+        [
+            (
+                Vectors.dense(X[0, :]),
+                int(predt0[0]),
+                proba0[0, :].tolist(),
+                int(predt1[0]),
+                proba1[0, :].tolist(),
+            ),
+            (
+                Vectors.sparse(3, {1: 1.0, 2: 5.5}),
+                int(predt0[1]),
+                proba0[1, :].tolist(),
+                int(predt1[1]),
+                proba1[1, :].tolist(),
+            ),
+        ],
+        [
+            "features",
+            "expected_prediction",
+            "expected_probability",
+            "expected_prediction_with_params",
+            "expected_probability_with_params",
+        ],
+    )
+    yield ClfData(cls_params, cls_df_train, cls_df_train_large, cls_df_test)
+
+
+def assert_model_compatible(model: XGBModel, model_path: str) -> None:
+    bst = xgb.Booster()
+    path = glob.glob(f"{model_path}/**/model/part-00000", recursive=True)[0]
+    bst.load_model(path)
+    np.testing.assert_equal(
+        np.array(model.get_booster().save_raw("json")), np.array(bst.save_raw("json"))
+    )
+
+
+def check_sub_dict_match(
+    sub_dist: dict, whole_dict: dict, excluding_keys: Sequence[str]
+) -> None:
+    for k in sub_dist:
+        if k not in excluding_keys:
+            assert k in whole_dict, f"check on {k} failed"
+            assert sub_dist[k] == whole_dict[k], f"check on {k} failed"
+
+
+def get_params_map(params_kv: dict, estimator: Type) -> dict:
+    return {getattr(estimator, k): v for k, v in params_kv.items()}
+
+
 class TestPySparkLocal:
     def test_regressor_with_weight_eval(self, reg_with_weight: RegWithWeight) -> None:
         # with weight
@@ -350,10 +432,161 @@ def test_classifier_with_weight_eval(self, clf_with_weight: ClfWithWeight) -> No
         )
 
         for row in pred_result_with_weight_eval:
-            np.testing.assert_allclose(  # failed
+            np.testing.assert_allclose(
                 row.probability, row.expected_prob_with_weight_and_eval, atol=1e-3
             )
 
+    def test_classifier_model_save_load(self, clf_data: ClfData) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = "file:" + tmpdir
+            clf = SparkXGBClassifier(**clf_data.cls_params)
+            model = clf.fit(clf_data.cls_df_train)
+            model.save(path)
+            loaded_model = SparkXGBClassifierModel.load(path)
+            assert model.uid == loaded_model.uid
+            for k, v in clf_data.cls_params.items():
+                assert loaded_model.getOrDefault(k) == v
+
+            pred_result = loaded_model.transform(clf_data.cls_df_test).collect()
+            for row in pred_result:
+                np.testing.assert_allclose(
+                    row.probability, row.expected_probability_with_params, atol=1e-3
+                )
+
+            with pytest.raises(AssertionError, match="Expected class name"):
+                SparkXGBRegressorModel.load(path)
+
+            assert_model_compatible(model, tmpdir)
+
+    def test_classifier_basic(self, clf_data: ClfData) -> None:
+        classifier = SparkXGBClassifier()
+        model = classifier.fit(clf_data.cls_df_train)
+        pred_result = model.transform(clf_data.cls_df_test).collect()
+        for row in pred_result:
+            np.testing.assert_equal(row.prediction, row.expected_prediction)
+            np.testing.assert_allclose(
+                row.probability, row.expected_probability, rtol=1e-3
+            )
+
+    def test_classifier_with_params(self, clf_data: ClfData) -> None:
+        classifier = SparkXGBClassifier(**clf_data.cls_params)
+        all_params = dict(
+            **(classifier._gen_xgb_params_dict()),
+            **(classifier._gen_fit_params_dict()),
+            **(classifier._gen_predict_params_dict()),
+        )
+        check_sub_dict_match(
+            clf_data.cls_params, all_params, excluding_keys=_non_booster_params
+        )
+
+        model = classifier.fit(clf_data.cls_df_train)
+        all_params = dict(
+            **(model._gen_xgb_params_dict()),
+            **(model._gen_fit_params_dict()),
+            **(model._gen_predict_params_dict()),
+        )
+        check_sub_dict_match(
+            clf_data.cls_params, all_params, excluding_keys=_non_booster_params
+        )
+        pred_result = model.transform(clf_data.cls_df_test).collect()
+        for row in pred_result:
+            np.testing.assert_equal(row.prediction, row.expected_prediction_with_params)
+            np.testing.assert_allclose(
+                row.probability, row.expected_probability_with_params, rtol=1e-3
+            )
+
+    def test_classifier_model_pipeline_save_load(self, clf_data: ClfData) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = "file:" + tmpdir
+            classifier = SparkXGBClassifier()
+            pipeline = Pipeline(stages=[classifier])
+            pipeline = pipeline.copy(
+                extra=get_params_map(clf_data.cls_params, classifier)
+            )
+            model = pipeline.fit(clf_data.cls_df_train)
+            model.save(path)
+
+            loaded_model = PipelineModel.load(path)
+            for k, v in clf_data.cls_params.items():
+                assert loaded_model.stages[0].getOrDefault(k) == v
+
+            pred_result = loaded_model.transform(clf_data.cls_df_test).collect()
+            for row in pred_result:
+                np.testing.assert_allclose(
+                    row.probability, row.expected_probability_with_params, atol=1e-3
+                )
+            assert_model_compatible(model.stages[0], tmpdir)
+
+    def test_classifier_with_cross_validator(self, clf_data: ClfData) -> None:
+        xgb_classifer = SparkXGBClassifier(n_estimators=1)
+        paramMaps = ParamGridBuilder().addGrid(xgb_classifer.max_depth, [1, 2]).build()
+        cvBin = CrossValidator(
+            estimator=xgb_classifer,
+            estimatorParamMaps=paramMaps,
+            evaluator=BinaryClassificationEvaluator(),
+            seed=1,
+            parallelism=4,
+            numFolds=2,
+        )
+        cvBinModel = cvBin.fit(clf_data.cls_df_train_large)
+        cvBinModel.transform(clf_data.cls_df_test)
+
+    def test_convert_to_sklearn_model_clf(self, clf_data: ClfData) -> None:
+        classifier = SparkXGBClassifier(
+            n_estimators=200, missing=2.0, max_depth=3, sketch_eps=0.5
+        )
+        clf_model = classifier.fit(clf_data.cls_df_train)
+
+        # Check that regardless of what booster, _convert_to_model converts to the
+        # correct class type
+        sklearn_classifier = classifier._convert_to_sklearn_model(
+            clf_model.get_booster().save_raw("json"),
+            clf_model.get_booster().save_config(),
+        )
+        assert isinstance(sklearn_classifier, XGBClassifier)
+        assert sklearn_classifier.n_estimators == 200
+        assert sklearn_classifier.missing == 2.0
+        assert sklearn_classifier.max_depth == 3
+        assert sklearn_classifier.get_params()["sketch_eps"] == 0.5
+
+    def test_classifier_array_col_as_feature(self, clf_data: ClfData) -> None:
+        train_dataset = clf_data.cls_df_train.withColumn(
+            "features", vector_to_array(spark_sql_func.col("features"))
+        )
+        test_dataset = clf_data.cls_df_test.withColumn(
+            "features", vector_to_array(spark_sql_func.col("features"))
+        )
+        classifier = SparkXGBClassifier()
+        model = classifier.fit(train_dataset)
+
+        pred_result = model.transform(test_dataset).collect()
+        for row in pred_result:
+            np.testing.assert_equal(row.prediction, row.expected_prediction)
+            np.testing.assert_allclose(
+                row.probability, row.expected_probability, rtol=1e-3
+            )
+
+    def test_classifier_with_feature_names_types_weights(
+        self, clf_data: ClfData
+    ) -> None:
+        classifier = SparkXGBClassifier(
+            feature_names=["a1", "a2", "a3"],
+            feature_types=["i", "int", "float"],
+            feature_weights=[2.0, 5.0, 3.0],
+        )
+        model = classifier.fit(clf_data.cls_df_train)
+        model.transform(clf_data.cls_df_test).collect()
+
+    def test_early_stop_param_validation(self, clf_data: ClfData) -> None:
+        classifier = SparkXGBClassifier(early_stopping_rounds=1)
+        with pytest.raises(ValueError, match="early_stopping_rounds"):
+            classifier.fit(clf_data.cls_df_train)
+
+    def test_gpu_param_setting(self, clf_data: ClfData) -> None:
+        py_cls = SparkXGBClassifier(use_gpu=True)
+        train_params = py_cls._get_distributed_train_params(clf_data.cls_df_train)
+        assert train_params["tree_method"] == "gpu_hist"
+
 
 class XgboostLocalTest(SparkTestCase):
     def setUp(self):
@@ -406,60 +639,6 @@ def setUp(self):
             ],
         )
 
-        # >>> X = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.5]])
-        # >>> y = np.array([0, 1])
-        # >>> cl1 = xgboost.XGBClassifier()
-        # >>> cl1.fit(X, y)
-        # >>> cl1.predict(X)
-        # array([0, 0])
-        # >>> cl1.predict_proba(X)
-        # array([[0.5, 0.5],
-        #        [0.5, 0.5]], dtype=float32)
-        # >>> cl2 = xgboost.XGBClassifier(max_depth=5, n_estimators=10, scale_pos_weight=4)
-        # >>> cl2.fit(X, y)
-        # >>> cl2.predict(X)
-        # array([1, 1])
-        # >>> cl2.predict_proba(X)
-        # array([[0.27574146, 0.72425854 ],
-        #        [0.27574146, 0.72425854 ]], dtype=float32)
-        self.cls_params = {"max_depth": 5, "n_estimators": 10, "scale_pos_weight": 4}
-
-        cls_df_train_data = [
-            (Vectors.dense(1.0, 2.0, 3.0), 0),
-            (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1),
-        ]
-        self.cls_df_train = self.session.createDataFrame(
-            cls_df_train_data, ["features", "label"]
-        )
-        self.cls_df_train_large = self.session.createDataFrame(
-            cls_df_train_data * 100, ["features", "label"]
-        )
-        self.cls_df_test = self.session.createDataFrame(
-            [
-                (
-                    Vectors.dense(1.0, 2.0, 3.0),
-                    0,
-                    [0.5, 0.5],
-                    1,
-                    [0.27574146, 0.72425854],
-                ),
-                (
-                    Vectors.sparse(3, {1: 1.0, 2: 5.5}),
-                    0,
-                    [0.5, 0.5],
-                    1,
-                    [0.27574146, 0.72425854],
-                ),
-            ],
-            [
-                "features",
-                "expected_prediction",
-                "expected_probability",
-                "expected_prediction_with_params",
-                "expected_probability_with_params",
-            ],
-        )
-
         # kwargs test (using the above data, train, we get the same results)
         self.cls_params_kwargs = {"tree_method": "approx", "sketch_eps": 0.03}
 
@@ -610,6 +789,22 @@ def assert_model_compatible(self, model: XGBModel, model_path: str):
         bst.load_model(path)
         self.assertEqual(model.get_booster().save_raw("json"), bst.save_raw("json"))
 
+    def test_convert_to_sklearn_model_reg(self) -> None:
+        regressor = SparkXGBRegressor(
+            n_estimators=200, missing=2.0, max_depth=3, sketch_eps=0.5
+        )
+        reg_model = regressor.fit(self.reg_df_train)
+
+        sklearn_regressor = regressor._convert_to_sklearn_model(
+            reg_model.get_booster().save_raw("json"),
+            reg_model.get_booster().save_config(),
+        )
+        assert isinstance(sklearn_regressor, XGBRegressor)
+        assert sklearn_regressor.n_estimators == 200
+        assert sklearn_regressor.missing == 2.0
+        assert sklearn_regressor.max_depth == 3
+        assert sklearn_regressor.get_params()["sketch_eps"] == 0.5
+
     def test_regressor_params_basic(self):
         py_reg = SparkXGBRegressor()
         self.assertTrue(hasattr(py_reg, "n_estimators"))
@@ -665,11 +860,6 @@ def test_param_alias(self):
         ):
             SparkXGBClassifier(featuresCol="f1")
 
-    def test_gpu_param_setting(self):
-        py_cls = SparkXGBClassifier(use_gpu=True)
-        train_params = py_cls._get_distributed_train_params(self.cls_df_train)
-        assert train_params["tree_method"] == "gpu_hist"
-
     @staticmethod
     def test_param_value_converter():
         py_cls = SparkXGBClassifier(missing=np.float64(1.0), sketch_eps=np.float64(0.3))
@@ -691,16 +881,6 @@ def test_regressor_basic(self):
                 np.isclose(row.prediction, row.expected_prediction, atol=1e-3)
             )
 
-    def test_classifier_basic(self):
-        classifier = SparkXGBClassifier()
-        model = classifier.fit(self.cls_df_train)
-        pred_result = model.transform(self.cls_df_test).collect()
-        for row in pred_result:
-            self.assertEqual(row.prediction, row.expected_prediction)
-            self.assertTrue(
-                np.allclose(row.probability, row.expected_probability, rtol=1e-3)
-            )
-
     def test_multi_classifier(self):
         classifier = SparkXGBClassifier()
         model = classifier.fit(self.multi_cls_df_train)
@@ -710,12 +890,6 @@ def test_multi_classifier(self):
                 np.allclose(row.probability, row.expected_probability, rtol=1e-3)
             )
 
-    def _check_sub_dict_match(self, sub_dist, whole_dict, excluding_keys):
-        for k in sub_dist:
-            if k not in excluding_keys:
-                self.assertTrue(k in whole_dict, f"check on {k} failed")
-                self.assertEqual(sub_dist[k], whole_dict[k], f"check on {k} failed")
-
     def test_regressor_with_params(self):
         regressor = SparkXGBRegressor(**self.reg_params)
         all_params = dict(
@@ -723,7 +897,7 @@ def test_regressor_with_params(self):
             **(regressor._gen_fit_params_dict()),
             **(regressor._gen_predict_params_dict()),
         )
-        self._check_sub_dict_match(
+        check_sub_dict_match(
             self.reg_params, all_params, excluding_keys=_non_booster_params
         )
 
@@ -733,7 +907,7 @@ def test_regressor_with_params(self):
             **(model._gen_fit_params_dict()),
             **(model._gen_predict_params_dict()),
         )
-        self._check_sub_dict_match(
+        check_sub_dict_match(
             self.reg_params, all_params, excluding_keys=_non_booster_params
         )
         pred_result = model.transform(self.reg_df_test).collect()
@@ -744,35 +918,6 @@ def test_regressor_with_params(self):
                 )
             )
 
-    def test_classifier_with_params(self):
-        classifier = SparkXGBClassifier(**self.cls_params)
-        all_params = dict(
-            **(classifier._gen_xgb_params_dict()),
-            **(classifier._gen_fit_params_dict()),
-            **(classifier._gen_predict_params_dict()),
-        )
-        self._check_sub_dict_match(
-            self.cls_params, all_params, excluding_keys=_non_booster_params
-        )
-
-        model = classifier.fit(self.cls_df_train)
-        all_params = dict(
-            **(model._gen_xgb_params_dict()),
-            **(model._gen_fit_params_dict()),
-            **(model._gen_predict_params_dict()),
-        )
-        self._check_sub_dict_match(
-            self.cls_params, all_params, excluding_keys=_non_booster_params
-        )
-        pred_result = model.transform(self.cls_df_test).collect()
-        for row in pred_result:
-            self.assertEqual(row.prediction, row.expected_prediction_with_params)
-            self.assertTrue(
-                np.allclose(
-                    row.probability, row.expected_probability_with_params, rtol=1e-3
-                )
-            )
-
     def test_regressor_model_save_load(self):
         tmp_dir = self.get_local_tmp_dir()
         path = "file:" + tmp_dir
@@ -797,40 +942,12 @@ def test_regressor_model_save_load(self):
 
         self.assert_model_compatible(model, tmp_dir)
 
-    def test_classifier_model_save_load(self):
-        tmp_dir = self.get_local_tmp_dir()
-        path = "file:" + tmp_dir
-        regressor = SparkXGBClassifier(**self.cls_params)
-        model = regressor.fit(self.cls_df_train)
-        model.save(path)
-        loaded_model = SparkXGBClassifierModel.load(path)
-        self.assertEqual(model.uid, loaded_model.uid)
-        for k, v in self.cls_params.items():
-            self.assertEqual(loaded_model.getOrDefault(k), v)
-
-        pred_result = loaded_model.transform(self.cls_df_test).collect()
-        for row in pred_result:
-            self.assertTrue(
-                np.allclose(
-                    row.probability, row.expected_probability_with_params, atol=1e-3
-                )
-            )
-
-        with self.assertRaisesRegex(AssertionError, "Expected class name"):
-            SparkXGBRegressorModel.load(path)
-
-        self.assert_model_compatible(model, tmp_dir)
-
-    @staticmethod
-    def _get_params_map(params_kv, estimator):
-        return {getattr(estimator, k): v for k, v in params_kv.items()}
-
     def test_regressor_model_pipeline_save_load(self):
         tmp_dir = self.get_local_tmp_dir()
         path = "file:" + tmp_dir
         regressor = SparkXGBRegressor()
         pipeline = Pipeline(stages=[regressor])
-        pipeline = pipeline.copy(extra=self._get_params_map(self.reg_params, regressor))
+        pipeline = pipeline.copy(extra=get_params_map(self.reg_params, regressor))
         model = pipeline.fit(self.reg_df_train)
         model.save(path)
 
@@ -847,44 +964,6 @@ def test_regressor_model_pipeline_save_load(self):
             )
         self.assert_model_compatible(model.stages[0], tmp_dir)
 
-    def test_classifier_model_pipeline_save_load(self):
-        tmp_dir = self.get_local_tmp_dir()
-        path = "file:" + tmp_dir
-        classifier = SparkXGBClassifier()
-        pipeline = Pipeline(stages=[classifier])
-        pipeline = pipeline.copy(
-            extra=self._get_params_map(self.cls_params, classifier)
-        )
-        model = pipeline.fit(self.cls_df_train)
-        model.save(path)
-
-        loaded_model = PipelineModel.load(path)
-        for k, v in self.cls_params.items():
-            self.assertEqual(loaded_model.stages[0].getOrDefault(k), v)
-
-        pred_result = loaded_model.transform(self.cls_df_test).collect()
-        for row in pred_result:
-            self.assertTrue(
-                np.allclose(
-                    row.probability, row.expected_probability_with_params, atol=1e-3
-                )
-            )
-        self.assert_model_compatible(model.stages[0], tmp_dir)
-
-    def test_classifier_with_cross_validator(self):
-        xgb_classifer = SparkXGBClassifier(n_estimators=1)
-        paramMaps = ParamGridBuilder().addGrid(xgb_classifer.max_depth, [1, 2]).build()
-        cvBin = CrossValidator(
-            estimator=xgb_classifer,
-            estimatorParamMaps=paramMaps,
-            evaluator=BinaryClassificationEvaluator(),
-            seed=1,
-            parallelism=4,
-            numFolds=2,
-        )
-        cvBinModel = cvBin.fit(self.cls_df_train_large)
-        cvBinModel.transform(self.cls_df_test)
-
     def test_callbacks(self):
         from xgboost.callback import LearningRateScheduler
 
@@ -1003,38 +1082,6 @@ def test_use_gpu_param(self):
         classifier = SparkXGBClassifier(use_gpu=True, tree_method="gpu_hist")
         classifier = SparkXGBClassifier(use_gpu=True)
 
-    def test_convert_to_sklearn_model(self):
-        classifier = SparkXGBClassifier(
-            n_estimators=200, missing=2.0, max_depth=3, sketch_eps=0.5
-        )
-        clf_model = classifier.fit(self.cls_df_train)
-
-        regressor = SparkXGBRegressor(
-            n_estimators=200, missing=2.0, max_depth=3, sketch_eps=0.5
-        )
-        reg_model = regressor.fit(self.reg_df_train)
-
-        # Check that regardless of what booster, _convert_to_model converts to the correct class type
-        sklearn_classifier = classifier._convert_to_sklearn_model(
-            clf_model.get_booster().save_raw("json"),
-            clf_model.get_booster().save_config(),
-        )
-        assert isinstance(sklearn_classifier, XGBClassifier)
-        assert sklearn_classifier.n_estimators == 200
-        assert sklearn_classifier.missing == 2.0
-        assert sklearn_classifier.max_depth == 3
-        assert sklearn_classifier.get_params()["sketch_eps"] == 0.5
-
-        sklearn_regressor = regressor._convert_to_sklearn_model(
-            reg_model.get_booster().save_raw("json"),
-            reg_model.get_booster().save_config(),
-        )
-        assert isinstance(sklearn_regressor, XGBRegressor)
-        assert sklearn_regressor.n_estimators == 200
-        assert sklearn_regressor.missing == 2.0
-        assert sklearn_regressor.max_depth == 3
-        assert sklearn_classifier.get_params()["sketch_eps"] == 0.5
-
     def test_feature_importances(self):
         reg1 = SparkXGBRegressor(**self.reg_params)
         model = reg1.fit(self.reg_df_train)
@@ -1060,32 +1107,6 @@ def test_regressor_array_col_as_feature(self):
                 np.isclose(row.prediction, row.expected_prediction, atol=1e-3)
             )
 
-    def test_classifier_array_col_as_feature(self):
-        train_dataset = self.cls_df_train.withColumn(
-            "features", vector_to_array(spark_sql_func.col("features"))
-        )
-        test_dataset = self.cls_df_test.withColumn(
-            "features", vector_to_array(spark_sql_func.col("features"))
-        )
-        classifier = SparkXGBClassifier()
-        model = classifier.fit(train_dataset)
-
-        pred_result = model.transform(test_dataset).collect()
-        for row in pred_result:
-            self.assertEqual(row.prediction, row.expected_prediction)
-            self.assertTrue(
-                np.allclose(row.probability, row.expected_probability, rtol=1e-3)
-            )
-
-    def test_classifier_with_feature_names_types_weights(self):
-        classifier = SparkXGBClassifier(
-            feature_names=["a1", "a2", "a3"],
-            feature_types=["i", "int", "float"],
-            feature_weights=[2.0, 5.0, 3.0],
-        )
-        model = classifier.fit(self.cls_df_train)
-        model.transform(self.cls_df_test).collect()
-
     def test_regressor_with_sparse_optim(self):
         regressor = SparkXGBRegressor(missing=0.0)
         model = regressor.fit(self.reg_df_sparse_train)
@@ -1192,11 +1213,6 @@ def test_empty_partition(self):
             classifier = SparkXGBClassifier(num_workers=4, tree_method=tree_method)
             classifier.fit(data_trans)
 
-    def test_early_stop_param_validation(self):
-        classifier = SparkXGBClassifier(early_stopping_rounds=1)
-        with pytest.raises(ValueError, match="early_stopping_rounds"):
-            classifier.fit(self.cls_df_train)
-
     def test_unsupported_params(self):
         with pytest.raises(ValueError, match="evals_result"):
             SparkXGBClassifier(evals_result={})

From 47778a7f65273f6ae37de690829a05cf400de0d3 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sat, 17 Dec 2022 00:13:55 +0800
Subject: [PATCH 119/133] lint.

---
 python-package/xgboost/testing/dask.py    | 1 +
 python-package/xgboost/testing/updater.py | 8 +++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/python-package/xgboost/testing/dask.py b/python-package/xgboost/testing/dask.py
index d925630909b5..fc57b2199feb 100644
--- a/python-package/xgboost/testing/dask.py
+++ b/python-package/xgboost/testing/dask.py
@@ -31,6 +31,7 @@ def check_init_estimation_reg(tree_method: str, client: Client) -> None:
     """Test init estimation for regressor."""
     from sklearn.datasets import make_regression
 
+    # pylint: disable=unbalanced-tuple-unpacking
     X, y = make_regression(n_samples=4096 * 2, n_features=32, random_state=1994)
     reg = xgb.XGBRegressor(n_estimators=1, max_depth=1, tree_method=tree_method)
     reg.fit(X, y)
diff --git a/python-package/xgboost/testing/updater.py b/python-package/xgboost/testing/updater.py
index 8b8cb61c98fa..fa141dd7db96 100644
--- a/python-package/xgboost/testing/updater.py
+++ b/python-package/xgboost/testing/updater.py
@@ -12,7 +12,7 @@ def check_init_estimation(tree_method: str) -> None:
         make_regression,
     )
 
-    def run_reg(X: np.ndarray, y: np.ndarray) -> None:
+    def run_reg(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-name
         reg = xgb.XGBRegressor(tree_method=tree_method, max_depth=1, n_estimators=1)
         reg.fit(X, y, eval_set=[(X, y)])
         base_score_0 = reg.get_params()["base_score"]
@@ -27,12 +27,13 @@ def run_reg(X: np.ndarray, y: np.ndarray) -> None:
         assert not np.isclose(base_score_0, base_score_1)
         assert score_0 < score_1  # must be better
 
-    X, y = make_regression(n_samples=4096)
+    X, y = make_regression(n_samples=4096)  # pylint: disable=unbalanced-tuple-unpacking
     run_reg(X, y)
+    # pylint: disable=unbalanced-tuple-unpacking
     X, y = make_regression(n_samples=4096, n_targets=3)
     run_reg(X, y)
 
-    def run_clf(X: np.ndarray, y: np.ndarray) -> None:
+    def run_clf(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-name
         clf = xgb.XGBClassifier(tree_method=tree_method, max_depth=1, n_estimators=1)
         clf.fit(X, y, eval_set=[(X, y)])
         base_score_0 = clf.get_params()["base_score"]
@@ -47,6 +48,7 @@ def run_clf(X: np.ndarray, y: np.ndarray) -> None:
         assert not np.isclose(base_score_0, base_score_1)
         assert score_0 < score_1  # must be better
 
+    # pylint: disable=unbalanced-tuple-unpacking
     X, y = make_classification(n_samples=4096)
     run_clf(X, y)
     X, y = make_multilabel_classification(n_samples=4096, n_labels=3, n_classes=5)

From 8cc70d346a36e8910ddea399f568c715e67b37ff Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sat, 17 Dec 2022 00:44:33 +0800
Subject: [PATCH 120/133] Cleanup.

---
 src/objective/regression_obj.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 2ef06551e235..8e5b4fb54662 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -42,7 +42,7 @@ namespace obj {
 namespace {
 void CheckInitInputs(MetaInfo const& info) {
   CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
-  if (!info.weights_.Empty() && info.group_ptr_.empty()) {
+  if (!info.weights_.Empty()) {
     CHECK_EQ(info.weights_.Size(), info.num_row_)
         << "Number of weights should be equal to number of data points.";
   }

From 6b1f8bfd4bf8fbe1f417a961055db5b2f719491b Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sat, 17 Dec 2022 18:18:22 +0800
Subject: [PATCH 121/133] Fix ci.

---
 tests/ci_build/Dockerfile.cpu | 6 +++---
 tests/ci_build/test_python.sh | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu
index 54c3c3af4ec7..d09250b045c3 100644
--- a/tests/ci_build/Dockerfile.cpu
+++ b/tests/ci_build/Dockerfile.cpu
@@ -36,10 +36,10 @@ RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
     rm -rf grpc
 
 # Create new Conda environment
-COPY conda_env/cpu_test.yml /scripts/
-RUN mamba env create -n cpu_test --file=/scripts/cpu_test.yml && \
+COPY conda_env/linux_cpu_test.yml /scripts/
+RUN mamba env create -n linux_cpu_test --file=/scripts/linux_cpu_test.yml && \
     mamba clean --all && \
-    conda run --no-capture-output -n cpu_test pip install buildkite-test-collector
+    conda run --no-capture-output -n linux_cpu_test pip install buildkite-test-collector
 
 # Install lightweight sudo (not bound to TTY)
 RUN set -ex; \
diff --git a/tests/ci_build/test_python.sh b/tests/ci_build/test_python.sh
index 98b73d789b84..7375b4c9f872 100755
--- a/tests/ci_build/test_python.sh
+++ b/tests/ci_build/test_python.sh
@@ -76,7 +76,7 @@ case "$suite" in
     ;;
 
   cpu)
-    source activate cpu_test
+    source activate linux_cpu_test
     set -x
     install_xgboost
     export RAY_OBJECT_STORE_ALLOW_SLOW_STORAGE=1

From 78f44f56e81f67f01c264d9e627604a8df2c335b Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sat, 17 Dec 2022 18:48:29 +0800
Subject: [PATCH 122/133] Merge changes.

---
 src/tree/fit_stump.cc | 36 +++++++++++++++++++++++-------------
 src/tree/fit_stump.cu | 25 ++++++++++++-------------
 src/tree/fit_stump.h  |  8 +++++++-
 3 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/src/tree/fit_stump.cc b/src/tree/fit_stump.cc
index fb442009f7fe..927144d36b1f 100644
--- a/src/tree/fit_stump.cc
+++ b/src/tree/fit_stump.cc
@@ -3,19 +3,20 @@
  *
  * \brief Utilities for estimating initial score.
  */
-
-#if !defined(NOMINMAX) && defined(_WIN32)
-#define NOMINMAX
-#endif  // !defined(NOMINMAX)
 #include "fit_stump.h"
 
-#include <algorithm>  // std::max
+#include <cinttypes>  // std::int32_t
 #include <cstddef>    // std::size_t
 
 #include "../collective/communicator-inl.h"
+#include "../common/common.h"              // AssertGPUSupport
 #include "../common/numeric.h"             // cpu_impl::Reduce
+#include "../common/threading_utils.h"     // ParallelFor
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
-#include "xgboost/linalg.h"                // TensorView
+#include "xgboost/base.h"                  // bst_target_t, GradientPairPrecise
+#include "xgboost/context.h"               // Context
+#include "xgboost/linalg.h"                // TensorView, Tensor, Constant
+#include "xgboost/logging.h"               // CHECK_EQ
 
 namespace xgboost {
 namespace tree {
@@ -24,20 +25,29 @@ void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpai
               linalg::VectorView<float> out) {
   auto n_targets = out.Size();
   CHECK_EQ(n_targets, gpair.Shape(1));
-  linalg::Vector<GradientPairPrecise> sum = linalg::Constant(ctx, GradientPairPrecise{}, n_targets);
-  auto h_sum = sum.HostView();
+  linalg::Tensor<GradientPairPrecise, 2> sum_tloc =
+      linalg::Constant(ctx, GradientPairPrecise{}, ctx->Threads(), n_targets);
+  auto h_sum_tloc = sum_tloc.HostView();
   // first dim for gpair is samples, second dim is target.
-  // Reduce by column
-  common::ParallelFor(gpair.Shape(1), ctx->Threads(), [&](auto j) {
-    for (std::size_t i = 0; i < gpair.Shape(0); ++i) {
-      h_sum(j) += GradientPairPrecise{gpair(i, j)};
+  // Reduce by column, parallel by samples
+  common::ParallelFor(gpair.Shape(0), ctx->Threads(), [&](auto i) {
+    for (bst_target_t t = 0; t < n_targets; ++t) {
+      h_sum_tloc(omp_get_thread_num(), t) += GradientPairPrecise{gpair(i, t)};
     }
   });
+  // Aggregate to the first row.
+  auto h_sum = h_sum_tloc.Slice(0, linalg::All());
+  for (std::int32_t i = 1; i < ctx->Threads(); ++i) {
+    for (bst_target_t j = 0; j < n_targets; ++j) {
+      h_sum(j) += h_sum_tloc(i, j);
+    }
+  }
+  CHECK(h_sum.CContiguous());
   collective::Allreduce<collective::Operation::kSum>(
       reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2);
 
   for (std::size_t i = 0; i < h_sum.Size(); ++i) {
-    out(i) = static_cast<float>(CalcUnregulatedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));
+    out(i) = static_cast<float>(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));
   }
 }
 }  // namespace cpu_impl
diff --git a/src/tree/fit_stump.cu b/src/tree/fit_stump.cu
index b7bbcdb87b95..58a1fae82987 100644
--- a/src/tree/fit_stump.cu
+++ b/src/tree/fit_stump.cu
@@ -7,18 +7,17 @@
 #define NOMINMAX
 #endif                                            // !defined(NOMINMAX)
 #include <thrust/execution_policy.h>              // cuda::par
-#include <thrust/functional.h>                    // thrust::equal_to
 #include <thrust/iterator/counting_iterator.h>    // thrust::make_counting_iterator
-#include <thrust/iterator/zip_iterator.h>         // thrust::make_zip_iterator
 
-#include <algorithm>                              // std::max
 #include <cstddef>                                // std::size_t
 
 #include "../collective/device_communicator.cuh"  // DeviceCommunicator
-#include "../common/device_helpers.cuh"           // dh::MakeTransformIterator::Reduce,TypedDiscard
+#include "../common/device_helpers.cuh"           // dh::MakeTransformIterator
 #include "fit_stump.h"
 #include "xgboost/base.h"     // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
 #include "xgboost/context.h"  // Context
+#include "xgboost/linalg.h"   // TensorView, Tensor, Constant
+#include "xgboost/logging.h"  // CHECK_EQ
 #include "xgboost/span.h"     // span
 
 namespace xgboost {
@@ -33,9 +32,8 @@ void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpai
 
   // Reduce by column
   auto key_it = dh::MakeTransformIterator<bst_target_t>(
-      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bst_target_t {
-        return i / gpair.Shape(0);
-      });
+      thrust::make_counting_iterator(0ul),
+      [=] XGBOOST_DEVICE(std::size_t i) -> bst_target_t { return i / gpair.Shape(0); });
   auto grad_it = dh::MakeTransformIterator<GradientPairPrecise>(
       thrust::make_counting_iterator(0ul),
       [=] XGBOOST_DEVICE(std::size_t i) -> GradientPairPrecise {
@@ -44,20 +42,21 @@ void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpai
         return GradientPairPrecise{gpair(sample, target)};
       });
   auto d_sum = sum.View(ctx->gpu_id);
+  CHECK(d_sum.CContiguous());
 
   dh::XGBCachingDeviceAllocator<char> alloc;
   auto policy = thrust::cuda::par(alloc);
   thrust::reduce_by_key(policy, key_it, key_it + gpair.Size(), grad_it,
-                        dh::TypedDiscard<bst_target_t>{}, dh::tbegin(d_sum.Values()));
+                        thrust::make_discard_iterator(), dh::tbegin(d_sum.Values()));
 
   collective::DeviceCommunicator* communicator = collective::Communicator::GetDevice(ctx->gpu_id);
   communicator->AllReduceSum(reinterpret_cast<double*>(d_sum.Values().data()), d_sum.Size() * 2);
 
-  thrust::for_each_n(
-      policy, thrust::make_counting_iterator(0ul), n_targets,
-      [=] XGBOOST_DEVICE(std::size_t i) mutable {
-        out(i) = static_cast<float>(CalcUnregulatedWeight(d_sum(i).GetGrad(), d_sum(i).GetHess()));
-      });
+  thrust::for_each_n(policy, thrust::make_counting_iterator(0ul), n_targets,
+                     [=] XGBOOST_DEVICE(std::size_t i) mutable {
+                       out(i) = static_cast<float>(
+                           CalcUnregularizedWeight(d_sum(i).GetGrad(), d_sum(i).GetHess()));
+                     });
 }
 }  // namespace cuda_impl
 }  // namespace tree
diff --git a/src/tree/fit_stump.h b/src/tree/fit_stump.h
index 96af3f6c0de5..1f5cd60b4928 100644
--- a/src/tree/fit_stump.h
+++ b/src/tree/fit_stump.h
@@ -7,6 +7,12 @@
 #ifndef XGBOOST_TREE_FIT_STUMP_H_
 #define XGBOOST_TREE_FIT_STUMP_H_
 
+#if !defined(NOMINMAX) && defined(_WIN32)
+#define NOMINMAX
+#endif  // !defined(NOMINMAX)
+
+#include <algorithm>  // std::max
+
 #include "../common/common.h"            // AssertGPUSupport
 #include "xgboost/base.h"                // GradientPair
 #include "xgboost/context.h"             // Context
@@ -17,7 +23,7 @@ namespace xgboost {
 namespace tree {
 
 template <typename T>
-XGBOOST_DEVICE inline double CalcUnregulatedWeight(T sum_grad, T sum_hess) {
+XGBOOST_DEVICE inline double CalcUnregularizedWeight(T sum_grad, T sum_hess) {
   return -sum_grad / std::max(sum_hess, static_cast<double>(kRtEps));
 }
 

From 66e010baa940e8fc61289fd1c4f02b01984a30e4 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sat, 17 Dec 2022 18:49:10 +0800
Subject: [PATCH 123/133] tidy.

---
 src/learner.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/learner.cc b/src/learner.cc
index 445be632a387..74b17daa8af2 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -209,7 +209,6 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     std::memcpy(data.data() + pos, &major_version, sizeof(major_version));
     pos += 1;
     std::memcpy(data.data() + pos, &minor_version, sizeof(minor_version));
-    pos += 1;
 
     std::array<std::int32_t, 6> sync;
     std::copy(data.cbegin(), data.cend(), sync.begin());

From 1134b1b26111478b51333f5e05ccee5363438db5 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Sun, 18 Dec 2022 03:34:37 +0800
Subject: [PATCH 124/133] Row major.

---
 src/tree/fit_stump.cc            | 8 +++-----
 tests/cpp/tree/test_fit_stump.cc | 6 +++---
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/tree/fit_stump.cc b/src/tree/fit_stump.cc
index fb442009f7fe..37ed3722f584 100644
--- a/src/tree/fit_stump.cc
+++ b/src/tree/fit_stump.cc
@@ -58,15 +58,13 @@ void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
               bst_target_t n_targets, linalg::Vector<float>* out) {
   out->SetDevice(ctx->gpu_id);
   out->Reshape(n_targets);
-  // column-major
   auto n_samples = gpair.Size() / n_targets;
-  std::size_t shape[2]{n_samples, n_targets};
-  std::size_t strides[2];
-  linalg::detail::CalcStride<2, true>(shape, strides);
 
   gpair.SetDevice(ctx->gpu_id);
   linalg::TensorView<GradientPair const, 2> gpair_t{
-      ctx->IsCPU() ? gpair.ConstHostSpan() : gpair.ConstDeviceSpan(), shape, strides, ctx->gpu_id};
+      ctx->IsCPU() ? gpair.ConstHostSpan() : gpair.ConstDeviceSpan(),
+      {n_samples, n_targets},
+      ctx->gpu_id};
   ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair_t, out->HostView())
                : cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id));
 }
diff --git a/tests/cpp/tree/test_fit_stump.cc b/tests/cpp/tree/test_fit_stump.cc
index aaa8bd4e2612..ef608e5757d9 100644
--- a/tests/cpp/tree/test_fit_stump.cc
+++ b/tests/cpp/tree/test_fit_stump.cc
@@ -15,9 +15,9 @@ void TestFitStump(Context const *ctx) {
   HostDeviceVector<GradientPair> gpair;
   auto &h_gpair = gpair.HostVector();
   h_gpair.resize(kRows * kTargets);
-  for (std::size_t t = 0; t < kTargets; ++t) {
-    for (std::size_t i = 0; i < kRows; ++i) {
-      h_gpair.at(t * kRows + i) = GradientPair{static_cast<float>(i), 1};
+  for (std::size_t i = 0; i < kRows; ++i) {
+    for (std::size_t t = 0; t < kTargets; ++t) {
+      h_gpair.at(i * kTargets + t) = GradientPair{static_cast<float>(i), 1};
     }
   }
   linalg::Vector<float> out;

From 7a8e2ed4cc5fea689eaa7bf74cb51c9199616466 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sun, 18 Dec 2022 03:59:01 +0800
Subject: [PATCH 125/133] deterministic.

---
 python-package/xgboost/testing/updater.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/python-package/xgboost/testing/updater.py b/python-package/xgboost/testing/updater.py
index fa141dd7db96..a565a0332d42 100644
--- a/python-package/xgboost/testing/updater.py
+++ b/python-package/xgboost/testing/updater.py
@@ -25,12 +25,13 @@ def run_reg(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-na
         base_score_1 = reg.get_params()["base_score"]
         score_1 = reg.evals_result()["validation_0"]["rmse"][0]
         assert not np.isclose(base_score_0, base_score_1)
-        assert score_0 < score_1  # must be better
+        assert score_0 < score_1  # should be better
 
-    X, y = make_regression(n_samples=4096)  # pylint: disable=unbalanced-tuple-unpacking
+    # pylint: disable=unbalanced-tuple-unpacking
+    X, y = make_regression(n_samples=4096, random_state=17)
     run_reg(X, y)
     # pylint: disable=unbalanced-tuple-unpacking
-    X, y = make_regression(n_samples=4096, n_targets=3)
+    X, y = make_regression(n_samples=4096, n_targets=3, random_state=17)
     run_reg(X, y)
 
     def run_clf(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-name
@@ -46,10 +47,12 @@ def run_clf(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-na
         base_score_1 = clf.get_params()["base_score"]
         score_1 = clf.evals_result()["validation_0"]["logloss"][0]
         assert not np.isclose(base_score_0, base_score_1)
-        assert score_0 < score_1  # must be better
+        assert score_0 < score_1  # should be better
 
     # pylint: disable=unbalanced-tuple-unpacking
-    X, y = make_classification(n_samples=4096)
+    X, y = make_classification(n_samples=4096, random_state=17)
     run_clf(X, y)
-    X, y = make_multilabel_classification(n_samples=4096, n_labels=3, n_classes=5)
+    X, y = make_multilabel_classification(
+        n_samples=4096, n_labels=3, n_classes=5, random_state=17
+    )
     run_clf(X, y)

From dcdef53ad7d0fed3008c3dbcb812dec42771ccfd Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Thu, 5 Jan 2023 18:27:36 +0800
Subject: [PATCH 126/133] cleanup.

---
 src/collective/rabit_communicator.h                     | 2 +-
 tests/test_distributed/test_with_dask/test_with_dask.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/collective/rabit_communicator.h b/src/collective/rabit_communicator.h
index 712b76eff4da..19004afb7ea9 100644
--- a/src/collective/rabit_communicator.h
+++ b/src/collective/rabit_communicator.h
@@ -119,7 +119,7 @@ class RabitCommunicator : public Communicator {
   }
 
   template <typename DType, std::enable_if_t<std::is_floating_point<DType>::value> * = nullptr>
-  void DoBitwiseAllReduce(void *send_receive_buffer, std::size_t count, Operation op) {
+  void DoBitwiseAllReduce(void *, std::size_t, Operation) {
     LOG(FATAL) << "Floating point types do not support bitwise operations.";
   }
 
diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py
index b5f8e87267da..244c6f1e2799 100644
--- a/tests/test_distributed/test_with_dask/test_with_dask.py
+++ b/tests/test_distributed/test_with_dask/test_with_dask.py
@@ -32,7 +32,7 @@
 import xgboost as xgb
 from xgboost import testing as tm
 
-pytestmark = [tm.timeout(30), pytest.mark.skipif(**tm.no_dask())]
+pytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_dask())]
 
 import dask
 import dask.array as da

From 504cc8cafeb341bf0711e9ba3a5a71ff71366fae Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Thu, 5 Jan 2023 21:48:11 +0800
Subject: [PATCH 127/133] cleanup.

---
 CITATION                                   | 1 +
 tests/python/test_training_continuation.py | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/CITATION b/CITATION
index b2acce7c13fe..189062510236 100644
--- a/CITATION
+++ b/CITATION
@@ -15,3 +15,4 @@
  address = {New York, NY, USA},
  keywords = {large-scale machine learning},
 }
+
diff --git a/tests/python/test_training_continuation.py b/tests/python/test_training_continuation.py
index 5d330fa29a4b..258af760caa1 100644
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@@ -16,7 +16,6 @@ class TestTrainingContinuation:
     def generate_parameters(self):
         xgb_params_01_binary = {
             'nthread': 1,
-            "objective": "binary:logistic",
         }
 
         xgb_params_02_binary = {

From 7d8464c3a91581f6e6caad8ba849774f433b780c Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Thu, 5 Jan 2023 22:27:45 +0800
Subject: [PATCH 128/133] Get base score.

---
 python-package/xgboost/testing/updater.py | 20 ++++++++++++++++----
 tests/python/test_early_stopping.py       |  4 +++-
 tests/python/test_with_sklearn.py         |  7 ++-----
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/python-package/xgboost/testing/updater.py b/python-package/xgboost/testing/updater.py
index a565a0332d42..1b675e51f887 100644
--- a/python-package/xgboost/testing/updater.py
+++ b/python-package/xgboost/testing/updater.py
@@ -1,9 +1,21 @@
 """Tests for updaters."""
+import json
+
 import numpy as np
 
 import xgboost as xgb
 
 
+def get_basescore(model: xgb.XGBModel) -> float:
+    """Get base score from an XGBoost sklearn estimator."""
+    base_score = float(
+        json.loads(model.get_booster().save_config())["learner"]["learner_model_param"][
+            "base_score"
+        ]
+    )
+    return base_score
+
+
 def check_init_estimation(tree_method: str) -> None:
     """Test for init estimation."""
     from sklearn.datasets import (
@@ -15,14 +27,14 @@ def check_init_estimation(tree_method: str) -> None:
     def run_reg(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-name
         reg = xgb.XGBRegressor(tree_method=tree_method, max_depth=1, n_estimators=1)
         reg.fit(X, y, eval_set=[(X, y)])
-        base_score_0 = reg.get_params()["base_score"]
+        base_score_0 = get_basescore(reg)
         score_0 = reg.evals_result()["validation_0"]["rmse"][0]
 
         reg = xgb.XGBRegressor(
             tree_method=tree_method, max_depth=1, n_estimators=1, boost_from_average=0
         )
         reg.fit(X, y, eval_set=[(X, y)])
-        base_score_1 = reg.get_params()["base_score"]
+        base_score_1 = get_basescore(reg)
         score_1 = reg.evals_result()["validation_0"]["rmse"][0]
         assert not np.isclose(base_score_0, base_score_1)
         assert score_0 < score_1  # should be better
@@ -37,14 +49,14 @@ def run_reg(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-na
     def run_clf(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-name
         clf = xgb.XGBClassifier(tree_method=tree_method, max_depth=1, n_estimators=1)
         clf.fit(X, y, eval_set=[(X, y)])
-        base_score_0 = clf.get_params()["base_score"]
+        base_score_0 = get_basescore(clf)
         score_0 = clf.evals_result()["validation_0"]["logloss"][0]
 
         clf = xgb.XGBClassifier(
             tree_method=tree_method, max_depth=1, n_estimators=1, boost_from_average=0
         )
         clf.fit(X, y, eval_set=[(X, y)])
-        base_score_1 = clf.get_params()["base_score"]
+        base_score_1 = get_basescore(clf)
         score_1 = clf.evals_result()["validation_0"]["logloss"][0]
         assert not np.isclose(base_score_0, base_score_1)
         assert score_0 < score_1  # should be better
diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py
index 6bc52fdda83b..000d5e347edc 100644
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+from xgboost.testing.updater import get_basescore
 
 import xgboost as xgb
 from xgboost import testing as tm
@@ -33,7 +34,8 @@ def test_early_stopping_nonparallel(self):
             early_stopping_rounds=10
         )
         clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])
-        assert 0.53 > clf3.get_params()["base_score"] > 0.5
+        base_score = get_basescore(clf3)
+        assert 0.53 > base_score > 0.5
 
         clf3 = xgb.XGBClassifier(
             learning_rate=0.1,
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 0817b44e846b..d0009090f53f 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -9,6 +9,7 @@
 import pytest
 from sklearn.utils.estimator_checks import parametrize_with_checks
 from xgboost.testing.shared import get_feature_weights, validate_data_initialization
+from xgboost.testing.updater import get_basescore
 
 import xgboost as xgb
 from xgboost import testing as tm
@@ -763,11 +764,7 @@ def test_sklearn_get_default_params():
     cls = xgb.XGBClassifier()
     assert cls.get_params()["base_score"] is None
     cls.fit(X[:4, ...], y[:4, ...])
-    base_score = float(
-        json.loads(cls.get_booster().save_config())["learner"]["learner_model_param"][
-            "base_score"
-        ]
-    )
+    base_score = get_basescore(cls)
     np.testing.assert_equal(base_score, 0.5)
 
 

From a940363388d4cff1197c70af98c5e3ae2050d537 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 5 Jan 2023 22:57:21 +0800
Subject: [PATCH 129/133] get base score.

---
 python-package/xgboost/testing/dask.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/python-package/xgboost/testing/dask.py b/python-package/xgboost/testing/dask.py
index fc57b2199feb..e1f7142943fa 100644
--- a/python-package/xgboost/testing/dask.py
+++ b/python-package/xgboost/testing/dask.py
@@ -2,6 +2,7 @@
 import numpy as np
 from dask import array as da
 from distributed import Client
+from xgboost.testing.updater import get_basescore
 
 import xgboost as xgb
 
@@ -13,7 +14,7 @@ def check_init_estimation_clf(tree_method: str, client: Client) -> None:
     X, y = make_classification(n_samples=4096 * 2, n_features=32, random_state=1994)
     clf = xgb.XGBClassifier(n_estimators=1, max_depth=1, tree_method=tree_method)
     clf.fit(X, y)
-    base_score = clf.get_params()["base_score"]
+    base_score = get_basescore(clf)
 
     dx = da.from_array(X).rechunk(chunks=(32, None))
     dy = da.from_array(y).rechunk(chunks=(32,))
@@ -22,9 +23,8 @@ def check_init_estimation_clf(tree_method: str, client: Client) -> None:
     )
     dclf.client = client
     dclf.fit(dx, dy)
-    dbase_score = dclf.get_params()["base_score"]
+    dbase_score = get_basescore(dclf)
     np.testing.assert_allclose(base_score, dbase_score)
-    print(base_score, dbase_score)
 
 
 def check_init_estimation_reg(tree_method: str, client: Client) -> None:
@@ -35,7 +35,7 @@ def check_init_estimation_reg(tree_method: str, client: Client) -> None:
     X, y = make_regression(n_samples=4096 * 2, n_features=32, random_state=1994)
     reg = xgb.XGBRegressor(n_estimators=1, max_depth=1, tree_method=tree_method)
     reg.fit(X, y)
-    base_score = reg.get_params()["base_score"]
+    base_score = get_basescore(reg)
 
     dx = da.from_array(X).rechunk(chunks=(32, None))
     dy = da.from_array(y).rechunk(chunks=(32,))
@@ -44,9 +44,8 @@ def check_init_estimation_reg(tree_method: str, client: Client) -> None:
     )
     dreg.client = client
     dreg.fit(dx, dy)
-    dbase_score = dreg.get_params()["base_score"]
+    dbase_score = get_basescore(dreg)
     np.testing.assert_allclose(base_score, dbase_score)
-    print(base_score, dbase_score)
 
 
 def check_init_estimation(tree_method: str, client: Client) -> None:

From 4990d104c7d26c2790c7f11ba89ef55f89cfcb70 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Thu, 5 Jan 2023 23:37:20 +0800
Subject: [PATCH 130/133] debug CI.

---
 tests/python/test_with_shap.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/python/test_with_shap.py b/tests/python/test_with_shap.py
index 4a38193c1780..d7749140447c 100644
--- a/tests/python/test_with_shap.py
+++ b/tests/python/test_with_shap.py
@@ -9,18 +9,25 @@
     shap = None
     pass
 
+
 pytestmark = pytest.mark.skipif(shap is None, reason="Requires shap package")
+print("\nSHAP:", shap.__version__, "NUMPY:", np.__version__)
 
 
 # Check integration is not broken from xgboost side
 # Changes in binary format may cause problems
 def test_with_shap():
     from sklearn.datasets import fetch_california_housing
+
     X, y = fetch_california_housing(return_X_y=True)
     dtrain = xgb.DMatrix(X, label=y)
     model = xgb.train({"learning_rate": 0.01}, dtrain, 10)
     explainer = shap.TreeExplainer(model)
     shap_values = explainer.shap_values(X)
     margin = model.predict(dtrain, output_margin=True)
-    assert np.allclose(np.sum(shap_values, axis=len(shap_values.shape) - 1),
-                       margin - explainer.expected_value, 1e-3, 1e-3)
+    assert np.allclose(
+        np.sum(shap_values, axis=len(shap_values.shape) - 1),
+        margin - explainer.expected_value,
+        1e-3,
+        1e-3,
+    )

From 475c7f2ab5bf88faf65c182f1f23472058a6470b Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Fri, 6 Jan 2023 01:00:15 +0800
Subject: [PATCH 131/133] Upgrade shap.

---
 tests/ci_build/conda_env/linux_cpu_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci_build/conda_env/linux_cpu_test.yml b/tests/ci_build/conda_env/linux_cpu_test.yml
index 0c426eb356da..222c51f187ad 100644
--- a/tests/ci_build/conda_env/linux_cpu_test.yml
+++ b/tests/ci_build/conda_env/linux_cpu_test.yml
@@ -37,7 +37,7 @@ dependencies:
 - pyarrow
 - protobuf
 - cloudpickle
-- shap
+- shap>=0.41
 - modin
 - pip:
   - datatable

From 7a851a27c59cce028590f329b0b7d808988787d1 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Fri, 6 Jan 2023 01:33:41 +0800
Subject: [PATCH 132/133] revert.

---
 tests/python/test_with_shap.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/python/test_with_shap.py b/tests/python/test_with_shap.py
index d7749140447c..eab98f4878ce 100644
--- a/tests/python/test_with_shap.py
+++ b/tests/python/test_with_shap.py
@@ -11,7 +11,6 @@
 
 
 pytestmark = pytest.mark.skipif(shap is None, reason="Requires shap package")
-print("\nSHAP:", shap.__version__, "NUMPY:", np.__version__)
 
 
 # Check integration is not broken from xgboost side

From 9e62a46b9e9405eb858de735cb213465745fedd4 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Fri, 6 Jan 2023 04:08:08 +0800
Subject: [PATCH 133/133] Fix empty test.

---
 tests/python-gpu/test_gpu_updaters.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index 08aad6402144..571c4a17165e 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -173,24 +173,25 @@ def test_empty_dmatrix_prediction(self):
         kCols = 100
 
         X = np.empty((kRows, kCols))
-        y = np.empty((kRows))
+        y = np.empty((kRows,))
 
         dtrain = xgb.DMatrix(X, y)
 
-        bst = xgb.train({'verbosity': 2,
-                         'tree_method': 'gpu_hist',
-                         'gpu_id': 0},
-                        dtrain,
-                        verbose_eval=True,
-                        num_boost_round=6,
-                        evals=[(dtrain, 'Train')])
+        bst = xgb.train(
+            {"verbosity": 2, "tree_method": "gpu_hist", "gpu_id": 0},
+            dtrain,
+            verbose_eval=True,
+            num_boost_round=6,
+            evals=[(dtrain, 'Train')]
+        )
 
         kRows = 100
         X = np.random.randn(kRows, kCols)
 
         dtest = xgb.DMatrix(X)
         predictions = bst.predict(dtest)
-        np.testing.assert_allclose(predictions, 0.5, 1e-6)
+        # non-distributed, 0.0 is returned due to base_score estimation with 0 gradient.
+        np.testing.assert_allclose(predictions, 0.0, 1e-6)
 
     @pytest.mark.mgpu
     @given(tm.dataset_strategy, strategies.integers(0, 10))