Move prediction cache into Learner.

* Clean-ups - Remove duplicated cache in Learner and GBM. - Remove ad-hoc fix of invalid cache. - Remove `PredictFromCache` in predictors. - Remove prediction cache for linear altogether, as it's only moving the prediction into training process but doesn't provide any actual overall speed gain. - The cache is now unique to Learner, which means the ownership is no longer shared by any other components. * Changes - Add version to prediction cache. - Use weak ptr to check expired DMatrix. - Pass shared pointer instead of raw pointer.
dmlc · Feb 11, 2020 · cbd5a3c · cbd5a3c
1 parent 29eeea7
commit cbd5a3c
Show file tree

Hide file tree

Showing 25 changed files with 481 additions and 391 deletions.
diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright by Contributors
+ * Copyright 2014-2020 by Contributors
  * \file gbm.h
  * \brief Interface of gradient booster,
  *  that learns through gradient statistics.
@@ -18,6 +18,7 @@
 #include <utility>
 #include <string>
 #include <functional>
+#include <unordered_map>
 #include <memory>
 
 namespace xgboost {
@@ -28,6 +29,8 @@ class ObjFunction;
 
 struct GenericParameter;
 struct LearnerModelParam;
+struct PredictionCacheEntry;
+class PredictionContainer;
 
 /*!
  * \brief interface of gradient boosting model.
@@ -38,7 +41,7 @@ class GradientBooster : public Model, public Configurable {
 
  public:
   /*! \brief virtual destructor */
-  virtual ~GradientBooster() = default;
+  ~GradientBooster() override = default;
   /*!
    * \brief Set the configuration of gradient boosting.
    *  User must call configure once before InitModel and Training.
@@ -71,19 +74,22 @@ class GradientBooster : public Model, public Configurable {
    * \param obj The objective function, optional, can be nullptr when use customized version
    * the booster may change content of gpair
    */
-  virtual void DoBoost(DMatrix* p_fmat,
-                       HostDeviceVector<GradientPair>* in_gpair,
-                       ObjFunction* obj = nullptr) = 0;
+  virtual void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
+                       PredictionCacheEntry *prediction) = 0;
 
   /*!
    * \brief generate predictions for given feature matrix
    * \param dmat feature matrix
    * \param out_preds output vector to hold the predictions
-   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
-   *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
+   * \param training Whether the prediction value is used for training.  For dart booster
+   *                 drop out is performed during training.
+   * \param ntree_limit limit the number of trees used in prediction,
+   *                    when it equals 0, this means we do not limit
+   *                    number of trees, this parameter is only valid
+   *                    for gbtree, but not for gblinear
    */
   virtual void PredictBatch(DMatrix* dmat,
-                            HostDeviceVector<bst_float>* out_preds,
+                            PredictionCacheEntry* out_preds,
                             bool training,
                             unsigned ntree_limit = 0) = 0;
   /*!
@@ -158,8 +164,7 @@ class GradientBooster : public Model, public Configurable {
   static GradientBooster* Create(
       const std::string& name,
       GenericParameter const* generic_param,
-      LearnerModelParam const* learner_model_param,
-      const std::vector<std::shared_ptr<DMatrix> >& cache_mats);
+      LearnerModelParam const* learner_model_param);
 
   static void AssertGPUSupport() {
 #ifndef XGBOOST_USE_CUDA
@@ -174,8 +179,7 @@ class GradientBooster : public Model, public Configurable {
 struct GradientBoosterReg
     : public dmlc::FunctionRegEntryBase<
   GradientBoosterReg,
-  std::function<GradientBooster* (const std::vector<std::shared_ptr<DMatrix> > &cached_mats,
-                                  LearnerModelParam const* learner_model_param)> > {
+  std::function<GradientBooster* (LearnerModelParam const* learner_model_param)> > {
 };
 
 /*!

diff --git a/include/xgboost/predictor.h b/include/xgboost/predictor.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright by Contributors
+ * Copyright 2017-2020 by Contributors
  * \file predictor.h
  * \brief Interface of predictor,
  *  performs predictions for a gradient booster.
@@ -32,47 +32,83 @@ namespace xgboost {
  * \brief Contains pointer to input matrix and associated cached predictions.
  */
 struct PredictionCacheEntry {
-  std::shared_ptr<DMatrix> data;
+  // A storage for caching prediction values
   HostDeviceVector<bst_float> predictions;
+  // The version of current cache, corresponding number of layers of trees
+  uint32_t version;
+  // A weak pointer for checking whether the DMatrix object has expired.
+  std::weak_ptr< DMatrix > ref;
+
+  PredictionCacheEntry() : version { 0 } {}
+  /* \brief Update the cache entry by number of versions.
+   *
+   * \param v Added versions.
+   */
+  void Update(uint32_t v) {
+    version += v;
+  }
+};
+
+/* \brief A container for managed prediction caches.
+ */
+class PredictionContainer {
+  std::unordered_map<DMatrix *, PredictionCacheEntry> container_;
+  void ClearExpiredEntries();
+
+ public:
+  PredictionContainer() = default;
+  /* \brief Add a new DMatrix to the cache, at the same time this function will clear out
+   *        all expired caches by checking the `std::weak_ptr`.  Caching an existing
+   *        DMatrix won't renew it.
+   *
+   *  Passing in a `shared_ptr` is critical here.  First to create a `weak_ptr` inside the
+   *  entry this shared pointer is necessary.  More importantly, the life time of this
+   *  cache is tied to the shared pointer.
+   *
+   *  Another way to make a safe cache is create a proxy to this entry, with anther shared
+   *  pointer defined inside, and pass this proxy around instead of the real entry.  But
+   *  seems to be too messy.  In XGBoost, functions like `UpdateOneIter` will have
+   *  (memory) safe access to the DMatrix as long as it's passed in as a `shared_ptr`.
+   *
+   * \param m shared pointer to the DMatrix that needs to be cached.
+   * \param device Which device should the cache be allocated on.  Pass
+   *               GenericParameter::kCpuId for CPU or positive integer for GPU id.
+   *
+   * \return the cache entry for passed in DMatrix, either an existing cache or newly
+   *         created.
+   */
+  PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, int32_t device);
+  /* \brief Get a prediction cache entry.  This entry must be already allocated by `Cache`
+   *        method.  Otherwise a dmlc::Error is thrown.
+   *
+   * \param m pointer the DMatrix.
+   * \return The prediction cache for passed in DMatrix.
+   */
+  PredictionCacheEntry& Entry(DMatrix* m);
+  /* \brief Get a const reference to the underlying hash map.  Clear expired caches before
+   *        returning.
+   */
+  decltype(container_) const& Container();
 };
 
 /**
  * \class Predictor
  *
- * \brief Performs prediction on individual training instances or batches of
- * instances for GBTree. The predictor also manages a prediction cache
- * associated with input matrices. If possible, it will use previously
- * calculated predictions instead of calculating new predictions.
- *        Prediction functions all take a GBTreeModel and a DMatrix as input and
- * output a vector of predictions. The predictor does not modify any state of
- * the model itself.
+ * \brief Performs prediction on individual training instances or batches of instances for
+ *        GBTree. Prediction functions all take a GBTreeModel and a DMatrix as input and
+ *        output a vector of predictions. The predictor does not modify any state of the
+ *        model itself.
  */
-
 class Predictor {
  protected:
   /*
    * \brief Runtime parameters.
    */
   GenericParameter const* generic_param_;
-  /**
-   * \brief Map of matrices and associated cached predictions to facilitate
-   * storing and looking up predictions.
-   */
-  std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache_;
-
-  std::unordered_map<DMatrix*, PredictionCacheEntry>::iterator FindCache(DMatrix const* dmat) {
-    auto cache_emtry = std::find_if(
-        cache_->begin(), cache_->end(),
-        [dmat](std::pair<DMatrix *, PredictionCacheEntry const &> const &kv) {
-          return kv.second.data.get() == dmat;
-        });
-    return cache_emtry;
-  }
 
  public:
-  Predictor(GenericParameter const* generic_param,
-            std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache) :
-      generic_param_{generic_param}, cache_{cache} {}
+  explicit Predictor(GenericParameter const* generic_param) :
+      generic_param_{generic_param} {}
   virtual ~Predictor() = default;
 
   /**
@@ -91,12 +127,11 @@ class Predictor {
    * \param           model       The model to predict from.
    * \param           tree_begin  The tree begin index.
    * \param           ntree_limit (Optional) The ntree limit. 0 means do not
-   * limit trees.
+   *                              limit trees.
    */
-
-  virtual void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
+  virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds,
                             const gbm::GBTreeModel& model, int tree_begin,
-                            unsigned ntree_limit = 0) = 0;
+                            uint32_t const ntree_limit = 0) = 0;
 
   /**
    * \fn  virtual void Predictor::UpdatePredictionCache( const gbm::GBTreeModel
@@ -116,7 +151,9 @@ class Predictor {
   virtual void UpdatePredictionCache(
       const gbm::GBTreeModel& model,
       std::vector<std::unique_ptr<TreeUpdater>>* updaters,
-      int num_new_trees) = 0;
+      int num_new_trees,
+      DMatrix* m,
+      PredictionCacheEntry* predts) = 0;
 
   /**
    * \fn  virtual void Predictor::PredictInstance( const SparsePage::Inst&
@@ -200,18 +237,15 @@ class Predictor {
    * \param cache          Pointer to prediction cache.
    */
   static Predictor* Create(
-      std::string const& name, GenericParameter const* generic_param,
-      std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache);
+      std::string const& name, GenericParameter const* generic_param);
 };
 
 /*!
  * \brief Registry entry for predictor.
  */
 struct PredictorReg
     : public dmlc::FunctionRegEntryBase<
-  PredictorReg, std::function<Predictor*(
-      GenericParameter const*,
-      std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>>)>> {};
+  PredictorReg, std::function<Predictor*(GenericParameter const*)>> {};
 
 #define XGBOOST_REGISTER_PREDICTOR(UniqueId, Name)      \
   static DMLC_ATTRIBUTE_UNUSED ::xgboost::PredictorReg& \

diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h
@@ -158,7 +158,7 @@ class RegTree : public Model {
     }
     /*! \brief whether this node is deleted */
     XGBOOST_DEVICE bool IsDeleted() const {
-      return sindex_ == std::numeric_limits<unsigned>::max();
+      return sindex_ == std::numeric_limits<uint32_t>::max();
     }
     /*! \brief whether current node is root */
     XGBOOST_DEVICE bool IsRoot() const { return parent_ == kInvalidNodeId; }

diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
@@ -13,6 +13,7 @@
 
 
 #include "xgboost/data.h"
+#include "xgboost/host_device_vector.h"
 #include "xgboost/learner.h"
 #include "xgboost/c_api.h"
 #include "xgboost/logging.h"
@@ -146,7 +147,7 @@ struct XGBAPIThreadLocalEntry {
   /*! \brief result holder for returning string pointers */
   std::vector<const char *> ret_vec_charp;
   /*! \brief returning float vector. */
-  std::vector<bst_float> ret_vec_float;
+  HostDeviceVector<bst_float> ret_vec_float;
   /*! \brief temp variable of gradient pairs. */
   std::vector<GradientPair> tmp_gpair;
 };
@@ -553,24 +554,22 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
                              int32_t training,
                              xgboost::bst_ulong *len,
                              const bst_float **out_result) {
-  std::vector<bst_float>& preds =
+  HostDeviceVector<bst_float>& preds =
       XGBAPIThreadLocalStore::Get()->ret_vec_float;
   API_BEGIN();
   CHECK_HANDLE();
   auto *bst = static_cast<Learner*>(handle);
-  HostDeviceVector<bst_float> tmp_preds;
   bst->Predict(
       *static_cast<std::shared_ptr<DMatrix>*>(dmat),
       (option_mask & 1) != 0,
-      &tmp_preds, ntree_limit,
+      &preds, ntree_limit,
       static_cast<bool>(training),
       (option_mask & 2) != 0,
       (option_mask & 4) != 0,
       (option_mask & 8) != 0,
       (option_mask & 16) != 0);
-  preds = tmp_preds.HostVector();
-  *out_result = dmlc::BeginPtr(preds);
-  *len = static_cast<xgboost::bst_ulong>(preds.size());
+  *out_result = dmlc::BeginPtr(preds.HostVector());
+  *len = static_cast<xgboost::bst_ulong>(preds.Size());
   API_END();
 }