Merge pull request BVLC#2 from curalate/frcnn

FRCNN support for Caffe
mbassov · Jun 27, 2017 · 59e4157 · 59e4157
2 parents eeebdab + bee7d8c
commit 59e4157
Show file tree

Hide file tree

Showing 41 changed files with 2,634 additions and 61 deletions.
diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
@@ -93,6 +93,10 @@ using std::string;
 using std::stringstream;
 using std::vector;
 
+#ifdef _MSC_VER
+#define snprintf _snprintf
+#endif
+
 // A global initialization function that you should call in your main function.
 // Currently it initializes google flags and google logging.
 void GlobalInit(int* pargc, char*** pargv);

diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
@@ -291,6 +291,17 @@ class Layer {
     param_propagate_down_[param_id] = value;
   }
 
+  inline Phase phase() { return phase_; }
+
+  /**
+   * @brief set phase
+   *        enable train and test with one network, for saving memory
+  */
+  virtual inline void set_phase(Phase phase) {
+    phase_ = phase;
+  }
+
+
 
  protected:
   /** The protobuf that stores the layer parameters */

diff --git a/include/caffe/layers/box_annotator_ohem_layer.hpp b/include/caffe/layers/box_annotator_ohem_layer.hpp
@@ -0,0 +1,57 @@
+#ifndef CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_
+#define CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/loss_layer.hpp"
+
+namespace caffe {
+
+ /**
+ * @brief BoxAnnotatorOHEMLayer: Annotate box labels for Online Hard Example Mining (OHEM) training
+ * R-FCN
+ * Written by Yi Li
+ */
+  template <typename Dtype>
+  class BoxAnnotatorOHEMLayer :public Layer<Dtype>{
+   public:
+    explicit BoxAnnotatorOHEMLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+    virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+    virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+    virtual inline const char* type() const { return "BoxAnnotatorOHEM"; }
+
+    virtual inline int ExactNumBottomBlobs() const { return 4; }
+    virtual inline int ExactNumTopBlobs() const { return 2; }
+
+   protected:
+    virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+    virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+    virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+    virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+    int num_;
+    int height_;
+    int width_;
+    int spatial_dim_;
+    int bbox_channels_;
+
+    int roi_per_img_;
+    int ignore_label_;
+  };
+
+}  // namespace caffe
+
+#endif  // CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_
diff --git a/include/caffe/layers/dropout_layer.hpp b/include/caffe/layers/dropout_layer.hpp
@@ -73,6 +73,7 @@ class DropoutLayer : public NeuronLayer<Dtype> {
   /// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$
   Dtype scale_;
   unsigned int uint_thres_;
+  bool scale_train_;
 };
 
 }  // namespace caffe

diff --git a/include/caffe/layers/psroi_pooling_layer.hpp b/include/caffe/layers/psroi_pooling_layer.hpp
@@ -0,0 +1,76 @@
+#ifndef CAFFE_PSROI_POOLING_LAYER_HPP_
+#define CAFFE_PSROI_POOLING_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+/** 
+ * @brief Perform position-sensitive max pooling on regions of interest specified by input, takes
+ *        as input N position-sensitive score maps and a list of R regions of interest.
+ *   ROIPoolingLayer takes 2 inputs and produces 1 output. bottom[0] is
+ *   [N x (C x K^2) x H x W] position-sensitive score maps on which pooling is performed. bottom[1] is
+ *   [R x 5] containing a list R ROI tuples with batch index and coordinates of
+ *   regions of interest. Each row in bottom[1] is a ROI tuple in format
+ *   [batch_index x1 y1 x2 y2], where batch_index corresponds to the index of
+ *   instance in the first input and x1 y1 x2 y2 are 0-indexed coordinates
+ *   of ROI rectangle (including its boundaries). The output top[0] is [R x C x K x K] score maps pooled
+ *   within the ROI tuples.
+ * @param param provides PSROIPoolingParameter psroi_pooling_param,
+ *        with PSROIPoolingLayer options:
+ *  - output_dim. The pooled output channel number.
+ *  - group_size. The number of groups to encode position-sensitive score maps
+ *  - spatial_scale. Multiplicative spatial scale factor to translate ROI
+ *  coordinates from their input scale to the scale used when pooling.
+ * R-FCN
+ * Written by Yi Li
+ */
+
+template <typename Dtype>
+class PSROIPoolingLayer : public Layer<Dtype> {
+ public:
+  explicit PSROIPoolingLayer(const LayerParameter& param)
+    : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "PSROIPooling"; }
+
+  virtual inline int MinBottomBlobs() const { return 2; }
+  virtual inline int MaxBottomBlobs() const { return 2; }
+  virtual inline int MinTopBlobs() const { return 1; }
+  virtual inline int MaxTopBlobs() const { return 1; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  Dtype spatial_scale_;
+  int output_dim_;
+  int group_size_;
+
+  int channels_;
+  int height_;
+  int width_;
+
+  int pooled_height_;
+  int pooled_width_;
+  Blob<int> mapping_channel_;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_PSROI_POOLING_LAYER_HPP_
diff --git a/include/caffe/layers/roi_pooling_layer.hpp b/include/caffe/layers/roi_pooling_layer.hpp
@@ -0,0 +1,84 @@
+#ifndef CAFFE_ROI_POOLING_LAYER_HPP_
+#define CAFFE_ROI_POOLING_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+/** 
+ * @brief Perform max pooling on regions of interest specified by input, takes
+ *        as input N feature maps and a list of R regions of interest.
+ *
+ *   ROIPoolingLayer takes 2 inputs and produces 1 output. bottom[0] is
+ *   [N x C x H x W] feature maps on which pooling is performed. bottom[1] is
+ *   [R x 5] containing a list R ROI tuples with batch index and coordinates of
+ *   regions of interest. Each row in bottom[1] is a ROI tuple in format
+ *   [batch_index x1 y1 x2 y2], where batch_index corresponds to the index of
+ *   instance in the first input and x1 y1 x2 y2 are 0-indexed coordinates
+ *   of ROI rectangle (including its boundaries).
+ *
+ *   For each of the R ROIs, max-pooling is performed over pooled_h x pooled_w
+ *   output bins (specified in roi_pooling_param). The pooling bin sizes are
+ *   adaptively set such that they tile ROI rectangle in the indexed feature
+ *   map. The pooling region of vertical bin ph in [0, pooled_h) is computed as
+ *
+ *    start_ph (included) = y1 + floor(ph * (y2 - y1 + 1) / pooled_h)
+ *    end_ph (excluded)   = y1 + ceil((ph + 1) * (y2 - y1 + 1) / pooled_h)
+ *
+ *   and similar horizontal bins.
+ *
+ * @param param provides ROIPoolingParameter roi_pooling_param,
+ *        with ROIPoolingLayer options:
+ *  - pooled_h. The pooled output height.
+ *  - pooled_w. The pooled output width
+ *  - spatial_scale. Multiplicative spatial scale factor to translate ROI
+ *  coordinates from their input scale to the scale used when pooling.
+ *
+ * Fast R-CNN
+ * Written by Ross Girshick
+ */
+
+template <typename Dtype>
+class ROIPoolingLayer : public Layer<Dtype> {
+ public:
+  explicit ROIPoolingLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "ROIPooling"; }
+
+  virtual inline int MinBottomBlobs() const { return 2; }
+  virtual inline int MaxBottomBlobs() const { return 2; }
+  virtual inline int MinTopBlobs() const { return 1; }
+  virtual inline int MaxTopBlobs() const { return 1; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  int channels_;
+  int height_;
+  int width_;
+  int pooled_height_;
+  int pooled_width_;
+  Dtype spatial_scale_;
+  Blob<int> max_idx_;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_ROI_POOLING_LAYER_HPP_
diff --git a/include/caffe/layers/smooth_l1_loss_layer.hpp b/include/caffe/layers/smooth_l1_loss_layer.hpp
@@ -0,0 +1,65 @@
+#ifndef CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
+#define CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/loss_layer.hpp"
+
+namespace caffe {
+
+/**
+ * @brief SmoothL1LossLayer
+ *
+ * Fast R-CNN
+ * Written by Ross Girshick
+ */
+template <typename Dtype>
+class SmoothL1LossLayer : public LossLayer<Dtype> {
+ public:
+  explicit SmoothL1LossLayer(const LayerParameter& param)
+      : LossLayer<Dtype>(param), diff_() {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "SmoothL1Loss"; }
+
+  virtual inline int ExactNumBottomBlobs() const { return -1; }
+  virtual inline int MinBottomBlobs() const { return 2; }
+  virtual inline int MaxBottomBlobs() const { return 4; }
+
+  /**
+   * Unlike most loss layers, in the SmoothL1LossLayer we can backpropagate
+   * to both inputs -- override to return true and always allow force_backward.
+   */
+  virtual inline bool AllowForceBackward(const int bottom_index) const {
+    return true;
+  }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  Blob<Dtype> diff_;
+  Blob<Dtype> errors_;
+  Blob<Dtype> ones_;
+  bool has_weights_;
+  Dtype sigma2_;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_