Merge pull request BVLC#99 from zer0n/master

Add Faster RCNN support
conner99 · Jul 15, 2016 · 42e6c85 · 42e6c85
2 parents ddf2ae4 + 86fbcab
commit 42e6c85
Show file tree

Hide file tree

Showing 14 changed files with 1,005 additions and 12 deletions.
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
@@ -316,6 +316,8 @@ class Layer {
     param_propagate_down_[param_id] = value;
   }
 
+  inline Phase phase() { return phase_; }
+
 
  protected:
   /** The protobuf that stores the layer parameters */

diff --git a/include/caffe/layers/dropout_layer.hpp b/include/caffe/layers/dropout_layer.hpp
@@ -73,6 +73,7 @@ class DropoutLayer : public NeuronLayer<Dtype> {
   /// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$
   Dtype scale_;
   unsigned int uint_thres_;
+  bool scale_train_;
 };
 
 }  // namespace caffe

diff --git a/include/caffe/layers/roi_pooling_layer.hpp b/include/caffe/layers/roi_pooling_layer.hpp
@@ -0,0 +1,84 @@
+#ifndef CAFFE_ROI_POOLING_LAYER_HPP_
+#define CAFFE_ROI_POOLING_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+/** 
+ * @brief Perform max pooling on regions of interest specified by input, takes
+ *        as input N feature maps and a list of R regions of interest.
+ *
+ *   ROIPoolingLayer takes 2 inputs and produces 1 output. bottom[0] is
+ *   [N x C x H x W] feature maps on which pooling is performed. bottom[1] is
+ *   [R x 5] containing a list R ROI tuples with batch index and coordinates of
+ *   regions of interest. Each row in bottom[1] is a ROI tuple in format
+ *   [batch_index x1 y1 x2 y2], where batch_index corresponds to the index of
+ *   instance in the first input and x1 y1 x2 y2 are 0-indexed coordinates
+ *   of ROI rectangle (including its boundaries).
+ *
+ *   For each of the R ROIs, max-pooling is performed over pooled_h x pooled_w
+ *   output bins (specified in roi_pooling_param). The pooling bin sizes are
+ *   adaptively set such that they tile ROI rectangle in the indexed feature
+ *   map. The pooling region of vertical bin ph in [0, pooled_h) is computed as
+ *
+ *    start_ph (included) = y1 + floor(ph * (y2 - y1 + 1) / pooled_h)
+ *    end_ph (excluded)   = y1 + ceil((ph + 1) * (y2 - y1 + 1) / pooled_h)
+ *
+ *   and similar horizontal bins.
+ *
+ * @param param provides ROIPoolingParameter roi_pooling_param,
+ *        with ROIPoolingLayer options:
+ *  - pooled_h. The pooled output height.
+ *  - pooled_w. The pooled output width
+ *  - spatial_scale. Multiplicative spatial scale factor to translate ROI
+ *  coordinates from their input scale to the scale used when pooling.
+ *
+ * Fast R-CNN
+ * Written by Ross Girshick
+ */
+
+template <typename Dtype>
+class ROIPoolingLayer : public Layer<Dtype> {
+ public:
+  explicit ROIPoolingLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "ROIPooling"; }
+
+  virtual inline int MinBottomBlobs() const { return 2; }
+  virtual inline int MaxBottomBlobs() const { return 2; }
+  virtual inline int MinTopBlobs() const { return 1; }
+  virtual inline int MaxTopBlobs() const { return 1; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  int channels_;
+  int height_;
+  int width_;
+  int pooled_height_;
+  int pooled_width_;
+  Dtype spatial_scale_;
+  Blob<int> max_idx_;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_ROI_POOLING_LAYER_HPP_
diff --git a/include/caffe/layers/smooth_l1_loss_layer.hpp b/include/caffe/layers/smooth_l1_loss_layer.hpp
@@ -0,0 +1,65 @@
+#ifndef CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
+#define CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/loss_layer.hpp"
+
+namespace caffe {
+
+/**
+ * @brief SmoothL1LossLayer
+ *
+ * Fast R-CNN
+ * Written by Ross Girshick
+ */
+template <typename Dtype>
+class SmoothL1LossLayer : public LossLayer<Dtype> {
+ public:
+  explicit SmoothL1LossLayer(const LayerParameter& param)
+      : LossLayer<Dtype>(param), diff_() {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "SmoothL1Loss"; }
+
+  virtual inline int ExactNumBottomBlobs() const { return -1; }
+  virtual inline int MinBottomBlobs() const { return 2; }
+  virtual inline int MaxBottomBlobs() const { return 4; }
+
+  /**
+   * Unlike most loss layers, in the SmoothL1LossLayer we can backpropagate
+   * to both inputs -- override to return true and always allow force_backward.
+   */
+  virtual inline bool AllowForceBackward(const int bottom_index) const {
+    return true;
+  }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  Blob<Dtype> diff_;
+  Blob<Dtype> errors_;
+  Blob<Dtype> ones_;
+  bool has_weights_;
+  Dtype sigma2_;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
@@ -287,6 +287,7 @@ BOOST_PYTHON_MODULE(_caffe) {
   bp::def("set_mode_gpu", &set_mode_gpu);
   bp::def("set_random_seed", &set_random_seed);
   bp::def("set_device", &Caffe::SetDevice);
+  bp::def("set_random_seed", &Caffe::set_random_seed);
 
   bp::def("layer_type_list", &LayerRegistry<Dtype>::LayerTypeList);
 

diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp
@@ -16,6 +16,7 @@ void DropoutLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   DCHECK(threshold_ < 1.);
   scale_ = 1. / (1. - threshold_);
   uint_thres_ = static_cast<unsigned int>(UINT_MAX * threshold_);
+  scale_train_ = this->layer_param_.dropout_param().scale_train();
 }
 
 template <typename Dtype>
@@ -37,11 +38,20 @@ void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   if (this->phase_ == TRAIN) {
     // Create random numbers
     caffe_rng_bernoulli(count, 1. - threshold_, mask);
-    for (int i = 0; i < count; ++i) {
-      top_data[i] = bottom_data[i] * mask[i] * scale_;
+    if (scale_train_) {
+      for (int i = 0; i < count; ++i) {
+        top_data[i] = bottom_data[i] * mask[i] * scale_;
+      }
+    } else {
+      for (int i = 0; i < count; ++i) {
+        top_data[i] = bottom_data[i] * mask[i];
+      }
     }
   } else {
     caffe_copy(bottom[0]->count(), bottom_data, top_data);
+    if (!scale_train_) {
+      caffe_scal<Dtype>(count, 1. / scale_, top_data);
+    }
   }
 }
 
@@ -55,11 +65,20 @@ void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     if (this->phase_ == TRAIN) {
       const unsigned int* mask = rand_vec_.cpu_data();
       const int count = bottom[0]->count();
-      for (int i = 0; i < count; ++i) {
-        bottom_diff[i] = top_diff[i] * mask[i] * scale_;
+      if (scale_train_) {
+        for (int i = 0; i < count; ++i) {
+          bottom_diff[i] = top_diff[i] * mask[i] * scale_;
+        }
+      } else {
+        for (int i = 0; i < count; ++i) {
+          bottom_diff[i] = top_diff[i] * mask[i];
+        }
       }
     } else {
       caffe_copy(top[0]->count(), top_diff, bottom_diff);
+      if (!scale_train_) {
+        caffe_scal<Dtype>(top[0]->count(), 1. / scale_, bottom_diff);
+      }
     }
   }
 }

diff --git a/src/caffe/layers/dropout_layer.cu b/src/caffe/layers/dropout_layer.cu
@@ -25,12 +25,23 @@ void DropoutLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
         static_cast<unsigned int*>(rand_vec_.mutable_gpu_data());
     caffe_gpu_rng_uniform(count, mask);
     // set thresholds
-    // NOLINT_NEXT_LINE(whitespace/operators)
-    DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
-        count, bottom_data, mask, uint_thres_, scale_, top_data);
+    if (scale_train_) {
+      // NOLINT_NEXT_LINE(whitespace/operators)
+      DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count),
+        CAFFE_CUDA_NUM_THREADS>>>(
+          count, bottom_data, mask, uint_thres_, scale_, top_data);
+    } else {
+      // NOLINT_NEXT_LINE(whitespace/operators)
+      DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count),
+        CAFFE_CUDA_NUM_THREADS>>>(
+          count, bottom_data, mask, uint_thres_, 1.f, top_data);
+    }
     CUDA_POST_KERNEL_CHECK;
   } else {
     caffe_copy(count, bottom_data, top_data);
+    if (!scale_train_) {
+      caffe_gpu_scal<Dtype>(count, 1. / scale_, top_data);
+    }
   }
 }
 
@@ -54,13 +65,23 @@ void DropoutLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       const unsigned int* mask =
           static_cast<const unsigned int*>(rand_vec_.gpu_data());
       const int count = bottom[0]->count();
-      // NOLINT_NEXT_LINE(whitespace/operators)
-      DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
-        CAFFE_CUDA_NUM_THREADS>>>(
-          count, top_diff, mask, uint_thres_, scale_, bottom_diff);
+      if (scale_train_) {
+        // NOLINT_NEXT_LINE(whitespace/operators)
+        DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
+          CAFFE_CUDA_NUM_THREADS>>>(
+            count, top_diff, mask, uint_thres_, scale_, bottom_diff);
+      } else {
+        // NOLINT_NEXT_LINE(whitespace/operators)
+        DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
+          CAFFE_CUDA_NUM_THREADS>>>(
+           count, top_diff, mask, uint_thres_, 1.f, bottom_diff);
+      }
       CUDA_POST_KERNEL_CHECK;
     } else {
       caffe_copy(top[0]->count(), top_diff, bottom_diff);
+      if (!scale_train_) {
+        caffe_gpu_scal<Dtype>(top[0]->count(), 1. / scale_, bottom_diff);
+      }
     }
   }
 }