Skip to content

Commit

Permalink
Merge pull request BVLC#2 from curalate/frcnn
Browse files Browse the repository at this point in the history
FRCNN support for Caffe
  • Loading branch information
mbassov authored Jun 27, 2017
2 parents eeebdab + bee7d8c commit 59e4157
Show file tree
Hide file tree
Showing 41 changed files with 2,634 additions and 61 deletions.
4 changes: 4 additions & 0 deletions include/caffe/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ using std::string;
using std::stringstream;
using std::vector;

#ifdef _MSC_VER
#define snprintf _snprintf
#endif

// A global initialization function that you should call in your main function.
// Currently it initializes google flags and google logging.
void GlobalInit(int* pargc, char*** pargv);
Expand Down
11 changes: 11 additions & 0 deletions include/caffe/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,17 @@ class Layer {
param_propagate_down_[param_id] = value;
}

inline Phase phase() { return phase_; }

/**
* @brief set phase
* enable train and test with one network, for saving memory
*/
virtual inline void set_phase(Phase phase) {
phase_ = phase;
}



protected:
/** The protobuf that stores the layer parameters */
Expand Down
57 changes: 57 additions & 0 deletions include/caffe/layers/box_annotator_ohem_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#ifndef CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_
#define CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/loss_layer.hpp"

namespace caffe {

/**
* @brief BoxAnnotatorOHEMLayer: Annotate box labels for Online Hard Example Mining (OHEM) training
* R-FCN
* Written by Yi Li
*/
template <typename Dtype>
class BoxAnnotatorOHEMLayer :public Layer<Dtype>{
public:
explicit BoxAnnotatorOHEMLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "BoxAnnotatorOHEM"; }

virtual inline int ExactNumBottomBlobs() const { return 4; }
virtual inline int ExactNumTopBlobs() const { return 2; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

int num_;
int height_;
int width_;
int spatial_dim_;
int bbox_channels_;

int roi_per_img_;
int ignore_label_;
};

} // namespace caffe

#endif // CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_
1 change: 1 addition & 0 deletions include/caffe/layers/dropout_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class DropoutLayer : public NeuronLayer<Dtype> {
/// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$
Dtype scale_;
unsigned int uint_thres_;
bool scale_train_;
};

} // namespace caffe
Expand Down
76 changes: 76 additions & 0 deletions include/caffe/layers/psroi_pooling_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#ifndef CAFFE_PSROI_POOLING_LAYER_HPP_
#define CAFFE_PSROI_POOLING_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
* @brief Perform position-sensitive max pooling on regions of interest specified by input, takes
* as input N position-sensitive score maps and a list of R regions of interest.
* ROIPoolingLayer takes 2 inputs and produces 1 output. bottom[0] is
* [N x (C x K^2) x H x W] position-sensitive score maps on which pooling is performed. bottom[1] is
* [R x 5] containing a list R ROI tuples with batch index and coordinates of
* regions of interest. Each row in bottom[1] is a ROI tuple in format
* [batch_index x1 y1 x2 y2], where batch_index corresponds to the index of
* instance in the first input and x1 y1 x2 y2 are 0-indexed coordinates
* of ROI rectangle (including its boundaries). The output top[0] is [R x C x K x K] score maps pooled
* within the ROI tuples.
* @param param provides PSROIPoolingParameter psroi_pooling_param,
* with PSROIPoolingLayer options:
* - output_dim. The pooled output channel number.
* - group_size. The number of groups to encode position-sensitive score maps
* - spatial_scale. Multiplicative spatial scale factor to translate ROI
* coordinates from their input scale to the scale used when pooling.
* R-FCN
* Written by Yi Li
*/

template <typename Dtype>
class PSROIPoolingLayer : public Layer<Dtype> {
public:
explicit PSROIPoolingLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "PSROIPooling"; }

virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int MaxBottomBlobs() const { return 2; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline int MaxTopBlobs() const { return 1; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

Dtype spatial_scale_;
int output_dim_;
int group_size_;

int channels_;
int height_;
int width_;

int pooled_height_;
int pooled_width_;
Blob<int> mapping_channel_;
};

} // namespace caffe

#endif // CAFFE_PSROI_POOLING_LAYER_HPP_
84 changes: 84 additions & 0 deletions include/caffe/layers/roi_pooling_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#ifndef CAFFE_ROI_POOLING_LAYER_HPP_
#define CAFFE_ROI_POOLING_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
* @brief Perform max pooling on regions of interest specified by input, takes
* as input N feature maps and a list of R regions of interest.
*
* ROIPoolingLayer takes 2 inputs and produces 1 output. bottom[0] is
* [N x C x H x W] feature maps on which pooling is performed. bottom[1] is
* [R x 5] containing a list R ROI tuples with batch index and coordinates of
* regions of interest. Each row in bottom[1] is a ROI tuple in format
* [batch_index x1 y1 x2 y2], where batch_index corresponds to the index of
* instance in the first input and x1 y1 x2 y2 are 0-indexed coordinates
* of ROI rectangle (including its boundaries).
*
* For each of the R ROIs, max-pooling is performed over pooled_h x pooled_w
* output bins (specified in roi_pooling_param). The pooling bin sizes are
* adaptively set such that they tile ROI rectangle in the indexed feature
* map. The pooling region of vertical bin ph in [0, pooled_h) is computed as
*
* start_ph (included) = y1 + floor(ph * (y2 - y1 + 1) / pooled_h)
* end_ph (excluded) = y1 + ceil((ph + 1) * (y2 - y1 + 1) / pooled_h)
*
* and similar horizontal bins.
*
* @param param provides ROIPoolingParameter roi_pooling_param,
* with ROIPoolingLayer options:
* - pooled_h. The pooled output height.
* - pooled_w. The pooled output width
* - spatial_scale. Multiplicative spatial scale factor to translate ROI
* coordinates from their input scale to the scale used when pooling.
*
* Fast R-CNN
* Written by Ross Girshick
*/

template <typename Dtype>
class ROIPoolingLayer : public Layer<Dtype> {
public:
explicit ROIPoolingLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "ROIPooling"; }

virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int MaxBottomBlobs() const { return 2; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline int MaxTopBlobs() const { return 1; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

int channels_;
int height_;
int width_;
int pooled_height_;
int pooled_width_;
Dtype spatial_scale_;
Blob<int> max_idx_;
};

} // namespace caffe

#endif // CAFFE_ROI_POOLING_LAYER_HPP_
65 changes: 65 additions & 0 deletions include/caffe/layers/smooth_l1_loss_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#ifndef CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
#define CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/loss_layer.hpp"

namespace caffe {

/**
* @brief SmoothL1LossLayer
*
* Fast R-CNN
* Written by Ross Girshick
*/
template <typename Dtype>
class SmoothL1LossLayer : public LossLayer<Dtype> {
public:
explicit SmoothL1LossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "SmoothL1Loss"; }

virtual inline int ExactNumBottomBlobs() const { return -1; }
virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int MaxBottomBlobs() const { return 4; }

/**
* Unlike most loss layers, in the SmoothL1LossLayer we can backpropagate
* to both inputs -- override to return true and always allow force_backward.
*/
virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

Blob<Dtype> diff_;
Blob<Dtype> errors_;
Blob<Dtype> ones_;
bool has_weights_;
Dtype sigma2_;
};

} // namespace caffe

#endif // CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
Loading

0 comments on commit 59e4157

Please sign in to comment.