forked from BVLC/caffe
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request BVLC#2 from curalate/frcnn
FRCNN support for Caffe
- Loading branch information
Showing
41 changed files
with
2,634 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#ifndef CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_ | ||
#define CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_ | ||
|
||
#include <vector> | ||
|
||
#include "caffe/blob.hpp" | ||
#include "caffe/common.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/proto/caffe.pb.h" | ||
|
||
#include "caffe/layers/loss_layer.hpp" | ||
|
||
namespace caffe { | ||
|
||
/** | ||
* @brief BoxAnnotatorOHEMLayer: Annotate box labels for Online Hard Example Mining (OHEM) training | ||
* R-FCN | ||
* Written by Yi Li | ||
*/ | ||
template <typename Dtype> | ||
class BoxAnnotatorOHEMLayer :public Layer<Dtype>{ | ||
public: | ||
explicit BoxAnnotatorOHEMLayer(const LayerParameter& param) | ||
: Layer<Dtype>(param) {} | ||
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
|
||
virtual inline const char* type() const { return "BoxAnnotatorOHEM"; } | ||
|
||
virtual inline int ExactNumBottomBlobs() const { return 4; } | ||
virtual inline int ExactNumTopBlobs() const { return 2; } | ||
|
||
protected: | ||
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
|
||
int num_; | ||
int height_; | ||
int width_; | ||
int spatial_dim_; | ||
int bbox_channels_; | ||
|
||
int roi_per_img_; | ||
int ignore_label_; | ||
}; | ||
|
||
} // namespace caffe | ||
|
||
#endif // CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#ifndef CAFFE_PSROI_POOLING_LAYER_HPP_ | ||
#define CAFFE_PSROI_POOLING_LAYER_HPP_ | ||
|
||
#include <vector> | ||
|
||
#include "caffe/blob.hpp" | ||
#include "caffe/common.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/proto/caffe.pb.h" | ||
|
||
namespace caffe { | ||
|
||
/** | ||
* @brief Perform position-sensitive max pooling on regions of interest specified by input, takes | ||
* as input N position-sensitive score maps and a list of R regions of interest. | ||
* ROIPoolingLayer takes 2 inputs and produces 1 output. bottom[0] is | ||
* [N x (C x K^2) x H x W] position-sensitive score maps on which pooling is performed. bottom[1] is | ||
* [R x 5] containing a list R ROI tuples with batch index and coordinates of | ||
* regions of interest. Each row in bottom[1] is a ROI tuple in format | ||
* [batch_index x1 y1 x2 y2], where batch_index corresponds to the index of | ||
* instance in the first input and x1 y1 x2 y2 are 0-indexed coordinates | ||
* of ROI rectangle (including its boundaries). The output top[0] is [R x C x K x K] score maps pooled | ||
* within the ROI tuples. | ||
* @param param provides PSROIPoolingParameter psroi_pooling_param, | ||
* with PSROIPoolingLayer options: | ||
* - output_dim. The pooled output channel number. | ||
* - group_size. The number of groups to encode position-sensitive score maps | ||
* - spatial_scale. Multiplicative spatial scale factor to translate ROI | ||
* coordinates from their input scale to the scale used when pooling. | ||
* R-FCN | ||
* Written by Yi Li | ||
*/ | ||
|
||
template <typename Dtype> | ||
class PSROIPoolingLayer : public Layer<Dtype> { | ||
public: | ||
explicit PSROIPoolingLayer(const LayerParameter& param) | ||
: Layer<Dtype>(param) {} | ||
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
|
||
virtual inline const char* type() const { return "PSROIPooling"; } | ||
|
||
virtual inline int MinBottomBlobs() const { return 2; } | ||
virtual inline int MaxBottomBlobs() const { return 2; } | ||
virtual inline int MinTopBlobs() const { return 1; } | ||
virtual inline int MaxTopBlobs() const { return 1; } | ||
|
||
protected: | ||
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
|
||
Dtype spatial_scale_; | ||
int output_dim_; | ||
int group_size_; | ||
|
||
int channels_; | ||
int height_; | ||
int width_; | ||
|
||
int pooled_height_; | ||
int pooled_width_; | ||
Blob<int> mapping_channel_; | ||
}; | ||
|
||
} // namespace caffe | ||
|
||
#endif // CAFFE_PSROI_POOLING_LAYER_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#ifndef CAFFE_ROI_POOLING_LAYER_HPP_ | ||
#define CAFFE_ROI_POOLING_LAYER_HPP_ | ||
|
||
#include <vector> | ||
|
||
#include "caffe/blob.hpp" | ||
#include "caffe/common.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/proto/caffe.pb.h" | ||
|
||
namespace caffe { | ||
|
||
/** | ||
* @brief Perform max pooling on regions of interest specified by input, takes | ||
* as input N feature maps and a list of R regions of interest. | ||
* | ||
* ROIPoolingLayer takes 2 inputs and produces 1 output. bottom[0] is | ||
* [N x C x H x W] feature maps on which pooling is performed. bottom[1] is | ||
* [R x 5] containing a list R ROI tuples with batch index and coordinates of | ||
* regions of interest. Each row in bottom[1] is a ROI tuple in format | ||
* [batch_index x1 y1 x2 y2], where batch_index corresponds to the index of | ||
* instance in the first input and x1 y1 x2 y2 are 0-indexed coordinates | ||
* of ROI rectangle (including its boundaries). | ||
* | ||
* For each of the R ROIs, max-pooling is performed over pooled_h x pooled_w | ||
* output bins (specified in roi_pooling_param). The pooling bin sizes are | ||
* adaptively set such that they tile ROI rectangle in the indexed feature | ||
* map. The pooling region of vertical bin ph in [0, pooled_h) is computed as | ||
* | ||
* start_ph (included) = y1 + floor(ph * (y2 - y1 + 1) / pooled_h) | ||
* end_ph (excluded) = y1 + ceil((ph + 1) * (y2 - y1 + 1) / pooled_h) | ||
* | ||
* and similar horizontal bins. | ||
* | ||
* @param param provides ROIPoolingParameter roi_pooling_param, | ||
* with ROIPoolingLayer options: | ||
* - pooled_h. The pooled output height. | ||
* - pooled_w. The pooled output width | ||
* - spatial_scale. Multiplicative spatial scale factor to translate ROI | ||
* coordinates from their input scale to the scale used when pooling. | ||
* | ||
* Fast R-CNN | ||
* Written by Ross Girshick | ||
*/ | ||
|
||
template <typename Dtype> | ||
class ROIPoolingLayer : public Layer<Dtype> { | ||
public: | ||
explicit ROIPoolingLayer(const LayerParameter& param) | ||
: Layer<Dtype>(param) {} | ||
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
|
||
virtual inline const char* type() const { return "ROIPooling"; } | ||
|
||
virtual inline int MinBottomBlobs() const { return 2; } | ||
virtual inline int MaxBottomBlobs() const { return 2; } | ||
virtual inline int MinTopBlobs() const { return 1; } | ||
virtual inline int MaxTopBlobs() const { return 1; } | ||
|
||
protected: | ||
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
|
||
int channels_; | ||
int height_; | ||
int width_; | ||
int pooled_height_; | ||
int pooled_width_; | ||
Dtype spatial_scale_; | ||
Blob<int> max_idx_; | ||
}; | ||
|
||
} // namespace caffe | ||
|
||
#endif // CAFFE_ROI_POOLING_LAYER_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#ifndef CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_ | ||
#define CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_ | ||
|
||
#include <vector> | ||
|
||
#include "caffe/blob.hpp" | ||
#include "caffe/common.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/proto/caffe.pb.h" | ||
|
||
#include "caffe/layers/loss_layer.hpp" | ||
|
||
namespace caffe { | ||
|
||
/** | ||
* @brief SmoothL1LossLayer | ||
* | ||
* Fast R-CNN | ||
* Written by Ross Girshick | ||
*/ | ||
template <typename Dtype> | ||
class SmoothL1LossLayer : public LossLayer<Dtype> { | ||
public: | ||
explicit SmoothL1LossLayer(const LayerParameter& param) | ||
: LossLayer<Dtype>(param), diff_() {} | ||
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
|
||
virtual inline const char* type() const { return "SmoothL1Loss"; } | ||
|
||
virtual inline int ExactNumBottomBlobs() const { return -1; } | ||
virtual inline int MinBottomBlobs() const { return 2; } | ||
virtual inline int MaxBottomBlobs() const { return 4; } | ||
|
||
/** | ||
* Unlike most loss layers, in the SmoothL1LossLayer we can backpropagate | ||
* to both inputs -- override to return true and always allow force_backward. | ||
*/ | ||
virtual inline bool AllowForceBackward(const int bottom_index) const { | ||
return true; | ||
} | ||
|
||
protected: | ||
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
|
||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
|
||
Blob<Dtype> diff_; | ||
Blob<Dtype> errors_; | ||
Blob<Dtype> ones_; | ||
bool has_weights_; | ||
Dtype sigma2_; | ||
}; | ||
|
||
} // namespace caffe | ||
|
||
#endif // CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_ |
Oops, something went wrong.