Skip to content

Commit

Permalink
Merge pull request BVLC#99 from zer0n/master
Browse files Browse the repository at this point in the history
Add Faster RCNN support
  • Loading branch information
zer0n authored Jul 15, 2016
2 parents ddf2ae4 + 86fbcab commit 42e6c85
Show file tree
Hide file tree
Showing 14 changed files with 1,005 additions and 12 deletions.
2 changes: 2 additions & 0 deletions include/caffe/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,8 @@ class Layer {
param_propagate_down_[param_id] = value;
}

inline Phase phase() { return phase_; }


protected:
/** The protobuf that stores the layer parameters */
Expand Down
1 change: 1 addition & 0 deletions include/caffe/layers/dropout_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class DropoutLayer : public NeuronLayer<Dtype> {
/// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$
Dtype scale_;
unsigned int uint_thres_;
bool scale_train_;
};

} // namespace caffe
Expand Down
84 changes: 84 additions & 0 deletions include/caffe/layers/roi_pooling_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#ifndef CAFFE_ROI_POOLING_LAYER_HPP_
#define CAFFE_ROI_POOLING_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
* @brief Perform max pooling on regions of interest specified by input, takes
* as input N feature maps and a list of R regions of interest.
*
* ROIPoolingLayer takes 2 inputs and produces 1 output. bottom[0] is
* [N x C x H x W] feature maps on which pooling is performed. bottom[1] is
* [R x 5] containing a list R ROI tuples with batch index and coordinates of
* regions of interest. Each row in bottom[1] is a ROI tuple in format
* [batch_index x1 y1 x2 y2], where batch_index corresponds to the index of
* instance in the first input and x1 y1 x2 y2 are 0-indexed coordinates
* of ROI rectangle (including its boundaries).
*
* For each of the R ROIs, max-pooling is performed over pooled_h x pooled_w
* output bins (specified in roi_pooling_param). The pooling bin sizes are
* adaptively set such that they tile ROI rectangle in the indexed feature
* map. The pooling region of vertical bin ph in [0, pooled_h) is computed as
*
* start_ph (included) = y1 + floor(ph * (y2 - y1 + 1) / pooled_h)
* end_ph (excluded) = y1 + ceil((ph + 1) * (y2 - y1 + 1) / pooled_h)
*
* and similar horizontal bins.
*
* @param param provides ROIPoolingParameter roi_pooling_param,
* with ROIPoolingLayer options:
* - pooled_h. The pooled output height.
* - pooled_w. The pooled output width
* - spatial_scale. Multiplicative spatial scale factor to translate ROI
* coordinates from their input scale to the scale used when pooling.
*
* Fast R-CNN
* Written by Ross Girshick
*/

template <typename Dtype>
class ROIPoolingLayer : public Layer<Dtype> {
public:
explicit ROIPoolingLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "ROIPooling"; }

virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int MaxBottomBlobs() const { return 2; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline int MaxTopBlobs() const { return 1; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

int channels_;
int height_;
int width_;
int pooled_height_;
int pooled_width_;
Dtype spatial_scale_;
Blob<int> max_idx_;
};

} // namespace caffe

#endif // CAFFE_ROI_POOLING_LAYER_HPP_
65 changes: 65 additions & 0 deletions include/caffe/layers/smooth_l1_loss_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#ifndef CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
#define CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/loss_layer.hpp"

namespace caffe {

/**
* @brief SmoothL1LossLayer
*
* Fast R-CNN
* Written by Ross Girshick
*/
template <typename Dtype>
class SmoothL1LossLayer : public LossLayer<Dtype> {
public:
explicit SmoothL1LossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "SmoothL1Loss"; }

virtual inline int ExactNumBottomBlobs() const { return -1; }
virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int MaxBottomBlobs() const { return 4; }

/**
* Unlike most loss layers, in the SmoothL1LossLayer we can backpropagate
* to both inputs -- override to return true and always allow force_backward.
*/
virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

Blob<Dtype> diff_;
Blob<Dtype> errors_;
Blob<Dtype> ones_;
bool has_weights_;
Dtype sigma2_;
};

} // namespace caffe

#endif // CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
1 change: 1 addition & 0 deletions python/caffe/_caffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ BOOST_PYTHON_MODULE(_caffe) {
bp::def("set_mode_gpu", &set_mode_gpu);
bp::def("set_random_seed", &set_random_seed);
bp::def("set_device", &Caffe::SetDevice);
bp::def("set_random_seed", &Caffe::set_random_seed);

bp::def("layer_type_list", &LayerRegistry<Dtype>::LayerTypeList);

Expand Down
27 changes: 23 additions & 4 deletions src/caffe/layers/dropout_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ void DropoutLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
DCHECK(threshold_ < 1.);
scale_ = 1. / (1. - threshold_);
uint_thres_ = static_cast<unsigned int>(UINT_MAX * threshold_);
scale_train_ = this->layer_param_.dropout_param().scale_train();
}

template <typename Dtype>
Expand All @@ -37,11 +38,20 @@ void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
if (this->phase_ == TRAIN) {
// Create random numbers
caffe_rng_bernoulli(count, 1. - threshold_, mask);
for (int i = 0; i < count; ++i) {
top_data[i] = bottom_data[i] * mask[i] * scale_;
if (scale_train_) {
for (int i = 0; i < count; ++i) {
top_data[i] = bottom_data[i] * mask[i] * scale_;
}
} else {
for (int i = 0; i < count; ++i) {
top_data[i] = bottom_data[i] * mask[i];
}
}
} else {
caffe_copy(bottom[0]->count(), bottom_data, top_data);
if (!scale_train_) {
caffe_scal<Dtype>(count, 1. / scale_, top_data);
}
}
}

Expand All @@ -55,11 +65,20 @@ void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
if (this->phase_ == TRAIN) {
const unsigned int* mask = rand_vec_.cpu_data();
const int count = bottom[0]->count();
for (int i = 0; i < count; ++i) {
bottom_diff[i] = top_diff[i] * mask[i] * scale_;
if (scale_train_) {
for (int i = 0; i < count; ++i) {
bottom_diff[i] = top_diff[i] * mask[i] * scale_;
}
} else {
for (int i = 0; i < count; ++i) {
bottom_diff[i] = top_diff[i] * mask[i];
}
}
} else {
caffe_copy(top[0]->count(), top_diff, bottom_diff);
if (!scale_train_) {
caffe_scal<Dtype>(top[0]->count(), 1. / scale_, bottom_diff);
}
}
}
}
Expand Down
35 changes: 28 additions & 7 deletions src/caffe/layers/dropout_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,23 @@ void DropoutLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
static_cast<unsigned int*>(rand_vec_.mutable_gpu_data());
caffe_gpu_rng_uniform(count, mask);
// set thresholds
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, mask, uint_thres_, scale_, top_data);
if (scale_train_) {
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, mask, uint_thres_, scale_, top_data);
} else {
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, mask, uint_thres_, 1.f, top_data);
}
CUDA_POST_KERNEL_CHECK;
} else {
caffe_copy(count, bottom_data, top_data);
if (!scale_train_) {
caffe_gpu_scal<Dtype>(count, 1. / scale_, top_data);
}
}
}

Expand All @@ -54,13 +65,23 @@ void DropoutLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const unsigned int* mask =
static_cast<const unsigned int*>(rand_vec_.gpu_data());
const int count = bottom[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, mask, uint_thres_, scale_, bottom_diff);
if (scale_train_) {
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, mask, uint_thres_, scale_, bottom_diff);
} else {
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, mask, uint_thres_, 1.f, bottom_diff);
}
CUDA_POST_KERNEL_CHECK;
} else {
caffe_copy(top[0]->count(), top_diff, bottom_diff);
if (!scale_train_) {
caffe_gpu_scal<Dtype>(top[0]->count(), 1. / scale_, bottom_diff);
}
}
}
}
Expand Down
Loading

0 comments on commit 42e6c85

Please sign in to comment.