Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster R-CNN (work from Ross Girshick) #4163

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/caffe/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ class Layer {
param_propagate_down_[param_id] = value;
}

inline Phase phase() { return phase_; }


protected:
/** The protobuf that stores the layer parameters */
Expand Down
1 change: 1 addition & 0 deletions include/caffe/layers/dropout_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class DropoutLayer : public NeuronLayer<Dtype> {
/// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$
Dtype scale_;
unsigned int uint_thres_;
bool scale_train_;
};

} // namespace caffe
Expand Down
84 changes: 84 additions & 0 deletions include/caffe/layers/roi_pooling_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#ifndef CAFFE_ROI_POOLING_LAYER_HPP_
#define CAFFE_ROI_POOLING_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
* @brief Perform max pooling on regions of interest specified by input, takes
* as input N feature maps and a list of R regions of interest.
*
* ROIPoolingLayer takes 2 inputs and produces 1 output. bottom[0] is
* [N x C x H x W] feature maps on which pooling is performed. bottom[1] is
* [R x 5] containing a list R ROI tuples with batch index and coordinates of
* regions of interest. Each row in bottom[1] is a ROI tuple in format
* [batch_index x1 y1 x2 y2], where batch_index corresponds to the index of
* instance in the first input and x1 y1 x2 y2 are 0-indexed coordinates
* of ROI rectangle (including its boundaries).
*
* For each of the R ROIs, max-pooling is performed over pooled_h x pooled_w
* output bins (specified in roi_pooling_param). The pooling bin sizes are
* adaptively set such that they tile ROI rectangle in the indexed feature
* map. The pooling region of vertical bin ph in [0, pooled_h) is computed as
*
* start_ph (included) = y1 + floor(ph * (y2 - y1 + 1) / pooled_h)
* end_ph (excluded) = y1 + ceil((ph + 1) * (y2 - y1 + 1) / pooled_h)
*
* and similar horizontal bins.
*
* @param param provides ROIPoolingParameter roi_pooling_param,
* with ROIPoolingLayer options:
* - pooled_h. The pooled output height.
* - pooled_w. The pooled output width
* - spatial_scale. Multiplicative spatial scale factor to translate ROI
* coordinates from their input scale to the scale used when pooling.
*
* Fast R-CNN
* Written by Ross Girshick
*/

template <typename Dtype>
class ROIPoolingLayer : public Layer<Dtype> {
public:
explicit ROIPoolingLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "ROIPooling"; }

virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int MaxBottomBlobs() const { return 2; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline int MaxTopBlobs() const { return 1; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

int channels_;
int height_;
int width_;
int pooled_height_;
int pooled_width_;
Dtype spatial_scale_;
Blob<int> max_idx_;
};

} // namespace caffe

#endif // CAFFE_ROI_POOLING_LAYER_HPP_
65 changes: 65 additions & 0 deletions include/caffe/layers/smooth_l1_loss_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#ifndef CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
#define CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/loss_layer.hpp"

namespace caffe {

/**
* @brief SmoothL1LossLayer
*
* Fast R-CNN
* Written by Ross Girshick
*/
template <typename Dtype>
class SmoothL1LossLayer : public LossLayer<Dtype> {
public:
explicit SmoothL1LossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "SmoothL1Loss"; }

virtual inline int ExactNumBottomBlobs() const { return -1; }
virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int MaxBottomBlobs() const { return 4; }

/**
* Unlike most loss layers, in the SmoothL1LossLayer we can backpropagate
* to both inputs -- override to return true and always allow force_backward.
*/
virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

Blob<Dtype> diff_;
Blob<Dtype> errors_;
Blob<Dtype> ones_;
bool has_weights_;
Dtype sigma2_;
};

} // namespace caffe

#endif // CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
1 change: 0 additions & 1 deletion python/caffe/_caffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,6 @@ BOOST_PYTHON_MODULE(_caffe) {
bp::def("solver_rank", &Caffe::solver_rank);
bp::def("set_solver_rank", &Caffe::set_solver_rank);
bp::def("set_multiprocess", &Caffe::set_multiprocess);

bp::def("layer_type_list", &LayerRegistry<Dtype>::LayerTypeList);

bp::class_<Net<Dtype>, shared_ptr<Net<Dtype> >, boost::noncopyable >("Net",
Expand Down
27 changes: 23 additions & 4 deletions src/caffe/layers/dropout_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ void DropoutLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
DCHECK(threshold_ < 1.);
scale_ = 1. / (1. - threshold_);
uint_thres_ = static_cast<unsigned int>(UINT_MAX * threshold_);
scale_train_ = this->layer_param_.dropout_param().scale_train();
}

template <typename Dtype>
Expand All @@ -37,11 +38,20 @@ void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
if (this->phase_ == TRAIN) {
// Create random numbers
caffe_rng_bernoulli(count, 1. - threshold_, mask);
for (int i = 0; i < count; ++i) {
top_data[i] = bottom_data[i] * mask[i] * scale_;
if (scale_train_) {
for (int i = 0; i < count; ++i) {
top_data[i] = bottom_data[i] * mask[i] * scale_;
}
} else {
for (int i = 0; i < count; ++i) {
top_data[i] = bottom_data[i] * mask[i];
}
}
} else {
caffe_copy(bottom[0]->count(), bottom_data, top_data);
if (!scale_train_) {
caffe_scal<Dtype>(count, 1. / scale_, top_data);
}
}
}

Expand All @@ -55,11 +65,20 @@ void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
if (this->phase_ == TRAIN) {
const unsigned int* mask = rand_vec_.cpu_data();
const int count = bottom[0]->count();
for (int i = 0; i < count; ++i) {
bottom_diff[i] = top_diff[i] * mask[i] * scale_;
if (scale_train_) {
for (int i = 0; i < count; ++i) {
bottom_diff[i] = top_diff[i] * mask[i] * scale_;
}
} else {
for (int i = 0; i < count; ++i) {
bottom_diff[i] = top_diff[i] * mask[i];
}
}
} else {
caffe_copy(top[0]->count(), top_diff, bottom_diff);
if (!scale_train_) {
caffe_scal<Dtype>(top[0]->count(), 1. / scale_, bottom_diff);
}
}
}
}
Expand Down
35 changes: 28 additions & 7 deletions src/caffe/layers/dropout_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,23 @@ void DropoutLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
static_cast<unsigned int*>(rand_vec_.mutable_gpu_data());
caffe_gpu_rng_uniform(count, mask);
// set thresholds
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, mask, uint_thres_, scale_, top_data);
if (scale_train_) {
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, mask, uint_thres_, scale_, top_data);
} else {
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, mask, uint_thres_, 1.f, top_data);
}
CUDA_POST_KERNEL_CHECK;
} else {
caffe_copy(count, bottom_data, top_data);
if (!scale_train_) {
caffe_gpu_scal<Dtype>(count, 1. / scale_, top_data);
}
}
}

Expand All @@ -54,13 +65,23 @@ void DropoutLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const unsigned int* mask =
static_cast<const unsigned int*>(rand_vec_.gpu_data());
const int count = bottom[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, mask, uint_thres_, scale_, bottom_diff);
if (scale_train_) {
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, mask, uint_thres_, scale_, bottom_diff);
} else {
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, mask, uint_thres_, 1.f, bottom_diff);
}
CUDA_POST_KERNEL_CHECK;
} else {
caffe_copy(top[0]->count(), top_diff, bottom_diff);
if (!scale_train_) {
caffe_gpu_scal<Dtype>(top[0]->count(), 1. / scale_, bottom_diff);
}
}
}
}
Expand Down
Loading