Skip to content

Commit

Permalink
Merge pull request BVLC#1976 from shelhamer/crop-layer
Browse files Browse the repository at this point in the history
Crop layer for automatically aligning computations
  • Loading branch information
longjon authored and twerdster committed Jul 17, 2015
2 parents 6d92d8f + a1c0fb2 commit a70af4a
Show file tree
Hide file tree
Showing 8 changed files with 334 additions and 1 deletion.
18 changes: 18 additions & 0 deletions include/caffe/common_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ class ConcatLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "Concat"; }
virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
/**
Expand Down Expand Up @@ -162,6 +165,9 @@ class EltwiseLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "Eltwise"; }
virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -343,6 +349,9 @@ class MVNLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "MVN"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -489,6 +498,9 @@ class SoftmaxLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "Softmax"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -555,6 +567,9 @@ class SplitLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "Split"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -588,6 +603,9 @@ class SliceLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "Slice"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 2; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down
17 changes: 17 additions & 0 deletions include/caffe/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@

#include <algorithm>
#include <string>
#include <utility>
#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer_factory.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/coords.hpp"
#include "caffe/util/device_alternate.hpp"

namespace caffe {

template <typename Dtype> class Net;

/**
* @brief An interface for the units of computation which can be composed into a
* Net.
Expand Down Expand Up @@ -285,6 +289,16 @@ class Layer {
param_propagate_down_[param_id] = value;
}

virtual DiagonalAffineMap<Dtype> coord_map() {
NOT_IMPLEMENTED;
// suppress warnings
return DiagonalAffineMap<Dtype>(vector<pair<Dtype, Dtype> >());
}

/**
* @brief Used by Net to give layers a pointer to their owning net.
*/
void set_net(Net<Dtype>* net) { net_ = net; }

protected:
/** The protobuf that stores the layer parameters */
Expand All @@ -300,6 +314,9 @@ class Layer {
* the objective function. */
vector<Dtype> loss_;

/** The net to which this layer belongs. */
Net<Dtype>* net_;

/** @brief Using the CPU device, compute the layer output. */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) = 0;
Expand Down
3 changes: 3 additions & 0 deletions include/caffe/neuron_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ class NeuronLayer : public Layer<Dtype> {

virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}
};

/**
Expand Down
61 changes: 61 additions & 0 deletions include/caffe/util/coords.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#ifndef CAFFE_UTIL_COORDS_H_
#define CAFFE_UTIL_COORDS_H_

#include <algorithm>
#include <utility>
#include <vector>

namespace caffe {

template <typename Dtype>
class DiagonalAffineMap {
public:
explicit DiagonalAffineMap(const vector<pair<Dtype, Dtype> > coefs)
: coefs_(coefs) { }
static DiagonalAffineMap identity(const int nd) {
return DiagonalAffineMap(vector<pair<Dtype, Dtype> >(nd, make_pair(1, 0)));
}

inline DiagonalAffineMap compose(const DiagonalAffineMap& other) const {
CHECK_EQ(coefs_.size(), other.coefs_.size())
<< "Attempt to compose DiagonalAffineMaps of different dimensions";
DiagonalAffineMap<Dtype> out;
transform(coefs_.begin(), coefs_.end(), other.coefs_.begin(),
std::back_inserter(out.coefs_), &compose_coefs);
return out;
}
inline DiagonalAffineMap inv() const {
DiagonalAffineMap<Dtype> out;
transform(coefs_.begin(), coefs_.end(), std::back_inserter(out.coefs_),
&inv_coefs);
return out;
}
inline vector<pair<Dtype, Dtype> > coefs() { return coefs_; }

private:
DiagonalAffineMap() { }
static inline pair<Dtype, Dtype> compose_coefs(pair<Dtype, Dtype> left,
pair<Dtype, Dtype> right) {
return make_pair(left.first * right.first,
left.first * right.second + left.second);
}
static inline pair<Dtype, Dtype> inv_coefs(pair<Dtype, Dtype> coefs) {
return make_pair(1 / coefs.first, - coefs.second / coefs.first);
}
vector<pair<Dtype, Dtype> > coefs_;
};

template <typename Dtype>
DiagonalAffineMap<Dtype> FilterMap(const int kernel_h, const int kernel_w,
const int stride_h, const int stride_w, const int pad_h, const int pad_w) {
vector<pair<Dtype, Dtype> > coefs;
coefs.push_back(make_pair(stride_h,
static_cast<Dtype>(kernel_h - 1) / 2 - pad_h));
coefs.push_back(make_pair(stride_w,
static_cast<Dtype>(kernel_w - 1) / 2 - pad_w));
return DiagonalAffineMap<Dtype>(coefs);
}

} // namespace caffe

#endif // CAFFE_UTIL_COORDS_H_
49 changes: 48 additions & 1 deletion include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ class ConvolutionLayer : public BaseConvolutionLayer<Dtype> {
: BaseConvolutionLayer<Dtype>(param) {}

virtual inline const char* type() const { return "Convolution"; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return FilterMap<Dtype>(this->kernel_h_, this->kernel_w_, this->stride_h_,
this->stride_w_, this->pad_h_, this->pad_w_).inv();
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -195,8 +199,11 @@ class DeconvolutionLayer : public BaseConvolutionLayer<Dtype> {
public:
explicit DeconvolutionLayer(const LayerParameter& param)
: BaseConvolutionLayer<Dtype>(param) {}

virtual inline const char* type() const { return "Deconvolution"; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return FilterMap<Dtype>(this->kernel_h_, this->kernel_w_, this->stride_h_,
this->stride_w_, this->pad_h_, this->pad_w_);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -316,6 +323,9 @@ class LRNLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "LRN"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -398,6 +408,10 @@ class PoolingLayer : public Layer<Dtype> {
return (this->layer_param_.pooling_param().pool() ==
PoolingParameter_PoolMethod_MAX) ? 2 : 1;
}
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return FilterMap<Dtype>(kernel_h_, kernel_w_, stride_h_, stride_w_,
pad_h_, pad_w_).inv();
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -453,6 +467,39 @@ class CuDNNPoolingLayer : public PoolingLayer<Dtype> {
};
#endif

template <typename Dtype>
class CropLayer : public Layer<Dtype> {
public:
explicit CropLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "Crop"; }
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
vector<pair<Dtype, Dtype> > coefs;
coefs.push_back(make_pair(1, - crop_h_));
coefs.push_back(make_pair(1, - crop_w_));
return DiagonalAffineMap<Dtype>(coefs);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

int crop_h_, crop_w_;
};

/**
* @brief Does spatial pyramid pooling on the input image
* by taking the max, average, etc. within regions
Expand Down
126 changes: 126 additions & 0 deletions src/caffe/layers/crop_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#include <algorithm>
#include <map>
#include <set>
#include <vector>

#include "caffe/layer.hpp"
#include "caffe/net.hpp"
#include "caffe/vision_layers.hpp"

namespace caffe {

template <typename Dtype>
void CropLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Construct a map from top blobs to layer inds, skipping over in-place
// connections.
map<Blob<Dtype>*, int> down_map;
for (int layer_ind = 0; layer_ind < this->net_->top_vecs().size();
++layer_ind) {
vector<Blob<Dtype>*> tops = this->net_->top_vecs()[layer_ind];
for (int top_ind = 0; top_ind < tops.size(); ++top_ind) {
if (down_map.find(tops[top_ind]) == down_map.end()) {
down_map[tops[top_ind]] = layer_ind;
}
}
}
// Walk back from the first bottom, keeping track of all the blobs we pass.
set<Blob<Dtype>*> path_blobs;
Blob<Dtype>* blob = bottom[0];
int layer_ind;
// TODO this logic can be simplified if all blobs are tops
path_blobs.insert(blob);
while (down_map.find(blob) != down_map.end()) {
layer_ind = down_map[blob];
if (this->net_->bottom_vecs()[layer_ind].size() == 0) {
break;
}
blob = this->net_->bottom_vecs()[layer_ind][0];
path_blobs.insert(blob);
}
// Now walk back from the second bottom, until we find a blob of intersection.
Blob<Dtype>* inter_blob = bottom[1];
while (path_blobs.find(inter_blob) == path_blobs.end()) {
CHECK(down_map.find(inter_blob) != down_map.end())
<< "Cannot align apparently disconnected blobs.";
layer_ind = down_map[inter_blob];
CHECK_GT(this->net_->bottom_vecs()[layer_ind].size(), 0)
<< "Cannot align apparently disconnected blobs.";
inter_blob = this->net_->bottom_vecs()[layer_ind][0];
}
// Compute the coord map from the blob of intersection to each bottom.
vector<DiagonalAffineMap<Dtype> > coord_maps(2,
DiagonalAffineMap<Dtype>::identity(2));
for (int i = 0; i < 2; ++i) {
for (Blob<Dtype>* blob = bottom[i]; blob != inter_blob;
blob = this->net_->bottom_vecs()[down_map[blob]][0]) {
shared_ptr<Layer<Dtype> > layer = this->net_->layers()[down_map[blob]];
coord_maps[i] = coord_maps[i].compose(layer->coord_map());
}
}
// Compute the mapping from first bottom coordinates to second.
DiagonalAffineMap<Dtype> crop_map =
coord_maps[1].compose(coord_maps[0].inv());
for (int i = 0; i < 2; ++i) {
// Check for scale mismatch (unfortunately, CHECK_DOUBLE_EQ does not
// support a message like the other CHECKs).
CHECK_DOUBLE_EQ(crop_map.coefs()[i].first, 1);
CHECK_LE(crop_map.coefs()[i].second, 0) << "Negative crop width.";
// Check that the crop width is an integer.
CHECK_DOUBLE_EQ(crop_map.coefs()[i].second,
round(crop_map.coefs()[i].second));
}
crop_h_ = - round(crop_map.coefs()[0].second);
crop_w_ = - round(crop_map.coefs()[1].second);
}

template <typename Dtype>
void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[1]->height(),
bottom[1]->width());
}

template <typename Dtype>
void CropLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
for (int n = 0; n < top[0]->num(); ++n) {
for (int c = 0; c < top[0]->channels(); ++c) {
for (int h = 0; h < top[0]->height(); ++h) {
caffe_copy(top[0]->width(),
bottom_data + bottom[0]->offset(n, c, crop_h_ + h, crop_w_),
top_data + top[0]->offset(n, c, h));
}
}
}
}

template <typename Dtype>
void CropLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
if (propagate_down[0]) {
caffe_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
for (int n = 0; n < top[0]->num(); ++n) {
for (int c = 0; c < top[0]->channels(); ++c) {
for (int h = 0; h < top[0]->height(); ++h) {
caffe_copy(top[0]->width(),
top_diff + top[0]->offset(n, c, h),
bottom_diff + bottom[0]->offset(n, c, crop_h_ + h, crop_w_));
}
}
}
}
}

#ifdef CPU_ONLY
STUB_GPU(CropLayer);
#endif

INSTANTIATE_CLASS(CropLayer);
REGISTER_LAYER_CLASS(Crop);

} // namespace caffe
Loading

0 comments on commit a70af4a

Please sign in to comment.