Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Crop layer for automatically aligning computations #1976

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions include/caffe/common_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ class ConcatLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "Concat"; }
virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
/**
Expand Down Expand Up @@ -165,6 +168,9 @@ class EltwiseLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "Eltwise"; }
virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -289,6 +295,9 @@ class MVNLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "MVN"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -351,6 +360,9 @@ class SoftmaxLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "Softmax"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -414,6 +426,9 @@ class SplitLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "Split"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -447,6 +462,9 @@ class SliceLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "Slice"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 2; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down
17 changes: 17 additions & 0 deletions include/caffe/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@

#include <algorithm>
#include <string>
#include <utility>
#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer_factory.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/coords.hpp"
#include "caffe/util/device_alternate.hpp"

namespace caffe {

template <typename Dtype> class Net;

/**
* @brief An interface for the units of computation which can be composed into a
* Net.
Expand Down Expand Up @@ -285,6 +289,16 @@ class Layer {
param_propagate_down_[param_id] = value;
}

virtual DiagonalAffineMap<Dtype> coord_map() {
NOT_IMPLEMENTED;
// suppress warnings
return DiagonalAffineMap<Dtype>(vector<pair<Dtype, Dtype> >());
}

/**
* @brief Used by Net to give layers a pointer to their owning net.
*/
void set_net(Net<Dtype>* net) { net_ = net; }

protected:
/** The protobuf that stores the layer parameters */
Expand All @@ -300,6 +314,9 @@ class Layer {
* the objective function. */
vector<Dtype> loss_;

/** The net to which this layer belongs. */
Net<Dtype>* net_;

/** @brief Using the CPU device, compute the layer output. */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) = 0;
Expand Down
3 changes: 3 additions & 0 deletions include/caffe/neuron_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class NeuronLayer : public Layer<Dtype> {

virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}
};

/**
Expand Down
61 changes: 61 additions & 0 deletions include/caffe/util/coords.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#ifndef CAFFE_UTIL_COORDS_H_
#define CAFFE_UTIL_COORDS_H_

#include <algorithm>
#include <utility>
#include <vector>

namespace caffe {

template <typename Dtype>
class DiagonalAffineMap {
public:
explicit DiagonalAffineMap(const vector<pair<Dtype, Dtype> > coefs)
: coefs_(coefs) { }
static DiagonalAffineMap identity(const int nd) {
return DiagonalAffineMap(vector<pair<Dtype, Dtype> >(nd, make_pair(1, 0)));
}

inline DiagonalAffineMap compose(const DiagonalAffineMap& other) const {
CHECK_EQ(coefs_.size(), other.coefs_.size())
<< "Attempt to compose DiagonalAffineMaps of different dimensions";
DiagonalAffineMap<Dtype> out;
transform(coefs_.begin(), coefs_.end(), other.coefs_.begin(),
std::back_inserter(out.coefs_), &compose_coefs);
return out;
}
inline DiagonalAffineMap inv() const {
DiagonalAffineMap<Dtype> out;
transform(coefs_.begin(), coefs_.end(), std::back_inserter(out.coefs_),
&inv_coefs);
return out;
}
inline vector<pair<Dtype, Dtype> > coefs() { return coefs_; }

private:
DiagonalAffineMap() { }
static inline pair<Dtype, Dtype> compose_coefs(pair<Dtype, Dtype> left,
pair<Dtype, Dtype> right) {
return make_pair(left.first * right.first,
left.first * right.second + left.second);
}
static inline pair<Dtype, Dtype> inv_coefs(pair<Dtype, Dtype> coefs) {
return make_pair(1 / coefs.first, - coefs.second / coefs.first);
}
vector<pair<Dtype, Dtype> > coefs_;
};

template <typename Dtype>
DiagonalAffineMap<Dtype> FilterMap(const int kernel_h, const int kernel_w,
const int stride_h, const int stride_w, const int pad_h, const int pad_w) {
vector<pair<Dtype, Dtype> > coefs;
coefs.push_back(make_pair(stride_h,
static_cast<Dtype>(kernel_h - 1) / 2 - pad_h));
coefs.push_back(make_pair(stride_w,
static_cast<Dtype>(kernel_w - 1) / 2 - pad_w));
return DiagonalAffineMap<Dtype>(coefs);
}

} // namespace caffe

#endif // CAFFE_UTIL_COORDS_H_
49 changes: 48 additions & 1 deletion include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ class ConvolutionLayer : public BaseConvolutionLayer<Dtype> {
: BaseConvolutionLayer<Dtype>(param) {}

virtual inline const char* type() const { return "Convolution"; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return FilterMap<Dtype>(this->kernel_h_, this->kernel_w_, this->stride_h_,
this->stride_w_, this->pad_h_, this->pad_w_).inv();
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -195,8 +199,11 @@ class DeconvolutionLayer : public BaseConvolutionLayer<Dtype> {
public:
explicit DeconvolutionLayer(const LayerParameter& param)
: BaseConvolutionLayer<Dtype>(param) {}

virtual inline const char* type() const { return "Deconvolution"; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return FilterMap<Dtype>(this->kernel_h_, this->kernel_w_, this->stride_h_,
this->stride_w_, this->pad_h_, this->pad_w_);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -314,6 +321,9 @@ class LRNLayer : public Layer<Dtype> {
virtual inline const char* type() const { return "LRN"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -396,6 +406,10 @@ class PoolingLayer : public Layer<Dtype> {
return (this->layer_param_.pooling_param().pool() ==
PoolingParameter_PoolMethod_MAX) ? 2 : 1;
}
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return FilterMap<Dtype>(kernel_h_, kernel_w_, stride_h_, stride_w_,
pad_h_, pad_w_).inv();
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand Down Expand Up @@ -451,6 +465,39 @@ class CuDNNPoolingLayer : public PoolingLayer<Dtype> {
};
#endif

template <typename Dtype>
class CropLayer : public Layer<Dtype> {
public:
explicit CropLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "Crop"; }
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
vector<pair<Dtype, Dtype> > coefs;
coefs.push_back(make_pair(1, - crop_h_));
coefs.push_back(make_pair(1, - crop_w_));
return DiagonalAffineMap<Dtype>(coefs);
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

int crop_h_, crop_w_;
};

} // namespace caffe

#endif // CAFFE_VISION_LAYERS_HPP_
126 changes: 126 additions & 0 deletions src/caffe/layers/crop_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#include <algorithm>
#include <map>
#include <set>
#include <vector>

#include "caffe/layer.hpp"
#include "caffe/net.hpp"
#include "caffe/vision_layers.hpp"

namespace caffe {

template <typename Dtype>
void CropLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Construct a map from top blobs to layer inds, skipping over in-place
// connections.
map<Blob<Dtype>*, int> down_map;
for (int layer_ind = 0; layer_ind < this->net_->top_vecs().size();
++layer_ind) {
vector<Blob<Dtype>*> tops = this->net_->top_vecs()[layer_ind];
for (int top_ind = 0; top_ind < tops.size(); ++top_ind) {
if (down_map.find(tops[top_ind]) == down_map.end()) {
down_map[tops[top_ind]] = layer_ind;
}
}
}
// Walk back from the first bottom, keeping track of all the blobs we pass.
set<Blob<Dtype>*> path_blobs;
Blob<Dtype>* blob = bottom[0];
int layer_ind;
// TODO this logic can be simplified if all blobs are tops
path_blobs.insert(blob);
while (down_map.find(blob) != down_map.end()) {
layer_ind = down_map[blob];
if (this->net_->bottom_vecs()[layer_ind].size() == 0) {
break;
}
blob = this->net_->bottom_vecs()[layer_ind][0];
path_blobs.insert(blob);
}
// Now walk back from the second bottom, until we find a blob of intersection.
Blob<Dtype>* inter_blob = bottom[1];
while (path_blobs.find(inter_blob) == path_blobs.end()) {
CHECK(down_map.find(inter_blob) != down_map.end())
<< "Cannot align apparently disconnected blobs.";
layer_ind = down_map[inter_blob];
CHECK_GT(this->net_->bottom_vecs()[layer_ind].size(), 0)
<< "Cannot align apparently disconnected blobs.";
inter_blob = this->net_->bottom_vecs()[layer_ind][0];
}
// Compute the coord map from the blob of intersection to each bottom.
vector<DiagonalAffineMap<Dtype> > coord_maps(2,
DiagonalAffineMap<Dtype>::identity(2));
for (int i = 0; i < 2; ++i) {
for (Blob<Dtype>* blob = bottom[i]; blob != inter_blob;
blob = this->net_->bottom_vecs()[down_map[blob]][0]) {
shared_ptr<Layer<Dtype> > layer = this->net_->layers()[down_map[blob]];
coord_maps[i] = coord_maps[i].compose(layer->coord_map());
}
}
// Compute the mapping from first bottom coordinates to second.
DiagonalAffineMap<Dtype> crop_map =
coord_maps[1].compose(coord_maps[0].inv());
for (int i = 0; i < 2; ++i) {
// Check for scale mismatch (unfortunately, CHECK_DOUBLE_EQ does not
// support a message like the other CHECKs).
CHECK_DOUBLE_EQ(crop_map.coefs()[i].first, 1);
CHECK_LE(crop_map.coefs()[i].second, 0) << "Negative crop width.";
// Check that the crop width is an integer.
CHECK_DOUBLE_EQ(crop_map.coefs()[i].second,
round(crop_map.coefs()[i].second));
}
crop_h_ = - round(crop_map.coefs()[0].second);
crop_w_ = - round(crop_map.coefs()[1].second);
}

template <typename Dtype>
void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[1]->height(),
bottom[1]->width());
}

template <typename Dtype>
void CropLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
for (int n = 0; n < top[0]->num(); ++n) {
for (int c = 0; c < top[0]->channels(); ++c) {
for (int h = 0; h < top[0]->height(); ++h) {
caffe_copy(top[0]->width(),
bottom_data + bottom[0]->offset(n, c, crop_h_ + h, crop_w_),
top_data + top[0]->offset(n, c, h));
}
}
}
}

template <typename Dtype>
void CropLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
if (propagate_down[0]) {
caffe_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
for (int n = 0; n < top[0]->num(); ++n) {
for (int c = 0; c < top[0]->channels(); ++c) {
for (int h = 0; h < top[0]->height(); ++h) {
caffe_copy(top[0]->width(),
top_diff + top[0]->offset(n, c, h),
bottom_diff + bottom[0]->offset(n, c, crop_h_ + h, crop_w_));
}
}
}
}
}

#ifdef CPU_ONLY
STUB_GPU(CropLayer);
#endif

INSTANTIATE_CLASS(CropLayer);
REGISTER_LAYER_CLASS(Crop);

} // namespace caffe
Loading