diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index d2c0ce6d0c6..c0389e745a9 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -87,6 +87,9 @@ class ConcatLayer : public Layer { virtual inline const char* type() const { return "Concat"; } virtual inline int MinBottomBlobs() const { return 2; } virtual inline int ExactNumTopBlobs() const { return 1; } + virtual inline DiagonalAffineMap coord_map() { + return DiagonalAffineMap::identity(2); + } protected: /** @@ -162,6 +165,9 @@ class EltwiseLayer : public Layer { virtual inline const char* type() const { return "Eltwise"; } virtual inline int MinBottomBlobs() const { return 2; } virtual inline int ExactNumTopBlobs() const { return 1; } + virtual inline DiagonalAffineMap coord_map() { + return DiagonalAffineMap::identity(2); + } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -343,6 +349,9 @@ class MVNLayer : public Layer { virtual inline const char* type() const { return "MVN"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } + virtual inline DiagonalAffineMap coord_map() { + return DiagonalAffineMap::identity(2); + } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -489,6 +498,9 @@ class SoftmaxLayer : public Layer { virtual inline const char* type() const { return "Softmax"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } + virtual inline DiagonalAffineMap coord_map() { + return DiagonalAffineMap::identity(2); + } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -555,6 +567,9 @@ class SplitLayer : public Layer { virtual inline const char* type() const { return "Split"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int MinTopBlobs() const { return 1; } + virtual inline DiagonalAffineMap coord_map() { + return DiagonalAffineMap::identity(2); + } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -588,6 +603,9 @@ class SliceLayer : public Layer { virtual inline const char* type() const { return "Slice"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int MinTopBlobs() const { return 2; } + virtual inline DiagonalAffineMap coord_map() { + return DiagonalAffineMap::identity(2); + } protected: virtual void Forward_cpu(const vector*>& bottom, diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index e2eba196346..f76c0cac085 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -3,16 +3,20 @@ #include #include +#include #include #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/layer_factory.hpp" #include "caffe/proto/caffe.pb.h" +#include "caffe/util/coords.hpp" #include "caffe/util/device_alternate.hpp" namespace caffe { +template class Net; + /** * @brief An interface for the units of computation which can be composed into a * Net. @@ -285,6 +289,16 @@ class Layer { param_propagate_down_[param_id] = value; } + virtual DiagonalAffineMap coord_map() { + NOT_IMPLEMENTED; + // suppress warnings + return DiagonalAffineMap(vector >()); + } + + /** + * @brief Used by Net to give layers a pointer to their owning net. + */ + void set_net(Net* net) { net_ = net; } protected: /** The protobuf that stores the layer parameters */ @@ -300,6 +314,9 @@ class Layer { * the objective function. */ vector loss_; + /** The net to which this layer belongs. */ + Net* net_; + /** @brief Using the CPU device, compute the layer output. */ virtual void Forward_cpu(const vector*>& bottom, const vector*>& top) = 0; diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp index c2e0774aaa2..6f69549f797 100644 --- a/include/caffe/neuron_layers.hpp +++ b/include/caffe/neuron_layers.hpp @@ -31,6 +31,9 @@ class NeuronLayer : public Layer { virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } + virtual inline DiagonalAffineMap coord_map() { + return DiagonalAffineMap::identity(2); + } }; /** diff --git a/include/caffe/util/coords.hpp b/include/caffe/util/coords.hpp new file mode 100644 index 00000000000..5032fc60abd --- /dev/null +++ b/include/caffe/util/coords.hpp @@ -0,0 +1,61 @@ +#ifndef CAFFE_UTIL_COORDS_H_ +#define CAFFE_UTIL_COORDS_H_ + +#include +#include +#include + +namespace caffe { + +template +class DiagonalAffineMap { + public: + explicit DiagonalAffineMap(const vector > coefs) + : coefs_(coefs) { } + static DiagonalAffineMap identity(const int nd) { + return DiagonalAffineMap(vector >(nd, make_pair(1, 0))); + } + + inline DiagonalAffineMap compose(const DiagonalAffineMap& other) const { + CHECK_EQ(coefs_.size(), other.coefs_.size()) + << "Attempt to compose DiagonalAffineMaps of different dimensions"; + DiagonalAffineMap out; + transform(coefs_.begin(), coefs_.end(), other.coefs_.begin(), + std::back_inserter(out.coefs_), &compose_coefs); + return out; + } + inline DiagonalAffineMap inv() const { + DiagonalAffineMap out; + transform(coefs_.begin(), coefs_.end(), std::back_inserter(out.coefs_), + &inv_coefs); + return out; + } + inline vector > coefs() { return coefs_; } + + private: + DiagonalAffineMap() { } + static inline pair compose_coefs(pair left, + pair right) { + return make_pair(left.first * right.first, + left.first * right.second + left.second); + } + static inline pair inv_coefs(pair coefs) { + return make_pair(1 / coefs.first, - coefs.second / coefs.first); + } + vector > coefs_; +}; + +template +DiagonalAffineMap FilterMap(const int kernel_h, const int kernel_w, + const int stride_h, const int stride_w, const int pad_h, const int pad_w) { + vector > coefs; + coefs.push_back(make_pair(stride_h, + static_cast(kernel_h - 1) / 2 - pad_h)); + coefs.push_back(make_pair(stride_w, + static_cast(kernel_w - 1) / 2 - pad_w)); + return DiagonalAffineMap(coefs); +} + +} // namespace caffe + +#endif // CAFFE_UTIL_COORDS_H_ diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index a6bd86a93f5..0ef713c6b13 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -162,6 +162,10 @@ class ConvolutionLayer : public BaseConvolutionLayer { : BaseConvolutionLayer(param) {} virtual inline const char* type() const { return "Convolution"; } + virtual inline DiagonalAffineMap coord_map() { + return FilterMap(this->kernel_h_, this->kernel_w_, this->stride_h_, + this->stride_w_, this->pad_h_, this->pad_w_).inv(); + } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -195,8 +199,11 @@ class DeconvolutionLayer : public BaseConvolutionLayer { public: explicit DeconvolutionLayer(const LayerParameter& param) : BaseConvolutionLayer(param) {} - virtual inline const char* type() const { return "Deconvolution"; } + virtual inline DiagonalAffineMap coord_map() { + return FilterMap(this->kernel_h_, this->kernel_w_, this->stride_h_, + this->stride_w_, this->pad_h_, this->pad_w_); + } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -316,6 +323,9 @@ class LRNLayer : public Layer { virtual inline const char* type() const { return "LRN"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } + virtual inline DiagonalAffineMap coord_map() { + return DiagonalAffineMap::identity(2); + } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -398,6 +408,10 @@ class PoolingLayer : public Layer { return (this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_MAX) ? 2 : 1; } + virtual inline DiagonalAffineMap coord_map() { + return FilterMap(kernel_h_, kernel_w_, stride_h_, stride_w_, + pad_h_, pad_w_).inv(); + } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -453,6 +467,39 @@ class CuDNNPoolingLayer : public PoolingLayer { }; #endif +template +class CropLayer : public Layer { + public: + explicit CropLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Crop"; } + virtual inline int ExactNumBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 1; } + virtual inline DiagonalAffineMap coord_map() { + vector > coefs; + coefs.push_back(make_pair(1, - crop_h_)); + coefs.push_back(make_pair(1, - crop_w_)); + return DiagonalAffineMap(coefs); + } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int crop_h_, crop_w_; +}; + /** * @brief Does spatial pyramid pooling on the input image * by taking the max, average, etc. within regions diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp new file mode 100644 index 00000000000..4e35598233b --- /dev/null +++ b/src/caffe/layers/crop_layer.cpp @@ -0,0 +1,126 @@ +#include +#include +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/net.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void CropLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + // Construct a map from top blobs to layer inds, skipping over in-place + // connections. + map*, int> down_map; + for (int layer_ind = 0; layer_ind < this->net_->top_vecs().size(); + ++layer_ind) { + vector*> tops = this->net_->top_vecs()[layer_ind]; + for (int top_ind = 0; top_ind < tops.size(); ++top_ind) { + if (down_map.find(tops[top_ind]) == down_map.end()) { + down_map[tops[top_ind]] = layer_ind; + } + } + } + // Walk back from the first bottom, keeping track of all the blobs we pass. + set*> path_blobs; + Blob* blob = bottom[0]; + int layer_ind; + // TODO this logic can be simplified if all blobs are tops + path_blobs.insert(blob); + while (down_map.find(blob) != down_map.end()) { + layer_ind = down_map[blob]; + if (this->net_->bottom_vecs()[layer_ind].size() == 0) { + break; + } + blob = this->net_->bottom_vecs()[layer_ind][0]; + path_blobs.insert(blob); + } + // Now walk back from the second bottom, until we find a blob of intersection. + Blob* inter_blob = bottom[1]; + while (path_blobs.find(inter_blob) == path_blobs.end()) { + CHECK(down_map.find(inter_blob) != down_map.end()) + << "Cannot align apparently disconnected blobs."; + layer_ind = down_map[inter_blob]; + CHECK_GT(this->net_->bottom_vecs()[layer_ind].size(), 0) + << "Cannot align apparently disconnected blobs."; + inter_blob = this->net_->bottom_vecs()[layer_ind][0]; + } + // Compute the coord map from the blob of intersection to each bottom. + vector > coord_maps(2, + DiagonalAffineMap::identity(2)); + for (int i = 0; i < 2; ++i) { + for (Blob* blob = bottom[i]; blob != inter_blob; + blob = this->net_->bottom_vecs()[down_map[blob]][0]) { + shared_ptr > layer = this->net_->layers()[down_map[blob]]; + coord_maps[i] = coord_maps[i].compose(layer->coord_map()); + } + } + // Compute the mapping from first bottom coordinates to second. + DiagonalAffineMap crop_map = + coord_maps[1].compose(coord_maps[0].inv()); + for (int i = 0; i < 2; ++i) { + // Check for scale mismatch (unfortunately, CHECK_DOUBLE_EQ does not + // support a message like the other CHECKs). + CHECK_DOUBLE_EQ(crop_map.coefs()[i].first, 1); + CHECK_LE(crop_map.coefs()[i].second, 0) << "Negative crop width."; + // Check that the crop width is an integer. + CHECK_DOUBLE_EQ(crop_map.coefs()[i].second, + round(crop_map.coefs()[i].second)); + } + crop_h_ = - round(crop_map.coefs()[0].second); + crop_w_ = - round(crop_map.coefs()[1].second); +} + +template +void CropLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[1]->height(), + bottom[1]->width()); +} + +template +void CropLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + for (int n = 0; n < top[0]->num(); ++n) { + for (int c = 0; c < top[0]->channels(); ++c) { + for (int h = 0; h < top[0]->height(); ++h) { + caffe_copy(top[0]->width(), + bottom_data + bottom[0]->offset(n, c, crop_h_ + h, crop_w_), + top_data + top[0]->offset(n, c, h)); + } + } + } +} + +template +void CropLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + if (propagate_down[0]) { + caffe_set(bottom[0]->count(), static_cast(0), bottom_diff); + for (int n = 0; n < top[0]->num(); ++n) { + for (int c = 0; c < top[0]->channels(); ++c) { + for (int h = 0; h < top[0]->height(); ++h) { + caffe_copy(top[0]->width(), + top_diff + top[0]->offset(n, c, h), + bottom_diff + bottom[0]->offset(n, c, crop_h_ + h, crop_w_)); + } + } + } + } +} + +#ifdef CPU_ONLY +STUB_GPU(CropLayer); +#endif + +INSTANTIATE_CLASS(CropLayer); +REGISTER_LAYER_CLASS(Crop); + +} // namespace caffe diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu new file mode 100644 index 00000000000..2dd3ff95d91 --- /dev/null +++ b/src/caffe/layers/crop_layer.cu @@ -0,0 +1,60 @@ +#include + +#include "caffe/vision_layers.hpp" + +namespace caffe { + +// Copy (one line per thread) from one array to another, with arbitrary +// strides in the last two dimensions. +template +__global__ void copy_kernel(const int n, const int height, const int width, + const int src_outer_stride, const int src_inner_stride, + const int dest_outer_stride, const int dest_inner_stride, + const Dtype* src, Dtype* dest) { + CUDA_KERNEL_LOOP(index, n) { + int src_start = index / height * src_outer_stride + + index % height * src_inner_stride; + int dest_start = index / height * dest_outer_stride + + index % height * dest_inner_stride; + for (int i = 0; i < width; ++i) { + dest[dest_start + i] = src[src_start + i]; + } + } +} + +template +void CropLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + const int lines = top[0]->count() / top[0]->width(); + + // NOLINT_NEXT_LINE(whitespace/operators) + copy_kernel<<>>( + lines, top[0]->height(), top[0]->width(), + bottom[0]->height() * bottom[0]->width(), bottom[0]->width(), + top[0]->height() * top[0]->width(), top[0]->width(), + bottom_data + bottom[0]->offset(0, 0, crop_h_, crop_w_), top_data); +} + +template +void CropLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + const int lines = top[0]->count() / top[0]->width(); + + if (propagate_down[0]) { + caffe_gpu_set(bottom[0]->count(), static_cast(0), bottom_diff); + // NOLINT_NEXT_LINE(whitespace/operators) + copy_kernel<<>>( + lines, top[0]->height(), top[0]->width(), + top[0]->height() * top[0]->width(), top[0]->width(), + bottom[0]->height() * bottom[0]->width(), bottom[0]->width(), + top_diff, bottom_diff + bottom[0]->offset(0, 0, crop_h_, crop_w_)); + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(CropLayer); + +} // namespace caffe diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index a18ee63818e..e1ff68cc149 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -86,6 +86,7 @@ void Net::Init(const NetParameter& in_param) { << "either 0 or bottom_size times "; } layers_.push_back(LayerRegistry::CreateLayer(layer_param)); + layers_[layer_id]->set_net(this); layer_names_.push_back(layer_param.name()); LOG(INFO) << "Creating Layer " << layer_param.name(); bool need_backward = false;