From 311abb7b377292bd94895a5d6088ac4f4b9e2981 Mon Sep 17 00:00:00 2001 From: Takuya Narihira Date: Wed, 25 Feb 2015 12:10:49 -0800 Subject: [PATCH 01/21] Fix incorrectly storing empty param_name --- src/caffe/net.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index c359be9b575..49b53956858 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -402,7 +402,7 @@ void Net::AppendParam(const NetParameter& param, const int layer_id, // (i.e., not given a param_name) or explicitly given a name that we // haven't already seen. param_owners_.push_back(-1); - if (param_size) { + if (param_name.size()) { param_names_index_[param_name] = net_param_id; } } else { From 4a4118f2b15c40d071da5faca0c404bd7e887de9 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Sun, 8 Mar 2015 17:47:20 -0700 Subject: [PATCH 02/21] Increment iter_ before snapshotting, remove +1 logic -- fixes final snapshot being off by one --- src/caffe/solver.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 034390e6824..b3b77a002ec 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -167,7 +167,7 @@ void Solver::Step(int iters) { vector losses; Dtype smoothed_loss = 0; - for (; iter_ < stop_iter; ++iter_) { + while (iter_ < stop_iter) { if (param_.test_interval() && iter_ % param_.test_interval() == 0 && (iter_ > 0 || param_.test_initialization())) { TestAll(); @@ -210,8 +210,12 @@ void Solver::Step(int iters) { ComputeUpdateValue(); net_->Update(); + // Increment the internal iter_ counter -- its value should always indicate + // the number of times the weights have been updated. + ++iter_; + // Save a snapshot if needed. - if (param_.snapshot() && (iter_ + 1) % param_.snapshot() == 0) { + if (param_.snapshot() && iter_ % param_.snapshot() == 0) { Snapshot(); } } @@ -327,15 +331,14 @@ void Solver::Snapshot() { string model_filename, snapshot_filename; const int kBufferSize = 20; char iter_str_buffer[kBufferSize]; - // Add one to iter_ to get the number of iterations that have completed. - snprintf(iter_str_buffer, kBufferSize, "_iter_%d", iter_ + 1); + snprintf(iter_str_buffer, kBufferSize, "_iter_%d", iter_); filename += iter_str_buffer; model_filename = filename + ".caffemodel"; LOG(INFO) << "Snapshotting to " << model_filename; WriteProtoToBinaryFile(net_param, model_filename.c_str()); SolverState state; SnapshotSolverState(&state); - state.set_iter(iter_ + 1); + state.set_iter(iter_); state.set_learned_net(model_filename); state.set_current_step(current_step_); snapshot_filename = filename + ".solverstate"; From f61c374983f4d49f81e6dc2f7588ace5c00c7b36 Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Thu, 19 Mar 2015 21:28:36 -0700 Subject: [PATCH 03/21] always call Layer::Reshape in Layer::Forward There are no cases where Forward is called without Reshape, so we can simplify the call structure. --- include/caffe/layer.hpp | 1 + src/caffe/net.cpp | 1 - src/caffe/test/test_neuron_layer.cpp | 4 ---- tools/caffe.cpp | 3 --- 4 files changed, 1 insertion(+), 8 deletions(-) diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index 2d13ef97c05..8f924a75755 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -406,6 +406,7 @@ template inline Dtype Layer::Forward(const vector*>& bottom, const vector*>& top) { Dtype loss = 0; + Reshape(bottom, top); switch (Caffe::mode()) { case Caffe::CPU: Forward_cpu(bottom, top); diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index fd00b122630..11503bce9c7 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -470,7 +470,6 @@ Dtype Net::ForwardFromTo(int start, int end) { } for (int i = start; i <= end; ++i) { // LOG(ERROR) << "Forwarding " << layer_names_[i]; - layers_[i]->Reshape(bottom_vecs_[i], top_vecs_[i]); Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]); loss += layer_loss; if (debug_info_) { ForwardDebugInfo(i); } diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp index c9d52f247a6..030f4bbae7f 100644 --- a/src/caffe/test/test_neuron_layer.cpp +++ b/src/caffe/test/test_neuron_layer.cpp @@ -541,14 +541,10 @@ TYPED_TEST(NeuronLayerTest, TestPReLUInPlace) { caffe_copy(ip2.blobs()[0]->count(), ip.blobs()[0]->cpu_data(), ip2.blobs()[0]->mutable_cpu_data()); // Forward in-place - ip.Reshape(this->blob_bottom_vec_, this->blob_top_vec_); ip.Forward(this->blob_bottom_vec_, this->blob_top_vec_); - prelu.Reshape(this->blob_top_vec_, this->blob_top_vec_); prelu.Forward(this->blob_top_vec_, this->blob_top_vec_); // Forward non-in-place - ip2.Reshape(blob_bottom_vec_2, blob_middle_vec_2); ip2.Forward(blob_bottom_vec_2, blob_middle_vec_2); - prelu2.Reshape(blob_middle_vec_2, blob_top_vec_2); prelu2.Forward(blob_middle_vec_2, blob_top_vec_2); // Check numbers for (int s = 0; s < blob_top_2->count(); ++s) { diff --git a/tools/caffe.cpp b/tools/caffe.cpp index eb9e97f5e27..70b15f890f7 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -252,9 +252,6 @@ int time() { forward_timer.Start(); for (int i = 0; i < layers.size(); ++i) { timer.Start(); - // Although Reshape should be essentially free, we include it here - // so that we will notice Reshape performance bugs. - layers[i]->Reshape(bottom_vecs[i], top_vecs[i]); layers[i]->Forward(bottom_vecs[i], top_vecs[i]); forward_time_per_layer[i] += timer.MicroSeconds(); } From 1a1ce5a4ef3897a0e3b40ebf243786b2f8a37667 Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Wed, 4 Mar 2015 16:18:17 -0800 Subject: [PATCH 04/21] remove spurious net.hpp includes --- include/caffe/data_layers.hpp | 1 - include/caffe/neuron_layers.hpp | 1 - src/caffe/layers/base_data_layer.cpp | 1 - 3 files changed, 3 deletions(-) diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 2bb9d948169..24dfe723636 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -14,7 +14,6 @@ #include "caffe/filler.hpp" #include "caffe/internal_thread.hpp" #include "caffe/layer.hpp" -#include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp index 8669923d1fa..37553b9ee71 100644 --- a/include/caffe/neuron_layers.hpp +++ b/include/caffe/neuron_layers.hpp @@ -8,7 +8,6 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/layer.hpp" -#include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #define HDF5_DATA_DATASET_NAME "data" diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp index 352200915d7..931e4a9c0ab 100644 --- a/src/caffe/layers/base_data_layer.cpp +++ b/src/caffe/layers/base_data_layer.cpp @@ -2,7 +2,6 @@ #include #include "caffe/data_layers.hpp" -#include "caffe/net.hpp" #include "caffe/util/io.hpp" namespace caffe { From 7e2fceb1e91cfe48eddb3569e29aaef4b9ca1a2a Mon Sep 17 00:00:00 2001 From: Nick Carlevaris-Bianco Date: Fri, 17 Apr 2015 14:16:26 +0930 Subject: [PATCH 05/21] Fixed contrastive loss layer to be the same as proposed in Hadsell et al 2006 --- src/caffe/layers/contrastive_loss_layer.cpp | 9 ++++++--- src/caffe/layers/contrastive_loss_layer.cu | 13 ++++++++----- src/caffe/test/test_contrastive_loss_layer.cpp | 3 ++- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/caffe/layers/contrastive_loss_layer.cpp b/src/caffe/layers/contrastive_loss_layer.cpp index 0692c11c257..d5e3c8a48bb 100644 --- a/src/caffe/layers/contrastive_loss_layer.cpp +++ b/src/caffe/layers/contrastive_loss_layer.cpp @@ -48,7 +48,8 @@ void ContrastiveLossLayer::Forward_cpu( if (static_cast(bottom[2]->cpu_data()[i])) { // similar pairs loss += dist_sq_.cpu_data()[i]; } else { // dissimilar pairs - loss += std::max(margin-dist_sq_.cpu_data()[i], Dtype(0.0)); + Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data()[i]), 0.0); + loss += dist*dist; } } loss = loss / static_cast(bottom[0]->num()) / Dtype(2); @@ -76,10 +77,12 @@ void ContrastiveLossLayer::Backward_cpu(const vector*>& top, Dtype(0.0), bout + (j*channels)); } else { // dissimilar pairs - if ((margin-dist_sq_.cpu_data()[j]) > Dtype(0.0)) { + Dtype dist = sqrt(dist_sq_.cpu_data()[j]); + Dtype mdist = (margin - dist); + if (mdist > Dtype(0.0)) { caffe_cpu_axpby( channels, - -alpha, + -alpha * mdist / dist, diff_.cpu_data() + (j*channels), Dtype(0.0), bout + (j*channels)); diff --git a/src/caffe/layers/contrastive_loss_layer.cu b/src/caffe/layers/contrastive_loss_layer.cu index 78a55995a0a..255480ced05 100644 --- a/src/caffe/layers/contrastive_loss_layer.cu +++ b/src/caffe/layers/contrastive_loss_layer.cu @@ -37,7 +37,8 @@ void ContrastiveLossLayer::Forward_gpu( if (static_cast(bottom[2]->cpu_data()[i])) { // similar pairs loss += dist_sq_.cpu_data()[i]; } else { // dissimilar pairs - loss += std::max(margin-dist_sq_.cpu_data()[i], Dtype(0.0)); + Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data()[i]), Dtype(0.0)); + loss += dist*dist; } } loss = loss / static_cast(bottom[0]->num()) / Dtype(2); @@ -45,7 +46,7 @@ void ContrastiveLossLayer::Forward_gpu( } template -__global__ void CLLForward(const int count, const int channels, +__global__ void CLLBackward(const int count, const int channels, const Dtype margin, const Dtype alpha, const Dtype* y, const Dtype* diff, const Dtype* dist_sq, Dtype *bottom_diff) { @@ -54,8 +55,10 @@ __global__ void CLLForward(const int count, const int channels, if (static_cast(y[n])) { // similar pairs bottom_diff[i] = alpha * diff[i]; } else { // dissimilar pairs - if ((margin-dist_sq[n]) > 0.0) { - bottom_diff[i] = -alpha * diff[i]; + Dtype dist = sqrt(dist_sq[n]); + Dtype mdist = (margin - dist); + if (mdist > 0.0) { + bottom_diff[i] = -alpha * mdist / dist * diff[i]; } else { bottom_diff[i] = 0; } @@ -75,7 +78,7 @@ void ContrastiveLossLayer::Backward_gpu(const vector*>& top, const Dtype alpha = sign * top[0]->cpu_diff()[0] / static_cast(bottom[0]->num()); // NOLINT_NEXT_LINE(whitespace/operators) - CLLForward<<>>( + CLLBackward<<>>( count, channels, margin, alpha, bottom[2]->gpu_data(), // pair similarity 0 or 1 diff_.gpu_data(), // the cached eltwise difference between a and b diff --git a/src/caffe/test/test_contrastive_loss_layer.cpp b/src/caffe/test/test_contrastive_loss_layer.cpp index d269fbc26f2..5fab25f8832 100644 --- a/src/caffe/test/test_contrastive_loss_layer.cpp +++ b/src/caffe/test/test_contrastive_loss_layer.cpp @@ -79,7 +79,8 @@ TYPED_TEST(ContrastiveLossLayerTest, TestForward) { if (this->blob_bottom_y_->cpu_data()[i]) { // similar pairs loss += dist_sq; } else { - loss += std::max(margin-dist_sq, Dtype(0)); + Dtype dist = std::max(margin - sqrt(dist_sq), 0.0); + loss += dist*dist; } } loss /= static_cast(num) * Dtype(2); From d91c353577baf2b27d303962bb039ba39211871b Mon Sep 17 00:00:00 2001 From: Nick Carlevaris-Bianco Date: Tue, 21 Apr 2015 17:14:47 +0930 Subject: [PATCH 06/21] added epsilon to prevent possible division by zero in gradient calculation --- src/caffe/layers/contrastive_loss_layer.cpp | 2 +- src/caffe/layers/contrastive_loss_layer.cu | 2 +- src/caffe/test/test_contrastive_loss_layer.cpp | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/caffe/layers/contrastive_loss_layer.cpp b/src/caffe/layers/contrastive_loss_layer.cpp index d5e3c8a48bb..4cf04d4dd15 100644 --- a/src/caffe/layers/contrastive_loss_layer.cpp +++ b/src/caffe/layers/contrastive_loss_layer.cpp @@ -82,7 +82,7 @@ void ContrastiveLossLayer::Backward_cpu(const vector*>& top, if (mdist > Dtype(0.0)) { caffe_cpu_axpby( channels, - -alpha * mdist / dist, + -alpha * mdist / (dist + Dtype(1e-4)), diff_.cpu_data() + (j*channels), Dtype(0.0), bout + (j*channels)); diff --git a/src/caffe/layers/contrastive_loss_layer.cu b/src/caffe/layers/contrastive_loss_layer.cu index 255480ced05..83ff3c90f0f 100644 --- a/src/caffe/layers/contrastive_loss_layer.cu +++ b/src/caffe/layers/contrastive_loss_layer.cu @@ -58,7 +58,7 @@ __global__ void CLLBackward(const int count, const int channels, Dtype dist = sqrt(dist_sq[n]); Dtype mdist = (margin - dist); if (mdist > 0.0) { - bottom_diff[i] = -alpha * mdist / dist * diff[i]; + bottom_diff[i] = -alpha * mdist / (dist + Dtype(1e-4)) * diff[i]; } else { bottom_diff[i] = 0; } diff --git a/src/caffe/test/test_contrastive_loss_layer.cpp b/src/caffe/test/test_contrastive_loss_layer.cpp index 5fab25f8832..460fc8f32fc 100644 --- a/src/caffe/test/test_contrastive_loss_layer.cpp +++ b/src/caffe/test/test_contrastive_loss_layer.cpp @@ -22,15 +22,15 @@ class ContrastiveLossLayerTest : public MultiDeviceTest { protected: ContrastiveLossLayerTest() - : blob_bottom_data_i_(new Blob(128, 10, 1, 1)), - blob_bottom_data_j_(new Blob(128, 10, 1, 1)), - blob_bottom_y_(new Blob(128, 1, 1, 1)), + : blob_bottom_data_i_(new Blob(512, 2, 1, 1)), + blob_bottom_data_j_(new Blob(512, 2, 1, 1)), + blob_bottom_y_(new Blob(512, 1, 1, 1)), blob_top_loss_(new Blob()) { // fill the values FillerParameter filler_param; - filler_param.set_mean(0.0); - filler_param.set_std(0.3); // distances~=1.0 to test both sides of margin - GaussianFiller filler(filler_param); + filler_param.set_min(-1.0); + filler_param.set_max(1.0); // distances~=1.0 to test both sides of margin + UniformFiller filler(filler_param); filler.Fill(this->blob_bottom_data_i_); blob_bottom_vec_.push_back(blob_bottom_data_i_); filler.Fill(this->blob_bottom_data_j_); From ca673fdd05458980f62a909b64b51b22f0ddd21e Mon Sep 17 00:00:00 2001 From: Nick Carlevaris-Bianco Date: Mon, 4 May 2015 11:41:44 +0930 Subject: [PATCH 07/21] Added support for original implementation, using (margin - d^2), through the legacy_version parameter. --- src/caffe/layers/contrastive_loss_layer.cpp | 26 ++++++++--- src/caffe/layers/contrastive_loss_layer.cu | 31 ++++++++++--- src/caffe/proto/caffe.proto | 9 +++- .../test/test_contrastive_loss_layer.cpp | 43 +++++++++++++++++++ 4 files changed, 96 insertions(+), 13 deletions(-) diff --git a/src/caffe/layers/contrastive_loss_layer.cpp b/src/caffe/layers/contrastive_loss_layer.cpp index 4cf04d4dd15..25e167819d3 100644 --- a/src/caffe/layers/contrastive_loss_layer.cpp +++ b/src/caffe/layers/contrastive_loss_layer.cpp @@ -41,6 +41,8 @@ void ContrastiveLossLayer::Forward_cpu( diff_.mutable_cpu_data()); // a_i-b_i const int channels = bottom[0]->channels(); Dtype margin = this->layer_param_.contrastive_loss_param().margin(); + bool legacy_version = + this->layer_param_.contrastive_loss_param().legacy_version(); Dtype loss(0.0); for (int i = 0; i < bottom[0]->num(); ++i) { dist_sq_.mutable_cpu_data()[i] = caffe_cpu_dot(channels, @@ -48,8 +50,12 @@ void ContrastiveLossLayer::Forward_cpu( if (static_cast(bottom[2]->cpu_data()[i])) { // similar pairs loss += dist_sq_.cpu_data()[i]; } else { // dissimilar pairs - Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data()[i]), 0.0); - loss += dist*dist; + if (legacy_version) { + loss += std::max(margin - dist_sq_.cpu_data()[i], Dtype(0.0)); + } else { + Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data()[i]), 0.0); + loss += dist*dist; + } } } loss = loss / static_cast(bottom[0]->num()) / Dtype(2); @@ -60,6 +66,8 @@ template void ContrastiveLossLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { Dtype margin = this->layer_param_.contrastive_loss_param().margin(); + bool legacy_version = + this->layer_param_.contrastive_loss_param().legacy_version(); for (int i = 0; i < 2; ++i) { if (propagate_down[i]) { const Dtype sign = (i == 0) ? 1 : -1; @@ -77,12 +85,20 @@ void ContrastiveLossLayer::Backward_cpu(const vector*>& top, Dtype(0.0), bout + (j*channels)); } else { // dissimilar pairs - Dtype dist = sqrt(dist_sq_.cpu_data()[j]); - Dtype mdist = (margin - dist); + Dtype mdist(0.0); + Dtype beta(0.0); + if (legacy_version) { + mdist = margin - dist_sq_.cpu_data()[j]; + beta = -alpha; + } else { + Dtype dist = sqrt(dist_sq_.cpu_data()[j]); + mdist = margin - dist; + beta = -alpha * mdist / (dist + Dtype(1e-4)); + } if (mdist > Dtype(0.0)) { caffe_cpu_axpby( channels, - -alpha * mdist / (dist + Dtype(1e-4)), + beta, diff_.cpu_data() + (j*channels), Dtype(0.0), bout + (j*channels)); diff --git a/src/caffe/layers/contrastive_loss_layer.cu b/src/caffe/layers/contrastive_loss_layer.cu index 83ff3c90f0f..931239316ac 100644 --- a/src/caffe/layers/contrastive_loss_layer.cu +++ b/src/caffe/layers/contrastive_loss_layer.cu @@ -32,13 +32,20 @@ void ContrastiveLossLayer::Forward_gpu( Dtype(0.0), dist_sq_.mutable_gpu_data()); // \Sum (a_i-b_i)^2 Dtype margin = this->layer_param_.contrastive_loss_param().margin(); + bool legacy_version = + this->layer_param_.contrastive_loss_param().legacy_version(); Dtype loss(0.0); for (int i = 0; i < bottom[0]->num(); ++i) { if (static_cast(bottom[2]->cpu_data()[i])) { // similar pairs loss += dist_sq_.cpu_data()[i]; } else { // dissimilar pairs - Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data()[i]), Dtype(0.0)); - loss += dist*dist; + if (legacy_version) { + loss += std::max(margin - dist_sq_.cpu_data()[i], Dtype(0.0)); + } else { + Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data()[i]), + Dtype(0.0)); + loss += dist*dist; + } } } loss = loss / static_cast(bottom[0]->num()) / Dtype(2); @@ -47,7 +54,7 @@ void ContrastiveLossLayer::Forward_gpu( template __global__ void CLLBackward(const int count, const int channels, - const Dtype margin, const Dtype alpha, + const Dtype margin, const bool legacy_version, const Dtype alpha, const Dtype* y, const Dtype* diff, const Dtype* dist_sq, Dtype *bottom_diff) { CUDA_KERNEL_LOOP(i, count) { @@ -55,10 +62,18 @@ __global__ void CLLBackward(const int count, const int channels, if (static_cast(y[n])) { // similar pairs bottom_diff[i] = alpha * diff[i]; } else { // dissimilar pairs - Dtype dist = sqrt(dist_sq[n]); - Dtype mdist = (margin - dist); + Dtype mdist(0.0); + Dtype beta(0.0); + if (legacy_version) { + mdist = (margin - dist_sq[n]); + beta = -alpha; + } else { + Dtype dist = sqrt(dist_sq[n]); + mdist = (margin - dist); + beta = -alpha * mdist / (dist + Dtype(1e-4)) * diff[i]; + } if (mdist > 0.0) { - bottom_diff[i] = -alpha * mdist / (dist + Dtype(1e-4)) * diff[i]; + bottom_diff[i] = beta; } else { bottom_diff[i] = 0; } @@ -74,12 +89,14 @@ void ContrastiveLossLayer::Backward_gpu(const vector*>& top, const int count = bottom[0]->count(); const int channels = bottom[0]->channels(); Dtype margin = this->layer_param_.contrastive_loss_param().margin(); + const bool legacy_version = + this->layer_param_.contrastive_loss_param().legacy_version(); const Dtype sign = (i == 0) ? 1 : -1; const Dtype alpha = sign * top[0]->cpu_diff()[0] / static_cast(bottom[0]->num()); // NOLINT_NEXT_LINE(whitespace/operators) CLLBackward<<>>( - count, channels, margin, alpha, + count, channels, margin, legacy_version, alpha, bottom[2]->gpu_data(), // pair similarity 0 or 1 diff_.gpu_data(), // the cached eltwise difference between a and b dist_sq_.gpu_data(), // the cached square distance between a and b diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 5b21cf20028..fe4ce366972 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -401,8 +401,15 @@ message ConcatParameter { // Message that stores parameters used by ContrastiveLossLayer message ContrastiveLossParameter { - //margin for dissimilar pair + // margin for dissimilar pair optional float margin = 1 [default = 1.0]; + // The first implementation of this cost did not exactly match the cost of + // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. + // legacy_version = false (the default) uses (margin - d)^2 as proposed in the + // Hadsell paper. New models should probably use this version. + // legacy_version = true uses (margin - d^2). This is kept to support / + // reproduce existing models and results + optional bool legacy_version = 2 [default = false]; } // Message that stores parameters used by ConvolutionLayer diff --git a/src/caffe/test/test_contrastive_loss_layer.cpp b/src/caffe/test/test_contrastive_loss_layer.cpp index 460fc8f32fc..1e9447cbc51 100644 --- a/src/caffe/test/test_contrastive_loss_layer.cpp +++ b/src/caffe/test/test_contrastive_loss_layer.cpp @@ -100,4 +100,47 @@ TYPED_TEST(ContrastiveLossLayerTest, TestGradient) { this->blob_top_vec_, 1); } +TYPED_TEST(ContrastiveLossLayerTest, TestForwardLegacy) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_contrastive_loss_param()->set_legacy_version(true); + ContrastiveLossLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // manually compute to compare + const Dtype margin = layer_param.contrastive_loss_param().margin(); + const int num = this->blob_bottom_data_i_->num(); + const int channels = this->blob_bottom_data_i_->channels(); + Dtype loss(0); + for (int i = 0; i < num; ++i) { + Dtype dist_sq(0); + for (int j = 0; j < channels; ++j) { + Dtype diff = this->blob_bottom_data_i_->cpu_data()[i*channels+j] - + this->blob_bottom_data_j_->cpu_data()[i*channels+j]; + dist_sq += diff*diff; + } + if (this->blob_bottom_y_->cpu_data()[i]) { // similar pairs + loss += dist_sq; + } else { + loss += std::max(margin - dist_sq, Dtype(0.0)); + } + } + loss /= static_cast(num) * Dtype(2); + EXPECT_NEAR(this->blob_top_loss_->cpu_data()[0], loss, 1e-6); +} + +TYPED_TEST(ContrastiveLossLayerTest, TestGradientLegacy) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_contrastive_loss_param()->set_legacy_version(true); + ContrastiveLossLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + GradientChecker checker(1e-2, 1e-2, 1701); + // check the gradient for the first two bottom layers + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 1); +} + } // namespace caffe From 63ed23bd83d5c30e74a83a976524dadf22d6dd49 Mon Sep 17 00:00:00 2001 From: Takuya Narihira Date: Mon, 4 May 2015 11:44:44 -0700 Subject: [PATCH 08/21] Fix redundancy of parameter backward computation --- src/caffe/layers/prelu_layer.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/layers/prelu_layer.cu b/src/caffe/layers/prelu_layer.cu index fd0eda5d191..5fd69d6c4d4 100644 --- a/src/caffe/layers/prelu_layer.cu +++ b/src/caffe/layers/prelu_layer.cu @@ -89,7 +89,7 @@ void PReLULayer::Backward_gpu(const vector*>& top, Dtype* temp_buff = multiplier_.mutable_gpu_diff(); // compute element-wise diff // NOLINT_NEXT_LINE(whitespace/operators) - PReLUParamBackward<<<<>>( cdim, top_diff + top[0]->offset(n), bottom_data + bottom[0]->offset(n), multiplier_.mutable_gpu_diff()); From 4348c6f4c905e9eb2c4dee32614a1880e074b217 Mon Sep 17 00:00:00 2001 From: Takuya Narihira Date: Mon, 4 May 2015 11:45:33 -0700 Subject: [PATCH 09/21] Modify for better readability regarding temporary bufffer for backward computation --- include/caffe/neuron_layers.hpp | 3 ++- src/caffe/layers/prelu_layer.cpp | 3 ++- src/caffe/layers/prelu_layer.cu | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp index 323215134c7..aff58233e5c 100644 --- a/include/caffe/neuron_layers.hpp +++ b/include/caffe/neuron_layers.hpp @@ -734,7 +734,8 @@ class PReLULayer : public NeuronLayer { const vector& propagate_down, const vector*>& bottom); bool channel_shared_; - Blob multiplier_; // dot multipler for backward computation of params + Blob multiplier_; // dot multiplier for backward computation of params + Blob backward_buff_; // temporary buffer for backward computation Blob bottom_memory_; // memory for in-place computation }; diff --git a/src/caffe/layers/prelu_layer.cpp b/src/caffe/layers/prelu_layer.cpp index 7119a274dd3..7a38f9fac80 100644 --- a/src/caffe/layers/prelu_layer.cpp +++ b/src/caffe/layers/prelu_layer.cpp @@ -45,7 +45,8 @@ void PReLULayer::LayerSetUp(const vector*>& bottom, // Propagate gradients to the parameters (as directed by backward pass). this->param_propagate_down_.resize(this->blobs_.size(), true); - multiplier_.Reshape(vector(1, bottom[0]->count() / bottom[0]->num())); + multiplier_.Reshape(vector(1, bottom[0]->count(1))); + backward_buff_.Reshape(vector(1, bottom[0]->count(1))); caffe_set(multiplier_.count(), Dtype(1), multiplier_.mutable_cpu_data()); } diff --git a/src/caffe/layers/prelu_layer.cu b/src/caffe/layers/prelu_layer.cu index 5fd69d6c4d4..dfa238d85bd 100644 --- a/src/caffe/layers/prelu_layer.cu +++ b/src/caffe/layers/prelu_layer.cu @@ -86,22 +86,22 @@ void PReLULayer::Backward_gpu(const vector*>& top, int cdim = channels * dim; Dtype dsum = 0.; for (int n = 0; n < bottom[0]->num(); ++n) { - Dtype* temp_buff = multiplier_.mutable_gpu_diff(); // compute element-wise diff // NOLINT_NEXT_LINE(whitespace/operators) PReLUParamBackward<<>>( cdim, top_diff + top[0]->offset(n), - bottom_data + bottom[0]->offset(n), multiplier_.mutable_gpu_diff()); + bottom_data + bottom[0]->offset(n), + backward_buff_.mutable_gpu_diff()); CUDA_POST_KERNEL_CHECK; if (channel_shared_) { Dtype d; - caffe_gpu_dot(channels * dim, multiplier_.gpu_diff(), + caffe_gpu_dot(channels * dim, backward_buff_.gpu_diff(), multiplier_.gpu_data(), &d); dsum += d; } else { caffe_gpu_gemv(CblasNoTrans, channels, dim, 1., - multiplier_.gpu_diff(), multiplier_.gpu_data(), 1., + backward_buff_.gpu_diff(), multiplier_.gpu_data(), 1., slope_diff); } } From a046c571c5d66a09cd6d2dd51ea1ee0957c574fd Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Wed, 6 May 2015 17:40:12 -0700 Subject: [PATCH 10/21] check that count_ does not overflow in Blob::Reshape --- src/caffe/blob.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp index 6d2b3f502d9..94fdcc35fb6 100644 --- a/src/caffe/blob.cpp +++ b/src/caffe/blob.cpp @@ -26,6 +26,7 @@ void Blob::Reshape(const vector& shape) { shape_.resize(shape.size()); for (int i = 0; i < shape.size(); ++i) { CHECK_GE(shape[i], 0); + CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX"; count_ *= shape[i]; shape_[i] = shape[i]; } From a0e9f95817ea593aeef6ad8b4bed0da417e5a66e Mon Sep 17 00:00:00 2001 From: Gustav Larsson Date: Mon, 11 May 2015 11:54:45 -0500 Subject: [PATCH 11/21] This imports the wrong io module in Python 3. The Python standard lib has a module called io, so instead of Python 3 throwing an error, it imports the wrong module without complaining. --- python/caffe/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py index 37e8956da4f..fbe7112e868 100644 --- a/python/caffe/__init__.py +++ b/python/caffe/__init__.py @@ -3,4 +3,4 @@ from .proto.caffe_pb2 import TRAIN, TEST from .classifier import Classifier from .detector import Detector -import io +from . import io From 7ed310f2835c6a7843fd3362f290dbfda1585936 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Mon, 11 May 2015 22:35:48 +0200 Subject: [PATCH 12/21] python: PEP8; changed docstring documentation style to NumPyDoc style --- python/caffe/classifier.py | 5 +- python/caffe/detector.py | 35 +++++++------ python/caffe/draw.py | 45 ++++++++++------- python/caffe/io.py | 27 ++++------ python/caffe/pycaffe.py | 68 +++++++++++++++----------- python/caffe/test/test_net.py | 2 + python/caffe/test/test_python_layer.py | 3 ++ python/caffe/test/test_solver.py | 1 + 8 files changed, 103 insertions(+), 83 deletions(-) diff --git a/python/caffe/classifier.py b/python/caffe/classifier.py index 49f8003ce9d..7fb2ccc8ff3 100644 --- a/python/caffe/classifier.py +++ b/python/caffe/classifier.py @@ -29,7 +29,7 @@ def __init__(self, model_file, pretrained_file, image_dims=None, in_ = self.inputs[0] self.transformer = caffe.io.Transformer( {in_: self.blobs[in_].data.shape}) - self.transformer.set_transpose(in_, (2,0,1)) + self.transformer.set_transpose(in_, (2, 0, 1)) if mean is not None: self.transformer.set_mean(in_, mean) if input_scale is not None: @@ -44,7 +44,6 @@ def __init__(self, model_file, pretrained_file, image_dims=None, image_dims = self.crop_dims self.image_dims = image_dims - def predict(self, inputs, oversample=True): """ Predict classification probabilities of inputs. @@ -78,7 +77,7 @@ def predict(self, inputs, oversample=True): input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :] # Classify - caffe_in = np.zeros(np.array(input_.shape)[[0,3,1,2]], + caffe_in = np.zeros(np.array(input_.shape)[[0, 3, 1, 2]], dtype=np.float32) for ix, in_ in enumerate(input_): caffe_in[ix] = self.transformer.preprocess(self.inputs[0], in_) diff --git a/python/caffe/detector.py b/python/caffe/detector.py index a67b818b93f..f72b548ac9a 100644 --- a/python/caffe/detector.py +++ b/python/caffe/detector.py @@ -41,7 +41,7 @@ def __init__(self, model_file, pretrained_file, mean=None, in_ = self.inputs[0] self.transformer = caffe.io.Transformer( {in_: self.blobs[in_].data.shape}) - self.transformer.set_transpose(in_, (2,0,1)) + self.transformer.set_transpose(in_, (2, 0, 1)) if mean is not None: self.transformer.set_mean(in_, mean) if input_scale is not None: @@ -53,17 +53,18 @@ def __init__(self, model_file, pretrained_file, mean=None, self.configure_crop(context_pad) - def detect_windows(self, images_windows): """ Do windowed detection over given images and windows. Windows are extracted then warped to the input dimensions of the net. - Take + Parameters + ---------- images_windows: (image filename, window list) iterable. context_crop: size of context border to crop in pixels. - Give + Returns + ------- detections: list of {filename: image filename, window: crop coordinates, predictions: prediction vector} dicts. """ @@ -82,7 +83,7 @@ def detect_windows(self, images_windows): for ix, window_in in enumerate(window_inputs): caffe_in[ix] = self.transformer.preprocess(in_, window_in) out = self.forward_all(**{in_: caffe_in}) - predictions = out[self.outputs[0]].squeeze(axis=(2,3)) + predictions = out[self.outputs[0]].squeeze(axis=(2, 3)) # Package predictions with images and windows. detections = [] @@ -97,16 +98,17 @@ def detect_windows(self, images_windows): ix += 1 return detections - def detect_selective_search(self, image_fnames): """ Do windowed detection over Selective Search proposals by extracting the crop and warping to the input dimensions of the net. - Take + Parameters + ---------- image_fnames: list - Give + Returns + ------- detections: list of {filename: image filename, window: crop coordinates, predictions: prediction vector} dicts. """ @@ -120,17 +122,18 @@ def detect_selective_search(self, image_fnames): # Run windowed detection on the selective search list. return self.detect_windows(zip(image_fnames, windows_list)) - def crop(self, im, window): """ Crop a window from the image for detection. Include surrounding context according to the `context_pad` configuration. - Take + Parameters + ---------- im: H x W x K image ndarray to crop. window: bounding box coordinates as ymin, xmin, ymax, xmax. - Give + Returns + ------- crop: cropped window. """ # Crop window from the image. @@ -175,14 +178,14 @@ def crop(self, im, window): return crop - def configure_crop(self, context_pad): """ Configure crop dimensions and amount of context for cropping. If context is included, make the special input mean for context padding. - Take - context_pad: amount of context for cropping. + Parameters + ---------- + context_pad : amount of context for cropping. """ # crop dimensions in_ = self.inputs[0] @@ -204,8 +207,8 @@ def configure_crop(self, context_pad): crop_mean = mean.copy().transpose(inv_transpose) if channel_order is not None: channel_order_inverse = [channel_order.index(i) - for i in range(crop_mean.shape[2])] - crop_mean = crop_mean[:,:, channel_order_inverse] + for i in range(crop_mean.shape[2])] + crop_mean = crop_mean[:, :, channel_order_inverse] if raw_scale is not None: crop_mean /= raw_scale self.crop_mean = crop_mean diff --git a/python/caffe/draw.py b/python/caffe/draw.py index 6a4dbd47351..08b7c1de14b 100644 --- a/python/caffe/draw.py +++ b/python/caffe/draw.py @@ -11,19 +11,23 @@ import pydot # Internal layer and blob styles. -LAYER_STYLE_DEFAULT = {'shape': 'record', 'fillcolor': '#6495ED', - 'style': 'filled'} -NEURON_LAYER_STYLE = {'shape': 'record', 'fillcolor': '#90EE90', - 'style': 'filled'} -BLOB_STYLE = {'shape': 'octagon', 'fillcolor': '#E0E0E0', - 'style': 'filled'} +LAYER_STYLE_DEFAULT = {'shape': 'record', + 'fillcolor': '#6495ED', + 'style': 'filled'} +NEURON_LAYER_STYLE = {'shape': 'record', + 'fillcolor': '#90EE90', + 'style': 'filled'} +BLOB_STYLE = {'shape': 'octagon', + 'fillcolor': '#E0E0E0', + 'style': 'filled'} + def get_pooling_types_dict(): """Get dictionary mapping pooling type number to type name """ desc = caffe_pb2.PoolingParameter.PoolMethod.DESCRIPTOR d = {} - for k,v in desc.values_by_name.items(): + for k, v in desc.values_by_name.items(): d[v.number] = k return d @@ -145,21 +149,24 @@ def get_pydot_graph(caffe_net, rankdir, label_edges=True): label=edge['label'])) return pydot_graph + def draw_net(caffe_net, rankdir, ext='png'): - """Draws a caffe net and returns the image string encoded using the given - extension. + """Draws a caffe net and returns the image string encoded using the given + extension. - Input: + Parameters + ---------- caffe_net: a caffe.proto.caffe_pb2.NetParameter protocol buffer. ext: the image extension. Default 'png'. - """ - return get_pydot_graph(caffe_net, rankdir).create(format=ext) + """ + return get_pydot_graph(caffe_net, rankdir).create(format=ext) + def draw_net_to_file(caffe_net, filename, rankdir='LR'): - """Draws a caffe net, and saves it to file using the format given as the - file extension. Use '.raw' to output raw text that you can manually feed - to graphviz to draw graphs. - """ - ext = filename[filename.rfind('.')+1:] - with open(filename, 'wb') as fid: - fid.write(draw_net(caffe_net, rankdir, ext)) + """Draws a caffe net, and saves it to file using the format given as the + file extension. Use '.raw' to output raw text that you can manually feed + to graphviz to draw graphs. + """ + ext = filename[filename.rfind('.')+1:] + with open(filename, 'wb') as fid: + fid.write(draw_net(caffe_net, rankdir, ext)) diff --git a/python/caffe/io.py b/python/caffe/io.py index 6ae2cf13cc0..e5feff38796 100644 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -8,16 +8,16 @@ from caffe.proto import caffe_pb2 except: import sys - if sys.version_info >= (3,0): + if sys.version_info >= (3, 0): print("Failed to include caffe_pb2, things might go wrong!") else: raise -## proto / datum / ndarray conversion +## proto / datum / ndarray conversion def blobproto_to_array(blob, return_diff=False): - """Convert a blob proto to an array. In default, we will just return the data, - unless return_diff is True, in which case we will return the diff. + """Convert a blob proto to an array. In default, we will just return the + data, unless return_diff is True, in which case we will return the diff. """ if return_diff: return np.array(blob.diff).reshape( @@ -35,7 +35,7 @@ def array_to_blobproto(arr, diff=None): if arr.ndim != 4: raise ValueError('Incorrect array shape.') blob = caffe_pb2.BlobProto() - blob.num, blob.channels, blob.height, blob.width = arr.shape; + blob.num, blob.channels, blob.height, blob.width = arr.shape blob.data.extend(arr.astype(float).flat) if diff is not None: blob.diff.extend(diff.astype(float).flat) @@ -81,7 +81,7 @@ def datum_to_array(datum): as one can easily get it by calling datum.label. """ if len(datum.data): - return np.fromstring(datum.data, dtype = np.uint8).reshape( + return np.fromstring(datum.data, dtype=np.uint8).reshape( datum.channels, datum.height, datum.width) else: return np.array(datum.float_data).astype(float).reshape( @@ -97,8 +97,9 @@ class Transformer: Note: this is mostly for illustrative purposes and it is likely better to define your own input preprocessing routine for your needs. - Take - net: a Net for which the input should be prepared + Parameters + ---------- + net : a Net for which the input should be prepared """ def __init__(self, inputs): self.inputs = inputs @@ -108,13 +109,11 @@ def __init__(self, inputs): self.mean = {} self.input_scale = {} - def __check_input(self, in_): if in_ not in self.inputs: raise Exception('{} is not one of the net inputs: {}'.format( in_, self.inputs)) - def preprocess(self, in_, data): """ Format input for Caffe: @@ -155,7 +154,6 @@ def preprocess(self, in_, data): caffe_in *= input_scale return caffe_in - def deprocess(self, in_, data): """ Invert Caffe formatting; see preprocess(). @@ -179,7 +177,6 @@ def deprocess(self, in_, data): decaf_in = decaf_in.transpose([transpose[t] for t in transpose]) return decaf_in - def set_transpose(self, in_, order): """ Set the input channel order for e.g. RGB to BGR conversion @@ -195,7 +192,6 @@ def set_transpose(self, in_, order): 'dimensions as the input.') self.transpose[in_] = order - def set_channel_swap(self, in_, order): """ Set the input channel order for e.g. RGB to BGR conversion @@ -213,7 +209,6 @@ def set_channel_swap(self, in_, order): 'dimensions as the input channels.') self.channel_swap[in_] = order - def set_raw_scale(self, in_, scale): """ Set the scale of raw features s.t. the input blob = input * scale. @@ -228,7 +223,6 @@ def set_raw_scale(self, in_, scale): self.__check_input(in_) self.raw_scale[in_] = scale - def set_mean(self, in_, mean): """ Set the mean to subtract for centering the data. @@ -254,7 +248,6 @@ def set_mean(self, in_, mean): raise ValueError('Mean shape incompatible with input shape.') self.mean[in_] = mean - def set_input_scale(self, in_, scale): """ Set the scale of preprocessed inputs s.t. the blob = blob * scale. @@ -359,7 +352,7 @@ def oversample(images, crop_dims): # Extract crops crops = np.empty((10 * len(images), crop_dims[0], crop_dims[1], - im_shape[-1]), dtype=np.float32) + im_shape[-1]), dtype=np.float32) ix = 0 for im in images: for crop in crops_ix: diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index 3c19261f690..e8a676a26d2 100644 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -5,9 +5,9 @@ from collections import OrderedDict try: - from itertools import izip_longest + from itertools import izip_longest except: - from itertools import zip_longest as izip_longest + from itertools import zip_longest as izip_longest import numpy as np from ._caffe import Net, SGDSolver @@ -53,16 +53,19 @@ def _Net_forward(self, blobs=None, start=None, end=None, **kwargs): """ Forward pass: prepare inputs and run the net forward. - Take - blobs: list of blobs to return in addition to output blobs. - kwargs: Keys are input blob names and values are blob ndarrays. - For formatting inputs for Caffe, see Net.preprocess(). - If None, input is taken from data layers. - start: optional name of layer at which to begin the forward pass - end: optional name of layer at which to finish the forward pass (inclusive) - - Give - outs: {blob name: blob ndarray} dict. + Parameters + ---------- + blobs : list of blobs to return in addition to output blobs. + kwargs : Keys are input blob names and values are blob ndarrays. + For formatting inputs for Caffe, see Net.preprocess(). + If None, input is taken from data layers. + start : optional name of layer at which to begin the forward pass + end : optional name of layer at which to finish the forward pass + (inclusive) + + Returns + ------- + outs : {blob name: blob ndarray} dict. """ if blobs is None: blobs = [] @@ -99,14 +102,17 @@ def _Net_backward(self, diffs=None, start=None, end=None, **kwargs): """ Backward pass: prepare diffs and run the net backward. - Take - diffs: list of diffs to return in addition to bottom diffs. - kwargs: Keys are output blob names and values are diff ndarrays. + Parameters + ---------- + diffs : list of diffs to return in addition to bottom diffs. + kwargs : Keys are output blob names and values are diff ndarrays. If None, top diffs are taken from forward loss. - start: optional name of layer at which to begin the backward pass - end: optional name of layer at which to finish the backward pass (inclusive) + start : optional name of layer at which to begin the backward pass + end : optional name of layer at which to finish the backward pass + (inclusive) - Give + Returns + ------- outs: {blob name: diff ndarray} dict. """ if diffs is None: @@ -146,13 +152,15 @@ def _Net_forward_all(self, blobs=None, **kwargs): """ Run net forward in batches. - Take - blobs: list of blobs to extract as in forward() - kwargs: Keys are input blob names and values are blob ndarrays. - Refer to forward(). + Parameters + ---------- + blobs : list of blobs to extract as in forward() + kwargs : Keys are input blob names and values are blob ndarrays. + Refer to forward(). - Give - all_outs: {blob name: list of blobs} dict. + Returns + ------- + all_outs : {blob name: list of blobs} dict. """ # Collect outputs from batches all_outs = {out: [] for out in set(self.outputs + (blobs or []))} @@ -175,14 +183,16 @@ def _Net_forward_backward_all(self, blobs=None, diffs=None, **kwargs): """ Run net forward + backward in batches. - Take + Parameters + ---------- blobs: list of blobs to extract as in forward() diffs: list of diffs to extract as in backward() kwargs: Keys are input (for forward) and output (for backward) blob names and values are ndarrays. Refer to forward() and backward(). Prefilled variants are called for lack of input or output blobs. - Give + Returns + ------- all_blobs: {blob name: blob ndarray} dict. all_diffs: {blob name: diff ndarray} dict. """ @@ -229,11 +239,13 @@ def _Net_batch(self, blobs): """ Batch blob lists according to net's batch size. - Take + Parameters + ---------- blobs: Keys blob names and values are lists of blobs (of any length). Naturally, all the lists should have the same length. - Give (yield) + Yields + ------ batch: {blob name: list of blobs} dict for a single batch. """ num = len(blobs.itervalues().next()) diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py index 62b407da8aa..cc367477752 100644 --- a/python/caffe/test/test_net.py +++ b/python/caffe/test/test_net.py @@ -5,6 +5,7 @@ import caffe + def simple_net_file(num_output): """Make a simple net prototxt, based on test_net.cpp, returning the name of the (temporary) file.""" @@ -31,6 +32,7 @@ def simple_net_file(num_output): f.close() return f.name + class TestNet(unittest.TestCase): def setUp(self): self.num_output = 13 diff --git a/python/caffe/test/test_python_layer.py b/python/caffe/test/test_python_layer.py index dd99f6f15b9..6fba49143bb 100644 --- a/python/caffe/test/test_python_layer.py +++ b/python/caffe/test/test_python_layer.py @@ -4,6 +4,7 @@ import caffe + class SimpleLayer(caffe.Layer): """A layer that just multiplies by ten""" @@ -19,6 +20,7 @@ def forward(self, bottom, top): def backward(self, top, propagate_down, bottom): bottom[0].diff[...] = 10 * top[0].diff + def python_net_file(): with tempfile.NamedTemporaryFile(delete=False) as f: f.write("""name: 'pythonnet' force_backward: true @@ -31,6 +33,7 @@ def python_net_file(): python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }""") return f.name + class TestPythonLayer(unittest.TestCase): def setUp(self): net_file = python_net_file() diff --git a/python/caffe/test/test_solver.py b/python/caffe/test/test_solver.py index d59f23d973a..09b974dad66 100644 --- a/python/caffe/test/test_solver.py +++ b/python/caffe/test/test_solver.py @@ -6,6 +6,7 @@ import caffe from test_net import simple_net_file + class TestSolver(unittest.TestCase): def setUp(self): self.num_output = 13 From 7cf8b830cd24b73ebed89ef19e6bc8bd25d938cd Mon Sep 17 00:00:00 2001 From: Takuya Narihira Date: Wed, 13 May 2015 21:16:28 -0700 Subject: [PATCH 13/21] [pycaffe] use bp::object instead of PyObject* for self in Python layer This simply allows direct use of the nicer bp::object interface. --- include/caffe/python_layer.hpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/include/caffe/python_layer.hpp b/include/caffe/python_layer.hpp index 816ef453720..19cf18c9742 100644 --- a/include/caffe/python_layer.hpp +++ b/include/caffe/python_layer.hpp @@ -14,12 +14,12 @@ template class PythonLayer : public Layer { public: PythonLayer(PyObject* self, const LayerParameter& param) - : Layer(param), self_(self) { } + : Layer(param), self_(bp::handle<>(bp::borrowed(self))) { } virtual void LayerSetUp(const vector*>& bottom, const vector*>& top) { try { - bp::call_method(self_, "setup", bottom, top); + self_.attr("setup")(bottom, top); } catch (bp::error_already_set) { PyErr_Print(); throw; @@ -29,7 +29,7 @@ class PythonLayer : public Layer { virtual void Reshape(const vector*>& bottom, const vector*>& top) { try { - bp::call_method(self_, "reshape", bottom, top); + self_.attr("reshape")(bottom, top); } catch (bp::error_already_set) { PyErr_Print(); throw; @@ -42,7 +42,7 @@ class PythonLayer : public Layer { virtual void Forward_cpu(const vector*>& bottom, const vector*>& top) { try { - bp::call_method(self_, "forward", bottom, top); + self_.attr("forward")(bottom, top); } catch (bp::error_already_set) { PyErr_Print(); throw; @@ -51,8 +51,7 @@ class PythonLayer : public Layer { virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { try { - bp::call_method(self_, "backward", top, propagate_down, - bottom); + self_.attr("backward")(top, propagate_down, bottom); } catch (bp::error_already_set) { PyErr_Print(); throw; @@ -60,7 +59,7 @@ class PythonLayer : public Layer { } private: - PyObject* self_; + bp::object self_; }; } // namespace caffe From e266a4d368b22a8940468d68f41fe4392f5200da Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Wed, 13 May 2015 22:08:57 -0700 Subject: [PATCH 14/21] remove superfluous empty destructors The removed definitions do nothing; these classes already have virtual destructors inherited from their respective base classes. --- include/caffe/data_layers.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 2bb9d948169..4dcf5501522 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -29,7 +29,6 @@ template class BaseDataLayer : public Layer { public: explicit BaseDataLayer(const LayerParameter& param); - virtual ~BaseDataLayer() {} // LayerSetUp: implements common data layer setup functionality, and calls // DataLayerSetUp to do special data layer setup for individual layer types. // This method may not be overridden except by the BasePrefetchingDataLayer. @@ -58,7 +57,6 @@ class BasePrefetchingDataLayer : public: explicit BasePrefetchingDataLayer(const LayerParameter& param) : BaseDataLayer(param) {} - virtual ~BasePrefetchingDataLayer() {} // LayerSetUp: implements common data layer setup functionality, and calls // DataLayerSetUp to do special data layer setup for individual layer types. // This method may not be overridden. From 6153231594b98c0933be21685708282bc6160b6c Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Thu, 12 Mar 2015 17:59:28 -0700 Subject: [PATCH 15/21] remove bogus implementation of SigmoidCrossEntropyLossLayer::Forward_gpu It was a verbatim copy of Forward_cpu; there is no proper GPU implementation. --- include/caffe/loss_layers.hpp | 2 -- .../sigmoid_cross_entropy_loss_layer.cpp | 2 +- .../sigmoid_cross_entropy_loss_layer.cu | 22 +------------------ 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index d3eecd2e510..86c34241168 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -605,8 +605,6 @@ class SigmoidCrossEntropyLossLayer : public LossLayer { /// @copydoc SigmoidCrossEntropyLossLayer virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); /** * @brief Computes the sigmoid cross-entropy loss error gradient w.r.t. the diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp index 077d949981c..cc236fe1e8e 100644 --- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp +++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp @@ -71,7 +71,7 @@ void SigmoidCrossEntropyLossLayer::Backward_cpu( } #ifdef CPU_ONLY -STUB_GPU(SigmoidCrossEntropyLossLayer); +STUB_GPU_BACKWARD(SigmoidCrossEntropyLossLayer, Backward); #endif INSTANTIATE_CLASS(SigmoidCrossEntropyLossLayer); diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu index 08f7f492297..547fa80c72f 100644 --- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu +++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu @@ -8,26 +8,6 @@ namespace caffe { -template -void SigmoidCrossEntropyLossLayer::Forward_gpu( - const vector*>& bottom, const vector*>& top) { - // The forward pass computes the sigmoid outputs. - sigmoid_bottom_vec_[0] = bottom[0]; - sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_); - // Compute the loss (negative log likelihood) - const int count = bottom[0]->count(); - const int num = bottom[0]->num(); - // Stable version of loss computation from input data - const Dtype* input_data = bottom[0]->cpu_data(); - const Dtype* target = bottom[1]->cpu_data(); - Dtype loss = 0; - for (int i = 0; i < count; ++i) { - loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) - - log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0))); - } - top[0]->mutable_cpu_data()[0] = loss / num; -} - template void SigmoidCrossEntropyLossLayer::Backward_gpu( const vector*>& top, const vector& propagate_down, @@ -51,7 +31,7 @@ void SigmoidCrossEntropyLossLayer::Backward_gpu( } } -INSTANTIATE_LAYER_GPU_FUNCS(SigmoidCrossEntropyLossLayer); +INSTANTIATE_LAYER_GPU_BACKWARD(SigmoidCrossEntropyLossLayer); } // namespace caffe From 438cf0e9a660676b1526eb8178b020ad2d745f6f Mon Sep 17 00:00:00 2001 From: PETER_GAO Date: Sat, 21 Mar 2015 16:00:05 -0700 Subject: [PATCH 16/21] Spatial Pyramid Pooling Layer --- include/caffe/vision_layers.hpp | 66 ++++++++++ src/caffe/layers/spp_layer.cpp | 193 ++++++++++++++++++++++++++++++ src/caffe/proto/caffe.proto | 20 +++- src/caffe/test/test_spp_layer.cpp | 131 ++++++++++++++++++++ 4 files changed, 409 insertions(+), 1 deletion(-) create mode 100644 src/caffe/layers/spp_layer.cpp create mode 100644 src/caffe/test/test_spp_layer.cpp diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 6cb507a5780..e9023230439 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -451,6 +451,72 @@ class CuDNNPoolingLayer : public PoolingLayer { }; #endif +/** + * @brief Does spatial pyramid pooling on the input image + * by taking the max, average, etc. within regions + * so that the result vector of different sized + * images are of the same size. + */ +template +class SPPLayer : public Layer { + public: + explicit SPPLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "SPP"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 1; } + // MAX POOL layers can output an extra top blob for the mask; + // others can only output the pooled inputs. + virtual inline int MaxTopBlobs() const { + return (this->layer_param_.pooling_param().pool() == + PoolingParameter_PoolMethod_MAX) ? 2 : 1; + } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + // calculates the kernel and stride dimensions for the pooling layer, + // returns a correctly configured LayerParameter for a PoolingLayer + virtual LayerParameter GetPoolingParam(const int pyramid_level, + const int bottom_h, const int bottom_w, const SPPParameter spp_param); + + int pyramid_height_; + int bottom_h_, bottom_w_; + int channels_; + int kernel_h_, kernel_w_; + int pad_h_, pad_w_; + + /// the internal Split layer that feeds the pooling layers + shared_ptr > split_layer_; + /// top vector holder used in call to the underlying SplitLayer::Forward + vector*> split_top_vec_; + /// bottom vector holder used in call to the underlying PoolingLayer::Forward + vector*>*> pooling_bottom_vecs_; + /// the internal Pooling layers of different kernel sizes + vector > > pooling_layers_; + /// top vector holders used in call to the underlying PoolingLayer::Forward + vector*>*> pooling_top_vecs_; + /// pooling_outputs stores the outputs of the PoolingLayers + vector*> pooling_outputs_; + /// the internal Flatten layers that the Pooling layers feed into + vector*> flatten_layers_; + /// top vector holders used in call to the underlying FlattenLayer::Forward + vector*>*> flatten_top_vecs_; + /// flatten_outputs stores the outputs of the FlattenLayers + vector*> flatten_outputs_; + /// bottom vector holder used in call to the underlying ConcatLayer::Forward + vector*> concat_bottom_vec_; + /// the internal Concat layers that the Flatten layers feed into + shared_ptr > concat_layer_; +}; + } // namespace caffe #endif // CAFFE_VISION_LAYERS_HPP_ diff --git a/src/caffe/layers/spp_layer.cpp b/src/caffe/layers/spp_layer.cpp new file mode 100644 index 00000000000..795dd71693e --- /dev/null +++ b/src/caffe/layers/spp_layer.cpp @@ -0,0 +1,193 @@ +#include +#include +#include + +#include "caffe/common.hpp" +#include "caffe/layer.hpp" +#include "caffe/syncedmem.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +using std::min; +using std::max; + +template +LayerParameter SPPLayer::GetPoolingParam(const int pyramid_level, + const int bottom_h, const int bottom_w, const SPPParameter spp_param) { + LayerParameter pooling_param; + int num_bins = pow(2, pyramid_level); + + // find padding and kernel size so that the pooling is + // performed across the entire image + int kernel_h = ceil(bottom_h / static_cast(num_bins)); + // remainder_h is the min number of pixels that need to be padded before + // entire image height is pooled over with the chosen kernel dimension + int remainder_h = kernel_h * num_bins - bottom_h; + // pooling layer pads (2 * pad_h) pixels on the top and bottom of the + // image. + int pad_h = (remainder_h + 1) / 2; + + // similar logic for width + int kernel_w = ceil(bottom_w / static_cast(num_bins)); + int remainder_w = kernel_w * num_bins - bottom_w; + int pad_w = (remainder_w + 1) / 2; + + pooling_param.mutable_pooling_param()->set_pad_h(pad_h); + pooling_param.mutable_pooling_param()->set_pad_w(pad_w); + pooling_param.mutable_pooling_param()->set_kernel_h(kernel_h); + pooling_param.mutable_pooling_param()->set_kernel_w(kernel_w); + pooling_param.mutable_pooling_param()->set_stride_h(kernel_h); + pooling_param.mutable_pooling_param()->set_stride_w(kernel_w); + + switch (spp_param.pool()) { + case SPPParameter_PoolMethod_MAX: + pooling_param.mutable_pooling_param()->set_pool( + PoolingParameter_PoolMethod_MAX); + break; + case SPPParameter_PoolMethod_AVE: + pooling_param.mutable_pooling_param()->set_pool( + PoolingParameter_PoolMethod_AVE); + break; + case SPPParameter_PoolMethod_STOCHASTIC: + pooling_param.mutable_pooling_param()->set_pool( + PoolingParameter_PoolMethod_STOCHASTIC); + break; + default: + LOG(FATAL) << "Unknown pooling method."; + } + + return pooling_param; +} + +template +void SPPLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + SPPParameter spp_param = this->layer_param_.spp_param(); + + bottom_h_ = bottom[0]->height(); + bottom_w_ = bottom[0]->width(); + CHECK_GT(bottom_h_, 0) << "Input dimensions cannot be zero."; + CHECK_GT(bottom_w_, 0) << "Input dimensions cannot be zero."; + + pyramid_height_ = spp_param.pyramid_height(); + split_top_vec_.clear(); + pooling_bottom_vecs_.clear(); + pooling_layers_.clear(); + pooling_top_vecs_.clear(); + pooling_outputs_.clear(); + flatten_layers_.clear(); + flatten_top_vecs_.clear(); + flatten_outputs_.clear(); + concat_bottom_vec_.clear(); + + // split layer output holders setup + for (int i = 0; i < pyramid_height_; i++) { + split_top_vec_.push_back(new Blob()); + } + + // split layer setup + LayerParameter split_param; + split_layer_.reset(new SplitLayer(split_param)); + split_layer_->SetUp(bottom, split_top_vec_); + + for (int i = 0; i < pyramid_height_; i++) { + // pooling layer input holders setup + pooling_bottom_vecs_.push_back(new vector*>); + pooling_bottom_vecs_[i]->push_back(split_top_vec_[i]); + + // pooling layer output holders setup + pooling_outputs_.push_back(new Blob()); + pooling_top_vecs_.push_back(new vector*>); + pooling_top_vecs_[i]->push_back(pooling_outputs_[i]); + + // pooling layer setup + LayerParameter pooling_param = GetPoolingParam( + i, bottom_h_, bottom_w_, spp_param); + + pooling_layers_.push_back(shared_ptr > ( + new PoolingLayer(pooling_param))); + pooling_layers_[i]->SetUp(*pooling_bottom_vecs_[i], *pooling_top_vecs_[i]); + + // flatten layer output holders setup + flatten_outputs_.push_back(new Blob()); + flatten_top_vecs_.push_back(new vector*>); + flatten_top_vecs_[i]->push_back(flatten_outputs_[i]); + + // flatten layer setup + LayerParameter flatten_param; + flatten_layers_.push_back(new FlattenLayer(flatten_param)); + flatten_layers_[i]->SetUp(*pooling_top_vecs_[i], *flatten_top_vecs_[i]); + + // concat layer input holders setup + concat_bottom_vec_.push_back(flatten_outputs_[i]); + } + + // concat layer setup + LayerParameter concat_param; + concat_layer_.reset(new ConcatLayer(concat_param)); + concat_layer_->SetUp(concat_bottom_vec_, top); +} + +template +void SPPLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; + channels_ = bottom[0]->channels(); + bottom_h_ = bottom[0]->height(); + bottom_w_ = bottom[0]->width(); + SPPParameter spp_param = this->layer_param_.spp_param(); + split_layer_->Reshape(bottom, split_top_vec_); + for (int i = 0; i < pyramid_height_; i++) { + LayerParameter pooling_param = GetPoolingParam( + i, bottom_h_, bottom_w_, spp_param); + + pooling_layers_[i].reset( + new PoolingLayer(pooling_param)); + pooling_layers_[i]->SetUp( + *pooling_bottom_vecs_[i], *pooling_top_vecs_[i]); + pooling_layers_[i]->Reshape( + *pooling_bottom_vecs_[i], *pooling_top_vecs_[i]); + flatten_layers_[i]->Reshape( + *pooling_top_vecs_[i], *flatten_top_vecs_[i]); + } + concat_layer_->Reshape(concat_bottom_vec_, top); +} + +template +void SPPLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + split_layer_->Forward(bottom, split_top_vec_); + for (int i = 0; i < pyramid_height_; i++) { + pooling_layers_[i]->Forward( + *pooling_bottom_vecs_[i], *pooling_top_vecs_[i]); + flatten_layers_[i]->Forward( + *pooling_top_vecs_[i], *flatten_top_vecs_[i]); + } + concat_layer_->Forward(concat_bottom_vec_, top); +} + +template +void SPPLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (!propagate_down[0]) { + return; + } + vector concat_propagate_down(pyramid_height_, true); + concat_layer_->Backward(top, concat_propagate_down, concat_bottom_vec_); + for (int i = 0; i < pyramid_height_; i++) { + flatten_layers_[i]->Backward( + *flatten_top_vecs_[i], propagate_down, *pooling_top_vecs_[i]); + pooling_layers_[i]->Backward( + *pooling_top_vecs_[i], propagate_down, *pooling_bottom_vecs_[i]); + } + split_layer_->Backward(split_top_vec_, propagate_down, bottom); +} + + +INSTANTIATE_CLASS(SPPLayer); +REGISTER_LAYER_CLASS(SPP); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 5b21cf20028..b11fdf62dec 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -259,7 +259,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 132 (last added: prelu_param) +// LayerParameter next available layer-specific ID: 133 (last added: spp_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -328,6 +328,7 @@ message LayerParameter { optional ReLUParameter relu_param = 123; optional SigmoidParameter sigmoid_param = 124; optional SoftmaxParameter softmax_param = 125; + optional SPPParameter spp_param = 132; optional SliceParameter slice_param = 126; optional TanHParameter tanh_param = 127; optional ThresholdParameter threshold_param = 128; @@ -768,6 +769,23 @@ message WindowDataParameter { optional string root_folder = 13 [default = ""]; } +// Message that stores parameters used by SPPLayer +message SPPParameter { + enum PoolMethod { + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional uint32 pyramid_height = 1; + optional PoolMethod pool = 2 [default = MAX]; // The pooling method + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 6 [default = DEFAULT]; +} + // DEPRECATED: use LayerParameter. message V1LayerParameter { repeated string bottom = 2; diff --git a/src/caffe/test/test_spp_layer.cpp b/src/caffe/test/test_spp_layer.cpp new file mode 100644 index 00000000000..b2585f1a5fa --- /dev/null +++ b/src/caffe/test/test_spp_layer.cpp @@ -0,0 +1,131 @@ +#include +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class SPPLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + SPPLayerTest() + : blob_bottom_(new Blob()), + blob_bottom_2_(new Blob()), + blob_bottom_3_(new Blob()), + blob_top_(new Blob()) {} + virtual void SetUp() { + Caffe::set_random_seed(1701); + blob_bottom_->Reshape(2, 3, 9, 8); + blob_bottom_2_->Reshape(4, 3, 1024, 765); + blob_bottom_3_->Reshape(10, 3, 7, 7); + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_bottom_vec_2_.push_back(blob_bottom_2_); + blob_bottom_vec_3_.push_back(blob_bottom_3_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~SPPLayerTest() { delete blob_bottom_; delete blob_top_; } + + Blob* const blob_bottom_; + Blob* const blob_bottom_2_; + Blob* const blob_bottom_3_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_bottom_vec_2_; + vector*> blob_bottom_vec_3_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(SPPLayerTest, TestDtypesAndDevices); + +TYPED_TEST(SPPLayerTest, TestSetup) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_spp_param()->set_pyramid_height(3); + SPPLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + // expected number of pool results is geometric sum + // (1 - r ** n)/(1 - r) where r = 4 and n = pyramid_height + // (1 - 4 ** 3)/(1 - 4) = 21 + // multiply bottom num_channels * expected_pool_results + // to get expected num_channels (3 * 21 = 63) + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 63); + EXPECT_EQ(this->blob_top_->height(), 1); + EXPECT_EQ(this->blob_top_->width(), 1); +} + +TYPED_TEST(SPPLayerTest, TestEqualOutputDims) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_spp_param()->set_pyramid_height(5); + SPPLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_2_, this->blob_top_vec_); + // expected number of pool results is geometric sum + // (1 - r ** n)/(1 - r) where r = 4 and n = pyramid_height + // (1 - 4 ** 5)/(1 - 4) = 341 + // multiply bottom num_channels * expected_pool_results + // to get expected num_channels (3 * 341 = 1023) + EXPECT_EQ(this->blob_top_->num(), 4); + EXPECT_EQ(this->blob_top_->channels(), 1023); + EXPECT_EQ(this->blob_top_->height(), 1); + EXPECT_EQ(this->blob_top_->width(), 1); +} + +TYPED_TEST(SPPLayerTest, TestEqualOutputDims2) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_spp_param()->set_pyramid_height(3); + SPPLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_3_, this->blob_top_vec_); + // expected number of pool results is geometric sum + // (1 - r ** n)/(1 - r) where r = 4 and n = pyramid_height + // (1 - 4 ** 3)/(1 - 4) = 21 + // multiply bottom num_channels * expected_pool_results + // to get expected num_channels (3 * 21 = 63) + EXPECT_EQ(this->blob_top_->num(), 10); + EXPECT_EQ(this->blob_top_->channels(), 63); + EXPECT_EQ(this->blob_top_->height(), 1); + EXPECT_EQ(this->blob_top_->width(), 1); +} + +TYPED_TEST(SPPLayerTest, TestForwardBackward) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_spp_param()->set_pyramid_height(3); + SPPLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + vector propagate_down(this->blob_bottom_vec_.size(), true); + layer.Backward(this->blob_top_vec_, propagate_down, + this->blob_bottom_vec_); +} + +TYPED_TEST(SPPLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + SPPParameter* spp_param = layer_param.mutable_spp_param(); + spp_param->set_pyramid_height(3); + SPPLayer layer(layer_param); + GradientChecker checker(1e-4, 1e-2); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + + +} // namespace caffe From 4fb3c9e6a5ac80804c910639d14651c2ecdcb5f3 Mon Sep 17 00:00:00 2001 From: Simon Safar Date: Wed, 15 Oct 2014 20:15:14 -0700 Subject: [PATCH 17/21] Added a Reshape layer for copying-free modification of blob dimensions. --- docs/tutorial/layers.md | 42 +++++++++ include/caffe/common_layers.hpp | 35 ++++++++ src/caffe/layers/reshape_layer.cpp | 113 ++++++++++++++++++++++++ src/caffe/layers/reshape_layer.cu | 23 +++++ src/caffe/proto/caffe.proto | 16 +++- src/caffe/test/test_reshape_layer.cpp | 120 ++++++++++++++++++++++++++ 6 files changed, 348 insertions(+), 1 deletion(-) create mode 100644 src/caffe/layers/reshape_layer.cpp create mode 100644 src/caffe/layers/reshape_layer.cu create mode 100644 src/caffe/test/test_reshape_layer.cpp diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md index 839939f5ad6..422ee01f201 100644 --- a/docs/tutorial/layers.md +++ b/docs/tutorial/layers.md @@ -419,6 +419,48 @@ The `SPLIT` layer is a utility layer that splits an input blob to multiple outpu The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c * h * w` to a simple vector output of shape `n * (c*h*w) * 1 * 1`. +#### Reshape + +* LayerType: `RESHAPE` +* CPU implementation: `./src/caffe/layers/reshape_layer.cpp` +* CUDA GPU implementation: `./src/caffe/layers/reshape_layer.cu` +* Parameters (`ReshapeParameter reshape_param`) + - Optional: (also see detailed description below) + - `num` [default 0] + - `channels` [default 0] + - `width` [default 0] + - `height` [default 0] + +* Input + - a single blob with arbitrary dimensions +* Output + - the same blob, with modified dimensions, as specified by `reshape_param` + +* Sample + + layers { + name: "reshape" + type: RESHAPE + bottom: "input" + top: "output" + + reshape_param { + num: 0 # copy the dimension from below + channels: 2 + width: 3 + height: -1 # infer it from the other dimensions + } + } + +The `RESHAPE` layer can be used to change the dimensions of its input, without changing its data. Just like the `FLATTEN` layer, only the dimensions are changed, no data is copied in the process. + +Output dimensions are specified by the `ReshapeParam` proto. Positive numbers are used directly, setting the corresponding dimension of the output blob. In addition, two special values are accepted for any of the target dimension values: + +* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom layer has 2 channels, the top one will have 2 channels too, given `channels: 0` as target dimension. Since the default value of all the target dimensions is 0, omitting any of the target dimensions will also cause it to be copied. +* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. If this is not possible, an error is raised. Also, at most one -1 can be used in a reshape operation. + +As another example, giving `num: 0, channels: -1, height: 1, width: 1` as parameters makes the layer behave in exactly the same way as the `FLATTEN` layer. + #### Concatenation * LayerType: `CONCAT` diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index cae1c3e4ee6..945c0cef1b6 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -297,6 +297,41 @@ class MVNLayer : public Layer { Blob sum_multiplier_; }; +/* + * @brief Reshapes the input Blob into an arbitrary-sized output Blob. + * + * Note: similarly to FlattenLayer, this layer does not change the input values + * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff). + */ +template +class ReshapeLayer : public Layer { + public: + explicit ReshapeLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& bottom, + const vector& propagate_down, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + void FillInSingleUnspecifiedDimension(int bottom_count); + + int num_out; + int channels_out; + int height_out; + int width_out; +}; + /** * @brief Ignores bottom blobs while producing no top blobs. (This is useful * to suppress outputs during testing.) diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp new file mode 100644 index 00000000000..7e8704e058a --- /dev/null +++ b/src/caffe/layers/reshape_layer.cpp @@ -0,0 +1,113 @@ +#include + +#include "caffe/common_layers.hpp" +#include "caffe/layer.hpp" + +namespace caffe { + +template +void ReshapeLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + CHECK_EQ(bottom.size(), 1) << "Reshape Layer takes a single blob as input."; + CHECK_EQ(top.size(), 1) << "Reshape Layer takes a single blob as output."; + + num_out = this->layer_param_.reshape_param().num(); + // Dimensions set to 0 (either by default or explicitly) will be copied from + // the bottom layer. + if (num_out == 0) { + num_out = bottom[0]->num(); + } + + channels_out = this->layer_param_.reshape_param().channels(); + if (channels_out == 0) { + channels_out = bottom[0]->channels(); + } + + width_out = this->layer_param_.reshape_param().width(); + if (width_out == 0) { + width_out = bottom[0]->width(); + } + + height_out = this->layer_param_.reshape_param().height(); + if (height_out == 0) { + height_out = bottom[0]->height(); + } + + FillInSingleUnspecifiedDimension(bottom[0]->count()); +} + +template +void ReshapeLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + top[0]->Reshape(num_out, channels_out, height_out, width_out); + + const size_t out_count = num_out * channels_out * height_out * width_out; + CHECK_EQ(out_count, bottom[0]->count()) << + "Bottom layer count isn't equal to predicted; output layer size is " << + num_out << "x" << channels_out << "x" << height_out << "x" << width_out; +} + +template +void ReshapeLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + top[0]->ShareData(*bottom[0]); +} + +template +void ReshapeLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + bottom[0]->ShareDiff(*top[0]); +} + +/** + * @brief Fill in a single dimension left unspecified. + * + * If a dimension is set to -1, it will be filled in with a value inferred from + * the count of the bottom layer (if the product of the nonzero dimensions is a + * divisor of the count). + * + * @param bottom_count Count of the bottom layer. + */ +template +void ReshapeLayer::FillInSingleUnspecifiedDimension(int bottom_count) { + int* const dimensions[] = {&num_out, &channels_out, &width_out, &height_out}; + const size_t N_DIMENSIONS = 4; + + // How many -1 dimensions do we have. + int n_unspecified = 0; + // Product of the remaining dimensions. + int product_without_unspecified_dim = 1; + + for (size_t i = 0; i < N_DIMENSIONS; i++) { + if (*(dimensions[i]) == -1) { + n_unspecified++; + } else { + product_without_unspecified_dim *= *(dimensions[i]); + } + } + + if (n_unspecified == 0) { + // Everything is filled out, nothing to do. + return; + } + + CHECK_EQ(n_unspecified, 1) << "Only one dimension can be set -1."; + CHECK_EQ(bottom_count % product_without_unspecified_dim, 0) << + "Bottom layer count " << bottom_count << " not divisible by product " << + product_without_unspecified_dim; + + // Fill up the one remaining dimension. + for (size_t i = 0; i < N_DIMENSIONS; i++) { + if (*(dimensions[i]) == -1) { + *(dimensions[i]) = bottom_count / product_without_unspecified_dim; + } + } +} + +#ifdef CPU_ONLY +STUB_GPU(ReshapeLayer); +#endif + +INSTANTIATE_CLASS(ReshapeLayer); +REGISTER_LAYER_CLASS(RESHAPE, ReshapeLayer); +} // namespace caffe diff --git a/src/caffe/layers/reshape_layer.cu b/src/caffe/layers/reshape_layer.cu new file mode 100644 index 00000000000..3023ce3ae88 --- /dev/null +++ b/src/caffe/layers/reshape_layer.cu @@ -0,0 +1,23 @@ +#include + +#include "caffe/common_layers.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void ReshapeLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + top[0]->ShareData(*bottom[0]); +} + +template +void ReshapeLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + bottom[0]->ShareDiff(*top[0]); +} + +INSTANTIATE_LAYER_GPU_FUNCS(ReshapeLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 4516106428c..e8bf240c1b3 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -259,7 +259,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 133 (last added: spp_param) +// LayerParameter next available layer-specific ID: 134 (last added: reshape_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -326,6 +326,7 @@ message LayerParameter { optional PReLUParameter prelu_param = 131; optional PythonParameter python_param = 130; optional ReLUParameter relu_param = 123; + optional ReshapeParameter reshape_param = 133; optional SigmoidParameter sigmoid_param = 124; optional SoftmaxParameter softmax_param = 125; optional SPPParameter spp_param = 132; @@ -690,6 +691,19 @@ message ReLUParameter { optional Engine engine = 2 [default = DEFAULT]; } +// Message that stores parameters used by ReshapeLayer +message ReshapeParameter { + // Specify the output dimensions. If some of the following parameters are + // omitted or set to 0 explicitly, the corresponding dimension from the bottom + // layer is used (unchanged). Also, if exactly one of them is set to -1, its + // value is calculated from the count of the bottom layer and the remaining + // dimensions, if possible. + optional int32 num = 1 [default = 0]; + optional int32 channels = 2 [default = 0]; + optional int32 height = 3 [default = 0]; + optional int32 width = 4 [default = 0]; +} + // Message that stores parameters used by SigmoidLayer message SigmoidParameter { enum Engine { diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp new file mode 100644 index 00000000000..878d40bb4d5 --- /dev/null +++ b/src/caffe/test/test_reshape_layer.cpp @@ -0,0 +1,120 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/common_layers.hpp" +#include "caffe/filler.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class ReshapeLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + protected: + ReshapeLayerTest() + : blob_bottom_(new Blob(2, 3, 6, 5)), + blob_top_(new Blob()) { + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + + virtual ~ReshapeLayerTest() { delete blob_bottom_; delete blob_top_; } + + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(ReshapeLayerTest, TestDtypesAndDevices); + +TYPED_TEST(ReshapeLayerTest, TestFlattenOutputSizes) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ReshapeParameter* reshape_param = + layer_param.mutable_reshape_param(); + reshape_param->set_channels(-1); + reshape_param->set_height(1); + reshape_param->set_width(1); + + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 3 * 6 * 5); + EXPECT_EQ(this->blob_top_->height(), 1); + EXPECT_EQ(this->blob_top_->width(), 1); +} + +TYPED_TEST(ReshapeLayerTest, TestFlattenValues) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ReshapeParameter* reshape_param = + layer_param.mutable_reshape_param(); + reshape_param->set_channels(-1); + reshape_param->set_height(1); + reshape_param->set_width(1); + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int c = 0; c < 3 * 6 * 5; ++c) { + EXPECT_EQ(this->blob_top_->data_at(0, c, 0, 0), + this->blob_bottom_->data_at(0, c / (6 * 5), (c / 5) % 6, c % 5)); + EXPECT_EQ(this->blob_top_->data_at(1, c, 0, 0), + this->blob_bottom_->data_at(1, c / (6 * 5), (c / 5) % 6, c % 5)); + } +} + +// Test whether setting output dimensions to 0 either explicitly or implicitly +// copies the respective dimension of the input layer. +TYPED_TEST(ReshapeLayerTest, TestCopyDimensions) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ReshapeParameter* reshape_param = + layer_param.mutable_reshape_param(); + // Omitting num to test implicit zeroes. + reshape_param->set_channels(0); + reshape_param->set_height(0); + reshape_param->set_width(0); + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 3); + EXPECT_EQ(this->blob_top_->height(), 6); + EXPECT_EQ(this->blob_top_->width(), 5); +} + +// When a dimension is set to -1, we should infer its value from the other +// dimensions (including those that get copied from below). +TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecified) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ReshapeParameter* reshape_param = + layer_param.mutable_reshape_param(); + // Since omitted, num is implicitly set to 0 (thus, copies 2). + reshape_param->set_channels(3); + reshape_param->set_height(10); + reshape_param->set_width(-1); + + // Count is 180, thus height should be 180 / (2*3*10) = 3. + + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 3); + EXPECT_EQ(this->blob_top_->height(), 10); + EXPECT_EQ(this->blob_top_->width(), 3); +} + +} // namespace caffe From fa6169ee799f97f80d33d6b4525c7fd4b891774a Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Wed, 25 Mar 2015 17:44:37 -0700 Subject: [PATCH 18/21] ReshapeLayer fixups for ND blobs --- docs/tutorial/layers.md | 33 +++---- include/caffe/common_layers.hpp | 31 +++--- src/caffe/layers/reshape_layer.cpp | 135 ++++++++------------------ src/caffe/layers/reshape_layer.cu | 23 ----- src/caffe/proto/caffe.proto | 14 +-- src/caffe/test/test_reshape_layer.cpp | 42 ++++---- 6 files changed, 101 insertions(+), 177 deletions(-) delete mode 100644 src/caffe/layers/reshape_layer.cu diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md index 422ee01f201..c4529e6afc0 100644 --- a/docs/tutorial/layers.md +++ b/docs/tutorial/layers.md @@ -421,15 +421,11 @@ The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c * #### Reshape -* LayerType: `RESHAPE` -* CPU implementation: `./src/caffe/layers/reshape_layer.cpp` -* CUDA GPU implementation: `./src/caffe/layers/reshape_layer.cu` +* Layer type: `Reshape` +* Implementation: `./src/caffe/layers/reshape_layer.cpp` * Parameters (`ReshapeParameter reshape_param`) - Optional: (also see detailed description below) - - `num` [default 0] - - `channels` [default 0] - - `width` [default 0] - - `height` [default 0] + - `shape` * Input - a single blob with arbitrary dimensions @@ -438,28 +434,29 @@ The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c * * Sample - layers { + layer { name: "reshape" - type: RESHAPE + type: "Reshape" bottom: "input" top: "output" - reshape_param { - num: 0 # copy the dimension from below - channels: 2 - width: 3 - height: -1 # infer it from the other dimensions + shape { + dim: 0 # copy the dimension from below + dim: 2 + dim: 3 + dim: -1 # infer it from the other dimensions + } } } -The `RESHAPE` layer can be used to change the dimensions of its input, without changing its data. Just like the `FLATTEN` layer, only the dimensions are changed, no data is copied in the process. +The `Reshape` layer can be used to change the dimensions of its input, without changing its data. Just like the `Flatten` layer, only the dimensions are changed; no data is copied in the process. Output dimensions are specified by the `ReshapeParam` proto. Positive numbers are used directly, setting the corresponding dimension of the output blob. In addition, two special values are accepted for any of the target dimension values: -* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom layer has 2 channels, the top one will have 2 channels too, given `channels: 0` as target dimension. Since the default value of all the target dimensions is 0, omitting any of the target dimensions will also cause it to be copied. -* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. If this is not possible, an error is raised. Also, at most one -1 can be used in a reshape operation. +* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom has 2 as its 1st dimension, the top will have 2 as its 1st dimension as well, given `dim: 0` as the 1st target dimension. +* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. At most one -1 can be used in a reshape operation. -As another example, giving `num: 0, channels: -1, height: 1, width: 1` as parameters makes the layer behave in exactly the same way as the `FLATTEN` layer. +As another example, specifying `reshape_param { shape { dim: 0 dim: -1 } }` makes the layer behave in exactly the same way as the `Flatten` layer. #### Concatenation diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 945c0cef1b6..ccdfd62d5be 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -313,23 +313,28 @@ class ReshapeLayer : public Layer { virtual void Reshape(const vector*>& bottom, const vector*>& top); + virtual inline const char* type() const { return "Reshape"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + protected: virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& bottom, - const vector& propagate_down, - const vector*>& top); + const vector*>& top) {} + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); + const vector*>& top) {} virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - void FillInSingleUnspecifiedDimension(int bottom_count); - - int num_out; - int channels_out; - int height_out; - int width_out; + const vector& propagate_down, const vector*>& bottom) {} + + /// @brief the current output shape + vector top_shape_; + /// @brief vector of axes indices whose dimensions we'll copy from the bottom + vector copy_axes_; + /// @brief the index of the axis whose dimension we infer, or -1 if none + int inferred_axis_; + /// @brief the product of the "constant" output dimensions + int constant_count_; }; /** diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp index 7e8704e058a..618edf31824 100644 --- a/src/caffe/layers/reshape_layer.cpp +++ b/src/caffe/layers/reshape_layer.cpp @@ -7,107 +7,58 @@ namespace caffe { template void ReshapeLayer::LayerSetUp(const vector*>& bottom, - const vector*>& top) { - CHECK_EQ(bottom.size(), 1) << "Reshape Layer takes a single blob as input."; - CHECK_EQ(top.size(), 1) << "Reshape Layer takes a single blob as output."; - - num_out = this->layer_param_.reshape_param().num(); - // Dimensions set to 0 (either by default or explicitly) will be copied from - // the bottom layer. - if (num_out == 0) { - num_out = bottom[0]->num(); - } - - channels_out = this->layer_param_.reshape_param().channels(); - if (channels_out == 0) { - channels_out = bottom[0]->channels(); - } - - width_out = this->layer_param_.reshape_param().width(); - if (width_out == 0) { - width_out = bottom[0]->width(); - } - - height_out = this->layer_param_.reshape_param().height(); - if (height_out == 0) { - height_out = bottom[0]->height(); + const vector*>& top) { + inferred_axis_ = -1; + copy_axes_.clear(); + const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape(); + const int top_num_axes = top_blob_shape.dim_size(); + top_shape_.resize(top_num_axes); + constant_count_ = 1; + for (int i = 0; i < top_num_axes; ++i) { + top_shape_[i] = top_blob_shape.dim(i); + if (top_shape_[i] == 0) { + copy_axes_.push_back(i); + } else if (top_shape_[i] == -1) { + CHECK_EQ(inferred_axis_, -1) << "new shape contains multiple " + << "-1 dims; at most a single (1) value of -1 may be specified"; + inferred_axis_ = i; + } else { + constant_count_ *= top_shape_[i]; + } } - - FillInSingleUnspecifiedDimension(bottom[0]->count()); } template void ReshapeLayer::Reshape(const vector*>& bottom, - const vector*>& top) { - top[0]->Reshape(num_out, channels_out, height_out, width_out); - - const size_t out_count = num_out * channels_out * height_out * width_out; - CHECK_EQ(out_count, bottom[0]->count()) << - "Bottom layer count isn't equal to predicted; output layer size is " << - num_out << "x" << channels_out << "x" << height_out << "x" << width_out; -} - -template -void ReshapeLayer::Forward_cpu(const vector*>& bottom, - const vector*>& top) { - top[0]->ShareData(*bottom[0]); -} - -template -void ReshapeLayer::Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) { - bottom[0]->ShareDiff(*top[0]); -} - -/** - * @brief Fill in a single dimension left unspecified. - * - * If a dimension is set to -1, it will be filled in with a value inferred from - * the count of the bottom layer (if the product of the nonzero dimensions is a - * divisor of the count). - * - * @param bottom_count Count of the bottom layer. - */ -template -void ReshapeLayer::FillInSingleUnspecifiedDimension(int bottom_count) { - int* const dimensions[] = {&num_out, &channels_out, &width_out, &height_out}; - const size_t N_DIMENSIONS = 4; - - // How many -1 dimensions do we have. - int n_unspecified = 0; - // Product of the remaining dimensions. - int product_without_unspecified_dim = 1; - - for (size_t i = 0; i < N_DIMENSIONS; i++) { - if (*(dimensions[i]) == -1) { - n_unspecified++; - } else { - product_without_unspecified_dim *= *(dimensions[i]); - } - } - - if (n_unspecified == 0) { - // Everything is filled out, nothing to do. - return; + const vector*>& top) { + for (int i = 0; i < copy_axes_.size(); ++i) { + const int copy_axis_index = copy_axes_[i]; + CHECK_GT(bottom[0]->num_axes(), copy_axis_index) << "new shape contains " + << "a 0, but there is no corresponding bottom axis to copy"; + top_shape_[copy_axis_index] = bottom[0]->shape(copy_axis_index); } - - CHECK_EQ(n_unspecified, 1) << "Only one dimension can be set -1."; - CHECK_EQ(bottom_count % product_without_unspecified_dim, 0) << - "Bottom layer count " << bottom_count << " not divisible by product " << - product_without_unspecified_dim; - - // Fill up the one remaining dimension. - for (size_t i = 0; i < N_DIMENSIONS; i++) { - if (*(dimensions[i]) == -1) { - *(dimensions[i]) = bottom_count / product_without_unspecified_dim; + if (inferred_axis_ >= 0) { + // A -1 dim was specified; infer the correct dimension by computing the + // product of the other dimensions. + int explicit_count = constant_count_; + for (int i = 0; i < copy_axes_.size(); ++i) { + const int copy_axis_index = copy_axes_[i]; + explicit_count *= top_shape_[copy_axis_index]; } + CHECK_EQ(0, bottom[0]->count() % explicit_count) << "bottom count (" + << bottom[0]->count() << ") must be divisible by the product of " + << "the specified dimensions (" << explicit_count << ")"; + const int inferred_dim = bottom[0]->count() / explicit_count; + top_shape_[inferred_axis_] = inferred_dim; } + top[0]->Reshape(top_shape_); + CHECK_EQ(top[0]->count(), bottom[0]->count()) + << "output count must match input count"; + top[0]->ShareData(*bottom[0]); + top[0]->ShareDiff(*bottom[0]); } -#ifdef CPU_ONLY -STUB_GPU(ReshapeLayer); -#endif - INSTANTIATE_CLASS(ReshapeLayer); -REGISTER_LAYER_CLASS(RESHAPE, ReshapeLayer); +REGISTER_LAYER_CLASS(Reshape); + } // namespace caffe diff --git a/src/caffe/layers/reshape_layer.cu b/src/caffe/layers/reshape_layer.cu deleted file mode 100644 index 3023ce3ae88..00000000000 --- a/src/caffe/layers/reshape_layer.cu +++ /dev/null @@ -1,23 +0,0 @@ -#include - -#include "caffe/common_layers.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/math_functions.hpp" - -namespace caffe { - -template -void ReshapeLayer::Forward_gpu(const vector*>& bottom, - const vector*>& top) { - top[0]->ShareData(*bottom[0]); -} - -template -void ReshapeLayer::Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) { - bottom[0]->ShareDiff(*top[0]); -} - -INSTANTIATE_LAYER_GPU_FUNCS(ReshapeLayer); - -} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index e8bf240c1b3..d36f1d511df 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -693,15 +693,11 @@ message ReLUParameter { // Message that stores parameters used by ReshapeLayer message ReshapeParameter { - // Specify the output dimensions. If some of the following parameters are - // omitted or set to 0 explicitly, the corresponding dimension from the bottom - // layer is used (unchanged). Also, if exactly one of them is set to -1, its - // value is calculated from the count of the bottom layer and the remaining - // dimensions, if possible. - optional int32 num = 1 [default = 0]; - optional int32 channels = 2 [default = 0]; - optional int32 height = 3 [default = 0]; - optional int32 width = 4 [default = 0]; + // Specify the output dimensions. If some of the dimensions are set to 0, + // the corresponding dimension from the bottom layer is used (unchanged). + // Exactly one dimension may be set to -1, in which case its value is + // inferred from the count of the bottom layer and the remaining dimensions. + optional BlobShape shape = 1; } // Message that stores parameters used by SigmoidLayer diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp index 878d40bb4d5..0c8e2427aa7 100644 --- a/src/caffe/test/test_reshape_layer.cpp +++ b/src/caffe/test/test_reshape_layer.cpp @@ -41,11 +41,11 @@ TYPED_TEST_CASE(ReshapeLayerTest, TestDtypesAndDevices); TYPED_TEST(ReshapeLayerTest, TestFlattenOutputSizes) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - ReshapeParameter* reshape_param = - layer_param.mutable_reshape_param(); - reshape_param->set_channels(-1); - reshape_param->set_height(1); - reshape_param->set_width(1); + BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape(); + blob_shape->add_dim(0); + blob_shape->add_dim(-1); + blob_shape->add_dim(1); + blob_shape->add_dim(1); ReshapeLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); @@ -58,11 +58,11 @@ TYPED_TEST(ReshapeLayerTest, TestFlattenOutputSizes) { TYPED_TEST(ReshapeLayerTest, TestFlattenValues) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - ReshapeParameter* reshape_param = - layer_param.mutable_reshape_param(); - reshape_param->set_channels(-1); - reshape_param->set_height(1); - reshape_param->set_width(1); + BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape(); + blob_shape->add_dim(0); + blob_shape->add_dim(-1); + blob_shape->add_dim(1); + blob_shape->add_dim(1); ReshapeLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); @@ -79,12 +79,11 @@ TYPED_TEST(ReshapeLayerTest, TestFlattenValues) { TYPED_TEST(ReshapeLayerTest, TestCopyDimensions) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - ReshapeParameter* reshape_param = - layer_param.mutable_reshape_param(); - // Omitting num to test implicit zeroes. - reshape_param->set_channels(0); - reshape_param->set_height(0); - reshape_param->set_width(0); + BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape(); + blob_shape->add_dim(0); + blob_shape->add_dim(0); + blob_shape->add_dim(0); + blob_shape->add_dim(0); ReshapeLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); @@ -99,12 +98,11 @@ TYPED_TEST(ReshapeLayerTest, TestCopyDimensions) { TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecified) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - ReshapeParameter* reshape_param = - layer_param.mutable_reshape_param(); - // Since omitted, num is implicitly set to 0 (thus, copies 2). - reshape_param->set_channels(3); - reshape_param->set_height(10); - reshape_param->set_width(-1); + BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape(); + blob_shape->add_dim(0); + blob_shape->add_dim(3); + blob_shape->add_dim(10); + blob_shape->add_dim(-1); // Count is 180, thus height should be 180 / (2*3*10) = 3. From 6b64f121c272b3d1464004554b9e6a9c4033a8f5 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 26 Mar 2015 02:25:48 -0700 Subject: [PATCH 19/21] basic tests (Forward, Gradient) for ReshapeLayer --- src/caffe/test/test_reshape_layer.cpp | 57 +++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp index 0c8e2427aa7..8635792a66e 100644 --- a/src/caffe/test/test_reshape_layer.cpp +++ b/src/caffe/test/test_reshape_layer.cpp @@ -115,4 +115,61 @@ TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecified) { EXPECT_EQ(this->blob_top_->width(), 3); } +TYPED_TEST(ReshapeLayerTest, TestForward) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + BlobShape* shape = layer_param.mutable_reshape_param()->mutable_shape(); + shape->add_dim(6); + shape->add_dim(2); + shape->add_dim(3); + shape->add_dim(5); + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_EQ(this->blob_top_->cpu_data()[i], + this->blob_bottom_->cpu_data()[i]); + } +} + +TYPED_TEST(ReshapeLayerTest, TestForwardAfterReshape) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + BlobShape* shape = layer_param.mutable_reshape_param()->mutable_shape(); + shape->add_dim(6); + shape->add_dim(2); + shape->add_dim(3); + shape->add_dim(5); + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // We know the above produced the correct result from TestForward. + // Reshape the bottom and call layer.Reshape, then try again. + vector new_bottom_shape(1, 2 * 3 * 6 * 5); + this->blob_bottom_->Reshape(new_bottom_shape); + layer.Reshape(this->blob_bottom_vec_, this->blob_top_vec_); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_EQ(this->blob_top_->cpu_data()[i], + this->blob_bottom_->cpu_data()[i]); + } +} + +TYPED_TEST(ReshapeLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + BlobShape* shape = layer_param.mutable_reshape_param()->mutable_shape(); + shape->add_dim(6); + shape->add_dim(2); + shape->add_dim(3); + shape->add_dim(5); + ReshapeLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + } // namespace caffe From 21032b2b0911cd4d907df46c114b8e96e55c2313 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 26 Mar 2015 01:13:18 -0700 Subject: [PATCH 20/21] Add ReshapeParameter axis and num_axes to reshape only a particular span of the input shape --- include/caffe/common_layers.hpp | 2 - src/caffe/layers/reshape_layer.cpp | 53 ++++++++++--- src/caffe/proto/caffe.proto | 58 +++++++++++++- src/caffe/test/test_reshape_layer.cpp | 105 ++++++++++++++++++++++++++ 4 files changed, 204 insertions(+), 14 deletions(-) diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index ccdfd62d5be..8da6d68096b 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -327,8 +327,6 @@ class ReshapeLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) {} - /// @brief the current output shape - vector top_shape_; /// @brief vector of axes indices whose dimensions we'll copy from the bottom vector copy_axes_; /// @brief the index of the axis whose dimension we infer, or -1 if none diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp index 618edf31824..ffe970f2689 100644 --- a/src/caffe/layers/reshape_layer.cpp +++ b/src/caffe/layers/reshape_layer.cpp @@ -12,18 +12,17 @@ void ReshapeLayer::LayerSetUp(const vector*>& bottom, copy_axes_.clear(); const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape(); const int top_num_axes = top_blob_shape.dim_size(); - top_shape_.resize(top_num_axes); constant_count_ = 1; for (int i = 0; i < top_num_axes; ++i) { - top_shape_[i] = top_blob_shape.dim(i); - if (top_shape_[i] == 0) { + const int top_dim = top_blob_shape.dim(i); + if (top_dim == 0) { copy_axes_.push_back(i); - } else if (top_shape_[i] == -1) { + } else if (top_dim == -1) { CHECK_EQ(inferred_axis_, -1) << "new shape contains multiple " << "-1 dims; at most a single (1) value of -1 may be specified"; inferred_axis_ = i; } else { - constant_count_ *= top_shape_[i]; + constant_count_ *= top_dim; } } } @@ -31,27 +30,59 @@ void ReshapeLayer::LayerSetUp(const vector*>& bottom, template void ReshapeLayer::Reshape(const vector*>& bottom, const vector*>& top) { + const int input_start_axis = this->layer_param_.reshape_param().axis(); + const int start_axis = (input_start_axis >= 0) ? input_start_axis : + bottom[0]->num_axes() + input_start_axis + 1; + CHECK_GE(start_axis, 0) << "axis " << input_start_axis << " out of range"; + CHECK_LE(start_axis, bottom[0]->num_axes()) << "axis " << input_start_axis + << " out of range for " << bottom[0]->num_axes() << "-D input blob"; + const int num_axes = this->layer_param_.reshape_param().num_axes(); + CHECK_GE(num_axes, -1) << "num_axes must be >= 0, or -1 for all"; + const int end_axis = + (num_axes == -1) ? bottom[0]->num_axes() : (start_axis + num_axes); + CHECK_LE(end_axis, bottom[0]->num_axes()) + << "end_axis = axis + num_axes is out of range"; + const int num_axes_replaced = end_axis - start_axis; + const int num_axes_retained = bottom[0]->num_axes() - num_axes_replaced; + const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape(); + const int num_new_axes = top_blob_shape.dim_size(); + vector top_shape(num_axes_retained + num_new_axes); + int top_shape_index = 0; + for (int i = 0; i < start_axis; ++i) { + top_shape[top_shape_index++] = bottom[0]->shape(i); + } + for (int i = 0; i < num_new_axes; ++i) { + top_shape[top_shape_index++] = top_blob_shape.dim(i); + } + for (int i = end_axis; i < bottom[0]->num_axes(); ++i) { + top_shape[top_shape_index++] = bottom[0]->shape(i); + } + CHECK_EQ(top_shape_index, top_shape.size()); for (int i = 0; i < copy_axes_.size(); ++i) { const int copy_axis_index = copy_axes_[i]; - CHECK_GT(bottom[0]->num_axes(), copy_axis_index) << "new shape contains " - << "a 0, but there is no corresponding bottom axis to copy"; - top_shape_[copy_axis_index] = bottom[0]->shape(copy_axis_index); + CHECK_GT(bottom[0]->num_axes(), start_axis + copy_axis_index) + << "new shape contains a 0, but there was no corresponding bottom axis " + << "to copy"; + top_shape[start_axis + copy_axis_index] = + bottom[0]->shape(start_axis + copy_axis_index); } if (inferred_axis_ >= 0) { // A -1 dim was specified; infer the correct dimension by computing the // product of the other dimensions. int explicit_count = constant_count_; + explicit_count *= bottom[0]->count(0, start_axis); + explicit_count *= bottom[0]->count(end_axis); for (int i = 0; i < copy_axes_.size(); ++i) { const int copy_axis_index = copy_axes_[i]; - explicit_count *= top_shape_[copy_axis_index]; + explicit_count *= top_shape[start_axis + copy_axis_index]; } CHECK_EQ(0, bottom[0]->count() % explicit_count) << "bottom count (" << bottom[0]->count() << ") must be divisible by the product of " << "the specified dimensions (" << explicit_count << ")"; const int inferred_dim = bottom[0]->count() / explicit_count; - top_shape_[inferred_axis_] = inferred_dim; + top_shape[start_axis + inferred_axis_] = inferred_dim; } - top[0]->Reshape(top_shape_); + top[0]->Reshape(top_shape); CHECK_EQ(top[0]->count(), bottom[0]->count()) << "output count must match input count"; top[0]->ShareData(*bottom[0]); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index d36f1d511df..d43e560a1fa 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -696,8 +696,64 @@ message ReshapeParameter { // Specify the output dimensions. If some of the dimensions are set to 0, // the corresponding dimension from the bottom layer is used (unchanged). // Exactly one dimension may be set to -1, in which case its value is - // inferred from the count of the bottom layer and the remaining dimensions. + // inferred from the count of the bottom blob and the remaining dimensions. + // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: + // + // layer { + // type: "Reshape" bottom: "input" top: "output" + // reshape_param { ... } + // } + // + // If "input" is 2D with shape 2 x 8, then the following reshape_param + // specifications are all equivalent, producing a 3D blob "output" with shape + // 2 x 2 x 4: + // + // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } + // reshape_param { shape { dim: -1 dim: 0 dim: 2 } } + // optional BlobShape shape = 1; + + // axis and num_axes control the portion of the bottom blob's shape that are + // replaced by (included in) the reshape. By default (axis == 0 and + // num_axes == -1), the entire bottom blob shape is included in the reshape, + // and hence the shape field must specify the entire output shape. + // + // axis may be non-zero to retain some portion of the beginning of the input + // shape (and may be negative to index from the end; e.g., -1 to begin the + // reshape after the last axis, including nothing in the reshape, + // -2 to include only the last axis, etc.). + // + // For example, suppose "input" is a 2D blob with shape 2 x 8. + // Then the following ReshapeLayer specifications are all equivalent, + // producing a blob "output" with shape 2 x 2 x 4: + // + // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } + // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } + // + // num_axes specifies the extent of the reshape. + // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on + // input axes in the range [axis, axis+num_axes]. + // num_axes may also be -1, the default, to include all remaining axes + // (starting from axis). + // + // For example, suppose "input" is a 2D blob with shape 2 x 8. + // Then the following ReshapeLayer specifications are equivalent, + // producing a blob "output" with shape 1 x 2 x 8. + // + // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } + // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } + // reshape_param { shape { dim: 1 } num_axes: 0 } + // + // On the other hand, these would produce output blob shape 2 x 1 x 8: + // + // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } + // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } + // + optional int32 axis = 2 [default = 0]; + optional int32 num_axes = 3 [default = -1]; } // Message that stores parameters used by SigmoidLayer diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp index 8635792a66e..9d08ec60d4e 100644 --- a/src/caffe/test/test_reshape_layer.cpp +++ b/src/caffe/test/test_reshape_layer.cpp @@ -115,6 +115,111 @@ TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecified) { EXPECT_EQ(this->blob_top_->width(), 3); } +TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecifiedWithStartAxis) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_reshape_param()->set_axis(1); + BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape(); + blob_shape->add_dim(3); + blob_shape->add_dim(10); + blob_shape->add_dim(-1); + + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + + ASSERT_EQ(this->blob_top_->num_axes(), 4); + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 3); + EXPECT_EQ(this->blob_top_->height(), 10); + EXPECT_EQ(this->blob_top_->width(), 3); +} + +TYPED_TEST(ReshapeLayerTest, TestInsertSingletonAxesStart) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_reshape_param()->set_axis(0); + layer_param.mutable_reshape_param()->set_num_axes(0); + BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape(); + blob_shape->add_dim(1); + blob_shape->add_dim(1); + blob_shape->add_dim(1); + + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + + ASSERT_EQ(this->blob_top_->num_axes(), 7); + EXPECT_EQ(this->blob_top_->shape(0), 1); + EXPECT_EQ(this->blob_top_->shape(1), 1); + EXPECT_EQ(this->blob_top_->shape(2), 1); + EXPECT_EQ(this->blob_top_->shape(3), 2); + EXPECT_EQ(this->blob_top_->shape(4), 3); + EXPECT_EQ(this->blob_top_->shape(5), 6); + EXPECT_EQ(this->blob_top_->shape(6), 5); +} + +TYPED_TEST(ReshapeLayerTest, TestInsertSingletonAxesMiddle) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_reshape_param()->set_axis(2); + layer_param.mutable_reshape_param()->set_num_axes(0); + BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape(); + blob_shape->add_dim(1); + blob_shape->add_dim(1); + blob_shape->add_dim(1); + + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + + ASSERT_EQ(this->blob_top_->num_axes(), 7); + EXPECT_EQ(this->blob_top_->shape(0), 2); + EXPECT_EQ(this->blob_top_->shape(1), 3); + EXPECT_EQ(this->blob_top_->shape(2), 1); + EXPECT_EQ(this->blob_top_->shape(3), 1); + EXPECT_EQ(this->blob_top_->shape(4), 1); + EXPECT_EQ(this->blob_top_->shape(5), 6); + EXPECT_EQ(this->blob_top_->shape(6), 5); +} + +TYPED_TEST(ReshapeLayerTest, TestInsertSingletonAxesEnd) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_reshape_param()->set_axis(-1); + layer_param.mutable_reshape_param()->set_num_axes(0); + BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape(); + blob_shape->add_dim(1); + blob_shape->add_dim(1); + blob_shape->add_dim(1); + + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + + ASSERT_EQ(this->blob_top_->num_axes(), 7); + EXPECT_EQ(this->blob_top_->shape(0), 2); + EXPECT_EQ(this->blob_top_->shape(1), 3); + EXPECT_EQ(this->blob_top_->shape(2), 6); + EXPECT_EQ(this->blob_top_->shape(3), 5); + EXPECT_EQ(this->blob_top_->shape(4), 1); + EXPECT_EQ(this->blob_top_->shape(5), 1); + EXPECT_EQ(this->blob_top_->shape(6), 1); +} + +TYPED_TEST(ReshapeLayerTest, TestFlattenMiddle) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_reshape_param()->set_axis(1); + layer_param.mutable_reshape_param()->set_num_axes(2); + BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape(); + blob_shape->add_dim(-1); + + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + + ASSERT_EQ(this->blob_top_->num_axes(), 3); + EXPECT_EQ(this->blob_top_->shape(0), 2); + EXPECT_EQ(this->blob_top_->shape(1), 3 * 6); + EXPECT_EQ(this->blob_top_->shape(2), 5); +} + TYPED_TEST(ReshapeLayerTest, TestForward) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; From cf0e6b7215b8c571b02bae84d7674d867156660f Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Wed, 25 Mar 2015 18:45:29 -0700 Subject: [PATCH 21/21] Update docs for ND blobs (#1970) and layer type is a string (#1694) --- docs/tutorial/data.md | 12 +-- docs/tutorial/layers.md | 160 +++++++++++++++---------------- docs/tutorial/loss.md | 20 ++-- docs/tutorial/net_layer_blob.md | 30 +++--- examples/mnist/readme.md | 50 +++++----- examples/siamese/readme.md | 66 +++++++------ matlab/caffe/hdf5creation/demo.m | 4 +- 7 files changed, 174 insertions(+), 168 deletions(-) diff --git a/docs/tutorial/data.md b/docs/tutorial/data.md index 40605f7cd73..3bf7d932eda 100644 --- a/docs/tutorial/data.md +++ b/docs/tutorial/data.md @@ -10,15 +10,15 @@ New input types are supported by developing a new data layer -- the rest of the This data layer definition - layers { + layer { name: "mnist" - # DATA layer loads leveldb or lmdb storage DBs for high-throughput. - type: DATA + # Data layer loads leveldb or lmdb storage DBs for high-throughput. + type: "Data" # the 1st top is the data itself: the name is only convention top: "data" # the 2nd top is the ground truth: the name is only convention top: "label" - # the DATA layer configuration + # the Data layer configuration data_param { # path to the DB source: "examples/mnist/mnist_train_lmdb" @@ -46,9 +46,9 @@ The (data, label) pairing is a convenience for classification models. **Transformations**: data preprocessing is parametrized by transformation messages within the data layer definition. - layers { + layer { name: "data" - type: DATA + type: "Data" [...] transform_param { scale: 0.1 diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md index 839939f5ad6..74d236c1194 100644 --- a/docs/tutorial/layers.md +++ b/docs/tutorial/layers.md @@ -23,7 +23,7 @@ In contrast, other layers (with few exceptions) ignore the spatial structure of #### Convolution -* LayerType: `CONVOLUTION` +* Layer type: `Convolution` * CPU implementation: `./src/caffe/layers/convolution_layer.cpp` * CUDA GPU implementation: `./src/caffe/layers/convolution_layer.cu` * Parameters (`ConvolutionParameter convolution_param`) @@ -43,15 +43,15 @@ In contrast, other layers (with few exceptions) ignore the spatial structure of - `n * c_o * h_o * w_o`, where `h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1` and `w_o` likewise. * Sample (as seen in `./examples/imagenet/imagenet_train_val.prototxt`) - layers { + layer { name: "conv1" - type: CONVOLUTION + type: "Convolution" bottom: "data" top: "conv1" - blobs_lr: 1 # learning rate multiplier for the filters - blobs_lr: 2 # learning rate multiplier for the biases - weight_decay: 1 # weight decay multiplier for the filters - weight_decay: 0 # weight decay multiplier for the biases + # learning rate and decay multipliers for the filters + param { lr_mult: 1 decay_mult: 1 } + # learning rate and decay multipliers for the biases + param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 # learn 96 filters kernel_size: 11 # each filter is 11x11 @@ -67,11 +67,11 @@ In contrast, other layers (with few exceptions) ignore the spatial structure of } } -The `CONVOLUTION` layer convolves the input image with a set of learnable filters, each producing one feature map in the output image. +The `Convolution` layer convolves the input image with a set of learnable filters, each producing one feature map in the output image. #### Pooling -* LayerType: `POOLING` +* Layer type: `Pooling` * CPU implementation: `./src/caffe/layers/pooling_layer.cpp` * CUDA GPU implementation: `./src/caffe/layers/pooling_layer.cu` * Parameters (`PoolingParameter pooling_param`) @@ -87,9 +87,9 @@ The `CONVOLUTION` layer convolves the input image with a set of learnable filter - `n * c * h_o * w_o`, where h_o and w_o are computed in the same way as convolution. * Sample (as seen in `./examples/imagenet/imagenet_train_val.prototxt`) - layers { + layer { name: "pool1" - type: POOLING + type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { @@ -101,7 +101,7 @@ The `CONVOLUTION` layer convolves the input image with a set of learnable filter #### Local Response Normalization (LRN) -* LayerType: `LRN` +* Layer type: `LRN` * CPU Implementation: `./src/caffe/layers/lrn_layer.cpp` * CUDA GPU Implementation: `./src/caffe/layers/lrn_layer.cu` * Parameters (`LRNParameter lrn_param`) @@ -115,7 +115,7 @@ The local response normalization layer performs a kind of "lateral inhibition" b #### im2col -`IM2COL` is a helper for doing the image-to-column transformation that you most likely do not need to know about. This is used in Caffe's original convolution to do matrix multiplication by laying out all patches into a matrix. +`Im2col` is a helper for doing the image-to-column transformation that you most likely do not need to know about. This is used in Caffe's original convolution to do matrix multiplication by laying out all patches into a matrix. ### Loss Layers @@ -123,19 +123,19 @@ Loss drives learning by comparing an output to a target and assigning cost to mi #### Softmax -* LayerType: `SOFTMAX_LOSS` +* Layer type: `SoftmaxWithLoss` The softmax loss layer computes the multinomial logistic loss of the softmax of its inputs. It's conceptually identical to a softmax layer followed by a multinomial logistic loss layer, but provides a more numerically stable gradient. #### Sum-of-Squares / Euclidean -* LayerType: `EUCLIDEAN_LOSS` +* Layer type: `EuclideanLoss` The Euclidean loss layer computes the sum of squares of differences of its two inputs, $$\frac 1 {2N} \sum_{i=1}^N \| x^1_i - x^2_i \|_2^2$$. #### Hinge / Margin -* LayerType: `HINGE_LOSS` +* Layer type: `HingeLoss` * CPU implementation: `./src/caffe/layers/hinge_loss_layer.cpp` * CUDA GPU implementation: none yet * Parameters (`HingeLossParameter hinge_loss_param`) @@ -149,17 +149,17 @@ The Euclidean loss layer computes the sum of squares of differences of its two i * Samples # L1 Norm - layers { + layer { name: "loss" - type: HINGE_LOSS + type: "HingeLoss" bottom: "pred" bottom: "label" } # L2 Norm - layers { + layer { name: "loss" - type: HINGE_LOSS + type: "HingeLoss" bottom: "pred" bottom: "label" top: "loss" @@ -172,15 +172,15 @@ The hinge loss layer computes a one-vs-all hinge or squared hinge loss. #### Sigmoid Cross-Entropy -`SIGMOID_CROSS_ENTROPY_LOSS` +`SigmoidCrossEntropyLoss` #### Infogain -`INFOGAIN_LOSS` +`InfogainLoss` #### Accuracy and Top-k -`ACCURACY` scores the output as the accuracy of output with respect to target -- it is not actually a loss and has no backward step. +`Accuracy` scores the output as the accuracy of output with respect to target -- it is not actually a loss and has no backward step. ### Activation / Neuron Layers @@ -193,7 +193,7 @@ In general, activation / Neuron layers are element-wise operators, taking one bo #### ReLU / Rectified-Linear and Leaky-ReLU -* LayerType: `RELU` +* Layer type: `ReLU` * CPU implementation: `./src/caffe/layers/relu_layer.cpp` * CUDA GPU implementation: `./src/caffe/layers/relu_layer.cu` * Parameters (`ReLUParameter relu_param`) @@ -201,66 +201,66 @@ In general, activation / Neuron layers are element-wise operators, taking one bo - `negative_slope` [default 0]: specifies whether to leak the negative part by multiplying it with the slope value rather than setting it to 0. * Sample (as seen in `./examples/imagenet/imagenet_train_val.prototxt`) - layers { + layer { name: "relu1" - type: RELU + type: "ReLU" bottom: "conv1" top: "conv1" } -Given an input value x, The `RELU` layer computes the output as x if x > 0 and negative_slope * x if x <= 0. When the negative slope parameter is not set, it is equivalent to the standard ReLU function of taking max(x, 0). It also supports in-place computation, meaning that the bottom and the top blob could be the same to preserve memory consumption. +Given an input value x, The `ReLU` layer computes the output as x if x > 0 and negative_slope * x if x <= 0. When the negative slope parameter is not set, it is equivalent to the standard ReLU function of taking max(x, 0). It also supports in-place computation, meaning that the bottom and the top blob could be the same to preserve memory consumption. #### Sigmoid -* LayerType: `SIGMOID` +* Layer type: `Sigmoid` * CPU implementation: `./src/caffe/layers/sigmoid_layer.cpp` * CUDA GPU implementation: `./src/caffe/layers/sigmoid_layer.cu` * Sample (as seen in `./examples/imagenet/mnist_autoencoder.prototxt`) - layers { + layer { name: "encode1neuron" bottom: "encode1" top: "encode1neuron" - type: SIGMOID + type: "Sigmoid" } -The `SIGMOID` layer computes the output as sigmoid(x) for each input element x. +The `Sigmoid` layer computes the output as sigmoid(x) for each input element x. #### TanH / Hyperbolic Tangent -* LayerType: `TANH` +* Layer type: `TanH` * CPU implementation: `./src/caffe/layers/tanh_layer.cpp` * CUDA GPU implementation: `./src/caffe/layers/tanh_layer.cu` * Sample - layers { + layer { name: "layer" bottom: "in" top: "out" - type: TANH + type: "TanH" } -The `TANH` layer computes the output as tanh(x) for each input element x. +The `TanH` layer computes the output as tanh(x) for each input element x. #### Absolute Value -* LayerType: `ABSVAL` +* Layer type: `AbsVal` * CPU implementation: `./src/caffe/layers/absval_layer.cpp` * CUDA GPU implementation: `./src/caffe/layers/absval_layer.cu` * Sample - layers { + layer { name: "layer" bottom: "in" top: "out" - type: ABSVAL + type: "AbsVal" } -The `ABSVAL` layer computes the output as abs(x) for each input element x. +The `AbsVal` layer computes the output as abs(x) for each input element x. #### Power -* LayerType: `POWER` +* Layer type: `Power` * CPU implementation: `./src/caffe/layers/power_layer.cpp` * CUDA GPU implementation: `./src/caffe/layers/power_layer.cu` * Parameters (`PowerParameter power_param`) @@ -270,11 +270,11 @@ The `ABSVAL` layer computes the output as abs(x) for each input element x. - `shift` [default 0] * Sample - layers { + layer { name: "layer" bottom: "in" top: "out" - type: POWER + type: "Power" power_param { power: 1 scale: 1 @@ -282,16 +282,16 @@ The `ABSVAL` layer computes the output as abs(x) for each input element x. } } -The `POWER` layer computes the output as (shift + scale * x) ^ power for each input element x. +The `Power` layer computes the output as (shift + scale * x) ^ power for each input element x. #### BNLL -* LayerType: `BNLL` +* Layer type: `BNLL` * CPU implementation: `./src/caffe/layers/bnll_layer.cpp` * CUDA GPU implementation: `./src/caffe/layers/bnll_layer.cu` * Sample - layers { + layer { name: "layer" bottom: "in" top: "out" @@ -309,7 +309,7 @@ Common input preprocessing (mean subtraction, scaling, random cropping, and mirr #### Database -* LayerType: `DATA` +* Layer type: `Data` * Parameters - Required - `source`: the name of the directory containing the database @@ -322,7 +322,7 @@ Common input preprocessing (mean subtraction, scaling, random cropping, and mirr #### In-Memory -* LayerType: `MEMORY_DATA` +* Layer type: `MemoryData` * Parameters - Required - `batch_size`, `channels`, `height`, `width`: specify the size of input chunks to read from memory @@ -331,7 +331,7 @@ The memory data layer reads data directly from memory, without copying it. In or #### HDF5 Input -* LayerType: `HDF5_DATA` +* Layer type: `HDF5Data` * Parameters - Required - `source`: the name of the file to read from @@ -339,7 +339,7 @@ The memory data layer reads data directly from memory, without copying it. In or #### HDF5 Output -* LayerType: `HDF5_OUTPUT` +* Layer type: `HDF5Output` * Parameters - Required - `file_name`: name of file to write to @@ -348,7 +348,7 @@ The HDF5 output layer performs the opposite function of the other layers in this #### Images -* LayerType: `IMAGE_DATA` +* Layer type: `ImageData` * Parameters - Required - `source`: name of a text file, with each line giving an image filename and label @@ -360,17 +360,17 @@ The HDF5 output layer performs the opposite function of the other layers in this #### Windows -`WINDOW_DATA` +`WindowData` #### Dummy -`DUMMY_DATA` is for development and debugging. See `DummyDataParameter`. +`DummyData` is for development and debugging. See `DummyDataParameter`. ### Common Layers #### Inner Product -* LayerType: `INNER_PRODUCT` +* Layer type: `InnerProduct` * CPU implementation: `./src/caffe/layers/inner_product_layer.cpp` * CUDA GPU implementation: `./src/caffe/layers/inner_product_layer.cu` * Parameters (`InnerProductParameter inner_product_param`) @@ -387,13 +387,13 @@ The HDF5 output layer performs the opposite function of the other layers in this - `n * c_o * 1 * 1` * Sample - layers { + layer { name: "fc8" - type: INNER_PRODUCT - blobs_lr: 1 # learning rate multiplier for the filters - blobs_lr: 2 # learning rate multiplier for the biases - weight_decay: 1 # weight decay multiplier for the filters - weight_decay: 0 # weight decay multiplier for the biases + type: "InnerProduct" + # learning rate and decay multipliers for the weights + param { lr_mult: 1 decay_mult: 1 } + # learning rate and decay multipliers for the biases + param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 1000 weight_filler { @@ -409,79 +409,79 @@ The HDF5 output layer performs the opposite function of the other layers in this top: "fc8" } -The `INNER_PRODUCT` layer (also usually referred to as the fully connected layer) treats the input as a simple vector and produces an output in the form of a single vector (with the blob's height and width set to 1). +The `InnerProduct` layer (also usually referred to as the fully connected layer) treats the input as a simple vector and produces an output in the form of a single vector (with the blob's height and width set to 1). #### Splitting -The `SPLIT` layer is a utility layer that splits an input blob to multiple output blobs. This is used when a blob is fed into multiple output layers. +The `Split` layer is a utility layer that splits an input blob to multiple output blobs. This is used when a blob is fed into multiple output layers. #### Flattening -The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c * h * w` to a simple vector output of shape `n * (c*h*w) * 1 * 1`. +The `Flatten` layer is a utility layer that flattens an input of shape `n * c * h * w` to a simple vector output of shape `n * (c*h*w)` #### Concatenation -* LayerType: `CONCAT` +* Layer type: `Concat` * CPU implementation: `./src/caffe/layers/concat_layer.cpp` * CUDA GPU implementation: `./src/caffe/layers/concat_layer.cu` * Parameters (`ConcatParameter concat_param`) - Optional - - `concat_dim` [default 1]: 0 for concatenation along num and 1 for channels. + - `axis` [default 1]: 0 for concatenation along num and 1 for channels. * Input - `n_i * c_i * h * w` for each input blob i from 1 to K. * Output - - if `concat_dim = 0`: `(n_1 + n_2 + ... + n_K) * c_1 * h * w`, and all input `c_i` should be the same. - - if `concat_dim = 1`: `n_1 * (c_1 + c_2 + ... + c_K) * h * w`, and all input `n_i` should be the same. + - if `axis = 0`: `(n_1 + n_2 + ... + n_K) * c_1 * h * w`, and all input `c_i` should be the same. + - if `axis = 1`: `n_1 * (c_1 + c_2 + ... + c_K) * h * w`, and all input `n_i` should be the same. * Sample - layers { + layer { name: "concat" bottom: "in1" bottom: "in2" top: "out" - type: CONCAT + type: "Concat" concat_param { - concat_dim: 1 + axis: 1 } } -The `CONCAT` layer is a utility layer that concatenates its multiple input blobs to one single output blob. Currently, the layer supports concatenation along num or channels only. +The `Concat` layer is a utility layer that concatenates its multiple input blobs to one single output blob. #### Slicing -The `SLICE` layer is a utility layer that slices an input layer to multiple output layers along a given dimension (currently num or channel only) with given slice indices. +The `Slice` layer is a utility layer that slices an input layer to multiple output layers along a given dimension (currently num or channel only) with given slice indices. * Sample - layers { + layer { name: "slicer_label" - type: SLICE + type: "Slice" bottom: "label" ## Example of label with a shape N x 3 x 1 x 1 top: "label1" top: "label2" top: "label3" slice_param { - slice_dim: 1 - slice_point: 1 - slice_point: 2 + axis: 1 + slice_point: 1 + slice_point: 2 } } -`slice_dim` indicates the target dimension and can assume only two values: 0 for num or 1 for channel; `slice_point` indicates indexes in the selected dimension (the number of indexes must be equal to the number of top blobs minus one). +`axis` indicates the target axis; `slice_point` indicates indexes in the selected dimension (the number of indices must be equal to the number of top blobs minus one). #### Elementwise Operations -`ELTWISE` +`Eltwise` #### Argmax -`ARGMAX` +`ArgMax` #### Softmax -`SOFTMAX` +`Softmax` #### Mean-Variance Normalization diff --git a/docs/tutorial/loss.md b/docs/tutorial/loss.md index aac561774bb..d2d0e77fbed 100644 --- a/docs/tutorial/loss.md +++ b/docs/tutorial/loss.md @@ -10,30 +10,30 @@ Hence, the goal of learning is to find a setting of the weights that *minimizes* The loss in Caffe is computed by the Forward pass of the network. Each layer takes a set of input (`bottom`) blobs and produces a set of output (`top`) blobs. Some of these layers' outputs may be used in the loss function. -A typical choice of loss function for one-versus-all classification tasks is the `SOFTMAX_LOSS` function, used in a network definition as follows, for example: +A typical choice of loss function for one-versus-all classification tasks is the `SoftmaxWithLoss` function, used in a network definition as follows, for example: - layers { + layer { name: "loss" - type: SOFTMAX_LOSS + type: "SoftmaxWithLoss" bottom: "pred" bottom: "label" top: "loss" } -In a `SOFTMAX_LOSS` function, the `top` blob is a scalar (dimensions $$1 \times 1 \times 1 \times 1$$) which averages the loss (computed from predicted labels `pred` and actuals labels `label`) over the entire mini-batch. +In a `SoftmaxWithLoss` function, the `top` blob is a scalar (empty shape) which averages the loss (computed from predicted labels `pred` and actuals labels `label`) over the entire mini-batch. ### Loss weights -For nets with multiple layers producing a loss (e.g., a network that both classifies the input using a `SOFTMAX_LOSS` layer and reconstructs it using a `EUCLIDEAN_LOSS` layer), *loss weights* can be used to specify their relative importance. +For nets with multiple layers producing a loss (e.g., a network that both classifies the input using a `SoftmaxWithLoss` layer and reconstructs it using a `EuclideanLoss` layer), *loss weights* can be used to specify their relative importance. -By convention, Caffe layer types with the suffix `_LOSS` contribute to the loss function, but other layers are assumed to be purely used for intermediate computations. +By convention, Caffe layer types with the suffix `Loss` contribute to the loss function, but other layers are assumed to be purely used for intermediate computations. However, any layer can be used as a loss by adding a field `loss_weight: ` to a layer definition for each `top` blob produced by the layer. -Layers with the suffix `_LOSS` have an implicit `loss_weight: 1` for the first `top` blob (and `loss_weight: 0` for any additional `top`s); other layers have an implicit `loss_weight: 0` for all `top`s. -So, the above `SOFTMAX_LOSS` layer could be equivalently written as: +Layers with the suffix `Loss` have an implicit `loss_weight: 1` for the first `top` blob (and `loss_weight: 0` for any additional `top`s); other layers have an implicit `loss_weight: 0` for all `top`s. +So, the above `SoftmaxWithLoss` layer could be equivalently written as: - layers { + layer { name: "loss" - type: SOFTMAX_LOSS + type: "SoftmaxWithLoss" bottom: "pred" bottom: "label" top: "loss" diff --git a/docs/tutorial/net_layer_blob.md b/docs/tutorial/net_layer_blob.md index 1f0966f88a4..e8b7bd316a9 100644 --- a/docs/tutorial/net_layer_blob.md +++ b/docs/tutorial/net_layer_blob.md @@ -11,22 +11,20 @@ We will go over the details of these components in more detail. ## Blob storage and communication -A Blob is a wrapper over the actual data being processed and passed along by Caffe, and also under the hood provides synchronization capability between the CPU and the GPU. Mathematically, a blob is a 4-dimensional array that stores things in the order of (Num, Channels, Height and Width), from major to minor, and stored in a C-contiguous fashion. The main reason for putting Num (the name is due to legacy reasons, and is equivalent to the notation of "batch" as in minibatch SGD). +A Blob is a wrapper over the actual data being processed and passed along by Caffe, and also under the hood provides synchronization capability between the CPU and the GPU. Mathematically, a blob is an N-dimensional array stored in a C-contiguous fashion. -Caffe stores and communicates data in 4-dimensional arrays called blobs. Blobs provide a unified memory interface, holding data e.g. batches of images, model parameters, and derivatives for optimization. +Caffe stores and communicates data using blobs. Blobs provide a unified memory interface holding data; e.g., batches of images, model parameters, and derivatives for optimization. Blobs conceal the computational and mental overhead of mixed CPU/GPU operation by synchronizing from the CPU host to the GPU device as needed. Memory on the host and device is allocated on demand (lazily) for efficient memory usage. -The conventional blob dimensions for data are number N x channel K x height H x width W. Blob memory is row-major in layout so the last / rightmost dimension changes fastest. For example, the value at index (n, k, h, w) is physically located at index ((n * K + k) * H + h) * W + w. +The conventional blob dimensions for batches of image data are number N x channel K x height H x width W. Blob memory is row-major in layout, so the last / rightmost dimension changes fastest. For example, in a 4D blob, the value at index (n, k, h, w) is physically located at index ((n * K + k) * H + h) * W + w. - Number / N is the batch size of the data. Batch processing achieves better throughput for communication and device processing. For an ImageNet training batch of 256 images B = 256. - Channel / K is the feature dimension e.g. for RGB images K = 3. -Note that although we have designed blobs with its dimensions corresponding to image applications, they are named purely for notational purpose and it is totally valid for you to do non-image applications. For example, if you simply need fully-connected networks like the conventional multi-layer perceptron, use blobs of dimensions (Num, Channels, 1, 1) and call the InnerProductLayer (which we will cover soon). +Note that although many blobs in Caffe examples are 4D with axes for image applications, it is totally valid to use blobs for non-image applications. For example, if you simply need fully-connected networks like the conventional multi-layer perceptron, use 2D blobs (shape (N, D)) and call the InnerProductLayer (which we will cover soon). -Caffe operations are general with respect to the channel dimension / K. Grayscale and hyperspectral imagery are fine. Caffe can likewise model and process arbitrary vectors in blobs with singleton. That is, the shape of blob holding 1000 vectors of 16 feature dimensions is 1000 x 16 x 1 x 1. - -Parameter blob dimensions vary according to the type and configuration of the layer. For a convolution layer with 96 filters of 11 x 11 spatial dimension and 3 inputs the blob is 96 x 3 x 11 x 11. For an inner product / fully-connected layer with 1000 output channels and 1024 input channels the parameter blob is 1 x 1 x 1000 x 1024. +Parameter blob dimensions vary according to the type and configuration of the layer. For a convolution layer with 96 filters of 11 x 11 spatial dimension and 3 inputs the blob is 96 x 3 x 11 x 11. For an inner product / fully-connected layer with 1000 output channels and 1024 input channels the parameter blob is 1000 x 1024. For custom data it may be necessary to hack your own input preparation tool or data layer. However once your data is in your job is done. The modularity of layers accomplishes the rest of the work for you. @@ -95,9 +93,9 @@ A simple logistic regression classifier is defined by name: "LogReg" - layers { + layer { name: "mnist" - type: DATA + type: "Data" top: "data" top: "label" data_param { @@ -105,18 +103,18 @@ is defined by batch_size: 64 } } - layers { + layer { name: "ip" - type: INNER_PRODUCT + type: "InnerProduct" bottom: "data" top: "ip" inner_product_param { num_output: 2 } } - layers { + layer { name: "loss" - type: SOFTMAX_LOSS + type: "SoftmaxWithLoss" bottom: "ip" bottom: "label" top: "loss" @@ -135,19 +133,19 @@ Model initialization is handled by `Net::Init()`. The initialization mainly does I0902 22:52:17.935807 2079114000 data_layer.cpp:135] Opening leveldb input_leveldb I0902 22:52:17.937155 2079114000 data_layer.cpp:195] output data size: 64,1,28,28 I0902 22:52:17.938570 2079114000 net.cpp:103] Top shape: 64 1 28 28 (50176) - I0902 22:52:17.938593 2079114000 net.cpp:103] Top shape: 64 1 1 1 (64) + I0902 22:52:17.938593 2079114000 net.cpp:103] Top shape: 64 (64) I0902 22:52:17.938611 2079114000 net.cpp:67] Creating Layer ip I0902 22:52:17.938617 2079114000 net.cpp:394] ip <- data I0902 22:52:17.939177 2079114000 net.cpp:356] ip -> ip I0902 22:52:17.939196 2079114000 net.cpp:96] Setting up ip - I0902 22:52:17.940289 2079114000 net.cpp:103] Top shape: 64 2 1 1 (128) + I0902 22:52:17.940289 2079114000 net.cpp:103] Top shape: 64 2 (128) I0902 22:52:17.941270 2079114000 net.cpp:67] Creating Layer loss I0902 22:52:17.941305 2079114000 net.cpp:394] loss <- ip I0902 22:52:17.941314 2079114000 net.cpp:394] loss <- label I0902 22:52:17.941323 2079114000 net.cpp:356] loss -> loss # set up the loss and configure the backward pass I0902 22:52:17.941328 2079114000 net.cpp:96] Setting up loss - I0902 22:52:17.941328 2079114000 net.cpp:103] Top shape: 1 1 1 1 (1) + I0902 22:52:17.941328 2079114000 net.cpp:103] Top shape: (1) I0902 22:52:17.941329 2079114000 net.cpp:109] with loss weight 1 I0902 22:52:17.941779 2079114000 net.cpp:170] loss needs backward computation. I0902 22:52:17.941787 2079114000 net.cpp:170] ip needs backward computation. diff --git a/examples/mnist/readme.md b/examples/mnist/readme.md index ef7f5da67d5..269e53ab9b9 100644 --- a/examples/mnist/readme.md +++ b/examples/mnist/readme.md @@ -38,9 +38,9 @@ Specifically, we will write a `caffe::NetParameter` (or in python, `caffe.proto. Currently, we will read the MNIST data from the lmdb we created earlier in the demo. This is defined by a data layer: - layers { + layer { name: "mnist" - type: DATA + type: "Data" data_param { source: "mnist_train_lmdb" backend: LMDB @@ -57,14 +57,14 @@ Specifically, this layer has name `mnist`, type `data`, and it reads the data fr Let's define the first convolution layer: - layers { + layer { name: "conv1" - type: CONVOLUTION - blobs_lr: 1. - blobs_lr: 2. + type: "Convolution" + param { lr_mult: 1 } + param { lr_mult: 2 } convolution_param { num_output: 20 - kernelsize: 5 + kernel_size: 5 stride: 1 weight_filler { type: "xavier" @@ -81,15 +81,15 @@ This layer takes the `data` blob (it is provided by the data layer), and produce The fillers allow us to randomly initialize the value of the weights and bias. For the weight filler, we will use the `xavier` algorithm that automatically determines the scale of initialization based on the number of input and output neurons. For the bias filler, we will simply initialize it as constant, with the default filling value 0. -`blobs_lr` are the learning rate adjustments for the layer's learnable parameters. In this case, we will set the weight learning rate to be the same as the learning rate given by the solver during runtime, and the bias learning rate to be twice as large as that - this usually leads to better convergence rates. +`lr_mult`s are the learning rate adjustments for the layer's learnable parameters. In this case, we will set the weight learning rate to be the same as the learning rate given by the solver during runtime, and the bias learning rate to be twice as large as that - this usually leads to better convergence rates. ### Writing the Pooling Layer Phew. Pooling layers are actually much easier to define: - layers { + layer { name: "pool1" - type: POOLING + type: "Pooling" pooling_param { kernel_size: 2 stride: 2 @@ -107,11 +107,11 @@ Similarly, you can write up the second convolution and pooling layers. Check `$C Writing a fully connected layer is also simple: - layers { + layer { name: "ip1" - type: INNER_PRODUCT - blobs_lr: 1. - blobs_lr: 2. + type: "InnerProduct" + param { lr_mult: 1 } + param { lr_mult: 2 } inner_product_param { num_output: 500 weight_filler { @@ -125,15 +125,15 @@ Writing a fully connected layer is also simple: top: "ip1" } -This defines a fully connected layer (for some legacy reason, Caffe calls it an `innerproduct` layer) with 500 outputs. All other lines look familiar, right? +This defines a fully connected layer (known in Caffe as an `InnerProduct` layer) with 500 outputs. All other lines look familiar, right? ### Writing the ReLU Layer A ReLU Layer is also simple: - layers { + layer { name: "relu1" - type: RELU + type: "ReLU" bottom: "ip1" top: "ip1" } @@ -142,11 +142,11 @@ Since ReLU is an element-wise operation, we can do *in-place* operations to save After the ReLU layer, we will write another innerproduct layer: - layers { + layer { name: "ip2" - type: INNER_PRODUCT - blobs_lr: 1. - blobs_lr: 2. + type: "InnerProduct" + param { lr_mult: 1 } + param { lr_mult: 2 } inner_product_param { num_output: 10 weight_filler { @@ -164,9 +164,9 @@ After the ReLU layer, we will write another innerproduct layer: Finally, we will write the loss! - layers { + layer { name: "loss" - type: SOFTMAX_LOSS + type: "SoftmaxWithLoss" bottom: "ip2" bottom: "label" } @@ -178,7 +178,7 @@ The `softmax_loss` layer implements both the softmax and the multinomial logisti Layer definitions can include rules for whether and when they are included in the network definition, like the one below: - layers { + layer { // ...layer definition... include: { phase: TRAIN } } @@ -190,7 +190,7 @@ In the above example, this layer will be included only in `TRAIN` phase. If we change `TRAIN` with `TEST`, then this layer will be used only in test phase. By default, that is without layer rules, a layer is always included in the network. Thus, `lenet_train_test.prototxt` has two `DATA` layers defined (with different `batch_size`), one for the training phase and one for the testing phase. -Also, there is an `ACCURACY` layer which is included only in `TEST` phase for reporting the model accuracy every 100 iteration, as defined in `lenet_solver.prototxt`. +Also, there is an `Accuracy` layer which is included only in `TEST` phase for reporting the model accuracy every 100 iteration, as defined in `lenet_solver.prototxt`. ## Define the MNIST Solver diff --git a/examples/siamese/readme.md b/examples/siamese/readme.md index ce98ec10819..83db8c94395 100644 --- a/examples/siamese/readme.md +++ b/examples/siamese/readme.md @@ -39,13 +39,19 @@ exactly the same as the [LeNet model](mnist.html), the only difference is that we have replaced the top layers that produced probabilities over the 10 digit classes with a linear "feature" layer that produces a 2 dimensional vector. - layers { + layer { name: "feat" - type: INNER_PRODUCT + type: "InnerProduct" bottom: "ip2" top: "feat" - blobs_lr: 1 - blobs_lr: 2 + param { + name: "feat_w" + lr_mult: 1 + } + param { + name: "feat_b" + lr_mult: 2 + } inner_product_param { num_output: 2 } @@ -64,17 +70,19 @@ earlier. Each entry in this database contains the image data for a pair of images (`pair_data`) and a binary label saying if they belong to the same class or different classes (`sim`). - layers { + layer { name: "pair_data" - type: DATA + type: "Data" top: "pair_data" top: "sim" - data_param { - source: "examples/siamese/mnist-siamese-train-leveldb" + include { phase: TRAIN } + transform_param { scale: 0.00390625 + } + data_param { + source: "examples/siamese/mnist_siamese_train_leveldb" batch_size: 64 } - include: { phase: TRAIN } } In order to pack a pair of images into the same blob in the database we pack one @@ -83,16 +91,16 @@ so we add a slice layer after the data layer. This takes the `pair_data` and slices it along the channel dimension so that we have a single image in `data` and its paired image in `data_p.` - layers { - name: "slice_pair" - type: SLICE - bottom: "pair_data" - top: "data" - top: "data_p" - slice_param { - slice_dim: 1 - slice_point: 1 - } + layer { + name: "slice_pair" + type: "Slice" + bottom: "pair_data" + top: "data" + top: "data_p" + slice_param { + slice_dim: 1 + slice_point: 1 + } } ### Building the First Side of the Siamese Net @@ -105,17 +113,17 @@ parameters allows Caffe to share the parameters between layers on both sides of the siamese net. In the definition this looks like: ... - param: "conv1_w" - param: "conv1_b" + param { name: "conv1_w" ... } + param { name: "conv1_b" ... } ... - param: "conv2_w" - param: "conv2_b" + param { name: "conv2_w" ... } + param { name: "conv2_b" ... } ... - param: "ip1_w" - param: "ip1_b" + param { name: "ip1_w" ... } + param { name: "ip1_b" ... } ... - param: "ip2_w" - param: "ip2_b" + param { name: "ip2_w" ... } + param { name: "ip2_b" ... } ... ### Building the Second Side of the Siamese Net @@ -133,9 +141,9 @@ an Invariant Mapping". This loss function encourages matching pairs to be close together in feature space while pushing non-matching pairs apart. This cost function is implemented with the `CONTRASTIVE_LOSS` layer: - layers { + layer { name: "loss" - type: CONTRASTIVE_LOSS + type: "ContrastiveLoss" contrastive_loss_param { margin: 1.0 } diff --git a/matlab/caffe/hdf5creation/demo.m b/matlab/caffe/hdf5creation/demo.m index f554b87e5f6..4f9f7b5a454 100644 --- a/matlab/caffe/hdf5creation/demo.m +++ b/matlab/caffe/hdf5creation/demo.m @@ -52,9 +52,9 @@ fprintf('HDF5 filename listed in %s \n', 'list.txt'); % NOTE: In net definition prototxt, use list.txt as input to HDF5_DATA as: -% layers { +% layer { % name: "data" -% type: HDF5_DATA +% type: "HDF5Data" % top: "data" % top: "labelvec" % hdf5_data_param {