From 5843b52558491b636bebcc5cf4ba706b074a717b Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Mon, 29 Dec 2014 17:05:47 -0800 Subject: [PATCH 1/3] add missing value support to SoftmaxLossLayer --- include/caffe/loss_layers.hpp | 4 ++++ src/caffe/layers/softmax_loss_layer.cpp | 28 ++++++++++++++++++++----- src/caffe/proto/caffe.proto | 11 +++++++++- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 13b108afd2d..3e4414ae6c1 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -758,6 +758,10 @@ class SoftmaxWithLossLayer : public LossLayer { vector*> softmax_bottom_vec_; /// top vector holder used in call to the underlying SoftmaxLayer::Forward vector*> softmax_top_vec_; + /// Whether to ignore instances with a certain label. + bool has_ignore_label_; + /// The label indicating that an instance should be ignored. + int ignore_label_; }; } // namespace caffe diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index db8dd8bb626..bc511cf603b 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -17,6 +17,12 @@ void SoftmaxWithLossLayer::LayerSetUp( softmax_top_vec_.clear(); softmax_top_vec_.push_back(&prob_); softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_); + + has_ignore_label_ = + this->layer_param_.loss_param().has_ignore_label(); + if (has_ignore_label_) { + ignore_label_ = this->layer_param_.loss_param().ignore_label(); + } } template @@ -40,15 +46,19 @@ void SoftmaxWithLossLayer::Forward_cpu( int num = prob_.num(); int dim = prob_.count() / num; int spatial_dim = prob_.height() * prob_.width(); + int count = 0; Dtype loss = 0; for (int i = 0; i < num; ++i) { for (int j = 0; j < spatial_dim; j++) { const int label_value = static_cast(label[i * spatial_dim + j]); + if (has_ignore_label_ && label_value == ignore_label_) { + continue; + } DCHECK_GE(label_value, 0); - DCHECK_GT(dim, label_value * spatial_dim); - loss -= log(std::max(prob_data[i * dim + - label_value * spatial_dim + j], + DCHECK_LT(label_value, prob_.channels()); + loss -= log(std::max(prob_data[i * dim + label_value * spatial_dim + j], Dtype(FLT_MIN))); + ++count; } } top[0]->mutable_cpu_data()[0] = loss / num / spatial_dim; @@ -73,10 +83,18 @@ void SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, int num = prob_.num(); int dim = prob_.count() / num; int spatial_dim = prob_.height() * prob_.width(); + int count = 0; for (int i = 0; i < num; ++i) { for (int j = 0; j < spatial_dim; ++j) { - bottom_diff[i * dim + static_cast(label[i * spatial_dim + j]) - * spatial_dim + j] -= 1; + const int label_value = static_cast(label[i * spatial_dim + j]); + if (has_ignore_label_ && label_value == ignore_label_) { + for (int c = 0; c < bottom[0]->channels(); ++c) { + bottom_diff[i * dim + c * spatial_dim + j] = 0; + } + } else { + bottom_diff[i * dim + label_value * spatial_dim + j] -= 1; + ++count; + } } } // Scale gradient diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 8086ad66579..cfd5cca2518 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -206,7 +206,7 @@ message NetStateRule { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available ID: 42 (last added: exp_param) +// LayerParameter next available ID: 43 (last added: loss_param) message LayerParameter { repeated string bottom = 2; // the name of the bottom blobs repeated string top = 3; // the name of the top blobs @@ -331,6 +331,9 @@ message LayerParameter { // Parameters for data pre-processing. optional TransformationParameter transform_param = 36; + // Parameters shared by loss layers. + optional LossParameter loss_param = 42; + // Note: certain layers may have more than one computational engine // for their implementation. These layers include an Engine type and // engine parameter for selecting the implementation. @@ -361,6 +364,12 @@ message TransformationParameter { repeated float mean_value = 5; } +// Message that stores parameters shared by loss layers +message LossParameter { + // If specified, ignore instances with the given label. + optional int32 ignore_label = 1; +} + // Message that stores parameters used by AccuracyLayer message AccuracyParameter { // When computing accuracy, count as correct by comparing the true label to From 34321e4ecf26494ded5a05221010ccc9ea99c144 Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Mon, 29 Dec 2014 17:08:21 -0800 Subject: [PATCH 2/3] add spatial normalization option to SoftmaxLossLayer With missing values (and batches of varying spatial dimension), normalizing each batch across instances can inappropriately give different instances different weights, so we give the option of simply normalizing by the batch size instead. --- include/caffe/loss_layers.hpp | 3 +++ src/caffe/layers/softmax_loss_layer.cpp | 13 +++++++++++-- src/caffe/proto/caffe.proto | 3 +++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 3e4414ae6c1..321f7068273 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -762,6 +762,9 @@ class SoftmaxWithLossLayer : public LossLayer { bool has_ignore_label_; /// The label indicating that an instance should be ignored. int ignore_label_; + /// Whether to normalize the loss by the total number of values present + /// (otherwise just by the batch size). + bool normalize_; }; } // namespace caffe diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index bc511cf603b..74893af7820 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -23,6 +23,7 @@ void SoftmaxWithLossLayer::LayerSetUp( if (has_ignore_label_) { ignore_label_ = this->layer_param_.loss_param().ignore_label(); } + normalize_ = this->layer_param_.loss_param().normalize(); } template @@ -61,7 +62,11 @@ void SoftmaxWithLossLayer::Forward_cpu( ++count; } } - top[0]->mutable_cpu_data()[0] = loss / num / spatial_dim; + if (normalize_) { + top[0]->mutable_cpu_data()[0] = loss / count; + } else { + top[0]->mutable_cpu_data()[0] = loss / num; + } if (top.size() == 2) { top[1]->ShareData(prob_); } @@ -99,7 +104,11 @@ void SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, } // Scale gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; - caffe_scal(prob_.count(), loss_weight / num / spatial_dim, bottom_diff); + if (normalize_) { + caffe_scal(prob_.count(), loss_weight / count, bottom_diff); + } else { + caffe_scal(prob_.count(), loss_weight / num, bottom_diff); + } } } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index cfd5cca2518..2bfd7b36fa3 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -368,6 +368,9 @@ message TransformationParameter { message LossParameter { // If specified, ignore instances with the given label. optional int32 ignore_label = 1; + // If true, normalize each batch across all instances (including spatial + // dimesions, but not ignored instances); else, divide by batch size only. + optional bool normalize = 2 [default = true]; } // Message that stores parameters used by AccuracyLayer From c7f63da671d7dc6345d645b89d3641bcd8b7cc27 Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Tue, 30 Dec 2014 12:31:53 -0800 Subject: [PATCH 3/3] clean up formatting in SoftmaxLossLayer --- src/caffe/layers/softmax_loss_layer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index 74893af7820..55eb0918ad2 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -74,8 +74,7 @@ void SoftmaxWithLossLayer::Forward_cpu( template void SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, - const vector& propagate_down, - const vector*>& bottom) { + const vector& propagate_down, const vector*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type_name() << " Layer cannot backpropagate to label inputs.";