From 5843b52558491b636bebcc5cf4ba706b074a717b Mon Sep 17 00:00:00 2001
From: Jonathan L Long <jonlong@cs.berkeley.edu>
Date: Mon, 29 Dec 2014 17:05:47 -0800
Subject: [PATCH 1/3] add missing value support to SoftmaxLossLayer

---
 include/caffe/loss_layers.hpp           |  4 ++++
 src/caffe/layers/softmax_loss_layer.cpp | 28 ++++++++++++++++++++-----
 src/caffe/proto/caffe.proto             | 11 +++++++++-
 3 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp
index 13b108afd2d..3e4414ae6c1 100644
--- a/include/caffe/loss_layers.hpp
+++ b/include/caffe/loss_layers.hpp
@@ -758,6 +758,10 @@ class SoftmaxWithLossLayer : public LossLayer<Dtype> {
   vector<Blob<Dtype>*> softmax_bottom_vec_;
   /// top vector holder used in call to the underlying SoftmaxLayer::Forward
   vector<Blob<Dtype>*> softmax_top_vec_;
+  /// Whether to ignore instances with a certain label.
+  bool has_ignore_label_;
+  /// The label indicating that an instance should be ignored.
+  int ignore_label_;
 };
 
 }  // namespace caffe
diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp
index db8dd8bb626..bc511cf603b 100644
--- a/src/caffe/layers/softmax_loss_layer.cpp
+++ b/src/caffe/layers/softmax_loss_layer.cpp
@@ -17,6 +17,12 @@ void SoftmaxWithLossLayer<Dtype>::LayerSetUp(
   softmax_top_vec_.clear();
   softmax_top_vec_.push_back(&prob_);
   softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_);
+
+  has_ignore_label_ =
+    this->layer_param_.loss_param().has_ignore_label();
+  if (has_ignore_label_) {
+    ignore_label_ = this->layer_param_.loss_param().ignore_label();
+  }
 }
 
 template <typename Dtype>
@@ -40,15 +46,19 @@ void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
   int num = prob_.num();
   int dim = prob_.count() / num;
   int spatial_dim = prob_.height() * prob_.width();
+  int count = 0;
   Dtype loss = 0;
   for (int i = 0; i < num; ++i) {
     for (int j = 0; j < spatial_dim; j++) {
       const int label_value = static_cast<int>(label[i * spatial_dim + j]);
+      if (has_ignore_label_ && label_value == ignore_label_) {
+        continue;
+      }
       DCHECK_GE(label_value, 0);
-      DCHECK_GT(dim, label_value * spatial_dim);
-      loss -= log(std::max(prob_data[i * dim +
-          label_value * spatial_dim + j],
+      DCHECK_LT(label_value, prob_.channels());
+      loss -= log(std::max(prob_data[i * dim + label_value * spatial_dim + j],
                            Dtype(FLT_MIN)));
+      ++count;
     }
   }
   top[0]->mutable_cpu_data()[0] = loss / num / spatial_dim;
@@ -73,10 +83,18 @@ void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     int num = prob_.num();
     int dim = prob_.count() / num;
     int spatial_dim = prob_.height() * prob_.width();
+    int count = 0;
     for (int i = 0; i < num; ++i) {
       for (int j = 0; j < spatial_dim; ++j) {
-        bottom_diff[i * dim + static_cast<int>(label[i * spatial_dim + j])
-            * spatial_dim + j] -= 1;
+        const int label_value = static_cast<int>(label[i * spatial_dim + j]);
+        if (has_ignore_label_ && label_value == ignore_label_) {
+          for (int c = 0; c < bottom[0]->channels(); ++c) {
+            bottom_diff[i * dim + c * spatial_dim + j] = 0;
+          }
+        } else {
+          bottom_diff[i * dim + label_value * spatial_dim + j] -= 1;
+          ++count;
+        }
       }
     }
     // Scale gradient
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 8086ad66579..cfd5cca2518 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -206,7 +206,7 @@ message NetStateRule {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available ID: 42 (last added: exp_param)
+// LayerParameter next available ID: 43 (last added: loss_param)
 message LayerParameter {
   repeated string bottom = 2; // the name of the bottom blobs
   repeated string top = 3; // the name of the top blobs
@@ -331,6 +331,9 @@ message LayerParameter {
   // Parameters for data pre-processing.
   optional TransformationParameter transform_param = 36;
 
+  // Parameters shared by loss layers.
+  optional LossParameter loss_param = 42;
+
   // Note: certain layers may have more than one computational engine
   // for their implementation. These layers include an Engine type and
   // engine parameter for selecting the implementation.
@@ -361,6 +364,12 @@ message TransformationParameter {
   repeated float mean_value = 5;
 }
 
+// Message that stores parameters shared by loss layers
+message LossParameter {
+  // If specified, ignore instances with the given label.
+  optional int32 ignore_label = 1;
+}
+
 // Message that stores parameters used by AccuracyLayer
 message AccuracyParameter {
   // When computing accuracy, count as correct by comparing the true label to

From 34321e4ecf26494ded5a05221010ccc9ea99c144 Mon Sep 17 00:00:00 2001
From: Jonathan L Long <jonlong@cs.berkeley.edu>
Date: Mon, 29 Dec 2014 17:08:21 -0800
Subject: [PATCH 2/3] add spatial normalization option to SoftmaxLossLayer

With missing values (and batches of varying spatial dimension),
normalizing each batch across instances can inappropriately give
different instances different weights, so we give the option of simply
normalizing by the batch size instead.
---
 include/caffe/loss_layers.hpp           |  3 +++
 src/caffe/layers/softmax_loss_layer.cpp | 13 +++++++++++--
 src/caffe/proto/caffe.proto             |  3 +++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp
index 3e4414ae6c1..321f7068273 100644
--- a/include/caffe/loss_layers.hpp
+++ b/include/caffe/loss_layers.hpp
@@ -762,6 +762,9 @@ class SoftmaxWithLossLayer : public LossLayer<Dtype> {
   bool has_ignore_label_;
   /// The label indicating that an instance should be ignored.
   int ignore_label_;
+  /// Whether to normalize the loss by the total number of values present
+  /// (otherwise just by the batch size).
+  bool normalize_;
 };
 
 }  // namespace caffe
diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp
index bc511cf603b..74893af7820 100644
--- a/src/caffe/layers/softmax_loss_layer.cpp
+++ b/src/caffe/layers/softmax_loss_layer.cpp
@@ -23,6 +23,7 @@ void SoftmaxWithLossLayer<Dtype>::LayerSetUp(
   if (has_ignore_label_) {
     ignore_label_ = this->layer_param_.loss_param().ignore_label();
   }
+  normalize_ = this->layer_param_.loss_param().normalize();
 }
 
 template <typename Dtype>
@@ -61,7 +62,11 @@ void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
       ++count;
     }
   }
-  top[0]->mutable_cpu_data()[0] = loss / num / spatial_dim;
+  if (normalize_) {
+    top[0]->mutable_cpu_data()[0] = loss / count;
+  } else {
+    top[0]->mutable_cpu_data()[0] = loss / num;
+  }
   if (top.size() == 2) {
     top[1]->ShareData(prob_);
   }
@@ -99,7 +104,11 @@ void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     }
     // Scale gradient
     const Dtype loss_weight = top[0]->cpu_diff()[0];
-    caffe_scal(prob_.count(), loss_weight / num / spatial_dim, bottom_diff);
+    if (normalize_) {
+      caffe_scal(prob_.count(), loss_weight / count, bottom_diff);
+    } else {
+      caffe_scal(prob_.count(), loss_weight / num, bottom_diff);
+    }
   }
 }
 
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index cfd5cca2518..2bfd7b36fa3 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -368,6 +368,9 @@ message TransformationParameter {
 message LossParameter {
   // If specified, ignore instances with the given label.
   optional int32 ignore_label = 1;
+  // If true, normalize each batch across all instances (including spatial
+  // dimesions, but not ignored instances); else, divide by batch size only.
+  optional bool normalize = 2 [default = true];
 }
 
 // Message that stores parameters used by AccuracyLayer

From c7f63da671d7dc6345d645b89d3641bcd8b7cc27 Mon Sep 17 00:00:00 2001
From: Jonathan L Long <jonlong@cs.berkeley.edu>
Date: Tue, 30 Dec 2014 12:31:53 -0800
Subject: [PATCH 3/3] clean up formatting in SoftmaxLossLayer

---
 src/caffe/layers/softmax_loss_layer.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp
index 74893af7820..55eb0918ad2 100644
--- a/src/caffe/layers/softmax_loss_layer.cpp
+++ b/src/caffe/layers/softmax_loss_layer.cpp
@@ -74,8 +74,7 @@ void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
 
 template <typename Dtype>
 void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
   if (propagate_down[1]) {
     LOG(FATAL) << this->type_name()
                << " Layer cannot backpropagate to label inputs.";