Skip to content

Commit

Permalink
Merge pull request BVLC#1654 from longjon/softmax-missing-values
Browse files Browse the repository at this point in the history
Add missing value support to SoftmaxLossLayer
  • Loading branch information
longjon committed Jan 3, 2015
2 parents 3f2f7fa + c7f63da commit 0ed9883
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 10 deletions.
7 changes: 7 additions & 0 deletions include/caffe/loss_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,13 @@ class SoftmaxWithLossLayer : public LossLayer<Dtype> {
vector<Blob<Dtype>*> softmax_bottom_vec_;
/// top vector holder used in call to the underlying SoftmaxLayer::Forward
vector<Blob<Dtype>*> softmax_top_vec_;
/// Whether to ignore instances with a certain label.
bool has_ignore_label_;
/// The label indicating that an instance should be ignored.
int ignore_label_;
/// Whether to normalize the loss by the total number of values present
/// (otherwise just by the batch size).
bool normalize_;
};

} // namespace caffe
Expand Down
44 changes: 35 additions & 9 deletions src/caffe/layers/softmax_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ void SoftmaxWithLossLayer<Dtype>::LayerSetUp(
softmax_top_vec_.clear();
softmax_top_vec_.push_back(&prob_);
softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_);

has_ignore_label_ =
this->layer_param_.loss_param().has_ignore_label();
if (has_ignore_label_) {
ignore_label_ = this->layer_param_.loss_param().ignore_label();
}
normalize_ = this->layer_param_.loss_param().normalize();
}

template <typename Dtype>
Expand All @@ -40,27 +47,34 @@ void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
int num = prob_.num();
int dim = prob_.count() / num;
int spatial_dim = prob_.height() * prob_.width();
int count = 0;
Dtype loss = 0;
for (int i = 0; i < num; ++i) {
for (int j = 0; j < spatial_dim; j++) {
const int label_value = static_cast<int>(label[i * spatial_dim + j]);
if (has_ignore_label_ && label_value == ignore_label_) {
continue;
}
DCHECK_GE(label_value, 0);
DCHECK_GT(dim, label_value * spatial_dim);
loss -= log(std::max(prob_data[i * dim +
label_value * spatial_dim + j],
DCHECK_LT(label_value, prob_.channels());
loss -= log(std::max(prob_data[i * dim + label_value * spatial_dim + j],
Dtype(FLT_MIN)));
++count;
}
}
top[0]->mutable_cpu_data()[0] = loss / num / spatial_dim;
if (normalize_) {
top[0]->mutable_cpu_data()[0] = loss / count;
} else {
top[0]->mutable_cpu_data()[0] = loss / num;
}
if (top.size() == 2) {
top[1]->ShareData(prob_);
}
}

template <typename Dtype>
void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[1]) {
LOG(FATAL) << this->type_name()
<< " Layer cannot backpropagate to label inputs.";
Expand All @@ -73,15 +87,27 @@ void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
int num = prob_.num();
int dim = prob_.count() / num;
int spatial_dim = prob_.height() * prob_.width();
int count = 0;
for (int i = 0; i < num; ++i) {
for (int j = 0; j < spatial_dim; ++j) {
bottom_diff[i * dim + static_cast<int>(label[i * spatial_dim + j])
* spatial_dim + j] -= 1;
const int label_value = static_cast<int>(label[i * spatial_dim + j]);
if (has_ignore_label_ && label_value == ignore_label_) {
for (int c = 0; c < bottom[0]->channels(); ++c) {
bottom_diff[i * dim + c * spatial_dim + j] = 0;
}
} else {
bottom_diff[i * dim + label_value * spatial_dim + j] -= 1;
++count;
}
}
}
// Scale gradient
const Dtype loss_weight = top[0]->cpu_diff()[0];
caffe_scal(prob_.count(), loss_weight / num / spatial_dim, bottom_diff);
if (normalize_) {
caffe_scal(prob_.count(), loss_weight / count, bottom_diff);
} else {
caffe_scal(prob_.count(), loss_weight / num, bottom_diff);
}
}
}

Expand Down
14 changes: 13 additions & 1 deletion src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ message NetStateRule {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available ID: 42 (last added: exp_param)
// LayerParameter next available ID: 43 (last added: loss_param)
message LayerParameter {
repeated string bottom = 2; // the name of the bottom blobs
repeated string top = 3; // the name of the top blobs
Expand Down Expand Up @@ -333,6 +333,9 @@ message LayerParameter {
// Parameters for data pre-processing.
optional TransformationParameter transform_param = 36;

// Parameters shared by loss layers.
optional LossParameter loss_param = 42;

// Note: certain layers may have more than one computational engine
// for their implementation. These layers include an Engine type and
// engine parameter for selecting the implementation.
Expand Down Expand Up @@ -363,6 +366,15 @@ message TransformationParameter {
repeated float mean_value = 5;
}

// Message that stores parameters shared by loss layers
message LossParameter {
// If specified, ignore instances with the given label.
optional int32 ignore_label = 1;
// If true, normalize each batch across all instances (including spatial
// dimesions, but not ignored instances); else, divide by batch size only.
optional bool normalize = 2 [default = true];
}

// Message that stores parameters used by AccuracyLayer
message AccuracyParameter {
// When computing accuracy, count as correct by comparing the true label to
Expand Down

1 comment on commit 0ed9883

@bittnt
Copy link

@bittnt bittnt commented on 0ed9883 Jan 23, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@longjon Hi, I think it is better to set some default values to has_ignore_label_ and normalize. I have tested it and it will crash if you do not set ignore_label.

Please sign in to comment.