Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added batchnorm layer. #1867

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions include/caffe/neuron_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,37 @@ class ThresholdLayer : public NeuronLayer<Dtype> {
Dtype threshold_;
};

template <typename Dtype>
class BatchnormLayer : public NeuronLayer<Dtype> {
public:
explicit BatchnormLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}

virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_BATCHNORM;
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
//virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
//vector<Blob<Dtype>*>* top);

virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
//virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
//const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

int num_;
int bottom_size_;
Dtype var_epsilon_;
Blob<Dtype> batch_mean_;
Blob<Dtype> buffer_blob_;
Blob<Dtype> batch_variance_;
};

} // namespace caffe

#endif // CAFFE_NEURON_LAYERS_HPP_
2 changes: 2 additions & 0 deletions src/caffe/layer_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ Layer<Dtype>* GetLayer(const LayerParameter& param) {
return new AbsValLayer<Dtype>(param);
case LayerParameter_LayerType_ARGMAX:
return new ArgMaxLayer<Dtype>(param);
case LayerParameter_LayerType_BATCHNORM:
return new BatchnormLayer<Dtype>(param);
case LayerParameter_LayerType_BNLL:
return new BNLLLayer<Dtype>(param);
case LayerParameter_LayerType_CONCAT:
Expand Down
159 changes: 159 additions & 0 deletions src/caffe/layers/batchnorm_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#include <algorithm>
#include <cmath>
#include <vector>

#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"

namespace caffe {

template <typename Dtype>
void BatchnormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
num_ = bottom[0]->num();
bottom_size_ = bottom[0]->count() / bottom[0]->num();

// Initialize the beta and gamma blobs to 1
this->blobs_.resize(2);
for (int i = 0; i < 2; ++i) {
this->blobs_[i].reset(new Blob<Dtype>(
1, bottom_size_, 1, 1));
caffe_set(this->blobs_[i]->count(), i == 0 ? Dtype(1) : Dtype(0),
this->blobs_[i]->mutable_cpu_data());
}

batch_mean_.Reshape(1, bottom_size_, 1, 1);
buffer_blob_.Reshape(1, bottom_size_, 1, 1);
batch_variance_.Reshape(1, bottom_size_, 1, 1);
var_epsilon_ = Dtype(0.1);

// Propagate gradients to the parameters (as directed by backward pass).
this->param_propagate_down_.resize(this->blobs_.size(), true);
}

template <typename Dtype>
void BatchnormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* gamma_data = this->blobs_[0]->cpu_data();
const Dtype* beta_data = this->blobs_[1]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();

Dtype* mean_data = batch_mean_.mutable_cpu_data();
Dtype* variance_data = batch_variance_.mutable_cpu_data();
Dtype* buffer = buffer_blob_.mutable_cpu_data();

caffe_set(bottom_size_, Dtype(0), mean_data);
caffe_set(bottom_size_, Dtype(0), variance_data);

for (int n = 0; n < num_; ++n) {
caffe_add(bottom_size_, bottom_data + bottom[0]->offset(n), mean_data,
mean_data);
caffe_sqr(bottom_size_, bottom_data + bottom[0]->offset(n), buffer);
caffe_add(bottom_size_, buffer, variance_data, variance_data);
}
caffe_cpu_scale(bottom_size_, Dtype(1) / Dtype(num_), mean_data, mean_data);
caffe_cpu_scale(bottom_size_, Dtype(1) / Dtype(num_), variance_data,
variance_data);

caffe_sqr(bottom_size_, mean_data, buffer);
caffe_sub(bottom_size_, variance_data, buffer, variance_data);
caffe_add_scalar(bottom_size_, var_epsilon_, variance_data);
caffe_powx(bottom_size_, variance_data, Dtype(0.5), variance_data);

for (int n = 0; n < num_; ++n) {
caffe_sub(bottom_size_, bottom_data + bottom[0]->offset(n), mean_data, buffer);
caffe_div(bottom_size_, buffer, variance_data, buffer);
caffe_mul(bottom_size_, buffer, gamma_data, buffer);
caffe_add(bottom_size_, buffer, beta_data,
top_data + (*top)[0]->offset(n));
}
}

template <typename Dtype>
void BatchnormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
vector<Blob<Dtype>*>* bottom) {
const Dtype* top_data = top[0]->cpu_data();
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* variance_data = batch_variance_.cpu_data();
const Dtype* gamma_data = this->blobs_[0]->cpu_data();
const Dtype* beta_data = this->blobs_[1]->cpu_data();

Dtype* dl_dvar = batch_variance_.mutable_cpu_diff();
Dtype* dl_dmean = batch_mean_.mutable_cpu_diff();
Dtype* buffer = buffer_blob_.mutable_cpu_data();

Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
Dtype* gamma_diff = this->blobs_[0]->mutable_cpu_diff();
Dtype* beta_diff = this->blobs_[1]->mutable_cpu_diff();

caffe_set((*bottom)[0]->count(), Dtype(0), bottom_diff);
caffe_set(this->blobs_[0]->count(), Dtype(0), gamma_diff);
caffe_set(this->blobs_[1]->count(), Dtype(0), beta_diff);
caffe_set(bottom_size_, Dtype(0), dl_dvar);
caffe_set(bottom_size_, Dtype(0), dl_dmean);

for (int n = 0; n < num_; ++n) {
// fill gamma_diff
caffe_sub(bottom_size_, top_data + top[0]->offset(n), beta_data,
buffer);
caffe_div(bottom_size_, buffer, gamma_data,
buffer);
caffe_mul(bottom_size_, buffer, top_diff + top[0]->offset(n),
buffer);
caffe_add(bottom_size_, buffer, gamma_diff, gamma_diff);

// fill beta_diff
caffe_add(bottom_size_, top_diff + top[0]->offset(n), beta_diff, beta_diff);
}

// fill bottom_diff direct term
for (int n = 0; n < num_; ++n) {
caffe_mul(bottom_size_, top_diff + top[0]->offset(n), gamma_data, buffer);
caffe_div(bottom_size_, buffer, variance_data, buffer);
caffe_add(bottom_size_, buffer, bottom_diff + (*bottom)[0]->offset(n),
bottom_diff + (*bottom)[0]->offset(n));
}

// fill bottom_diff variance contribution term
for (int n = 0; n < num_; ++n) {
caffe_sub(bottom_size_, top_data + top[0]->offset(n), beta_data, buffer);
caffe_mul(bottom_size_, buffer, variance_data, buffer);
caffe_mul(bottom_size_, buffer, top_diff + top[0]->offset(n), buffer);
caffe_add(bottom_size_, buffer, dl_dvar, dl_dvar);
}
caffe_powx(bottom_size_, variance_data, Dtype(-3.0), buffer);
caffe_mul(bottom_size_, dl_dvar, buffer, dl_dvar);
caffe_cpu_scale(bottom_size_, Dtype(-0.5), dl_dvar, dl_dvar);
for (int n = 0; n < num_; ++n) {
caffe_sub(bottom_size_, top_data + top[0]->offset(n), beta_data, buffer);
caffe_div(bottom_size_, buffer, gamma_data, buffer);
caffe_mul(bottom_size_, buffer, variance_data, buffer);
caffe_cpu_scale(bottom_size_, Dtype(2) / Dtype(num_), buffer, buffer);
caffe_mul(bottom_size_, buffer, dl_dvar, buffer);
caffe_add(bottom_size_, buffer, bottom_diff + (*bottom)[0]->offset(n),
bottom_diff + (*bottom)[0]->offset(n));
}

// fill bottom_diff mean contribution term
for (int n = 0; n < num_; ++n) {
caffe_mul(bottom_size_, top_diff + top[0]->offset(n), gamma_data, buffer);
caffe_div(bottom_size_, buffer, variance_data, buffer);
caffe_sub(bottom_size_, dl_dmean, buffer, dl_dmean);
}
caffe_cpu_scale(bottom_size_, Dtype(1) / Dtype(num_), dl_dmean, dl_dmean);
for (int n = 0; n < num_; ++n) {
caffe_add(bottom_size_, dl_dmean, bottom_diff + (*bottom)[0]->offset(n),
bottom_diff + (*bottom)[0]->offset(n));
}
}

#ifdef CPU_ONLY
STUB_GPU(BatchnormLayer);
#endif

INSTANTIATE_CLASS(BatchnormLayer);


} // namespace caffe
1 change: 1 addition & 0 deletions src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ message LayerParameter {
ABSVAL = 35;
ACCURACY = 1;
ARGMAX = 30;
BATCHNORM = 104;
BNLL = 2;
CONCAT = 3;
CONTRASTIVE_LOSS = 37;
Expand Down
74 changes: 74 additions & 0 deletions src/caffe/test/test_batchnorm_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#include <algorithm>
#include <cstring>
#include <vector>

#include "gtest/gtest.h"

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/vision_layers.hpp"

#include "caffe/test/test_caffe_main.hpp"
#include "caffe/test/test_gradient_check_util.hpp"

#define BATCH_SIZE 2
#define INPUT_DATA_SIZE 3

namespace caffe {

template <typename TypeParam>
class BatchnormLayerTest : public MultiDeviceTest<TypeParam> {
typedef typename TypeParam::Dtype Dtype;

protected:
BatchnormLayerTest()
: epsilon_(Dtype(1e-5)),
blob_bottom_(new Blob<Dtype>()),
blob_top_(new Blob<Dtype>()) {}
virtual void SetUp() {
Caffe::set_random_seed(1601);
blob_bottom_->Reshape(BATCH_SIZE, INPUT_DATA_SIZE, 1, 1);
// fill the values
FillerParameter filler_param;
GaussianFiller<Dtype> filler(filler_param);
filler.Fill(this->blob_bottom_);
blob_bottom_vec_.push_back(blob_bottom_);
blob_top_vec_.push_back(blob_top_);
}
virtual ~BatchnormLayerTest() { delete blob_bottom_; delete blob_top_; }

Dtype epsilon_;
Blob<Dtype>* const blob_bottom_;
Blob<Dtype>* const blob_top_;
vector<Blob<Dtype>*> blob_bottom_vec_;
vector<Blob<Dtype>*> blob_top_vec_;
};


TYPED_TEST_CASE(BatchnormLayerTest, TestDtypesAndDevices);

TYPED_TEST(BatchnormLayerTest, TestSetupAcrossChannels) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
BatchnormLayer<Dtype> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
EXPECT_EQ(this->blob_top_->num(), BATCH_SIZE);
EXPECT_EQ(this->blob_top_->channels(), INPUT_DATA_SIZE);
EXPECT_EQ(this->blob_top_->height(), 1);
EXPECT_EQ(this->blob_top_->width(), 1);
}

TYPED_TEST(BatchnormLayerTest, TestGradientAcrossChannels) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
BatchnormLayer<Dtype> layer(layer_param);
GradientChecker<Dtype> checker(1e-2, 1e-2);
//vector<bool> propagate_down(this->blob_bottom_vec_.size(), true);
//layer.Backward(this->blob_top_vec_, propagate_down,
//&(this->blob_bottom_vec_));
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

} // namespace caffe