From b866d14ae86df3bb1548117f22818c2fefb5778b Mon Sep 17 00:00:00 2001 From: Dmytro Mishkin Date: Fri, 15 May 2015 16:14:02 +0300 Subject: [PATCH] Remove unnecessary variance computation from backward in MVN layer --- include/caffe/common_layers.hpp | 1 + src/caffe/layers/mvn_layer.cpp | 23 ++--------------------- src/caffe/layers/mvn_layer.cu | 23 +---------------------- src/caffe/proto/caffe.proto | 3 +++ 4 files changed, 7 insertions(+), 43 deletions(-) diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 8da6d68096b..e6b42c14587 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -295,6 +295,7 @@ class MVNLayer : public Layer { /// sum_multiplier is used to carry out sum using BLAS Blob sum_multiplier_; + Dtype eps_; }; /* diff --git a/src/caffe/layers/mvn_layer.cpp b/src/caffe/layers/mvn_layer.cpp index b74d7b4f300..3e79bddcdde 100644 --- a/src/caffe/layers/mvn_layer.cpp +++ b/src/caffe/layers/mvn_layer.cpp @@ -22,6 +22,7 @@ void MVNLayer::Reshape(const vector*>& bottom, bottom[0]->height(), bottom[0]->width()); Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data(); caffe_set(sum_multiplier_.count(), Dtype(1), multiplier_data); + eps_ = this->layer_param_.mvn_param().eps(); } template @@ -36,7 +37,6 @@ void MVNLayer::Forward_cpu(const vector*>& bottom, num = bottom[0]->num() * bottom[0]->channels(); int dim = bottom[0]->count() / num; - Dtype eps = 1e-10; if (this->layer_param_.mvn_param().normalize_variance()) { // put the squares of bottom into temp_ @@ -66,7 +66,7 @@ void MVNLayer::Forward_cpu(const vector*>& bottom, caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), variance_.mutable_cpu_data()); - caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data()); + caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data()); caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., @@ -102,7 +102,6 @@ void MVNLayer::Backward_cpu(const vector*>& top, num = bottom[0]->num() * bottom[0]->channels(); int dim = bottom[0]->count() / num; - Dtype eps = 1e-10; if (this->layer_param_.mvn_param().normalize_variance()) { caffe_mul(temp_.count(), top_data, top_diff, bottom_diff); @@ -125,24 +124,6 @@ void MVNLayer::Backward_cpu(const vector*>& top, // put the squares of bottom into temp_ caffe_powx(temp_.count(), bottom_data, Dtype(2), temp_.mutable_cpu_data()); - - // computes variance using var(X) = E(X^2) - (EX)^2 - caffe_cpu_gemv(CblasNoTrans, num, dim, 1. / dim, bottom_data, - sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX - caffe_cpu_gemv(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(), - sum_multiplier_.cpu_data(), 0., - variance_.mutable_cpu_data()); // E(X^2) - caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2), - temp_.mutable_cpu_data()); // (EX)^2 - caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(), - variance_.mutable_cpu_data()); // variance - - // normalize variance - caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), - variance_.mutable_cpu_data()); - - caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data()); - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); diff --git a/src/caffe/layers/mvn_layer.cu b/src/caffe/layers/mvn_layer.cu index 0667f50380f..3888a0c7106 100644 --- a/src/caffe/layers/mvn_layer.cu +++ b/src/caffe/layers/mvn_layer.cu @@ -36,8 +36,6 @@ void MVNLayer::Forward_gpu(const vector*>& bottom, caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(), variance_.mutable_gpu_data()); // variance - Dtype eps = 1e-10; - // do mean and variance normalization // subtract mean caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., @@ -50,7 +48,7 @@ void MVNLayer::Forward_gpu(const vector*>& bottom, caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5), variance_.mutable_gpu_data()); - caffe_gpu_add_scalar(variance_.count(), eps, variance_.mutable_gpu_data()); + caffe_gpu_add_scalar(variance_.count(), eps_, variance_.mutable_gpu_data()); caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.gpu_data(), sum_multiplier_.gpu_data(), 0., @@ -87,8 +85,6 @@ void MVNLayer::Backward_gpu(const vector*>& top, int dim = bottom[0]->count() / num; - Dtype eps = 1e-10; - if (this->layer_param_.mvn_param().normalize_variance()) { caffe_gpu_mul(temp_.count(), top_data, top_diff, bottom_diff); caffe_gpu_gemv(CblasNoTrans, num, dim, 1., bottom_diff, @@ -111,23 +107,6 @@ void MVNLayer::Backward_gpu(const vector*>& top, caffe_gpu_powx(temp_.count(), bottom_data, Dtype(2), temp_.mutable_gpu_data()); - // computes variance using var(X) = E(X^2) - (EX)^2 - caffe_gpu_gemv(CblasNoTrans, num, dim, 1. / dim, bottom_data, - sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); // EX - caffe_gpu_gemv(CblasNoTrans, num, dim, 1. / dim, temp_.gpu_data(), - sum_multiplier_.gpu_data(), 0., - variance_.mutable_gpu_data()); // E(X^2) - caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2), - temp_.mutable_gpu_data()); // (EX)^2 - caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(), - variance_.mutable_gpu_data()); // variance - - // normalize variance - caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5), - variance_.mutable_gpu_data()); - - caffe_gpu_add_scalar(variance_.count(), eps, variance_.mutable_gpu_data()); - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.gpu_data(), sum_multiplier_.gpu_data(), 0., temp_.mutable_gpu_data()); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index d43e560a1fa..1923627fd97 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -629,6 +629,9 @@ message MVNParameter { // This parameter can be set to true to perform DNN-like MVN optional bool across_channels = 2 [default = false]; + + // Epsilon for not dividing by zero while normalizing variance + optional float eps = 3 [default = 1e-9]; } // Message that stores parameters used by PoolingLayer