Add ScalarLayer to multiply two Blobs, broadcasting the shape of the

second as needed
BVLC · Sep 4, 2015 · b4a5b6a · b4a5b6a
1 parent 0486127
commit b4a5b6a
Show file tree

Hide file tree

Showing 5 changed files with 535 additions and 1 deletion.
diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
@@ -511,6 +511,59 @@ class SilenceLayer : public Layer<Dtype> {
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 };
 
+/**
+ * @brief Computes a product of two input Blobs, with the shape of the
+ *        latter Blob "broadcast" to match the shape of the former.
+ *        Equivalent to tiling the latter Blob, then computing the elementwise
+ *        product.
+ */
+template <typename Dtype>
+class ScalarLayer: public Layer<Dtype> {
+ public:
+  explicit ScalarLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "Scalar"; }
+  virtual inline int ExactNumBottomBlobs() const { return 2; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  /**
+   * In the below shape specifications, @f$ i @f$ denotes the value of the
+   * `axis` field given by `this->layer_param_.scalar_param().axis()`, after
+   * canonicalization (i.e., conversion from negative to positive index,
+   * if applicable).
+   *
+   * @param bottom input Blob vector (length 2)
+   *   -# @f$ (d_0 \times ... \times
+   *           d_i \times ... \times d_j \times ... \times d_n) @f$
+   *      the first factor @f$ x @f$
+   *   -# @f$ (d_i \times ... \times d_j) @f$
+   *      the second factor @f$ y @f$
+   * @param top output Blob vector (length 1)
+   *   -# @f$ (d_0 \times ... \times
+   *           d_i \times ... \times d_j \times ... \times d_n) @f$
+   *      the product @f$ z = x y @f$ computed after "broadcasting" y.
+   *      Equivalent to tiling @f$ y @f$ to have the same shape as @f$ x @f$,
+   *      then computing the elementwise product.
+   */
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  Blob<Dtype> sum_multiplier_;
+  Blob<Dtype> sum_result_;
+  int axis_;
+  int outer_dim_, scalar_dim_, inner_dim_;
+};
+
 /**
  * @brief Computes the softmax function.
  *

diff --git a/src/caffe/layers/scalar_layer.cpp b/src/caffe/layers/scalar_layer.cpp
@@ -0,0 +1,119 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/common_layers.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  // TODO: make ScalarLayer usable in-place.
+  // Currently, in-place computation is broken during Backward with
+  // propagate_down[0] && propagate_down[1], as bottom[0]'s diff is used for
+  // temporary storage of an intermediate result, overwriting top[0]'s diff
+  // if using in-place computation.
+  CHECK_NE(bottom[0], top[0]) << "ScalarLayer cannot be used in-place";
+  axis_ =
+      bottom[0]->CanonicalAxisIndex(this->layer_param_.scalar_param().axis());
+  CHECK_GE(bottom[0]->num_axes(), axis_ + bottom[1]->num_axes())
+      << "bottom[1]'s shape extends past bottom[0]'s shape when applied "
+      << "starting with bottom[0] axis = " << axis_;
+  for (int i = 0; i < bottom[1]->num_axes(); ++i) {
+    CHECK_EQ(bottom[0]->shape(axis_ + i), bottom[1]->shape(i))
+        << "dimension mismatch between bottom[0]->shape(" << axis_ + i
+        << ") and bottom[1]->shape(" << i << ")";
+  }
+  outer_dim_ = bottom[0]->count(0, axis_);
+  scalar_dim_ = bottom[1]->count();
+  inner_dim_ = bottom[0]->count(axis_ + bottom[1]->num_axes());
+  top[0]->ReshapeLike(*bottom[0]);
+  sum_result_.Reshape(vector<int>(1, outer_dim_ * scalar_dim_));
+  const int sum_mult_size = std::max(outer_dim_, inner_dim_);
+  sum_multiplier_.Reshape(vector<int>(1, sum_mult_size));
+  if (sum_multiplier_.cpu_data()[sum_mult_size - 1] != Dtype(1)) {
+    caffe_set(sum_mult_size, Dtype(1), sum_multiplier_.mutable_cpu_data());
+  }
+}
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::Forward_cpu(
+    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  const Dtype* scalar_data = bottom[1]->cpu_data();
+  Dtype* top_data = top[0]->mutable_cpu_data();
+  for (int n = 0; n < outer_dim_; ++n) {
+    for (int d = 0; d < scalar_dim_; ++d) {
+      const Dtype factor = scalar_data[d];
+      caffe_cpu_scale(inner_dim_, factor, bottom_data, top_data);
+      bottom_data += inner_dim_;
+      top_data += inner_dim_;
+    }
+  }
+}
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[1]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
+    const Dtype* bottom_data = bottom[0]->cpu_data();
+    // Hack: store big eltwise product in bottom[0] diff, except in the special
+    // case where this layer itself does the eltwise product, in which case we
+    // can store it directly in the scalar diff, and we're done.
+    const bool is_eltwise = (inner_dim_ == 1 && outer_dim_ == 1);
+    Dtype* product = is_eltwise ?
+        bottom[1]->mutable_cpu_diff() : bottom[0]->mutable_cpu_diff();
+    caffe_mul(top[0]->count(), top_diff, bottom_data, product);
+    if (!is_eltwise) {
+      Dtype* sum_result = NULL;
+      if (inner_dim_ == 1) {
+        sum_result = product;
+      } else if (sum_result_.count() == 1) {
+        const Dtype* sum_mult = sum_multiplier_.cpu_data();
+        Dtype* scalar_diff = bottom[1]->mutable_cpu_diff();
+        *scalar_diff = caffe_cpu_dot(inner_dim_, product, sum_mult);
+      } else {
+        const Dtype* sum_mult = sum_multiplier_.cpu_data();
+        sum_result = (outer_dim_ == 1) ?
+            bottom[1]->mutable_cpu_diff() : sum_result_.mutable_cpu_data();
+        caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
+                       Dtype(1), product, sum_mult, Dtype(0), sum_result);
+      }
+      if (outer_dim_ != 1) {
+        const Dtype* sum_mult = sum_multiplier_.cpu_data();
+        Dtype* scalar_diff = bottom[1]->mutable_cpu_diff();
+        if (scalar_dim_ == 1) {
+          *scalar_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
+        } else {
+          caffe_cpu_gemv(CblasTrans, outer_dim_, scalar_dim_,
+                         Dtype(1), sum_result, sum_mult, Dtype(0), scalar_diff);
+        }
+      }
+    }
+  }
+  if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
+    const Dtype* scalar_data = bottom[1]->cpu_data();
+    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+    for (int n = 0; n < outer_dim_; ++n) {
+      for (int d = 0; d < scalar_dim_; ++d) {
+        const Dtype factor = scalar_data[d];
+        caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff);
+        bottom_diff += inner_dim_;
+        top_diff += inner_dim_;
+      }
+    }
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(ScalarLayer);
+#endif
+
+INSTANTIATE_CLASS(ScalarLayer);
+REGISTER_LAYER_CLASS(Scalar);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/scalar_layer.cu b/src/caffe/layers/scalar_layer.cu
@@ -0,0 +1,86 @@
+#include <cfloat>
+#include <vector>
+
+#include "caffe/common_layers.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void ScalarForward(const int n, const Dtype* in,
+    const Dtype* scalars, const int scalar_dim, const int inner_dim,
+    Dtype* out) {
+  CUDA_KERNEL_LOOP(index, n) {
+    const int scalar_index = (index / inner_dim) % scalar_dim;
+    out[index] = in[index] * scalars[scalar_index];
+  }
+}
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::Forward_gpu(
+    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+  const int count = top[0]->count();
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  const Dtype* scalar_data = bottom[1]->gpu_data();
+  Dtype* top_data = top[0]->mutable_gpu_data();
+  ScalarForward<Dtype>  // NOLINT_NEXT_LINE(whitespace/operators)
+      <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+      count, bottom_data, scalar_data, scalar_dim_, inner_dim_, top_data);
+}
+
+template <typename Dtype>
+void ScalarLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[1]) {
+    const Dtype* top_diff = top[0]->gpu_diff();
+    const Dtype* bottom_data = bottom[0]->gpu_data();
+    // Hack: store big eltwise product in bottom[0] diff, except in the special
+    // case where this layer itself does the eltwise product, in which case we
+    // can store it directly in the scalar diff, and we're done.
+    const bool is_eltwise = (inner_dim_ == 1 && outer_dim_ == 1);
+    Dtype* product = is_eltwise ?
+        bottom[1]->mutable_gpu_diff() : bottom[0]->mutable_gpu_diff();
+    caffe_gpu_mul(top[0]->count(), top_diff, bottom_data, product);
+    if (!is_eltwise) {
+      Dtype* sum_result = NULL;
+      if (inner_dim_ == 1) {
+        sum_result = product;
+      } else if (sum_result_.count() == 1) {
+        const Dtype* sum_mult = sum_multiplier_.gpu_data();
+        Dtype* scalar_diff = bottom[1]->mutable_cpu_diff();
+        caffe_gpu_dot(inner_dim_, product, sum_mult, scalar_diff);
+      } else {
+        const Dtype* sum_mult = sum_multiplier_.gpu_data();
+        sum_result = (outer_dim_ == 1) ?
+            bottom[1]->mutable_gpu_diff() : sum_result_.mutable_gpu_data();
+        caffe_gpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
+                       Dtype(1), product, sum_mult, Dtype(0), sum_result);
+      }
+      if (outer_dim_ != 1) {
+        const Dtype* sum_mult = sum_multiplier_.gpu_data();
+        if (scalar_dim_ == 1) {
+          Dtype* scalar_diff = bottom[1]->mutable_cpu_diff();
+          caffe_gpu_dot(outer_dim_, sum_mult, sum_result, scalar_diff);
+        } else {
+          Dtype* scalar_diff = bottom[1]->mutable_gpu_diff();
+          caffe_gpu_gemv(CblasTrans, outer_dim_, scalar_dim_,
+                         Dtype(1), sum_result, sum_mult, Dtype(0), scalar_diff);
+        }
+      }
+    }
+  }
+  if (propagate_down[0]) {
+    const int count = top[0]->count();
+    const Dtype* top_diff = top[0]->gpu_diff();
+    const Dtype* scalar_data = bottom[1]->gpu_data();
+    Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+    ScalarForward<Dtype>  // NOLINT_NEXT_LINE(whitespace/operators)
+        <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, top_diff, scalar_data, scalar_dim_, inner_dim_, bottom_diff);
+  }
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(ScalarLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -301,7 +301,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 139 (last added: tile_param)
+// LayerParameter next available layer-specific ID: 140 (last added: scalar_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -377,6 +377,7 @@ message LayerParameter {
   optional ReductionParameter reduction_param = 136;
   optional ReLUParameter relu_param = 123;
   optional ReshapeParameter reshape_param = 133;
+  optional ScalarParameter scalar_param = 139;
   optional SigmoidParameter sigmoid_param = 124;
   optional SoftmaxParameter softmax_param = 125;
   optional SPPParameter spp_param = 132;
@@ -876,6 +877,23 @@ message ReshapeParameter {
   optional int32 num_axes = 3 [default = -1];
 }
 
+message ScalarParameter {
+  // The first axis of bottom[0] (the first input Blob) along which to apply
+  // bottom[1] (the second input Blob).  May be negative to index from the end
+  // (e.g., -1 for the last axis).
+  //
+  // For example, if bottom[0] is 4D with shape 100x3x224x224, the output
+  // top[0] will have the same shape, and bottom[1] may have any of the
+  // following shapes (for the given value of axis):
+  //    (axis == 0 == -4) 100; 100x3; 100x3x224; 100x3x224x224
+  //    (axis == 1 == -3)          3;     3x224;     3x224x224
+  //    (axis == 2 == -2)                   224;       224x224
+  //    (axis == 3 == -1)                                  224
+  // Furthermore, bottom[1] may have the empty shape (regardless of the value of
+  // "axis") -- a literal scalar.
+  optional int32 axis = 1 [default = 0];
+}
+
 message SigmoidParameter {
   enum Engine {
     DEFAULT = 0;