Skip to content

Commit

Permalink
Merge pull request #2217 from jeffdonahue/ssafar-reshape-rebase
Browse files Browse the repository at this point in the history
Rebase @ssafar's ReshapeLayer
  • Loading branch information
jeffdonahue committed May 15, 2015
2 parents 35a5df5 + 21032b2 commit 329bac7
Show file tree
Hide file tree
Showing 5 changed files with 519 additions and 1 deletion.
39 changes: 39 additions & 0 deletions docs/tutorial/layers.md
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,45 @@ The `SPLIT` layer is a utility layer that splits an input blob to multiple outpu

The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c * h * w` to a simple vector output of shape `n * (c*h*w) * 1 * 1`.

#### Reshape

* Layer type: `Reshape`
* Implementation: `./src/caffe/layers/reshape_layer.cpp`
* Parameters (`ReshapeParameter reshape_param`)
- Optional: (also see detailed description below)
- `shape`

* Input
- a single blob with arbitrary dimensions
* Output
- the same blob, with modified dimensions, as specified by `reshape_param`

* Sample

layer {
name: "reshape"
type: "Reshape"
bottom: "input"
top: "output"
reshape_param {
shape {
dim: 0 # copy the dimension from below
dim: 2
dim: 3
dim: -1 # infer it from the other dimensions
}
}
}

The `Reshape` layer can be used to change the dimensions of its input, without changing its data. Just like the `Flatten` layer, only the dimensions are changed; no data is copied in the process.

Output dimensions are specified by the `ReshapeParam` proto. Positive numbers are used directly, setting the corresponding dimension of the output blob. In addition, two special values are accepted for any of the target dimension values:

* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom has 2 as its 1st dimension, the top will have 2 as its 1st dimension as well, given `dim: 0` as the 1st target dimension.
* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. At most one -1 can be used in a reshape operation.

As another example, specifying `reshape_param { shape { dim: 0 dim: -1 } }` makes the layer behave in exactly the same way as the `Flatten` layer.

#### Concatenation

* LayerType: `CONCAT`
Expand Down
38 changes: 38 additions & 0 deletions include/caffe/common_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,44 @@ class MVNLayer : public Layer<Dtype> {
Blob<Dtype> sum_multiplier_;
};

/*
* @brief Reshapes the input Blob into an arbitrary-sized output Blob.
*
* Note: similarly to FlattenLayer, this layer does not change the input values
* (see FlattenLayer, Blob::ShareData and Blob::ShareDiff).
*/
template <typename Dtype>
class ReshapeLayer : public Layer<Dtype> {
public:
explicit ReshapeLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "Reshape"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}

/// @brief vector of axes indices whose dimensions we'll copy from the bottom
vector<int> copy_axes_;
/// @brief the index of the axis whose dimension we infer, or -1 if none
int inferred_axis_;
/// @brief the product of the "constant" output dimensions
int constant_count_;
};

/**
* @brief Ignores bottom blobs while producing no top blobs. (This is useful
* to suppress outputs during testing.)
Expand Down
95 changes: 95 additions & 0 deletions src/caffe/layers/reshape_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#include <vector>

#include "caffe/common_layers.hpp"
#include "caffe/layer.hpp"

namespace caffe {

template <typename Dtype>
void ReshapeLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
inferred_axis_ = -1;
copy_axes_.clear();
const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape();
const int top_num_axes = top_blob_shape.dim_size();
constant_count_ = 1;
for (int i = 0; i < top_num_axes; ++i) {
const int top_dim = top_blob_shape.dim(i);
if (top_dim == 0) {
copy_axes_.push_back(i);
} else if (top_dim == -1) {
CHECK_EQ(inferred_axis_, -1) << "new shape contains multiple "
<< "-1 dims; at most a single (1) value of -1 may be specified";
inferred_axis_ = i;
} else {
constant_count_ *= top_dim;
}
}
}

template <typename Dtype>
void ReshapeLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int input_start_axis = this->layer_param_.reshape_param().axis();
const int start_axis = (input_start_axis >= 0) ? input_start_axis :
bottom[0]->num_axes() + input_start_axis + 1;
CHECK_GE(start_axis, 0) << "axis " << input_start_axis << " out of range";
CHECK_LE(start_axis, bottom[0]->num_axes()) << "axis " << input_start_axis
<< " out of range for " << bottom[0]->num_axes() << "-D input blob";
const int num_axes = this->layer_param_.reshape_param().num_axes();
CHECK_GE(num_axes, -1) << "num_axes must be >= 0, or -1 for all";
const int end_axis =
(num_axes == -1) ? bottom[0]->num_axes() : (start_axis + num_axes);
CHECK_LE(end_axis, bottom[0]->num_axes())
<< "end_axis = axis + num_axes is out of range";
const int num_axes_replaced = end_axis - start_axis;
const int num_axes_retained = bottom[0]->num_axes() - num_axes_replaced;
const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape();
const int num_new_axes = top_blob_shape.dim_size();
vector<int> top_shape(num_axes_retained + num_new_axes);
int top_shape_index = 0;
for (int i = 0; i < start_axis; ++i) {
top_shape[top_shape_index++] = bottom[0]->shape(i);
}
for (int i = 0; i < num_new_axes; ++i) {
top_shape[top_shape_index++] = top_blob_shape.dim(i);
}
for (int i = end_axis; i < bottom[0]->num_axes(); ++i) {
top_shape[top_shape_index++] = bottom[0]->shape(i);
}
CHECK_EQ(top_shape_index, top_shape.size());
for (int i = 0; i < copy_axes_.size(); ++i) {
const int copy_axis_index = copy_axes_[i];
CHECK_GT(bottom[0]->num_axes(), start_axis + copy_axis_index)
<< "new shape contains a 0, but there was no corresponding bottom axis "
<< "to copy";
top_shape[start_axis + copy_axis_index] =
bottom[0]->shape(start_axis + copy_axis_index);
}
if (inferred_axis_ >= 0) {
// A -1 dim was specified; infer the correct dimension by computing the
// product of the other dimensions.
int explicit_count = constant_count_;
explicit_count *= bottom[0]->count(0, start_axis);
explicit_count *= bottom[0]->count(end_axis);
for (int i = 0; i < copy_axes_.size(); ++i) {
const int copy_axis_index = copy_axes_[i];
explicit_count *= top_shape[start_axis + copy_axis_index];
}
CHECK_EQ(0, bottom[0]->count() % explicit_count) << "bottom count ("
<< bottom[0]->count() << ") must be divisible by the product of "
<< "the specified dimensions (" << explicit_count << ")";
const int inferred_dim = bottom[0]->count() / explicit_count;
top_shape[start_axis + inferred_axis_] = inferred_dim;
}
top[0]->Reshape(top_shape);
CHECK_EQ(top[0]->count(), bottom[0]->count())
<< "output count must match input count";
top[0]->ShareData(*bottom[0]);
top[0]->ShareDiff(*bottom[0]);
}

INSTANTIATE_CLASS(ReshapeLayer);
REGISTER_LAYER_CLASS(Reshape);

} // namespace caffe
68 changes: 67 additions & 1 deletion src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 133 (last added: spp_param)
// LayerParameter next available layer-specific ID: 134 (last added: reshape_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
Expand Down Expand Up @@ -326,6 +326,7 @@ message LayerParameter {
optional PReLUParameter prelu_param = 131;
optional PythonParameter python_param = 130;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
Expand Down Expand Up @@ -690,6 +691,71 @@ message ReLUParameter {
optional Engine engine = 2 [default = DEFAULT];
}

// Message that stores parameters used by ReshapeLayer
message ReshapeParameter {
// Specify the output dimensions. If some of the dimensions are set to 0,
// the corresponding dimension from the bottom layer is used (unchanged).
// Exactly one dimension may be set to -1, in which case its value is
// inferred from the count of the bottom blob and the remaining dimensions.
// For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
//
// layer {
// type: "Reshape" bottom: "input" top: "output"
// reshape_param { ... }
// }
//
// If "input" is 2D with shape 2 x 8, then the following reshape_param
// specifications are all equivalent, producing a 3D blob "output" with shape
// 2 x 2 x 4:
//
// reshape_param { shape { dim: 2 dim: 2 dim: 4 } }
// reshape_param { shape { dim: 0 dim: 2 dim: 4 } }
// reshape_param { shape { dim: 0 dim: 2 dim: -1 } }
// reshape_param { shape { dim: -1 dim: 0 dim: 2 } }
//
optional BlobShape shape = 1;

// axis and num_axes control the portion of the bottom blob's shape that are
// replaced by (included in) the reshape. By default (axis == 0 and
// num_axes == -1), the entire bottom blob shape is included in the reshape,
// and hence the shape field must specify the entire output shape.
//
// axis may be non-zero to retain some portion of the beginning of the input
// shape (and may be negative to index from the end; e.g., -1 to begin the
// reshape after the last axis, including nothing in the reshape,
// -2 to include only the last axis, etc.).
//
// For example, suppose "input" is a 2D blob with shape 2 x 8.
// Then the following ReshapeLayer specifications are all equivalent,
// producing a blob "output" with shape 2 x 2 x 4:
//
// reshape_param { shape { dim: 2 dim: 2 dim: 4 } }
// reshape_param { shape { dim: 2 dim: 4 } axis: 1 }
// reshape_param { shape { dim: 2 dim: 4 } axis: -3 }
//
// num_axes specifies the extent of the reshape.
// If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
// input axes in the range [axis, axis+num_axes].
// num_axes may also be -1, the default, to include all remaining axes
// (starting from axis).
//
// For example, suppose "input" is a 2D blob with shape 2 x 8.
// Then the following ReshapeLayer specifications are equivalent,
// producing a blob "output" with shape 1 x 2 x 8.
//
// reshape_param { shape { dim: 1 dim: 2 dim: 8 } }
// reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 }
// reshape_param { shape { dim: 1 } num_axes: 0 }
//
// On the other hand, these would produce output blob shape 2 x 1 x 8:
//
// reshape_param { shape { dim: 2 dim: 1 dim: 8 } }
// reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 }
//
optional int32 axis = 2 [default = 0];
optional int32 num_axes = 3 [default = -1];
}

// Message that stores parameters used by SigmoidLayer
message SigmoidParameter {
enum Engine {
Expand Down
Loading

0 comments on commit 329bac7

Please sign in to comment.