Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor convolution layer and add deconvolution layer #1615

Merged
merged 6 commits into from
Feb 1, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 133 additions & 32 deletions include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,101 @@

namespace caffe {

/**
* @brief Abstract base class that factors out the BLAS code common to
* ConvolutionLayer and DeconvolutionLayer.
*/
template <typename Dtype>
class BaseConvolutionLayer : public Layer<Dtype> {
public:
explicit BaseConvolutionLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline int MinBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline bool EqualNumBottomTopBlobs() const { return true; }

protected:
// Helper functions that abstract away the column buffer and gemm arguments.
// The last argument in forward_cpu_gemm is so that we can skip the im2col if
// we just called weight_cpu_gemm with the same input.
void forward_cpu_gemm(const Dtype* input, const Dtype* weights,
Dtype* output, bool skip_im2col = false);
void forward_cpu_bias(Dtype* output, const Dtype* bias);
void backward_cpu_gemm(const Dtype* input, const Dtype* weights,
Dtype* output);
void weight_cpu_gemm(const Dtype* input, const Dtype* output, Dtype*
weights);
void backward_cpu_bias(Dtype* bias, const Dtype* input);

#ifndef CPU_ONLY
void forward_gpu_gemm(const Dtype* col_input, const Dtype* weights,
Dtype* output, bool skip_im2col = false);
void forward_gpu_bias(Dtype* output, const Dtype* bias);
void backward_gpu_gemm(const Dtype* input, const Dtype* weights,
Dtype* col_output);
void weight_gpu_gemm(const Dtype* col_input, const Dtype* output, Dtype*
weights);
void backward_gpu_bias(Dtype* bias, const Dtype* input);
#endif

// reverse_dimensions should return true iff we are implementing deconv, so
// that conv helpers know which dimensions are which.
virtual bool reverse_dimensions() = 0;
// Compute height_out_ and width_out_ from other parameters.
virtual void compute_output_shape() = 0;

int kernel_h_, kernel_w_;
int stride_h_, stride_w_;
int num_;
int channels_;
int pad_h_, pad_w_;
int height_, width_;
int group_;
int num_output_;
int height_out_, width_out_;
bool bias_term_;
bool is_1x1_;

private:
// wrap im2col/col2im so we don't have to remember the (long) argument lists
inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff) {
im2col_cpu(data, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be slightly more elegant to give BaseConvolutionLayer a private im2col_layer_ which {Dec,C}onvolutionLayer call Forward and Backward on, instead of these functions? (Possibly also saving some duplicated setup logic & private variables.)

Relatedly, I always kind of thought we should have a separate BiasLayer internally called by both InnerProductLayer and ConvolutionLayer, factoring out that little bit of logic, and allowing one to use biases without multiplicative weights, for whatever that's worth.

Just a minor thought -- definitely doesn't need to be done here as this is nice cleanup regardless, and of course deconvolution layer will be a welcome feature.

This looks good to merge to me if you think it's ready.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think the im2col layer would probably be a bit better, and I agree with factoring out the bias, although I'd rather save those things for later PRs.

Other than that, I think I ought to add some tests that at least call deconv forward and backward, and then it'll be ready.

}
inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) {
col2im_cpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data);
}
#ifndef CPU_ONLY
inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) {
im2col_gpu(data, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff);
}
inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) {
col2im_gpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data);
}
#endif

int conv_out_channels_;
int conv_in_channels_;
int conv_out_spatial_dim_;
int conv_in_height_;
int conv_in_width_;
int kernel_dim_;
int weight_offset_;
int col_offset_;
int output_offset_;

Blob<Dtype> col_buffer_;
Blob<Dtype> bias_multiplier_;
};

/**
* @brief Convolves the input image with a bank of learned filters,
* and (optionally) adds biases.
Expand All @@ -33,7 +128,7 @@ namespace caffe {
* the output channel N' columns of the output matrix.
*/
template <typename Dtype>
class ConvolutionLayer : public Layer<Dtype> {
class ConvolutionLayer : public BaseConvolutionLayer<Dtype> {
public:
/**
* @param param provides ConvolutionParameter convolution_param,
Expand Down Expand Up @@ -64,18 +159,10 @@ class ConvolutionLayer : public Layer<Dtype> {
* kernels + stream parallelism) engines.
*/
explicit ConvolutionLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

: BaseConvolutionLayer<Dtype>(param) {}
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_CONVOLUTION;
}
virtual inline int MinBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline bool EqualNumBottomTopBlobs() const { return true; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Expand All @@ -86,30 +173,44 @@ class ConvolutionLayer : public Layer<Dtype> {
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual inline bool reverse_dimensions() { return false; }
virtual void compute_output_shape();
};

int kernel_h_, kernel_w_;
int stride_h_, stride_w_;
int num_;
int channels_;
int pad_h_, pad_w_;
int height_, width_;
int group_;
int num_output_;
int height_out_, width_out_;
bool bias_term_;
bool is_1x1_;
/**
* @brief Convolve the input with a bank of learned filters, and (optionally)
* add biases, treating filters and convolution parameters in the
* opposite sense as ConvolutionLayer.
*
* ConvolutionLayer computes each output value by dotting an input window with
* a filter; DeconvolutionLayer multiplies each input value by a filter
* elementwise, and sums over the resulting output windows. In other words,
* DeconvolutionLayer is ConvolutionLayer with the forward and backward passes
* reversed. DeconvolutionLayer reuses ConvolutionParameter for its
* parameters, but they take the opposite sense as in ConvolutionLayer (so
* padding is removed from the output rather than added to the input, and
* stride results in upsampling rather than downsampling).
*/
template <typename Dtype>
class DeconvolutionLayer : public BaseConvolutionLayer<Dtype> {
public:
explicit DeconvolutionLayer(const LayerParameter& param)
: BaseConvolutionLayer<Dtype>(param) {}
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_DECONVOLUTION;
}

/// M_ is the channel dimension of the output for a single group, which is the
/// leading dimension of the filter matrix.
int M_;
/// K_ is the dimension of an unrolled input for a single group, which is the
/// leading dimension of the data matrix.
int K_;
/// N_ is the spatial dimension of the output, the H x W, which are the last
/// dimensions of the data and filter matrices.
int N_;
Blob<Dtype> col_buffer_;
Blob<Dtype> bias_multiplier_;
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual inline bool reverse_dimensions() { return true; }
virtual void compute_output_shape();
};

#ifdef USE_CUDNN
Expand Down
Loading