PaddlePaddle · abhinavarora · Jan 23, 2018 · Dec 23, 2017 · Jan 3, 2018 · Jan 3, 2018
diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc
@@ -117,8 +117,8 @@ The input(X) size and output(Out) size may be different.
  Output shape: $(N, C_{out}, H_{out}, W_{out})$
  Where
  $$
- H_{out} = (H_{in} - 1) * strides[0] - 2 * paddings[0] + H_f \\
- W_{out} = (W_{in} - 1) * strides[1] - 2 * paddings[1] + W_f
+ H_{out} = (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\
+ W_{out} = (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1
  $$
 )DOC");
 }
@@ -187,9 +187,9 @@ The input(X) size and output(Out) size may be different.
  Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
  Where
  $$
- D_{out} = (D_{in} - 1) * strides[0] - 2 * paddings[0] + D_f \\
- H_{out} = (H_{in} - 1) * strides[1] - 2 * paddings[1] + H_f \\
- W_{out} = (W_{in} - 1) * strides[2] - 2 * paddings[2] + W_f
+ D_{out} = (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\
+ H_{out} = (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\
+ W_{out} = (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1
  $$
 )DOC");
 }

diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h
@@ -133,9 +133,9 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
  if (data_dim == 2U) {
  // col2im: col_matrix -> dy
  // from (c * k_h * k_w, h * w) to (c, o_h, o_w)
- col2im(dev_ctx, col, std::vector<int>{dilations[0], dilations[1]},
- strides, std::vector<int>{paddings[0], paddings[1], paddings[0],
-  paddings[1]},
+ col2im(dev_ctx, col, dilations, strides,
+ std::vector<int>{paddings[0], paddings[1], paddings[0],
+ paddings[1]},
  &output_batch);
  } else if (data_dim == 3U) {
  // col2vol: col_matrix -> dy
@@ -239,8 +239,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
  if (data_dim == 2U) {
  // im2col: dy -> col matrix
  // from (c, o_h, o_w) to (c * k_h * k_w, h * w)
- im2col(dev_ctx, output_grad_batch,
- std::vector<int>{dilations[0], dilations[1]}, strides,
+ im2col(dev_ctx, output_grad_batch, dilations, strides,
  std::vector<int>{paddings[0], paddings[1], paddings[0],
  paddings[1]},
  &col);

diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
@@ -607,58 +607,61 @@ def conv2d(input,
  <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
  If bias attribution and activation type are provided, bias is added to the output of the convolution,
  and the corresponding activation function is applied to the final result.
- For each input :math:`X`, the equation is:
 
+ For each input :math:`X`, the equation is:
 
  .. math::
 
  Out = \sigma (W \\ast X + b)
 
  In the above equation:
 
-  * :math:`X`: Input value, a tensor with NCHW format.
-  * :math:`W`: Filter value, a tensor with MCHW format.
-  * :math:`\\ast`: Convolution operation.
-  * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
-  * :math:`\\sigma`: Activation function.
-  * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+ * :math:`X`: Input value, a tensor with NCHW format.
+ * :math:`W`: Filter value, a tensor with MCHW format.
+ * :math:`\\ast`: Convolution operation.
+ * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
+ * :math:`\\sigma`: Activation function.
+ * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
 
  Example:
 
- Input:
- Input shape: $(N, C_{in}, H_{in}, W_{in})$
+ - Input:
+
+ Input shape: $(N, C_{in}, H_{in}, W_{in})$
 
- Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
+ Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
+
+ - Output:
+ Output shape: $(N, C_{out}, H_{out}, W_{out})$
 
- Output:
- Output shape: $(N, C_{out}, H_{out}, W_{out})$
  Where
- .. math::
+
+ .. math::
 
  H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
  W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
 
  Args:
-  input(Variable): The input image with [N, C, H, W] format.
-  num_filters(int): The number of filter. It is as same as the output
-  image channel.
-  filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
-  it must contain two integers, (filter_size_H, filter_size_W).
-  Otherwise, the filter will be a square.
-  stride(int|tuple): The stride size. If stride is a tuple, it must
-  contain two integers, (stride_H, stride_W). Otherwise, the
-  stride_H = stride_W = stride. Default: stride = 1.
-  padding(int|tuple): The padding size. If padding is a tuple, it must
-  contain two integers, (padding_H, padding_W). Otherwise, the
-  padding_H = padding_W = padding. Default: padding = 0.
-  groups(int): The groups number of the Conv2d Layer. According to grouped
-  convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
-  the first half of the filters is only connected to the first half
-  of the input channels, while the second half of the filters is only
-  connected to the second half of the input channels. Default: groups=1
-  param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None
-  bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
-  act(str): Activation type. Default: None
+ input(Variable): The input image with [N, C, H, W] format.
+ num_filters(int): The number of filter. It is as same as the output
+ image channel.
+ filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
+ it must contain two integers, (filter_size_H, filter_size_W).
+ Otherwise, the filter will be a square.
+ stride(int|tuple): The stride size. If stride is a tuple, it must
+ contain two integers, (stride_H, stride_W). Otherwise, the
+ stride_H = stride_W = stride. Default: stride = 1.
+ padding(int|tuple): The padding size. If padding is a tuple, it must
+ contain two integers, (padding_H, padding_W). Otherwise, the
+ padding_H = padding_W = padding. Default: padding = 0.
+ groups(int): The groups number of the Conv2d Layer. According to grouped
+ convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
+ the first half of the filters is only connected to the first half
+ of the input channels, while the second half of the filters is only
+ connected to the second half of the input channels. Default: groups=1
+ param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None
+ bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
+ act(str): Activation type. Default: None
 
  Returns:
  Variable: The tensor variable storing the convolution and \
@@ -673,7 +676,6 @@ def conv2d(input,
  data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
  conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu")
  """
-
  if stride is None:
  stride = [1, 1]
  helper = LayerHelper('conv2d', **locals())
@@ -1003,36 +1005,81 @@ def conv2d_transpose(input,
  dilation=None,
  param_attr=None):
  """
- The transpose of conv2d layer.
+ **Convlution2D transpose layer**
+
+ The convolution2D transpose layer calculates the output based on the input,
+ filter, and dilations, strides, paddings. Input(Input) and output(Output)
+ are in NCHW format. Where N is batch size, C is the number of channels,
+ H is the height of the feature, and W is the width of the feature.
+ Parameters(dilations, strides, paddings) are two elements. These two elements
+ represent height and width, respectively. The details of convolution transpose
+ layer, please refer to the following explanation and references `therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_.
+
+ For each input :math:`X`, the equation is:
+
+ .. math::
+
+ Out = W \\ast X
+
+ In the above equation:
+
+ * :math:`X`: Input value, a tensor with NCHW format.
+ * :math:`W`: Filter value, a tensor with MCHW format.
+ * :math:`\\ast` : Convolution transpose operation.
+ * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
 
- This layer is also known as deconvolution layer.
+ Example:
+
+ - Input:
+
+ Input shape: $(N, C_{in}, H_{in}, W_{in})$
+
+ Filter shape: $(C_{in}, C_{out}, H_f, W_f)$
+
+ - Output:
+
+ Output shape: $(N, C_{out}, H_{out}, W_{out})$
+
+ Where
+
+ .. math::
+
+ H_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
+ W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1
 
  Args:
- input(Variable): The input image with [N, C, H, W] format.
- num_filters(int): The number of filter. It is as same as the output
- image channel.
- output_size(int|tuple|None): The output image size. If output size is a
- tuple, it must contain two integers, (image_H, image_W). This
- parameter only works when filter_size is None.
- filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
- it must contain two integers, (filter_size_H, filter_size_W).
- Otherwise, the filter will be a square. None if use output size to
- calculate filter_size
- padding(int|tuple): The padding size. If padding is a tuple, it must
- contain two integers, (padding_H, padding_W). Otherwise, the
- padding_H = padding_W = padding.
- stride(int|tuple): The stride size. If stride is a tuple, it must
- contain two integers, (stride_H, stride_W). Otherwise, the
- stride_H = stride_W = stride.
- dilation(int|tuple): The dilation size. If dilation is a tuple, it must
- contain two integers, (dilation_H, dilation_W). Otherwise, the
- dilation_H = dilation_W = dilation.
- param_attr: Parameter Attribute.
- main_program(Program): the main program
- startup_program(Program): the startup program
+ input(Variable): The input image with [N, C, H, W] format.
+ num_filters(int): The number of the filter. It is as same as the output
+ image channel.
+ output_size(int|tuple|None): The output image size. If output size is a
+ tuple, it must contain two integers, (image_H, image_W). This
+ parameter only works when filter_size is None.
+ filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
+ it must contain two integers, (filter_size_H, filter_size_W).
+ Otherwise, the filter will be a square. None if use output size to
+ calculate filter_size.
+ padding(int|tuple): The padding size. If padding is a tuple, it must
+ contain two integers, (padding_H, padding_W). Otherwise, the
+ padding_H = padding_W = padding. Default: padding = 0.
+ stride(int|tuple): The stride size. If stride is a tuple, it must
+ contain two integers, (stride_H, stride_W). Otherwise, the
+ stride_H = stride_W = stride. Default: stride = 1.
+ dilation(int|tuple): The dilation size. If dilation is a tuple, it must
+ contain two integers, (dilation_H, dilation_W). Otherwise, the
+ dilation_H = dilation_W = dilation. Default: dilation = 1.
+ param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer. Default: None
 
  Returns:
- Variable: Output image.
+ Variable: The tensor variable storing the convolution transpose result.
+
+ Raises:
+ ValueError: If the shapes of input, filter_size, stride, padding and groups mismatch.
+
+ Examples:
+ .. code-block:: python
+
+ data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
+ conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3)
  """
  helper = LayerHelper("conv2d_transpose", **locals())
  if not isinstance(input, Variable):