Skip to content

Commit

Permalink
[Relay][TOPI]Fix meaning of conv2d_transpose output_padding parameter (
Browse files Browse the repository at this point in the history
…#4318)

* Add output_padding to generic

* Add output_padding to the reference impl

* Add output_padding to arm_cpu

* Add output_padding to the test

* Add output_padding for cuda

* Add output_padding for x86

* Make use of the new output_padding argument in Relay

* Adjust conv2d_transpose Relay test

* Fix lint errors

* Fix the VTA declaration of conv2d_transpose

* support for output padding in conv2d transpose

* some output padding will break IR pass

* Fix new conv2d_transpose test

* Update tophub

* Fix conv1d output_padding too.

* Fix the conv1d_transpose reference function.

* Fix the cuda impl

* fix the topi test for conv1d

* Update the versions in tophub.py

Co-authored-by: Thierry Moreau <tmoreau@octoml.ai>
  • Loading branch information
2 people authored and vinx13 committed Jan 11, 2020
1 parent 4073125 commit dcf7fbf
Show file tree
Hide file tree
Showing 16 changed files with 120 additions and 92 deletions.
6 changes: 3 additions & 3 deletions python/tvm/autotvm/tophub.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,16 @@

# the version of each package
PACKAGE_VERSION = {
'arm_cpu': "v0.04",
'arm_cpu': "v0.05",
'llvm': "v0.03",

'cuda': "v0.06",
'cuda': "v0.07",
'rocm': "v0.03",
'opencl': "v0.03",
'mali': "v0.05",
'intel_graphics': "v0.01",

'vta': "v0.06",
'vta': "v0.07",
}

logger = logging.getLogger('autotvm')
Expand Down
12 changes: 4 additions & 8 deletions python/tvm/relay/op/nn/_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def compute_conv2d_transpose(attrs, inputs, out_dtype, target):
padding = get_const_tuple(attrs.padding)
strides = get_const_tuple(attrs.strides)
dilation = get_const_tuple(attrs.dilation)
output_padding = get_const_tuple(attrs.output_padding)
groups = attrs.groups
layout = attrs.data_layout
out_dtype = attrs.out_dtype
Expand All @@ -312,10 +313,7 @@ def compute_conv2d_transpose(attrs, inputs, out_dtype, target):
assert dilation == (1, 1), "not support dilate now"
assert groups == 1, "only support groups == 1 for now"
out = topi.nn.conv2d_transpose_nchw(
inputs[0], inputs[1], strides, padding, out_dtype)
output_padding = get_const_tuple(attrs.output_padding)
out = topi.nn.pad(out,
[0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]])
inputs[0], inputs[1], strides, padding, out_dtype, output_padding)
return [out]


Expand Down Expand Up @@ -408,10 +406,8 @@ def compute_conv1d_transpose(attrs, inputs, out_dtype, target):
assert dilation == (1,), "conv1d_transpose dilation is not supported"
assert groups == 1, "conv1d_transpose groups == 1 only supported"
out = topi.nn.conv1d_transpose_ncw(
inputs[0], inputs[1], strides, padding, out_dtype)
output_padding = get_const_tuple(attrs.output_padding)
out = topi.nn.pad(out,
[0, 0, 0], [0, 0, output_padding[0]])
inputs[0], inputs[1], strides, padding, out_dtype,
get_const_tuple(attrs.output_padding))
return [out]


Expand Down
25 changes: 12 additions & 13 deletions tests/python/relay/test_op_level2.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,11 +475,8 @@ def test_conv2d_transpose_nchw_run():
dtype = "float32"
data = np.random.uniform(size=dshape).astype(dtype)
kernel = np.random.uniform(size=kshape).astype(dtype)
c_np = topi.testing.conv2d_transpose_nchw_python(
data, kernel, 2, 1)
d_np = np.zeros(shape=oshape)
d_np[:,:,0:c_np.shape[2],0:c_np.shape[3]] = c_np
ref_res = d_np
ref_res = topi.testing.conv2d_transpose_nchw_python(
data, kernel, 2, 1, (2, 2))

for target, ctx in ctx_list():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
Expand All @@ -504,9 +501,14 @@ def test_conv2d_transpose_nhwc_run():
data = np.random.uniform(size=dshape_nhwc).astype(dtype)
kernel = np.random.uniform(size=kshape_hwoi).astype(dtype)
# use true kshape layout here - HWOI
c_np = topi.testing.conv2d_transpose_nhwc_python(data, kernel, 'HWOI', 2, 1)
d_np = np.zeros(shape=oshape_nhwc)
d_np[:,0:c_np.shape[1],0:c_np.shape[2],:] = c_np

ref_res = topi.testing.conv2d_transpose_nhwc_python(data, kernel, 'HWOI',
2, 1, output_padding=(2, 2))

for target, ctx in ctx_list():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, kernel)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)


def test_conv1d_transpose_ncw_run():
Expand All @@ -522,11 +524,8 @@ def test_conv1d_transpose_ncw_run():
dtype = "float32"
data = np.random.uniform(size=dshape).astype(dtype)
kernel = np.random.uniform(size=kshape).astype(dtype)
c_np = topi.testing.conv1d_transpose_ncw_python(
data, kernel, 2, 1)
d_np = np.zeros(shape=oshape)
d_np[:,:,0:c_np.shape[2]] = c_np
ref_res = d_np
ref_res = topi.testing.conv1d_transpose_ncw_python(
data, kernel, 2, 1, output_padding=(2,))

for target, ctx in ctx_list():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
Expand Down
21 changes: 14 additions & 7 deletions topi/python/topi/arm_cpu/conv2d_transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from .conv2d_spatial_pack import schedule_conv2d_spatial_pack_nchw

@autotvm.task.register_topi_compute(conv2d_transpose_nchw, "arm_cpu", "direct")
def conv2d_transpose_nchw_arm(cfg, Input, Filter, strides, padding, out_dtype):
def conv2d_transpose_nchw_arm(cfg, Input, Filter, strides, padding, out_dtype,
output_padding=(0, 0)):
"""Transposed 2D convolution nchw forward operator.
Parameters
Expand All @@ -47,27 +48,33 @@ def conv2d_transpose_nchw_arm(cfg, Input, Filter, strides, padding, out_dtype):
out_dtype: str
The output data type. This is used for mixed precision.
output_padding : tuple of int
Used to get the right output shape in gradients
Returns
-------
Output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
return _decl_spatial_pack(cfg, Input, Filter, strides, padding, "NCHW", out_dtype, 2)
return _decl_spatial_pack(cfg, Input, Filter, strides, padding, "NCHW", out_dtype, 2,
output_padding)

def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, num_tile):
def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, num_tile,
output_padding):
assert layout == "NCHW", "Only support NCHW"
out_dtype = out_dtype or data.dtype

N, CI, IH, IW = get_const_tuple(data.shape)
_, CO, KH, KW = get_const_tuple(kernel.shape)
opad_h, opad_w = output_padding

pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (KH, KW))
bpad_top, bpad_bottom = KH - 1 - pad_top, KH - 1 - pad_bottom
bpad_left, bpad_right = KW - 1 - pad_left, KW - 1 - pad_right
bpad_top, bpad_bottom = KH - 1 - pad_top, KH - 1 - pad_bottom + opad_h
bpad_left, bpad_right = KW - 1 - pad_left, KW - 1 - pad_right + opad_w
HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides)

OH = (IH - 1) * HSTR - pad_top - pad_bottom + KH
OW = (IW - 1) * WSTR - pad_left - pad_right + KW
OH = (IH - 1) * HSTR - pad_top - pad_bottom + KH + opad_h
OW = (IW - 1) * WSTR - pad_left - pad_right + KW + opad_w

dilated_input = dilate(data, [1, 1, HSTR, WSTR])
data_pad = pad(dilated_input, [0, 0, bpad_top, bpad_left], [0, 0, bpad_bottom, bpad_right])
Expand Down
7 changes: 4 additions & 3 deletions topi/python/topi/cuda/conv1d_transpose_ncw.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ..util import get_const_tuple, traverse_inline

@autotvm.task.register_topi_compute(nn.conv1d_transpose_ncw, ['cuda', 'gpu'], "direct")
def conv1d_transpose_ncw_cuda(cfg, data, kernel, stride, padding, out_dtype):
def conv1d_transpose_ncw_cuda(cfg, data, kernel, stride, padding, out_dtype, output_padding=(0,)):
"""Transposed 1D convolution ncw forward operator.
Parameters
Expand Down Expand Up @@ -53,10 +53,11 @@ def conv1d_transpose_ncw_cuda(cfg, data, kernel, stride, padding, out_dtype):
cfg.stride = stride
batch, inp_channels, inp_width = get_const_tuple(data.shape)
_, out_channels, kernel_size = get_const_tuple(kernel.shape)
opad = output_padding[0]
pad_left, pad_right = nn.get_pad_tuple1d(padding, kernel_size)
out_width = (inp_width - 1) * stride + kernel_size - pad_left - pad_right
out_width = (inp_width - 1) * stride + kernel_size - pad_left - pad_right + opad
pad_left = kernel_size - 1 - pad_left
pad_right = kernel_size - 1 - pad_right
pad_right = kernel_size - 1 - pad_right + opad
dilated_width = stride * (inp_width - 1) + 1
data = tvm.compute(
(batch, inp_channels, pad_left + dilated_width + pad_right),
Expand Down
12 changes: 7 additions & 5 deletions topi/python/topi/cuda/conv2d_transpose_nchw.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@


@autotvm.task.register_topi_compute(nn.conv2d_transpose_nchw, ['cuda', 'gpu'], "direct")
def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype):
def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype,
output_padding=(0, 0)):
"""Transposed 2D convolution nchw forward operator.
Parameters
Expand All @@ -51,16 +52,17 @@ def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype):
batch, in_c, in_h, in_w = get_const_tuple(Input.shape)
_, out_c, filter_h, filter_w = get_const_tuple(Filter.shape)
stride_h, stride_w = strides
opad_h, opad_w = output_padding

# attach stride info to config, this is used in schedule space definition
cfg.stride = strides

# padding stage
fpad_top, fpad_left, fpad_bottom, fpad_right = nn.get_pad_tuple(padding, (filter_h, filter_w))
bpad_top = filter_h - 1 - fpad_top
bpad_bottom = filter_h - 1 - fpad_bottom
bpad_bottom = filter_h - 1 - fpad_bottom + opad_h
bpad_left = filter_w - 1 - fpad_left
bpad_right = filter_w - 1 - fpad_right
bpad_right = filter_w - 1 - fpad_right + opad_w

# padding stage
FirstPad = nn.pad(Input,
Expand Down Expand Up @@ -95,8 +97,8 @@ def _dilate(*indices):
return data(*index_tuple)

# convolution stage
out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h
out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w
out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h + opad_h
out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w + opad_w
dc = tvm.reduce_axis((0, in_c), name='dc')
dh = tvm.reduce_axis((0, filter_h), name='dh')
dw = tvm.reduce_axis((0, filter_w), name='dw')
Expand Down
6 changes: 4 additions & 2 deletions topi/python/topi/nn/conv1d_transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@


@tvm.target.generic_func
def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype):
def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype,
output_padding=(0,)):
"""Transposed 1D convolution ncw forward operator.
Parameters
Expand Down Expand Up @@ -56,11 +57,12 @@ def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype):
stride = stride[0]
batch, channels_in, data_width = data.shape
_, channels_out, kernel_width = kernel.shape
opad = output_padding[0]
channels_out = simplify(channels_out)
data = dilate(data, [1, 1, stride], name='data_dilate')
pad_left, pad_right = get_pad_tuple1d(padding, (kernel_width,))
pad_left = kernel_width - 1 - pad_left
pad_right = kernel_width - 1 - pad_right
pad_right = kernel_width - 1 - pad_right + opad
data = pad(data, [0, 0, pad_left], [0, 0, pad_right], name='data_pad')

# transpose kernel, switch kernel layout to IOW
Expand Down
24 changes: 14 additions & 10 deletions topi/python/topi/nn/conv2d_transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@


@tvm.target.generic_func
def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype):
def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype, output_padding=(0, 0)):
"""Transposed 2D convolution nchw forward operator.
Parameters
Expand All @@ -46,28 +46,33 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype):
out_dtype : str
The output data type. This is used for mixed precision.
output_padding : tuple of ints
Used to get the right output shape for gradients
Returns
-------
Output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
return declaration_conv2d_transpose_impl(Input, Filter, strides, padding, out_dtype)
return declaration_conv2d_transpose_impl(Input, Filter, strides, padding, out_dtype,
output_padding=output_padding)


def conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype):
def conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype, output_padding):
"""Preprocess data and kernel to make the compute pattern
of conv2d_transpose the same as conv2d"""
batch, in_c, in_h, in_w = data.shape
_, out_c, filter_h, filter_w = kernel.shape
stride_h, stride_w = strides
opad_h, opad_w = output_padding
# dilate data
data_dilate = dilate(data, [1, 1, stride_h, stride_w], name='data_dilate')
# pad data
fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (filter_h, filter_w))
bpad_top = filter_h - 1 - fpad_top
bpad_bottom = filter_h - 1 - fpad_bottom
bpad_bottom = filter_h - 1 - fpad_bottom + opad_h
bpad_left = filter_w - 1 - fpad_left
bpad_right = filter_w - 1 - fpad_right
bpad_right = filter_w - 1 - fpad_right + opad_w
data_pad = pad(data_dilate, \
[0, 0, bpad_top, bpad_left], \
[0, 0, bpad_bottom, bpad_right], \
Expand All @@ -79,18 +84,17 @@ def conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype):
return data_pad, kernel_transform


def declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype):
def declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype, output_padding):
"""Implementation of conv2d transpose"""
data_pad, kernel_transform = \
conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype)
conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype, output_padding)
batch, in_c, in_h, in_w = data_pad.shape
out_c, _, filter_h, filter_w = kernel_transform.shape
stride_h, stride_w = strides

# convolution stage
out_c = simplify(out_c)
out_h = simplify(in_h - filter_h + 1)
out_w = simplify(in_w - filter_w + 1)
out_h = simplify(in_h - filter_h + 1 + output_padding[0])
out_w = simplify(in_w - filter_w + 1 + output_padding[1])
dc = tvm.reduce_axis((0, in_c), name='dc')
dh = tvm.reduce_axis((0, filter_h), name='dh')
dw = tvm.reduce_axis((0, filter_w), name='dw')
Expand Down
7 changes: 4 additions & 3 deletions topi/python/topi/testing/conv1d_transpose_ncw_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import topi
from topi.nn.util import get_pad_tuple1d

def conv1d_transpose_ncw_python(a_np, w_np, stride, padding):
def conv1d_transpose_ncw_python(a_np, w_np, stride, padding, output_padding):
"""Transposed 1D convolution operator in NCW layout.
Parameters
Expand All @@ -47,6 +47,7 @@ def conv1d_transpose_ncw_python(a_np, w_np, stride, padding):
"""
batch, in_c, in_w = a_np.shape
_, out_c, filter_w = w_np.shape
opad = output_padding[0]
if isinstance(stride, int):
stride_w = stride
else:
Expand All @@ -56,11 +57,11 @@ def conv1d_transpose_ncw_python(a_np, w_np, stride, padding):
dilated_a_np = topi.testing.dilate_python(a_np, [1, 1, stride_w])
# padding stage
bpad_left = filter_w - 1 - fpad_left
bpad_right = filter_w - 1 - fpad_right
bpad_right = filter_w - 1 - fpad_right + opad
padded_a_np = np.zeros((batch, in_c, dilated_a_np.shape[2]+bpad_left+bpad_right))
padded_a_np[:, :, bpad_left:dilated_a_np.shape[2]+bpad_left] = dilated_a_np
# convolution stage
out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w
out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w + opad
b_np = np.zeros((batch, out_c, out_w))
for n in range(batch):
for f in range(out_c):
Expand Down
17 changes: 10 additions & 7 deletions topi/python/topi/testing/conv2d_transpose_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from topi.nn.util import get_pad_tuple


def conv2d_transpose_nchw_python(a_np, w_np, stride, padding):
def conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding=(0, 0)):
"""Transposed convolution operator in NCHW layout.
Parameters
Expand Down Expand Up @@ -50,21 +50,22 @@ def conv2d_transpose_nchw_python(a_np, w_np, stride, padding):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
opad_h, opad_w = output_padding
# dilate stage
dilated_a_np = topi.testing.dilate_python(a_np, [1, 1, stride_h, stride_w])
# padding stage
fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (filter_h, filter_w))
bpad_top = filter_h - 1 - fpad_top
bpad_bottom = filter_h - 1 - fpad_bottom
bpad_bottom = filter_h - 1 - fpad_bottom + opad_h
bpad_left = filter_w - 1 - fpad_left
bpad_right = filter_w - 1 - fpad_right
bpad_right = filter_w - 1 - fpad_right + opad_w
padded_a_np = np.zeros((batch, in_c, dilated_a_np.shape[2]+bpad_top+bpad_bottom, \
dilated_a_np.shape[3]+bpad_left+bpad_right))
padded_a_np[:, :, bpad_top:dilated_a_np.shape[2]+bpad_top, \
bpad_left:dilated_a_np.shape[3]+bpad_left] = dilated_a_np
# convolution stage
out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h
out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w
out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h + opad_h
out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w + opad_w
b_np = np.zeros((batch, out_c, out_h, out_w))
for n in range(batch):
for f in range(out_c):
Expand All @@ -75,7 +76,8 @@ def conv2d_transpose_nchw_python(a_np, w_np, stride, padding):
return b_np


def conv2d_transpose_nhwc_python(a_nhwc, weight, weight_format, stride, padding):
def conv2d_transpose_nhwc_python(a_nhwc, weight, weight_format, stride, padding,
output_padding=(0, 0)):
"""Transposed convolution operator in NHWC layout.
Parameters
Expand Down Expand Up @@ -117,6 +119,7 @@ def conv2d_transpose_nhwc_python(a_nhwc, weight, weight_format, stride, padding)
else:
raise ValueError('Valid weight_formats are HWIO, HWOI, OIHW or IOHW')

res_nchw = conv2d_transpose_nchw_python(a_nchw, w_iohw, stride, padding)
res_nchw = conv2d_transpose_nchw_python(a_nchw, w_iohw, stride, padding,
output_padding=output_padding)
res_nhwc = np.transpose(res_nchw, (0, 2, 3, 1))
return res_nhwc
Loading

0 comments on commit dcf7fbf

Please sign in to comment.