Skip to content

Commit

Permalink
Add CUDA conv2d for NHWC layout (apache#4737)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexgl-github authored and zhiics committed Mar 2, 2020
1 parent 7bb2798 commit b0b067d
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
36 changes: 36 additions & 0 deletions topi/python/topi/cuda/conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ def conv2d_cuda(cfg, data, kernel, strides, padding, dilation, layout='NCHW', ou
return nn.conv2d_nchw(data, kernel, strides, padding, dilation, out_dtype)
if layout == 'HWCN':
return nn.conv2d_hwcn(data, kernel, strides, padding, dilation, out_dtype)
if layout == 'NHWC':
return nn.conv2d_nhwc(data, kernel, strides, padding, dilation, out_dtype)
raise ValueError("not support this layout {} yet".format(layout))


Expand Down Expand Up @@ -161,3 +163,37 @@ def _callback(op):

traverse_inline(s, outs[0].op, _callback)
return s


@autotvm.register_topi_schedule(generic.schedule_conv2d_nhwc, ["cuda", "gpu"],
["direct"])
def schedule_conv2d_nhwc_cuda(cfg, outs):
"""TOPI schedule for CUDA conv2d_nhwc
Parameters
----------
cfg: ConfigEntity
The config for this template
outs: Array of Tensor
The computation graph description of conv2d
in the format of an array of tensors.
Returns
-------
s: Schedule
The computation schedule for conv2d.
"""
target = tvm.target.current_target()
if 'cudnn' in target.libs:
return generic.schedule_extern(outs)

outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
s = tvm.create_schedule([x.op for x in outs])

def _callback(op):
if op.tag == 'conv2d_nhwc':
schedule_direct_cuda(cfg, s, op.output(0))

traverse_inline(s, outs[0].op, _callback)
return s
5 changes: 3 additions & 2 deletions topi/tests/python/test_topi_conv2d_nhwc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride, p

A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A')
W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W')
B = topi.nn.conv2d_nhwc(A, W, stride, padding, dilation)

a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
Expand All @@ -50,6 +49,8 @@ def check_device(device):
return
print("Running on target: %s" % device)
with tvm.target.create(device):
B = topi.nn.conv2d(A, W, (stride, stride), padding,
(dilation, dilation), layout='NHWC', out_dtype=dtype)
s = topi.generic.schedule_conv2d_nhwc([B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
Expand All @@ -59,7 +60,7 @@ def check_device(device):
func(a, w, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

for device in ['llvm']:
for device in ['llvm', 'cuda']:
check_device(device)


Expand Down

0 comments on commit b0b067d

Please sign in to comment.