diff --git a/topi/python/topi/cuda/conv2d.py b/topi/python/topi/cuda/conv2d.py index 3117a298830f..21f55643b23b 100644 --- a/topi/python/topi/cuda/conv2d.py +++ b/topi/python/topi/cuda/conv2d.py @@ -122,6 +122,8 @@ def conv2d_cuda(cfg, data, kernel, strides, padding, dilation, layout='NCHW', ou return nn.conv2d_nchw(data, kernel, strides, padding, dilation, out_dtype) if layout == 'HWCN': return nn.conv2d_hwcn(data, kernel, strides, padding, dilation, out_dtype) + if layout == 'NHWC': + return nn.conv2d_nhwc(data, kernel, strides, padding, dilation, out_dtype) raise ValueError("not support this layout {} yet".format(layout)) @@ -161,3 +163,37 @@ def _callback(op): traverse_inline(s, outs[0].op, _callback) return s + + +@autotvm.register_topi_schedule(generic.schedule_conv2d_nhwc, ["cuda", "gpu"], + ["direct"]) +def schedule_conv2d_nhwc_cuda(cfg, outs): + """TOPI schedule for CUDA conv2d_nhwc + + Parameters + ---------- + cfg: ConfigEntity + The config for this template + + outs: Array of Tensor + The computation graph description of conv2d + in the format of an array of tensors. + + Returns + ------- + s: Schedule + The computation schedule for conv2d. + """ + target = tvm.target.current_target() + if 'cudnn' in target.libs: + return generic.schedule_extern(outs) + + outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + s = tvm.create_schedule([x.op for x in outs]) + + def _callback(op): + if op.tag == 'conv2d_nhwc': + schedule_direct_cuda(cfg, s, op.output(0)) + + traverse_inline(s, outs[0].op, _callback) + return s diff --git a/topi/tests/python/test_topi_conv2d_nhwc.py b/topi/tests/python/test_topi_conv2d_nhwc.py index 8c6e0090640c..2a44d60487f8 100644 --- a/topi/tests/python/test_topi_conv2d_nhwc.py +++ b/topi/tests/python/test_topi_conv2d_nhwc.py @@ -29,7 +29,6 @@ def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride, p A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A') W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W') - B = topi.nn.conv2d_nhwc(A, W, stride, padding, dilation) a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) @@ -50,6 +49,8 @@ def check_device(device): return print("Running on target: %s" % device) with tvm.target.create(device): + B = topi.nn.conv2d(A, W, (stride, stride), padding, + (dilation, dilation), layout='NHWC', out_dtype=dtype) s = topi.generic.schedule_conv2d_nhwc([B]) ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) @@ -59,7 +60,7 @@ def check_device(device): func(a, w, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) - for device in ['llvm']: + for device in ['llvm', 'cuda']: check_device(device)