Skip to content

Commit

Permalink
[Relay] Fix index order in conv2d computation for Arm CPU.
Browse files Browse the repository at this point in the history
When dilation is larger than value 1 in conv2d with NHWC
layout, the ordering of indexes when accessing data array
in computation of convolution appears to be incorrect.

'data_vec' is defined as

lambda n, oho, owo, kh, kw, ic, ohi, owi:

But accessed as

data_vec[n, oho, owo, kh, kw, ohi, owi, ic]

This patch fixes the order of indexes and modifies the test
so that it is suitable for running on an AArch64 CPU.
  • Loading branch information
Anastasia Stulova committed Jul 6, 2021
1 parent c586834 commit dc557ae
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 13 deletions.
2 changes: 1 addition & 1 deletion python/tvm/topi/arm_cpu/conv2d_spatial_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def conv2d_spatial_pack_nhwc(cfg, data, kernel, strides, padding, dilation, out_
conv = te.compute(
ovshape,
lambda n, oho, owo, oco, ohi, owi, oci: te.sum(
data_vec[n, oho, owo, kh, kw, ohi, owi, ic].astype(out_dtype)
data_vec[n, oho, owo, kh, kw, ic, ohi, owi].astype(out_dtype)
* kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype),
axis=[ic, kh, kw],
),
Expand Down
19 changes: 7 additions & 12 deletions tests/python/topi/python/test_topi_conv2d_nhwc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,26 +58,21 @@ def get_ref_data():

a_np, w_np, b_np = get_ref_data()

def check_device(device):
if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
with tvm.target.Target(device):
fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_nhwc_implement)
def check_device(target, dev):
print("Running on target: %s" % target)
with tvm.target.Target(target):
fcompute, fschedule = tvm.topi.testing.dispatch(target, _conv2d_nhwc_implement)
B = fcompute(A, W, stride, padding, dilation, dtype)
s = fschedule([B])
dev = tvm.device(device, 0)
a = tvm.nd.array(a_np, dev)
w = tvm.nd.array(w_np, dev)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev)
func = tvm.build(s, [A, W, B], device)
func = tvm.build(s, [A, W, B], target)
func(a, w, b)
tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5)

for device in ["llvm", "cuda"]:
check_device(device)

for target, dev in tvm.testing.enabled_targets():
check_device(target, dev)

@tvm.testing.uses_gpu
def test_conv2d_nhwc():
Expand Down

0 comments on commit dc557ae

Please sign in to comment.