[Relay] Fix index order in conv2d computation for Arm CPU.

When dilation is larger than value 1 in conv2d with NHWC layout, the ordering of indexes when accessing data array in computation of convolution appears to be incorrect. 'data_vec' is defined as lambda n, oho, owo, kh, kw, ic, ohi, owi: But accessed as data_vec[n, oho, owo, kh, kw, ohi, owi, ic] This patch fixes the order of indexes and modifies the test so that it is suitable for running on an AArch64 CPU.
apache · Jul 6, 2021 · dc557ae · dc557ae
1 parent c586834
commit dc557ae
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 13 deletions.
diff --git a/python/tvm/topi/arm_cpu/conv2d_spatial_pack.py b/python/tvm/topi/arm_cpu/conv2d_spatial_pack.py
@@ -344,7 +344,7 @@ def conv2d_spatial_pack_nhwc(cfg, data, kernel, strides, padding, dilation, out_
         conv = te.compute(
             ovshape,
             lambda n, oho, owo, oco, ohi, owi, oci: te.sum(
-                data_vec[n, oho, owo, kh, kw, ohi, owi, ic].astype(out_dtype)
+                data_vec[n, oho, owo, kh, kw, ic, ohi, owi].astype(out_dtype)
                 * kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype),
                 axis=[ic, kh, kw],
             ),

diff --git a/tests/python/topi/python/test_topi_conv2d_nhwc.py b/tests/python/topi/python/test_topi_conv2d_nhwc.py
@@ -58,26 +58,21 @@ def get_ref_data():
 
     a_np, w_np, b_np = get_ref_data()
 
-    def check_device(device):
-        if not tvm.testing.device_enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        with tvm.target.Target(device):
-            fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_nhwc_implement)
+    def check_device(target, dev):
+        print("Running on target: %s" % target)
+        with tvm.target.Target(target):
+            fcompute, fschedule = tvm.topi.testing.dispatch(target, _conv2d_nhwc_implement)
             B = fcompute(A, W, stride, padding, dilation, dtype)
             s = fschedule([B])
-        dev = tvm.device(device, 0)
         a = tvm.nd.array(a_np, dev)
         w = tvm.nd.array(w_np, dev)
         b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev)
-        func = tvm.build(s, [A, W, B], device)
+        func = tvm.build(s, [A, W, B], target)
         func(a, w, b)
         tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5)
 
-    for device in ["llvm", "cuda"]:
-        check_device(device)
-
+    for target, dev in tvm.testing.enabled_targets():
+        check_device(target, dev)
 
 @tvm.testing.uses_gpu
 def test_conv2d_nhwc():