From ef19b35e577b52c11de3dd2cb47a7f927917791a Mon Sep 17 00:00:00 2001
From: Giuseppe Rossini <giuseppe.rossini@arm.com>
Date: Fri, 10 Jul 2020 14:54:30 +0100
Subject: [PATCH] Fix conv2_gemm after target structure update

After target structure changed in this RFC:

https://discuss.tvm.ai/t/rfc-tvm-target-specification/6844/42

The conv2d optimizations was broken for the following reasons:
- "target" is now called mtriple (this changes how we test if the
  architecture is AArch64)
- when we invoke "clang.create_llvm" we still need to specify the
  "--target" option (set to aarch64-linux-gnu)

This submission reverts those changes

Change-Id: I04c597b91ca5800ddf4471255e2a358c60bc048e
---
 python/tvm/relay/qnn/op/legalizations.py   |  2 +-
 topi/python/topi/arm_cpu/conv2d_gemm.py    |  2 +-
 topi/python/topi/arm_cpu/tensor_intrin.py  |  2 +-
 topi/tests/python/test_topi_conv2d_int8.py | 64 ++++++++++++++++++++++
 4 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py
index 4d515dea329f..af5072ef74cd 100644
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
@@ -239,7 +239,7 @@ def is_fast_int8_on_arm():
 def is_aarch64_arm():
     """ Checks whether we are compiling for an AArch64 target. """
     target = tvm.target.Target.current(allow_none=False)
-    return 'aarch64' in target.attrs.get("target", "")
+    return 'aarch64' in target.attrs.get("mtriple", "")
 
 ########################
 # ARM CPU legalizations.
diff --git a/topi/python/topi/arm_cpu/conv2d_gemm.py b/topi/python/topi/arm_cpu/conv2d_gemm.py
index 68161c32a0fa..63d96bb44d92 100644
--- a/topi/python/topi/arm_cpu/conv2d_gemm.py
+++ b/topi/python/topi/arm_cpu/conv2d_gemm.py
@@ -27,7 +27,7 @@
 def is_aarch64_arm():
     """ Checks whether we are compiling for an AArch64 target. """
     target = tvm.target.Target.current(allow_none=False)
-    return 'aarch64' in target.attrs.get("target", "")
+    return 'aarch64' in target.attrs.get("mtriple", "")
 
 
 # Compute function
diff --git a/topi/python/topi/arm_cpu/tensor_intrin.py b/topi/python/topi/arm_cpu/tensor_intrin.py
index d8d9481c2a32..dfa2f05e7960 100644
--- a/topi/python/topi/arm_cpu/tensor_intrin.py
+++ b/topi/python/topi/arm_cpu/tensor_intrin.py
@@ -267,7 +267,7 @@ def gemv_quantized_impl(M, N, data_type='uint8'):
     ll_path = temp.relpath("temp.ll")
     # Create LLVM ir from c source code
     ll_code = clang.create_llvm(cc_code,
-                                options=["-mtriple=aarch64-linux-gnu -mattr=+neon"],
+                                options=["--target=aarch64-linux-gnu -mattr=+neon"],
                                 output=ll_path)
     return ll_code
 
diff --git a/topi/tests/python/test_topi_conv2d_int8.py b/topi/tests/python/test_topi_conv2d_int8.py
index edf4267ddaee..5659147f8c41 100644
--- a/topi/tests/python/test_topi_conv2d_int8.py
+++ b/topi/tests/python/test_topi_conv2d_int8.py
@@ -26,9 +26,70 @@
 from tvm.contrib.pickle_memoize import memoize
 from topi.nn.util import get_pad_tuple
 from topi.util import get_const_tuple
+from topi.arm_cpu.conv2d_gemm import is_aarch64_arm
 
 from common import get_all_backend, Int8Fallback
 
+def compile_conv2d_NHWC_gemm_int8_arm(batch, in_channel, in_size, num_filter, kernel, stride, padding,
+                                 dilation=1, add_bias=False, add_relu=False):
+    pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (kernel, kernel))
+    padding_sum = pad_top + pad_left + pad_bottom + pad_right
+    print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size, num_filter,
+                                                          kernel, stride, padding_sum, dilation))
+
+    in_height = in_width = in_size
+    A = te.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='int8')
+    W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W', dtype='int8')
+    bias = te.placeholder((num_filter,), name='bias', dtype='int8')
+    dtype = 'int32'
+    device = "llvm --device arm_cpu --mtriple aarch64-linux-gnu"
+
+    ctx = tvm.context(device, 0)
+    if not ctx.exist:
+        print("Skip because %s is not enabled" % device)
+        return
+    print("Compiling on arm AArch64 target: %s" % device)
+    with tvm.target.create(device):
+        assert is_aarch64_arm(), "AArch64 target not recognized"
+
+        C = topi.arm_cpu.compute_conv2d_NHWC_quantized(A, W, (stride, stride), padding,
+                                                       (dilation, dilation), dtype)
+        if add_bias:
+            C = topi.add(C, bias)
+        if add_relu:
+            C = topi.nn.relu(C)
+        s = topi.arm_cpu.schedule_conv2d_NHWC_quantized([C])
+
+    if add_bias:
+        tvm.build(s, [A, W, bias, C], device,
+                  name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch,
+                                                         in_channel,
+                                                         in_size,
+                                                         num_filter,
+                                                         kernel,
+                                                         stride,
+                                                         padding_sum,
+                                                         dilation))
+        func = tvm.build(s, [A, W, bias, C], device,
+                         name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch,
+                                                                in_channel,
+                                                                in_size,
+                                                                num_filter,
+                                                                kernel,
+                                                                stride,
+                                                                padding_sum,
+                                                                dilation))
+    else:
+        func = tvm.build(s, [A, W, C], device,
+                         name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch,
+                                                                in_channel,
+                                                                in_size,
+                                                                num_filter,
+                                                                kernel,
+                                                                stride,
+                                                                padding_sum,
+                                                                dilation))
+
 def verify_conv2d_NHWC_gemm_int8(batch, in_channel, in_size, num_filter, kernel, stride, padding,
                                  dilation=1, add_bias=False, add_relu=False):
     pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (kernel, kernel))
@@ -409,6 +470,9 @@ def test_conv2d_nhwc():
         verify_conv2d_NHWC_gemm_int8(1, 2048, 8, 448, 1, 1, 'SAME', add_bias=True, add_relu=True)
         verify_conv2d_NHWC_gemm_int8(1, 2048, 8, 192, 1, 1, 'SAME', add_bias=True)
 
+        # Let's also verify that it compiles fine on AArch64 targets
+        compile_conv2d_NHWC_gemm_int8_arm(1, 3, 299, 32, 3, 2, 'SAME')
+
 
 if __name__ == "__main__":
     test_conv2d_nchw()