From 252c032edbc47bfbd04b5388c9d19895ea7f60ef Mon Sep 17 00:00:00 2001
From: Hua Jiang <huaj@xilinx.com>
Date: Sat, 14 Aug 2021 17:48:50 -0700
Subject: [PATCH] [VTA] Make vta graph_pack compatible with latest TVM, and
 bring back object detection tutorials. (#8731)

* [VTA] Make vta graph_pack compatible with latest TVM, and bring back
object detection tutorials.

* remove deploy_detection.py.

* move out deploy_detection.py from legacy folder.

* fix build error.
---
 vta/python/vta/top/graphpack.py               | 33 +++++++++++++++----
 .../frontend/{legacy => }/deploy_detection.py |  7 ++--
 2 files changed, 30 insertions(+), 10 deletions(-)
 rename vta/tutorials/frontend/{legacy => }/deploy_detection.py (99%)

diff --git a/vta/python/vta/top/graphpack.py b/vta/python/vta/top/graphpack.py
index a982b88b75e8..f15e4922b4a8 100644
--- a/vta/python/vta/top/graphpack.py
+++ b/vta/python/vta/top/graphpack.py
@@ -56,13 +56,24 @@ def _pack_batch_channel(data, dshape, bfactor, cfactor):
     return data
 
 
-def _unpack_batch_channel(data, old_shape):
+def _unpack_batch_channel(data, old_shape, unpack_transpose=False):
     """Unpack the data channel dimension."""
-    data = op.transpose(data, axes=(0, 4, 1, 5, 2, 3))
+    if unpack_transpose:
+        data = op.transpose(data, axes=(0, 4, 1, 5, 2, 3))
     data = op.reshape(data, newshape=old_shape)
     return data
 
 
+def _channel_const_match(channel_length, cfactor_out):
+    """Round the chanel const variant if the value not divisible by cfactor_out"""
+    diff = int(channel_length) % cfactor_out
+    if diff != 0:
+        diff = cfactor_out - diff
+        channel_length = channel_length + diff
+
+    return diff, channel_length
+
+
 def _const_shape_match(data, dshape, cfactor_out):
     """Pad the constant if the shape[0] not divisible by cfactor_out."""
     assert len(dshape) == 3
@@ -299,6 +310,7 @@ def __init__(self, bfactor, cfactor, weight_bits):
         self.upsampling = op.op.get("nn.upsampling")
         self.reshape = op.op.get("reshape")
         self.number_of_conv2d = 0
+        self.unpack_transpose = True
         super().__init__()
 
     def visit_call(self, call):
@@ -319,7 +331,7 @@ def visit_call(self, call):
                 self.start_pack = False
                 data = args[0]
                 data_shape = _get_tensor_shape(call.args[0])
-                return _unpack_batch_channel(data, data_shape)
+                return _unpack_batch_channel(data, data_shape, self.unpack_transpose)
         if self.start_pack:
             # Operator cases
             if call.op == self.conv2d and odtype == "int32":
@@ -429,12 +441,12 @@ def visit_call(self, call):
                 if len(pad_width) == 6:
                     pass
                 elif len(pad_width) == 4:
-                    (data,) = args
+                    (data, pad_value) = args
                     new_pad_width = []
                     new_pad_width.extend(pad_width)
                     for _ in range(2):
                         new_pad_width.append([0, 0])
-                    return op.nn.pad(data, pad_value=call.attrs.pad_value, pad_width=new_pad_width)
+                    return op.nn.pad(data, pad_value=pad_value, pad_width=new_pad_width)
             elif call.op == self.upsampling:
                 (data,) = args
                 scale_h = call.attrs.scale_h
@@ -445,8 +457,17 @@ def visit_call(self, call):
                 return op.nn.upsampling(data, scale_h, scale_w, data_layout, method, align_corners)
             elif call.op == self.reshape and len(input_types[0].shape) == 4:
                 (data,) = args
+                self.unpack_transpose = False
                 data = op.transpose(data, axes=(0, 4, 1, 5, 2, 3))
-                return op.reshape(data, [int(x) for x in input_types[0].shape])
+                new_shape = [int(x) for x in input_types[0].shape]
+                # Check if the reshape match with such shape after pad
+                pad, new_shape[1] = _channel_const_match(new_shape[1], self.cfactor)
+                data = op.reshape(data, new_shape)
+                # remove pad data
+                if pad != 0:
+                    new_pad_width = [[0, 0], [0, -pad], [0, 0], [0, 0]]
+                    data = op.nn.pad(data, pad_width=new_pad_width)
+                return data
 
         return relay.Call(self.visit(call.op), args, call.attrs)
 
diff --git a/vta/tutorials/frontend/legacy/deploy_detection.py b/vta/tutorials/frontend/deploy_detection.py
similarity index 99%
rename from vta/tutorials/frontend/legacy/deploy_detection.py
rename to vta/tutorials/frontend/deploy_detection.py
index 1d78786848e7..771801851a48 100644
--- a/vta/tutorials/frontend/legacy/deploy_detection.py
+++ b/vta/tutorials/frontend/deploy_detection.py
@@ -177,9 +177,9 @@
 # Get execution context from remote
 ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
 
-####################################
+#####################################
 # Build the inference graph executor.
-# ----------------------------------
+# -----------------------------------
 # Using Darknet library load downloaded vision model and compile with Relay.
 # The compilation steps are:
 #
@@ -191,7 +191,6 @@
 # 5. Perform relay build to object file.
 # 6. Load the object file onto remote (FPGA device).
 # 7. Generate graph executor, `m`.
-#
 
 # Load pre-configured AutoTVM schedules
 with autotvm.tophub.context(target):
@@ -212,7 +211,7 @@
         # Note: We set opt_level to 3 in order to fold batch norm
         with tvm.transform.PassContext(opt_level=3):
             with relay.quantize.qconfig(
-                global_scale=33.0,
+                global_scale=23.0,
                 skip_conv_layers=[0],
                 store_lowbit_output=True,
                 round_for_shift=True,