From 252c032edbc47bfbd04b5388c9d19895ea7f60ef Mon Sep 17 00:00:00 2001 From: Hua Jiang Date: Sat, 14 Aug 2021 17:48:50 -0700 Subject: [PATCH] [VTA] Make vta graph_pack compatible with latest TVM, and bring back object detection tutorials. (#8731) * [VTA] Make vta graph_pack compatible with latest TVM, and bring back object detection tutorials. * remove deploy_detection.py. * move out deploy_detection.py from legacy folder. * fix build error. --- vta/python/vta/top/graphpack.py | 33 +++++++++++++++---- .../frontend/{legacy => }/deploy_detection.py | 7 ++-- 2 files changed, 30 insertions(+), 10 deletions(-) rename vta/tutorials/frontend/{legacy => }/deploy_detection.py (99%) diff --git a/vta/python/vta/top/graphpack.py b/vta/python/vta/top/graphpack.py index a982b88b75e8..f15e4922b4a8 100644 --- a/vta/python/vta/top/graphpack.py +++ b/vta/python/vta/top/graphpack.py @@ -56,13 +56,24 @@ def _pack_batch_channel(data, dshape, bfactor, cfactor): return data -def _unpack_batch_channel(data, old_shape): +def _unpack_batch_channel(data, old_shape, unpack_transpose=False): """Unpack the data channel dimension.""" - data = op.transpose(data, axes=(0, 4, 1, 5, 2, 3)) + if unpack_transpose: + data = op.transpose(data, axes=(0, 4, 1, 5, 2, 3)) data = op.reshape(data, newshape=old_shape) return data +def _channel_const_match(channel_length, cfactor_out): + """Round the chanel const variant if the value not divisible by cfactor_out""" + diff = int(channel_length) % cfactor_out + if diff != 0: + diff = cfactor_out - diff + channel_length = channel_length + diff + + return diff, channel_length + + def _const_shape_match(data, dshape, cfactor_out): """Pad the constant if the shape[0] not divisible by cfactor_out.""" assert len(dshape) == 3 @@ -299,6 +310,7 @@ def __init__(self, bfactor, cfactor, weight_bits): self.upsampling = op.op.get("nn.upsampling") self.reshape = op.op.get("reshape") self.number_of_conv2d = 0 + self.unpack_transpose = True super().__init__() def visit_call(self, call): @@ -319,7 +331,7 @@ def visit_call(self, call): self.start_pack = False data = args[0] data_shape = _get_tensor_shape(call.args[0]) - return _unpack_batch_channel(data, data_shape) + return _unpack_batch_channel(data, data_shape, self.unpack_transpose) if self.start_pack: # Operator cases if call.op == self.conv2d and odtype == "int32": @@ -429,12 +441,12 @@ def visit_call(self, call): if len(pad_width) == 6: pass elif len(pad_width) == 4: - (data,) = args + (data, pad_value) = args new_pad_width = [] new_pad_width.extend(pad_width) for _ in range(2): new_pad_width.append([0, 0]) - return op.nn.pad(data, pad_value=call.attrs.pad_value, pad_width=new_pad_width) + return op.nn.pad(data, pad_value=pad_value, pad_width=new_pad_width) elif call.op == self.upsampling: (data,) = args scale_h = call.attrs.scale_h @@ -445,8 +457,17 @@ def visit_call(self, call): return op.nn.upsampling(data, scale_h, scale_w, data_layout, method, align_corners) elif call.op == self.reshape and len(input_types[0].shape) == 4: (data,) = args + self.unpack_transpose = False data = op.transpose(data, axes=(0, 4, 1, 5, 2, 3)) - return op.reshape(data, [int(x) for x in input_types[0].shape]) + new_shape = [int(x) for x in input_types[0].shape] + # Check if the reshape match with such shape after pad + pad, new_shape[1] = _channel_const_match(new_shape[1], self.cfactor) + data = op.reshape(data, new_shape) + # remove pad data + if pad != 0: + new_pad_width = [[0, 0], [0, -pad], [0, 0], [0, 0]] + data = op.nn.pad(data, pad_width=new_pad_width) + return data return relay.Call(self.visit(call.op), args, call.attrs) diff --git a/vta/tutorials/frontend/legacy/deploy_detection.py b/vta/tutorials/frontend/deploy_detection.py similarity index 99% rename from vta/tutorials/frontend/legacy/deploy_detection.py rename to vta/tutorials/frontend/deploy_detection.py index 1d78786848e7..771801851a48 100644 --- a/vta/tutorials/frontend/legacy/deploy_detection.py +++ b/vta/tutorials/frontend/deploy_detection.py @@ -177,9 +177,9 @@ # Get execution context from remote ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0) -#################################### +##################################### # Build the inference graph executor. -# ---------------------------------- +# ----------------------------------- # Using Darknet library load downloaded vision model and compile with Relay. # The compilation steps are: # @@ -191,7 +191,6 @@ # 5. Perform relay build to object file. # 6. Load the object file onto remote (FPGA device). # 7. Generate graph executor, `m`. -# # Load pre-configured AutoTVM schedules with autotvm.tophub.context(target): @@ -212,7 +211,7 @@ # Note: We set opt_level to 3 in order to fold batch norm with tvm.transform.PassContext(opt_level=3): with relay.quantize.qconfig( - global_scale=33.0, + global_scale=23.0, skip_conv_layers=[0], store_lowbit_output=True, round_for_shift=True,