From 00214cd8dabb4e78f88de011a1540994890be5e7 Mon Sep 17 00:00:00 2001 From: Anirudh Sundar Date: Mon, 13 Sep 2021 19:57:14 +0530 Subject: [PATCH 1/4] [ONNX] QLinearAveragePool and QLinearGlobalAveragePool contrib op --- python/tvm/relay/frontend/onnx.py | 63 ++++++++- tests/python/frontend/onnx/test_forward.py | 146 +++++++++++++++++++++ 2 files changed, 207 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 48089d164a2f..ab2fcec85176 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -269,9 +269,16 @@ class Pool(OnnxOpConverter): """A helper class for pool op converters.""" name = "" + is_quant = False @classmethod def _impl_v1(cls, inputs, attr, params): + if cls.is_quant: + x_scale = get_scalar(inputs[1], params) + x_zero_point = get_scalar(inputs[2], params, dtype="int32") + y_scale = fold_constant(get_scalar(inputs[3], params)) + y_zero_point = get_scalar(inputs[4], params, dtype="int32") + data = inputs[0] input_shape = infer_shape(data) input_dtype = infer_type(data).checked_type.dtype @@ -321,7 +328,7 @@ def _impl_v1(cls, inputs, attr, params): else: attr["layout"] = onnx_default_layout(dims=(len(input_shape) - 2), op_name=cls.name) - return AttrCvt( + attr_cvt = AttrCvt( op_name=dimension_picker(cls.name), transforms={ "kernel_shape": "pool_size", @@ -330,7 +337,16 @@ def _impl_v1(cls, inputs, attr, params): }, ignores=["storage_order"], custom_check=dimension_constraint(), - )([data], attr, params) + ) + # Onnxruntime doesn't actually do this op in integer, they dequantize to fp32 + # and then requantize afer + # https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md#com.microsoft.QLinearAveragePool + if cls.is_quant: + input = _qnn.op.dequantize(data, x_scale, x_zero_point) + out = attr_cvt([input], attr, params) + return _qnn.op.quantize(out, y_scale, y_zero_point, out_dtype=input_dtype) + else: + return attr_cvt([data], attr, params) class Absolute(Unary): @@ -351,6 +367,13 @@ class AveragePool(Pool): name = "avg_pool" +class QLinearAveragePool(Pool): + """Operator converter for QLinearAveragePool from Microsoft onnxruntime contrib opset.""" + + name = "avg_pool" + is_quant = True + + class BatchNorm(OnnxOpConverter): """Operator converter for BatchNorm.""" @@ -654,6 +677,40 @@ def _impl_v1(cls, inputs, attr, params): ) +class QLinearGlobalAveragePool(OnnxOpConverter): + "Operator converter for QLinearGlobalAveragePool from Microsoft onnxruntime contrib opset." + + @classmethod + def _impl_v1(cls, inputs, attr, params): + rank = len(infer_shape(inputs[0])) + + x_scale = get_scalar(inputs[1], params) + x_zero_point = get_scalar(inputs[2], params, dtype="int32") + y_scale = fold_constant(get_scalar(inputs[3], params)) + y_zero_point = get_scalar(inputs[4], params, dtype="int32") + + input_dtype = infer_type(inputs[0]).checked_type.dtype + + # Onnxruntime documentation does not mention that this global avg_pool should follow the + # sequence dequantize -> float op -> quantize, but that is how QLinearAveragePool is done. + # + # This op also follows the same pattern since qnn op is not available right now. + # It can be modified once qnn support for GlobalAveragePool is added + x = _qnn.op.dequantize(inputs[0], x_scale, x_zero_point) + if rank == 3: + out = _op.nn.global_avg_pool1d(x) + elif rank == 4: + out = _op.nn.global_avg_pool2d(x) + elif rank == 5: + out = _op.nn.global_avg_pool3d(x) + else: + raise NotImplementedError( + "Global average pooling is only implemented for 1D, 2D, and 3D kernels, got %dD." + % (rank - 2), + ) + return _qnn.op.quantize(out, y_scale, y_zero_point, out_dtype=input_dtype) + + class GlobalMaxPool(OnnxOpConverter): """Operator converter for GlobalMaxPool""" @@ -3794,12 +3851,14 @@ def _get_convert_map(opset): "Xor": Renamer("logical_xor"), # defs/nn "AveragePool": AveragePool.get_converter(opset), + "QLinearAveragePool": QLinearAveragePool.get_converter(opset), "LpPool": LpPool.get_converter(opset), "MaxPool": MaxPool.get_converter(opset), "MaxUnpool": MaxUnpool.get_converter(opset), "Conv": Conv.get_converter(opset), "ConvTranspose": ConvTranspose.get_converter(opset), "GlobalAveragePool": GlobalAveragePool.get_converter(opset), + "QLinearGlobalAveragePool": QLinearGlobalAveragePool.get_converter(opset), "GlobalMaxPool": GlobalMaxPool.get_converter(opset), "BatchNormalization": BatchNorm.get_converter(opset), "InstanceNormalization": InstanceNorm.get_converter(opset), diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index d9f2e97f8247..b13aeb6592fd 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -3056,6 +3056,152 @@ def verify_global_pooling(x_shape, mode): verify_global_pooling([4, 1, 2, 6, 4], mode) +@tvm.testing.parametrize_targets +def test_qlinear_average_pool(target, dev): + def verify_qlinear_average_pool( + x_shape, kernel_shape, strides, pads, out_shape, auto_pad="NOTSET" + ): + input_nodes = [ + helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape)), + ] + + output_nodes = [ + helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape)), + ] + + input_names = ["X"] + + node = helper.make_node( + "AveragePool", + inputs=input_names, + outputs=["Y"], + kernel_shape=kernel_shape, + strides=strides, + ) + + if pads is None: + pad_attr = helper.make_attribute("auto_pad", auto_pad) + else: + pad_attr = helper.make_attribute("pads", pads) + node.attribute.append(pad_attr) + + graph = helper.make_graph( + [node], + "qlinear_average_pool_test", + inputs=input_nodes, + outputs=output_nodes, + ) + + model = helper.make_model(graph, producer_name="qlinear_average_pool_Test") + quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev) + + # Pool1D + verify_qlinear_average_pool( + x_shape=[1, 1, 32], + kernel_shape=[3], + strides=[1], + pads=[1, 1], + out_shape=[1, 1, 32], + ) + # Pool2D + verify_qlinear_average_pool( + x_shape=[1, 1, 32, 32], + kernel_shape=[3, 3], + strides=[1, 1], + pads=[1, 1, 1, 1], + out_shape=[1, 1, 32, 32], + ) + + # Pool1D with stride + verify_qlinear_average_pool( + x_shape=[1, 1, 32], + kernel_shape=[3], + strides=[2], + pads=[1, 1], + out_shape=[1, 1, 16], + ) + # Pool2D with stride + verify_qlinear_average_pool( + x_shape=[1, 1, 32, 32], + kernel_shape=[3, 3], + strides=[2, 2], + pads=[1, 1, 1, 1], + out_shape=[1, 1, 16, 16], + ) + + # Pool1D with stride and autopadding + verify_qlinear_average_pool( + x_shape=[1, 1, 32], + kernel_shape=[3], + strides=[2], + pads=None, + out_shape=[1, 1, 16], + auto_pad="SAME_UPPER", + ) + # Pool2D with stride and autopadding + verify_qlinear_average_pool( + x_shape=[1, 1, 32, 32], + kernel_shape=[3, 3], + strides=[2, 2], + pads=None, + out_shape=[1, 1, 16, 16], + auto_pad="SAME_UPPER", + ) + + # Pool3D with stride + verify_qlinear_average_pool( + x_shape=[1, 1, 32, 32, 32], + kernel_shape=[3, 3, 3], + strides=[2, 2, 2], + pads=[1, 1, 1, 1, 1, 1], + out_shape=[1, 1, 16, 16, 16], + ) + + # Pool3D with stride and autopadding + verify_qlinear_average_pool( + x_shape=[1, 1, 32, 32, 32], + kernel_shape=[3, 3, 3], + strides=[2, 2, 2], + pads=None, + out_shape=[1, 1, 16, 16, 16], + auto_pad="SAME_UPPER", + ) + + +@tvm.testing.parametrize_targets +def test_qlinear_global_average_pool(target, dev): + def verify_qlinear_global_average_pool(x_shape): + out_shape = x_shape[:2] + [1] * (len(x_shape) - 2) + + node_type = "GlobalAveragePool" + + input_names = ["X"] + + pool_node = helper.make_node(node_type, inputs=input_names, outputs=["Y"]) + + graph = helper.make_graph( + [pool_node], + "qlinear_global_average_pool_test", + inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape))], + outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape))], + ) + + model = helper.make_model(graph, producer_name="qlinear_global_average_pool_test") + quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev) + + # 1D Pooling (NCW) + verify_qlinear_global_average_pool([1, 8, 8]) + verify_qlinear_global_average_pool([4, 1, 4]) + + # 2D Pooling (NCHW) + verify_qlinear_global_average_pool([1, 8, 8, 8]) + verify_qlinear_global_average_pool([4, 1, 6, 4]) + + # 3D Pooling (NCDHW) + verify_qlinear_global_average_pool([1, 8, 6, 8, 8]) + verify_qlinear_global_average_pool([4, 1, 2, 6, 4]) + + @tvm.testing.parametrize_targets def test_mod(target, dev): def verify_mod(x_shape, y_shape, fmod, out_shape, dtype="float32"): From 5b7ab848093ab57238fc6ac73d091d2d63e298d5 Mon Sep 17 00:00:00 2001 From: Anirudh Sundar Date: Wed, 15 Sep 2021 21:03:17 +0530 Subject: [PATCH 2/4] Fix linter error for variable name and else after return --- python/tvm/relay/frontend/onnx.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index ab2fcec85176..1401c4fb5884 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -339,14 +339,13 @@ def _impl_v1(cls, inputs, attr, params): custom_check=dimension_constraint(), ) # Onnxruntime doesn't actually do this op in integer, they dequantize to fp32 - # and then requantize afer + # and then requantize afer (according to documentation below) # https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md#com.microsoft.QLinearAveragePool if cls.is_quant: - input = _qnn.op.dequantize(data, x_scale, x_zero_point) - out = attr_cvt([input], attr, params) + float_node = _qnn.op.dequantize(data, x_scale, x_zero_point) + out = attr_cvt([float_node], attr, params) return _qnn.op.quantize(out, y_scale, y_zero_point, out_dtype=input_dtype) - else: - return attr_cvt([data], attr, params) + return attr_cvt([data], attr, params) class Absolute(Unary): From e3f90c9d6db23b2e1e3b304bb1480f58962edf0a Mon Sep 17 00:00:00 2001 From: Anirudh Sundar Date: Thu, 16 Sep 2021 17:01:03 +0530 Subject: [PATCH 3/4] Separate quantized avg_pool impl and add TODO for global_avg_pool --- python/tvm/relay/frontend/onnx.py | 65 ++++++++++++++++++------------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 1401c4fb5884..cfd31cb03001 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -269,15 +269,15 @@ class Pool(OnnxOpConverter): """A helper class for pool op converters.""" name = "" - is_quant = False @classmethod def _impl_v1(cls, inputs, attr, params): - if cls.is_quant: - x_scale = get_scalar(inputs[1], params) - x_zero_point = get_scalar(inputs[2], params, dtype="int32") - y_scale = fold_constant(get_scalar(inputs[3], params)) - y_zero_point = get_scalar(inputs[4], params, dtype="int32") + attr_cvt, data = cls._run_calculation(inputs, attr, params) + return attr_cvt([data], attr, params) + + @classmethod + def _run_calculation(cls, inputs, attr, params): + """Helper method to return the processed input data and AttrCvt object""" data = inputs[0] input_shape = infer_shape(data) @@ -328,24 +328,19 @@ def _impl_v1(cls, inputs, attr, params): else: attr["layout"] = onnx_default_layout(dims=(len(input_shape) - 2), op_name=cls.name) - attr_cvt = AttrCvt( - op_name=dimension_picker(cls.name), - transforms={ - "kernel_shape": "pool_size", - "pads": ("padding", 0), - "dilations": ("dilation", 1), - }, - ignores=["storage_order"], - custom_check=dimension_constraint(), + return ( + AttrCvt( + op_name=dimension_picker(cls.name), + transforms={ + "kernel_shape": "pool_size", + "pads": ("padding", 0), + "dilations": ("dilation", 1), + }, + ignores=["storage_order"], + custom_check=dimension_constraint(), + ), + data, ) - # Onnxruntime doesn't actually do this op in integer, they dequantize to fp32 - # and then requantize afer (according to documentation below) - # https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md#com.microsoft.QLinearAveragePool - if cls.is_quant: - float_node = _qnn.op.dequantize(data, x_scale, x_zero_point) - out = attr_cvt([float_node], attr, params) - return _qnn.op.quantize(out, y_scale, y_zero_point, out_dtype=input_dtype) - return attr_cvt([data], attr, params) class Absolute(Unary): @@ -370,7 +365,23 @@ class QLinearAveragePool(Pool): """Operator converter for QLinearAveragePool from Microsoft onnxruntime contrib opset.""" name = "avg_pool" - is_quant = True + + @classmethod + def _impl_v1(cls, inputs, attr, params): + x_scale = get_scalar(inputs[1], params) + x_zero_point = get_scalar(inputs[2], params, dtype="int32") + y_scale = fold_constant(get_scalar(inputs[3], params)) + y_zero_point = get_scalar(inputs[4], params, dtype="int32") + + attr_cvt, data = cls._run_calculation(inputs, attr, params) + + input_dtype = infer_type(data).checked_type.dtype + # Onnxruntime doesn't actually do this op in integer, they dequantize to fp32 + # and then requantize afer (according to documentation below) + # https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md#com.microsoft.QLinearAveragePool + float_node = _qnn.op.dequantize(data, x_scale, x_zero_point) + out = attr_cvt([float_node], attr, params) + return _qnn.op.quantize(out, y_scale, y_zero_point, out_dtype=input_dtype) class BatchNorm(OnnxOpConverter): @@ -694,7 +705,7 @@ def _impl_v1(cls, inputs, attr, params): # sequence dequantize -> float op -> quantize, but that is how QLinearAveragePool is done. # # This op also follows the same pattern since qnn op is not available right now. - # It can be modified once qnn support for GlobalAveragePool is added + # TODO: Generate QNN op to perform quantized operation instead of dequant -> op -> float x = _qnn.op.dequantize(inputs[0], x_scale, x_zero_point) if rank == 3: out = _op.nn.global_avg_pool1d(x) @@ -3850,14 +3861,12 @@ def _get_convert_map(opset): "Xor": Renamer("logical_xor"), # defs/nn "AveragePool": AveragePool.get_converter(opset), - "QLinearAveragePool": QLinearAveragePool.get_converter(opset), "LpPool": LpPool.get_converter(opset), "MaxPool": MaxPool.get_converter(opset), "MaxUnpool": MaxUnpool.get_converter(opset), "Conv": Conv.get_converter(opset), "ConvTranspose": ConvTranspose.get_converter(opset), "GlobalAveragePool": GlobalAveragePool.get_converter(opset), - "QLinearGlobalAveragePool": QLinearGlobalAveragePool.get_converter(opset), "GlobalMaxPool": GlobalMaxPool.get_converter(opset), "BatchNormalization": BatchNorm.get_converter(opset), "InstanceNormalization": InstanceNorm.get_converter(opset), @@ -3937,6 +3946,8 @@ def _get_convert_map(opset): "QLinearAdd": QLinearAdd.get_converter(opset), "QLinearMul": QLinearMul.get_converter(opset), "ConvInteger": ConvInteger.get_converter(opset), + "QLinearAveragePool": QLinearAveragePool.get_converter(opset), + "QLinearGlobalAveragePool": QLinearGlobalAveragePool.get_converter(opset), # Random number generation. "RandomUniform": RandomUniform.get_converter(opset), # Loss functions / training From d240901c6df213ce2c2c69ecf49656e1c854ba53 Mon Sep 17 00:00:00 2001 From: Anirudh Sundar Date: Thu, 16 Sep 2021 22:32:55 +0530 Subject: [PATCH 4/4] Fix comment typo --- python/tvm/relay/frontend/onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index cfd31cb03001..b5a3c43f6bfb 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -705,7 +705,7 @@ def _impl_v1(cls, inputs, attr, params): # sequence dequantize -> float op -> quantize, but that is how QLinearAveragePool is done. # # This op also follows the same pattern since qnn op is not available right now. - # TODO: Generate QNN op to perform quantized operation instead of dequant -> op -> float + # TODO: Generate QNN op to perform quantized operation instead of dequant -> op -> quant x = _qnn.op.dequantize(inputs[0], x_scale, x_zero_point) if rank == 3: out = _op.nn.global_avg_pool1d(x)