From de2df87dc8ead1ac5e8605ea96366b2679c98c3b Mon Sep 17 00:00:00 2001 From: Matthew Brookhart Date: Tue, 23 Aug 2022 13:41:02 -0600 Subject: [PATCH 1/3] fix a rounding error with avg pooling in fq2i --- .../relay/transform/fake_quantization_to_integer.py | 9 +++------ .../relay/test_pass_fake_quantization_to_integer.py | 12 ++++++------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 58dcc3477f6a..3ef886823f22 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -117,8 +117,7 @@ def adaptive_avgpool1d(expr, type_map): arg = relay.op.cast(arg, "int32") output_size = expr.attrs.output_size out = relay.op.nn.adaptive_avg_pool1d(arg, output_size) - out = relay.op.cast(out, t.dtype) - return [out, t] + return [out, TensorAffineType(t.scale, t.zero_point, "int32", t.axis)] @register_fake_quantization_to_integer("nn.avg_pool2d") @@ -128,8 +127,7 @@ def avgpool2d(expr, type_map): t = type_map[arg] arg = relay.op.cast(arg, "int32") out = relay.op.nn.avg_pool2d(arg, **expr.attrs) - out = relay.op.cast(out, t.dtype) - return [out, t] + return [out, TensorAffineType(t.scale, t.zero_point, "int32", t.axis)] @register_fake_quantization_to_integer("nn.global_avg_pool2d") @@ -139,8 +137,7 @@ def global_avgpool2d(expr, type_map): t = type_map[arg] arg = relay.op.cast(arg, "int32") out = relay.op.nn.global_avg_pool2d(arg) - out = relay.op.cast(out, t.dtype) - return [out, t] + return [out, TensorAffineType(t.scale, t.zero_point, "int32", t.axis)] @register_fake_quantization_to_integer("broadcast_to") diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index cdf5fd42a138..2d112b9057a9 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -284,11 +284,11 @@ def test_fake_quantize_adaptive_avgpool1d(output_size): zero = relay.const(0) x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) op = relay.op.nn.adaptive_avg_pool1d(x, output_size) - op = relay.qnn.op.quantize(op, relay.const(2.0), zero) + op = relay.qnn.op.quantize(op, relay.const(0.5), relay.const(10)) x_np = np.random.randint(-128, 127, size=[1, 128, 768], dtype="int8") - compare_fq_to_int(op, [x_np], True) + compare_fq_to_int(op, [x_np]) def test_fake_quantize_avgpool(): @@ -297,11 +297,11 @@ def test_fake_quantize_avgpool(): zero = relay.const(0) x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) op = relay.op.nn.avg_pool2d(x, [3, 3]) - op = relay.qnn.op.quantize(op, relay.const(2.0), zero) + op = relay.qnn.op.quantize(op, relay.const(0.5), relay.const(10)) x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") - compare_fq_to_int(op, [x_np], True) + compare_fq_to_int(op, [x_np]) def test_fake_quantize_global_avg_pool(): @@ -310,11 +310,11 @@ def test_fake_quantize_global_avg_pool(): zero = relay.const(0) x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) op = relay.op.nn.global_avg_pool2d(x) - op = relay.qnn.op.quantize(op, relay.const(2.0), zero) + op = relay.qnn.op.quantize(op, relay.const(0.5), relay.const(10)) x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") - compare_fq_to_int(op, [x_np], True) + compare_fq_to_int(op, [x_np]) class TestUnaryQNNOp: From a77a18e9eab4f2c1e18cae6d78fb90c9df03bec5 Mon Sep 17 00:00:00 2001 From: Matthew Brookhart Date: Wed, 24 Aug 2022 12:16:53 -0600 Subject: [PATCH 2/3] fix --- .../transform/fake_quantization_to_integer.py | 55 +++++++++++++++++-- .../test_pass_fake_quantization_to_integer.py | 15 ++--- 2 files changed, 55 insertions(+), 15 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 3ef886823f22..6f7ea8c4e603 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -114,10 +114,24 @@ def adaptive_avgpool1d(expr, type_map): """Rewrite an adaptive avgpool op""" arg = expr.args[0] t = type_map[arg] - arg = relay.op.cast(arg, "int32") + out_t = type_map[expr] + if not ( + approx_equal(t.scale, out_t.scale) + and approx_equal(t.zero_point, out_t.zero_point) + and tvm.ir.structural_equal(t.dtype, out_t.dtype) + ): + arg = relay.qnn.op.requantize( + arg, + t.scale, + t.zero_point, + out_t.scale, + out_t.zero_point, + out_dtype="int32", + axis=t.axis, + ) output_size = expr.attrs.output_size out = relay.op.nn.adaptive_avg_pool1d(arg, output_size) - return [out, TensorAffineType(t.scale, t.zero_point, "int32", t.axis)] + return [out, TensorAffineType(out_t.scale, out_t.zero_point, "int32", out_t.axis)] @register_fake_quantization_to_integer("nn.avg_pool2d") @@ -125,9 +139,23 @@ def avgpool2d(expr, type_map): """Rewrite a avgpool op""" arg = expr.args[0] t = type_map[arg] - arg = relay.op.cast(arg, "int32") + out_t = type_map[expr] + if not ( + approx_equal(t.scale, out_t.scale) + and approx_equal(t.zero_point, out_t.zero_point) + and tvm.ir.structural_equal(t.dtype, out_t.dtype) + ): + arg = relay.qnn.op.requantize( + arg, + t.scale, + t.zero_point, + out_t.scale, + out_t.zero_point, + out_dtype="int32", + axis=t.axis, + ) out = relay.op.nn.avg_pool2d(arg, **expr.attrs) - return [out, TensorAffineType(t.scale, t.zero_point, "int32", t.axis)] + return [out, TensorAffineType(out_t.scale, out_t.zero_point, "int32", out_t.axis)] @register_fake_quantization_to_integer("nn.global_avg_pool2d") @@ -135,9 +163,24 @@ def global_avgpool2d(expr, type_map): """Rewrite a global_avgpool op""" arg = expr.args[0] t = type_map[arg] - arg = relay.op.cast(arg, "int32") + out_t = type_map[expr] + out_t = type_map[expr] + if not ( + approx_equal(t.scale, out_t.scale) + and approx_equal(t.zero_point, out_t.zero_point) + and tvm.ir.structural_equal(t.dtype, out_t.dtype) + ): + arg = relay.qnn.op.requantize( + arg, + t.scale, + t.zero_point, + out_t.scale, + out_t.zero_point, + out_dtype="int32", + axis=t.axis, + ) out = relay.op.nn.global_avg_pool2d(arg) - return [out, TensorAffineType(t.scale, t.zero_point, "int32", t.axis)] + return [out, TensorAffineType(out_t.scale, out_t.zero_point, "int32", out_t.axis)] @register_fake_quantization_to_integer("broadcast_to") diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 2d112b9057a9..a63d82e68750 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -281,40 +281,37 @@ def test_fake_quantize_maxpool(): def test_fake_quantize_adaptive_avgpool1d(output_size): x = relay.var("x", shape=[1, 128, 768], dtype="int8") - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) + x = relay.qnn.op.dequantize(x, relay.const(2.0), relay.const(-12)) op = relay.op.nn.adaptive_avg_pool1d(x, output_size) op = relay.qnn.op.quantize(op, relay.const(0.5), relay.const(10)) x_np = np.random.randint(-128, 127, size=[1, 128, 768], dtype="int8") - compare_fq_to_int(op, [x_np]) + compare_fq_to_int(op, [x_np], True) def test_fake_quantize_avgpool(): x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8") - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) + x = relay.qnn.op.dequantize(x, relay.const(2.0), relay.const(-12)) op = relay.op.nn.avg_pool2d(x, [3, 3]) op = relay.qnn.op.quantize(op, relay.const(0.5), relay.const(10)) x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") - compare_fq_to_int(op, [x_np]) + compare_fq_to_int(op, [x_np], True) def test_fake_quantize_global_avg_pool(): x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8") - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) + x = relay.qnn.op.dequantize(x, relay.const(2.0), relay.const(-12)) op = relay.op.nn.global_avg_pool2d(x) op = relay.qnn.op.quantize(op, relay.const(0.5), relay.const(10)) x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") - compare_fq_to_int(op, [x_np]) + compare_fq_to_int(op, [x_np], True) class TestUnaryQNNOp: From cac072a84e4babb89513198da0c1140572ec1db4 Mon Sep 17 00:00:00 2001 From: Matthew Brookhart Date: Wed, 24 Aug 2022 12:31:40 -0600 Subject: [PATCH 3/3] else for case when types match --- python/tvm/relay/transform/fake_quantization_to_integer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 6f7ea8c4e603..bb874c131cd8 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -129,6 +129,8 @@ def adaptive_avgpool1d(expr, type_map): out_dtype="int32", axis=t.axis, ) + else: + arg = relay.op.cast(arg, "int32") output_size = expr.attrs.output_size out = relay.op.nn.adaptive_avg_pool1d(arg, output_size) return [out, TensorAffineType(out_t.scale, out_t.zero_point, "int32", out_t.axis)] @@ -154,6 +156,8 @@ def avgpool2d(expr, type_map): out_dtype="int32", axis=t.axis, ) + else: + arg = relay.op.cast(arg, "int32") out = relay.op.nn.avg_pool2d(arg, **expr.attrs) return [out, TensorAffineType(out_t.scale, out_t.zero_point, "int32", out_t.axis)] @@ -179,6 +183,8 @@ def global_avgpool2d(expr, type_map): out_dtype="int32", axis=t.axis, ) + else: + arg = relay.op.cast(arg, "int32") out = relay.op.nn.global_avg_pool2d(arg) return [out, TensorAffineType(out_t.scale, out_t.zero_point, "int32", out_t.axis)]