From 7a5535c5c5377b36865db9edeea51d15c8c6702e Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Wed, 4 Nov 2020 09:56:48 -0800
Subject: [PATCH 01/11] add dynamic dequantize

---
 src/relay/qnn/op/dequantize.cc            | 23 +++++++++++------------
 tests/python/frontend/pytorch/qnn_test.py | 20 ++++++++++++++++++++
 2 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/src/relay/qnn/op/dequantize.cc b/src/relay/qnn/op/dequantize.cc
index 2fe075c7e64b..724441e0c523 100644
--- a/src/relay/qnn/op/dequantize.cc
+++ b/src/relay/qnn/op/dequantize.cc
@@ -79,20 +79,27 @@ Expr MakeDequantize(Expr data, Expr input_scale, Expr input_zero_point, int axis
 }
 
 Expr DequantizeLower(const Expr& input_tensor, const Expr& input_scale,
-                     const Expr& input_zero_point, const Array<IndexExpr>& input_shape,
+                     const Expr& input_zero_point, const Array<tvm::relay::Type>& types,
                      const DequantizeAttrs* attrs) {
   const auto axis = attrs->axis;
 
+  ICHECK_EQ(types.size(), 4);
+  auto in_type = types[0];
+  auto in_tensor_type = in_type.as<TensorTypeNode>();
+  ICHECK(in_tensor_type != nullptr) << "Type information missing"
+                                    << " Please run infer_type pass.";
+  Array<IndexExpr> input_shape = in_tensor_type->shape;
+
   size_t n_dim = input_shape.size();
 
   // Expand scale and zero point if the input tensor is channel quantized
   auto expanded_input_scale = input_scale;
-  if (!IsConstScalar(input_scale)) {
+  if (!IsConstScalar(input_scale) && !IsScalarType(types[1])) {
     expanded_input_scale = ExpandBiasToMatchAxis(input_scale, n_dim, {axis});
   }
 
   auto expanded_input_zero_point = input_zero_point;
-  if (!IsConstScalar(input_zero_point)) {
+  if (!IsConstScalar(input_zero_point) && !IsScalarType(types[2])) {
     expanded_input_zero_point = ExpandBiasToMatchAxis(input_zero_point, n_dim, {axis});
   }
 
@@ -113,15 +120,7 @@ Expr DequantizeQnnCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   const auto* dequantize_attrs = attrs.as<DequantizeAttrs>();
   ICHECK(dequantize_attrs != nullptr);
 
-  // Find input shape.
-  ICHECK_EQ(types.size(), 4);
-  auto in_type = types[0];
-  auto in_tensor_type = in_type.as<TensorTypeNode>();
-  ICHECK(in_tensor_type != nullptr) << "Type information missing."
-                                    << " Please run infer_type pass.";
-  Array<IndexExpr> input_shape = in_tensor_type->shape;
-
-  return DequantizeLower(data, input_scale, input_zero_point, input_shape, dequantize_attrs);
+  return DequantizeLower(data, input_scale, input_zero_point, types, dequantize_attrs);
 }
 
 RELAY_REGISTER_OP("qnn.dequantize")
diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
index 9781eb5d57c4..f514832d86a5 100644
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ b/tests/python/frontend/pytorch/qnn_test.py
@@ -29,6 +29,7 @@
 import tvm
 import tvm.testing
 from tvm import relay
+from tvm.relay.testing import run_infer_type
 from tvm.relay.frontend.pytorch_utils import is_version_greater_than
 from tvm.contrib.download import download_testdata
 
@@ -505,6 +506,25 @@ def test_serialized_modules():
     match_ratio = num_identical / float(np.prod(tvm_result.shape))
     assert match_ratio > 0.90
 
+def test_dequantize_dynamic_unit():
+    data = relay.var('data', shape=(1, 2, 3, 4), dtype='int8')
+    scale = relay.var('scale', shape=(), dtype='float32')
+    zp = relay.var('zp', shape=(), dtype='int32')
+
+    deq_data = relay.qnn.op.dequantize(data, scale * scale, zp + zp)
+    tt = run_infer_type(deq_data)
+
+    assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "float32")
+   
+def test_quantize_dynamic_unit():
+    data = relay.var('data', shape=(1, 2, 3, 4), dtype='float32')
+    scale = relay.var('scale', shape=(), dtype='float32')
+    zp = relay.var('zp', shape=(), dtype='int32')
+
+    q_data = relay.qnn.op.quantize(data, scale * scale, zp + zp)
+    tt = run_infer_type(q_data)
+    
+    assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "int8")
 
 def test_quantize_dynamic():
     # A wrapper is required for quantize_dynamic to work correctly

From ef30a980f27308545f827b96ee97ccd355ec7ec1 Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Wed, 4 Nov 2020 10:33:43 -0800
Subject: [PATCH 02/11] register quantize and dequantize as opaque

---
 python/tvm/relay/qnn/op/qnn.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 3f23d6895b43..0efdf8a55177 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -21,7 +21,8 @@
 from tvm.relay.expr import Tuple, TupleWrapper
 from tvm.relay.op.nn.utils import get_pad_tuple2d
 from . import _make
-
+from ... import op as reg
+from ...op import OpPattern
 
 def requantize(
     data,
@@ -496,3 +497,7 @@ def subtract(
         output_scale,
         output_zero_point,
     )
+
+# register fuse pattern for qnn ops
+reg.register_pattern("qnn.quantize", OpPattern.OPAQUE)
+reg.register_pattern("qnn.dequantize", OpPattern.OPAQUE)
\ No newline at end of file

From 3cd5b4fb661a9f20267be9412d1beaba118c411f Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Wed, 4 Nov 2020 10:51:43 -0800
Subject: [PATCH 03/11] make tests better

---
 tests/python/frontend/pytorch/qnn_test.py | 56 +++++++++++++++++++----
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
index f514832d86a5..c7b079177315 100644
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ b/tests/python/frontend/pytorch/qnn_test.py
@@ -29,6 +29,8 @@
 import tvm
 import tvm.testing
 from tvm import relay
+from tvm.contrib import graph_runtime
+from tvm.relay import create_executor
 from tvm.relay.testing import run_infer_type
 from tvm.relay.frontend.pytorch_utils import is_version_greater_than
 from tvm.contrib.download import download_testdata
@@ -507,24 +509,54 @@ def test_serialized_modules():
     assert match_ratio > 0.90
 
 def test_dequantize_dynamic_unit():
-    data = relay.var('data', shape=(1, 2, 3, 4), dtype='int8')
-    scale = relay.var('scale', shape=(), dtype='float32')
-    zp = relay.var('zp', shape=(), dtype='int32')
+    x = relay.var('x', shape=(1, 2, 3, 4), dtype='int8')
+    scale_var = relay.var('scale', shape=(), dtype='float32')
+    zp_var = relay.var('zp', shape=(), dtype='int32')
 
-    deq_data = relay.qnn.op.dequantize(data, scale * scale, zp + zp)
-    tt = run_infer_type(deq_data)
+    deq_x = relay.qnn.op.dequantize(x, scale_var * scale_var, zp_var + zp_var)
+    tt = run_infer_type(deq_x)
 
     assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "float32")
+    func = relay.Function([x, scale_var, zp_var], deq_x)
+    data = np.random.uniform(size=(1, 2, 3, 4)).astype('int8')
+    scale = np.array(1).astype('float32')
+    zp = np.array(0).astype('int32')
+
+    mod = tvm.ir.IRModule.from_expr(func)
+
+    for target, ctx in tvm.testing.enabled_targets():
+        #TODO: (electriclilies) enable AlterOpLayout when it is fixed
+        with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
+            lib = relay.build(mod, target=target)
+    
+    module = graph_runtime.GraphModule(lib["default"](ctx))
+    module.set_input(**{'x': data, 'scale': scale, 'zp': zp})
+    module.run()
    
 def test_quantize_dynamic_unit():
-    data = relay.var('data', shape=(1, 2, 3, 4), dtype='float32')
-    scale = relay.var('scale', shape=(), dtype='float32')
-    zp = relay.var('zp', shape=(), dtype='int32')
+    x = relay.var('x', shape=(1, 2, 3, 4), dtype='float32')
+    scale_var = relay.var('scale', shape=(), dtype='float32')
+    zp_var = relay.var('zp', shape=(), dtype='int32')
 
-    q_data = relay.qnn.op.quantize(data, scale * scale, zp + zp)
-    tt = run_infer_type(q_data)
+    q_x = relay.qnn.op.quantize(x, scale_var * scale_var, zp_var + zp_var)
+    tt = run_infer_type(q_x)
     
     assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "int8")
+    func = relay.Function([x, scale_var, zp_var], q_x)
+    data = np.random.uniform(size=(1, 2, 3, 4)).astype('float32')
+    scale = np.array(1).astype('float32')
+    zp = np.array(0).astype('int32')
+
+    mod = tvm.ir.IRModule.from_expr(func)
+
+    for target, ctx in tvm.testing.enabled_targets():
+        #TODO: (electriclilies) enable AlterOpLayout when it is fixed
+        with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
+            lib = relay.build(mod, target=target)
+    
+    module = graph_runtime.GraphModule(lib["default"](ctx))
+    module.set_input(**{'x': data, 'scale': scale, 'zp': zp})
+    module.run()
 
 def test_quantize_dynamic():
     # A wrapper is required for quantize_dynamic to work correctly
@@ -566,3 +598,7 @@ def forward(self, inp):
             # Outputs from v1.6 seem reliable. TVM's outputs are always the same
             if is_version_greater_than("1.5.1"):
                 tvm.testing.assert_allclose(tvm_result, pt_result, rtol=1e-4, atol=1e-4)
+
+if __name__ == '__main__':
+    test_dequantize_dynamic_unit()
+    test_quantize_dynamic_unit()
\ No newline at end of file

From f96398d501de74c3a91e24a34e35f0d62e98b378 Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Wed, 4 Nov 2020 11:03:18 -0800
Subject: [PATCH 04/11] black

---
 tests/python/frontend/pytorch/qnn_test.py | 48 ++++++++++++-----------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
index c7b079177315..c372939f3ed6 100644
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ b/tests/python/frontend/pytorch/qnn_test.py
@@ -508,56 +508,59 @@ def test_serialized_modules():
     match_ratio = num_identical / float(np.prod(tvm_result.shape))
     assert match_ratio > 0.90
 
+
 def test_dequantize_dynamic_unit():
-    x = relay.var('x', shape=(1, 2, 3, 4), dtype='int8')
-    scale_var = relay.var('scale', shape=(), dtype='float32')
-    zp_var = relay.var('zp', shape=(), dtype='int32')
+    x = relay.var("x", shape=(1, 2, 3, 4), dtype="int8")
+    scale_var = relay.var("scale", shape=(), dtype="float32")
+    zp_var = relay.var("zp", shape=(), dtype="int32")
 
     deq_x = relay.qnn.op.dequantize(x, scale_var * scale_var, zp_var + zp_var)
     tt = run_infer_type(deq_x)
 
     assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "float32")
     func = relay.Function([x, scale_var, zp_var], deq_x)
-    data = np.random.uniform(size=(1, 2, 3, 4)).astype('int8')
-    scale = np.array(1).astype('float32')
-    zp = np.array(0).astype('int32')
+    data = np.random.uniform(size=(1, 2, 3, 4)).astype("int8")
+    scale = np.array(1).astype("float32")
+    zp = np.array(0).astype("int32")
 
     mod = tvm.ir.IRModule.from_expr(func)
 
     for target, ctx in tvm.testing.enabled_targets():
-        #TODO: (electriclilies) enable AlterOpLayout when it is fixed
+        # TODO: (electriclilies) enable AlterOpLayout when it is fixed
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             lib = relay.build(mod, target=target)
-    
+
     module = graph_runtime.GraphModule(lib["default"](ctx))
-    module.set_input(**{'x': data, 'scale': scale, 'zp': zp})
+    module.set_input(**{"x": data, "scale": scale, "zp": zp})
     module.run()
-   
+
+
 def test_quantize_dynamic_unit():
-    x = relay.var('x', shape=(1, 2, 3, 4), dtype='float32')
-    scale_var = relay.var('scale', shape=(), dtype='float32')
-    zp_var = relay.var('zp', shape=(), dtype='int32')
+    x = relay.var("x", shape=(1, 2, 3, 4), dtype="float32")
+    scale_var = relay.var("scale", shape=(), dtype="float32")
+    zp_var = relay.var("zp", shape=(), dtype="int32")
 
     q_x = relay.qnn.op.quantize(x, scale_var * scale_var, zp_var + zp_var)
     tt = run_infer_type(q_x)
-    
+
     assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "int8")
     func = relay.Function([x, scale_var, zp_var], q_x)
-    data = np.random.uniform(size=(1, 2, 3, 4)).astype('float32')
-    scale = np.array(1).astype('float32')
-    zp = np.array(0).astype('int32')
+    data = np.random.uniform(size=(1, 2, 3, 4)).astype("float32")
+    scale = np.array(1).astype("float32")
+    zp = np.array(0).astype("int32")
 
     mod = tvm.ir.IRModule.from_expr(func)
 
     for target, ctx in tvm.testing.enabled_targets():
-        #TODO: (electriclilies) enable AlterOpLayout when it is fixed
+        # TODO: (electriclilies) enable AlterOpLayout when it is fixed
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             lib = relay.build(mod, target=target)
-    
+
     module = graph_runtime.GraphModule(lib["default"](ctx))
-    module.set_input(**{'x': data, 'scale': scale, 'zp': zp})
+    module.set_input(**{"x": data, "scale": scale, "zp": zp})
     module.run()
 
+
 def test_quantize_dynamic():
     # A wrapper is required for quantize_dynamic to work correctly
     class LinearWrapper(nn.Module):
@@ -599,6 +602,7 @@ def forward(self, inp):
             if is_version_greater_than("1.5.1"):
                 tvm.testing.assert_allclose(tvm_result, pt_result, rtol=1e-4, atol=1e-4)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     test_dequantize_dynamic_unit()
-    test_quantize_dynamic_unit()
\ No newline at end of file
+    test_quantize_dynamic_unit()

From b0e7ceca49cd585a2669592e4eaf9e35c9375081 Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Wed, 4 Nov 2020 11:07:25 -0800
Subject: [PATCH 05/11] remove main fn

---
 tests/python/frontend/pytorch/qnn_test.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
index c372939f3ed6..ca35e4c9b5e4 100644
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ b/tests/python/frontend/pytorch/qnn_test.py
@@ -601,8 +601,3 @@ def forward(self, inp):
             # Outputs from v1.6 seem reliable. TVM's outputs are always the same
             if is_version_greater_than("1.5.1"):
                 tvm.testing.assert_allclose(tvm_result, pt_result, rtol=1e-4, atol=1e-4)
-
-
-if __name__ == "__main__":
-    test_dequantize_dynamic_unit()
-    test_quantize_dynamic_unit()

From 95cdd5a4defbd7bc32db9d02f77f1878650b91f5 Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Wed, 4 Nov 2020 11:10:26 -0800
Subject: [PATCH 06/11] fix black again

---
 python/tvm/relay/qnn/op/qnn.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 0efdf8a55177..9a8f22bfb9bc 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -24,6 +24,7 @@
 from ... import op as reg
 from ...op import OpPattern
 
+
 def requantize(
     data,
     input_scale,
@@ -498,6 +499,7 @@ def subtract(
         output_zero_point,
     )
 
+
 # register fuse pattern for qnn ops
 reg.register_pattern("qnn.quantize", OpPattern.OPAQUE)
-reg.register_pattern("qnn.dequantize", OpPattern.OPAQUE)
\ No newline at end of file
+reg.register_pattern("qnn.dequantize", OpPattern.OPAQUE)

From b7d2163044a8c983901ad49bfa58c103c4c2f05e Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Thu, 5 Nov 2020 08:28:22 -0800
Subject: [PATCH 07/11] move tests

---
 tests/python/frontend/pytorch/qnn_test.py    | 53 --------------------
 tests/python/relay/test_op_qnn_dequantize.py | 27 ++++++++++
 tests/python/relay/test_op_qnn_quantize.py   | 28 +++++++++++
 3 files changed, 55 insertions(+), 53 deletions(-)

diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
index ca35e4c9b5e4..c0ee7c54b18a 100644
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ b/tests/python/frontend/pytorch/qnn_test.py
@@ -31,7 +31,6 @@
 from tvm import relay
 from tvm.contrib import graph_runtime
 from tvm.relay import create_executor
-from tvm.relay.testing import run_infer_type
 from tvm.relay.frontend.pytorch_utils import is_version_greater_than
 from tvm.contrib.download import download_testdata
 
@@ -509,58 +508,6 @@ def test_serialized_modules():
     assert match_ratio > 0.90
 
 
-def test_dequantize_dynamic_unit():
-    x = relay.var("x", shape=(1, 2, 3, 4), dtype="int8")
-    scale_var = relay.var("scale", shape=(), dtype="float32")
-    zp_var = relay.var("zp", shape=(), dtype="int32")
-
-    deq_x = relay.qnn.op.dequantize(x, scale_var * scale_var, zp_var + zp_var)
-    tt = run_infer_type(deq_x)
-
-    assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "float32")
-    func = relay.Function([x, scale_var, zp_var], deq_x)
-    data = np.random.uniform(size=(1, 2, 3, 4)).astype("int8")
-    scale = np.array(1).astype("float32")
-    zp = np.array(0).astype("int32")
-
-    mod = tvm.ir.IRModule.from_expr(func)
-
-    for target, ctx in tvm.testing.enabled_targets():
-        # TODO: (electriclilies) enable AlterOpLayout when it is fixed
-        with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
-            lib = relay.build(mod, target=target)
-
-    module = graph_runtime.GraphModule(lib["default"](ctx))
-    module.set_input(**{"x": data, "scale": scale, "zp": zp})
-    module.run()
-
-
-def test_quantize_dynamic_unit():
-    x = relay.var("x", shape=(1, 2, 3, 4), dtype="float32")
-    scale_var = relay.var("scale", shape=(), dtype="float32")
-    zp_var = relay.var("zp", shape=(), dtype="int32")
-
-    q_x = relay.qnn.op.quantize(x, scale_var * scale_var, zp_var + zp_var)
-    tt = run_infer_type(q_x)
-
-    assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "int8")
-    func = relay.Function([x, scale_var, zp_var], q_x)
-    data = np.random.uniform(size=(1, 2, 3, 4)).astype("float32")
-    scale = np.array(1).astype("float32")
-    zp = np.array(0).astype("int32")
-
-    mod = tvm.ir.IRModule.from_expr(func)
-
-    for target, ctx in tvm.testing.enabled_targets():
-        # TODO: (electriclilies) enable AlterOpLayout when it is fixed
-        with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
-            lib = relay.build(mod, target=target)
-
-    module = graph_runtime.GraphModule(lib["default"](ctx))
-    module.set_input(**{"x": data, "scale": scale, "zp": zp})
-    module.run()
-
-
 def test_quantize_dynamic():
     # A wrapper is required for quantize_dynamic to work correctly
     class LinearWrapper(nn.Module):
diff --git a/tests/python/relay/test_op_qnn_dequantize.py b/tests/python/relay/test_op_qnn_dequantize.py
index e1416622c236..e1b3c8ed2038 100644
--- a/tests/python/relay/test_op_qnn_dequantize.py
+++ b/tests/python/relay/test_op_qnn_dequantize.py
@@ -118,9 +118,36 @@ def test_channelwise_axis_0():
     )
 
 
+def test_dynamic_dequantize():
+    x = relay.var("x", shape=(1, 2, 3, 4), dtype="int8")
+    scale_var = relay.var("scale", shape=(), dtype="float32")
+    zp_var = relay.var("zp", shape=(), dtype="int32")
+
+    deq_x = relay.qnn.op.dequantize(x, scale_var * scale_var, zp_var + zp_var)
+    tt = run_infer_type(deq_x)
+
+    assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "float32")
+    func = relay.Function([x, scale_var, zp_var], deq_x)
+    data = np.random.uniform(size=(1, 2, 3, 4)).astype("int8")
+    scale = np.array(1).astype("float32")
+    zp = np.array(0).astype("int32")
+
+    mod = tvm.ir.IRModule.from_expr(func)
+
+    for target, ctx in tvm.testing.enabled_targets():
+        # TODO: (electriclilies) enable AlterOpLayout when it is fixed
+        with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
+            lib = relay.build(mod, target=target)
+
+    module = graph_runtime.GraphModule(lib["default"](ctx))
+    module.set_input(**{"x": data, "scale": scale, "zp": zp})
+    module.run()
+
+
 if __name__ == "__main__":
     test_uint8_to_float32()
     test_int8_to_float32()
     test_int32_to_float32()
     test_channelwise_axis_1()
     test_channelwise_axis_0()
+    test_dynamic_dequantize()
diff --git a/tests/python/relay/test_op_qnn_quantize.py b/tests/python/relay/test_op_qnn_quantize.py
index a22c25f5b97f..2ef298679904 100644
--- a/tests/python/relay/test_op_qnn_quantize.py
+++ b/tests/python/relay/test_op_qnn_quantize.py
@@ -20,6 +20,7 @@
 import numpy as np
 from tvm import relay
 from tvm.contrib import graph_runtime
+from tvm.relay.testing import run_infer_type
 
 
 def quantize_test_driver(in_dtype, quant_args, axis, out_dtype, in_data, verify_output_data):
@@ -133,8 +134,35 @@ def test_channelwise_axis_1():
     )
 
 
+def test_dynamic_quantize():
+    x = relay.var("x", shape=(1, 2, 3, 4), dtype="float32")
+    scale_var = relay.var("scale", shape=(), dtype="float32")
+    zp_var = relay.var("zp", shape=(), dtype="int32")
+
+    q_x = relay.qnn.op.quantize(x, scale_var * scale_var, zp_var + zp_var)
+    tt = run_infer_type(q_x)
+
+    assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "int8")
+    func = relay.Function([x, scale_var, zp_var], q_x)
+    data = np.random.uniform(size=(1, 2, 3, 4)).astype("float32")
+    scale = np.array(1).astype("float32")
+    zp = np.array(0).astype("int32")
+
+    mod = tvm.ir.IRModule.from_expr(func)
+
+    for target, ctx in tvm.testing.enabled_targets():
+        # TODO: (electriclilies) enable AlterOpLayout when it is fixed
+        with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
+            lib = relay.build(mod, target=target)
+
+    module = graph_runtime.GraphModule(lib["default"](ctx))
+    module.set_input(**{"x": data, "scale": scale, "zp": zp})
+    module.run()
+
+
 if __name__ == "__main__":
     test_float32_to_uint8()
     test_float32_to_int8()
     test_channelwise_axis_0()
     test_channelwise_axis_1()
+    test_dynamic_quantize()

From fe1150c460d8983185a7025803d18d95c567d837 Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Thu, 5 Nov 2020 08:31:05 -0800
Subject: [PATCH 08/11] fix import

---
 tests/python/frontend/pytorch/qnn_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
index c0ee7c54b18a..a3e1c903353d 100644
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ b/tests/python/frontend/pytorch/qnn_test.py
@@ -30,7 +30,6 @@
 import tvm.testing
 from tvm import relay
 from tvm.contrib import graph_runtime
-from tvm.relay import create_executor
 from tvm.relay.frontend.pytorch_utils import is_version_greater_than
 from tvm.contrib.download import download_testdata
 

From e54bc6de38ac5e2fb2c0780fa2dad53efb87deb1 Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Thu, 5 Nov 2020 08:32:25 -0800
Subject: [PATCH 09/11] fix import again

---
 tests/python/frontend/pytorch/qnn_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
index a3e1c903353d..9781eb5d57c4 100644
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ b/tests/python/frontend/pytorch/qnn_test.py
@@ -29,7 +29,6 @@
 import tvm
 import tvm.testing
 from tvm import relay
-from tvm.contrib import graph_runtime
 from tvm.relay.frontend.pytorch_utils import is_version_greater_than
 from tvm.contrib.download import download_testdata
 

From e2fc37bad91d0183433900ae4c941f1896b4862f Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Thu, 5 Nov 2020 08:44:39 -0800
Subject: [PATCH 10/11] try again


From 4e70bb28e96f7d0f0b2b0a614bed8f9b943f4ca1 Mon Sep 17 00:00:00 2001
From: electriclilies <lilyorthsmith@gmail.com>
Date: Thu, 5 Nov 2020 09:29:47 -0800
Subject: [PATCH 11/11] fix import

---
 tests/python/relay/test_op_qnn_dequantize.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/python/relay/test_op_qnn_dequantize.py b/tests/python/relay/test_op_qnn_dequantize.py
index e1b3c8ed2038..e7fb161a13cb 100644
--- a/tests/python/relay/test_op_qnn_dequantize.py
+++ b/tests/python/relay/test_op_qnn_dequantize.py
@@ -20,6 +20,7 @@
 import numpy as np
 from tvm import relay
 from tvm.contrib import graph_runtime
+from tvm.relay.testing import run_infer_type
 
 
 def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data, axis):