From cbdd8fa1ee158cd5d32eba17150b4cdbb36f389a Mon Sep 17 00:00:00 2001
From: mbrookhart <mbrookhart@octoml.ai>
Date: Wed, 9 Dec 2020 16:40:42 -0700
Subject: [PATCH 1/3] check for incomplete types in QNN Relation functions

---
 src/relay/qnn/op/concatenate.cc           | 34 +++++++++++++++++++----
 src/relay/qnn/op/convolution.cc           |  5 ++++
 src/relay/qnn/op/convolution_transpose.cc |  6 ++++
 src/relay/qnn/op/dense.cc                 |  6 ++++
 src/relay/qnn/op/op_common.h              |  5 ++++
 src/relay/qnn/op/requantize.cc            |  8 ++++++
 6 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/src/relay/qnn/op/concatenate.cc b/src/relay/qnn/op/concatenate.cc
index 7a716a1ec498..f4b0c56fdbf7 100644
--- a/src/relay/qnn/op/concatenate.cc
+++ b/src/relay/qnn/op/concatenate.cc
@@ -40,27 +40,49 @@ bool QnnConcatenateRel(const Array<Type>& types, int num_inputs, const Attrs& at
                        const TypeReporter& reporter) {
   ICHECK_EQ(types.size(), 6);
 
+  if (types[0].as<IncompleteTypeNode>()) {
+    return false;
+  }
   // Check the scale and zero point types
   const auto* input_scales_tuple = types[1].as<TupleTypeNode>();
   if (input_scales_tuple == nullptr) {
-    throw Error(ErrorBuilder()
-                << "qnn concatenate requires a tuple of scales as the second argument, found "
-                << PrettyPrint(types[1]));
+    if (types[1].as<IncompleteTypeNode>()) {
+      return false;
+    } else {
+      throw Error(ErrorBuilder()
+                  << "qnn concatenate requires a tuple of scales as the second argument, found "
+                  << PrettyPrint(types[1]));
+    }
   }
   for (const auto& input_scale : input_scales_tuple->fields) {
+    if (input_scale.as<IncompleteTypeNode>()) {
+      return false;
+    }
     ICHECK(IsScalarType(input_scale, DataType::Float(32)));  // input_scales[idx]
   }
 
   const auto* input_zero_points_tuple = types[2].as<TupleTypeNode>();
   if (input_zero_points_tuple == nullptr) {
-    throw Error(ErrorBuilder()
-                << "qnn concatenate requires a tuple of zero_points as the third argument, found "
-                << PrettyPrint(types[2]));
+    if (types[2].as<IncompleteTypeNode>()) {
+      return false;
+    } else {
+      throw Error(ErrorBuilder()
+                  << "qnn concatenate requires a tuple of zero_points as the third argument, found "
+                  << PrettyPrint(types[2]));
+    }
   }
   for (const auto& input_zero_point : input_zero_points_tuple->fields) {
+    if (input_zero_point.as<IncompleteTypeNode>()) {
+      return false;
+    }
     ICHECK(IsScalarType(input_zero_point, DataType::Int(32)));  // input_zero_points[idx]
   }
 
+  for (size_t i = 3; i < 5; ++i) {
+    if (types[i].as<IncompleteTypeNode>()) {
+      return false;
+    }
+  }
   ICHECK(IsScalarType(types[3], DataType::Float(32)));  // output_scale
   ICHECK(IsScalarType(types[4], DataType::Int(32)));    // output_zero_point
 
diff --git a/src/relay/qnn/op/convolution.cc b/src/relay/qnn/op/convolution.cc
index a9f2f361f2b3..3615f4c44b12 100644
--- a/src/relay/qnn/op/convolution.cc
+++ b/src/relay/qnn/op/convolution.cc
@@ -56,6 +56,11 @@ bool QnnConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
       << "Expected qnn conv2d type(int32, int16) for output but was " << param->out_dtype;
   ICHECK(param->out_dtype.bits() > 0) << "Output dtype bits should be greater than 0.";
 
+  for (size_t i = 2; i < 5; ++i) {
+    if (types[i].as<IncompleteTypeNode>()) {
+      return false;
+    }
+  }
   // Check the types of scale and zero points.
   ICHECK(IsScalarType(types[2], DataType::Int(32)));    // input_zero_point
   ICHECK(IsScalarType(types[3], DataType::Int(32)));    // kernel_zero_point
diff --git a/src/relay/qnn/op/convolution_transpose.cc b/src/relay/qnn/op/convolution_transpose.cc
index c7515b5904f1..4c4492df0f24 100644
--- a/src/relay/qnn/op/convolution_transpose.cc
+++ b/src/relay/qnn/op/convolution_transpose.cc
@@ -96,6 +96,12 @@ bool QnnConv2DTransposeRel(const Array<Type>& types, int num_inputs, const Attrs
   ICHECK(param->out_dtype.bits() > 0) << "Output dtype bits should be greater than 0.";
 
   // Check the types of scale and zero points.
+  for (size_t i = 2; i < 5; ++i) {
+    if (types[i].as<IncompleteTypeNode>()) {
+      return false;
+    }
+  }
+
   ICHECK(IsScalarType(types[2], DataType::Int(32)));    // input_zero_point
   ICHECK(IsScalarType(types[3], DataType::Int(32)));    // kernel_zero_point
   ICHECK(IsScalarType(types[4], DataType::Float(32)));  // input_scale
diff --git a/src/relay/qnn/op/dense.cc b/src/relay/qnn/op/dense.cc
index 3602995b8f16..221dc9c27969 100644
--- a/src/relay/qnn/op/dense.cc
+++ b/src/relay/qnn/op/dense.cc
@@ -53,6 +53,12 @@ bool QnnDenseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
       << "Expected quantized dense type(int32) for output but was " << param->out_dtype;
 
   // Check the types of scale and zero points.
+  for (size_t i = 2; i < 5; ++i) {
+    if (types[i].as<IncompleteTypeNode>()) {
+      return false;
+    }
+  }
+
   ICHECK(IsScalarType(types[2], DataType::Int(32)));    // input_zero_point
   ICHECK(IsScalarType(types[3], DataType::Int(32)));    // kernel_zero_point
   ICHECK(IsScalarType(types[4], DataType::Float(32)));  // input_scale
diff --git a/src/relay/qnn/op/op_common.h b/src/relay/qnn/op/op_common.h
index 330802c4c9b1..1a719bf69c11 100644
--- a/src/relay/qnn/op/op_common.h
+++ b/src/relay/qnn/op/op_common.h
@@ -171,6 +171,11 @@ static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, con
   ICHECK_EQ(types.size(), kNumQnnBinaryOpArgTypes);
 
   // Check the scale and zero point types
+  for (size_t i = 0; i < 8; ++i) {
+    if (types[i].as<IncompleteTypeNode>()) {
+      return false;
+    }
+  }
   ICHECK(IsScalarType(types[2], DataType::Float(32)));  // lhs_scale
   ICHECK(IsScalarType(types[3], DataType::Int(32)));    // lhs_zero_point
   ICHECK(IsScalarType(types[4], DataType::Float(32)));  // rhs_scale
diff --git a/src/relay/qnn/op/requantize.cc b/src/relay/qnn/op/requantize.cc
index 8e9b31e6fc39..e038ff37df77 100644
--- a/src/relay/qnn/op/requantize.cc
+++ b/src/relay/qnn/op/requantize.cc
@@ -263,6 +263,14 @@ bool RequantizeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
     return false;
   }
 
+  if (types[0].as<IncompleteTypeNode>()) {
+    return false;
+  }
+  for (size_t i = 3; i < 5; ++i) {
+    if (types[i].as<IncompleteTypeNode>()) {
+      return false;
+    }
+  }
   const auto in_dtype = data->dtype;
   ICHECK(in_dtype == DataType::Int(8) || in_dtype == DataType::UInt(8) ||
          in_dtype == DataType::Int(32))

From 303469fd14023058bf40950bb306cabb2c226606 Mon Sep 17 00:00:00 2001
From: mbrookhart <mbrookhart@octoml.ai>
Date: Wed, 9 Dec 2020 16:44:09 -0700
Subject: [PATCH 2/3] add regression test from #7067

---
 tests/python/frontend/pytorch/qnn_test.py | 41 +++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
index 9781eb5d57c4..4b7395922efb 100644
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ b/tests/python/frontend/pytorch/qnn_test.py
@@ -32,6 +32,10 @@
 from tvm.relay.frontend.pytorch_utils import is_version_greater_than
 from tvm.contrib.download import download_testdata
 
+from tvm.relay.dataflow_pattern import wildcard, is_op
+from tvm.relay.op.contrib.register import register_pattern_table
+from tvm.relay.op.contrib.register import get_pattern_table
+
 
 def torch_version_check():
     from packaging import version
@@ -39,10 +43,47 @@ def torch_version_check():
     return version.parse(torch.__version__) > version.parse("1.4.0")
 
 
+def make_qnn_add_pattern():
+    lhs = wildcard()
+    rhs = wildcard()
+    lhs_scale = wildcard()
+    lhs_zero_point = wildcard()
+    rhs_scale = wildcard()
+    rhs_zero_point = wildcard()
+    output_scale = wildcard()
+    output_zero_point = wildcard()
+    qadd = is_op("qnn.add")(
+        lhs,
+        rhs,
+        lhs_scale,
+        lhs_zero_point,
+        rhs_scale,
+        rhs_zero_point,
+        output_scale,
+        output_zero_point,
+    )
+    return qadd.optional(is_op("clip"))
+
+
+@register_pattern_table("test_table")
+def pattern_table():
+    return [
+        ("qnn_add", make_qnn_add_pattern()),
+    ]
+
+
 def get_tvm_runtime(script_module, input_name, ishape):
 
     input_shapes = [(input_name, ishape)]
     mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
+    pattern_table = get_pattern_table("test_table")
+    with tvm.transform.PassContext(opt_level=3):
+        pass_list = [
+            tvm.relay.transform.SimplifyInference(),
+            tvm.relay.transform.MergeComposite(pattern_table),
+        ]
+        composite_partition = tvm.transform.Sequential(pass_list)
+        partitioned = composite_partition(mod)
 
     with tvm.transform.PassContext(opt_level=3):
         # test on only cpu for now, torch cannot run quant models on cuda

From b49016c748544e73364b7d31a2704076de020dc5 Mon Sep 17 00:00:00 2001
From: mbrookhart <mbrookhart@octoml.ai>
Date: Wed, 9 Dec 2020 20:17:43 -0700
Subject: [PATCH 3/3] respond to review comments

---
 src/relay/qnn/op/concatenate.cc           |  2 ++
 src/relay/qnn/op/convolution.cc           | 10 ++++++----
 src/relay/qnn/op/convolution_transpose.cc |  7 ++++---
 src/relay/qnn/op/dense.cc                 | 11 ++++++-----
 src/relay/qnn/op/op_common.h              | 10 +++++++++-
 src/relay/qnn/op/requantize.cc            |  5 ++---
 6 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/src/relay/qnn/op/concatenate.cc b/src/relay/qnn/op/concatenate.cc
index f4b0c56fdbf7..59a519d66436 100644
--- a/src/relay/qnn/op/concatenate.cc
+++ b/src/relay/qnn/op/concatenate.cc
@@ -38,6 +38,8 @@ namespace qnn {
 
 bool QnnConcatenateRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
                        const TypeReporter& reporter) {
+  // Expected Types: data, input_scales, input_zero_points, output_scale, output_zero_point,
+  // out_type
   ICHECK_EQ(types.size(), 6);
 
   if (types[0].as<IncompleteTypeNode>()) {
diff --git a/src/relay/qnn/op/convolution.cc b/src/relay/qnn/op/convolution.cc
index 3615f4c44b12..21335ec2fb34 100644
--- a/src/relay/qnn/op/convolution.cc
+++ b/src/relay/qnn/op/convolution.cc
@@ -42,6 +42,8 @@ namespace qnn {
 
 bool QnnConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
                   const TypeReporter& reporter) {
+  // Expected Types: data, weight, input_zero_point, weight_zero_point, input_scale, weight_scale,
+  // out_type
   ICHECK_EQ(types.size(), 7);
   const auto* data = types[0].as<TensorTypeNode>();
   const auto* weight = types[1].as<TensorTypeNode>();
@@ -56,20 +58,20 @@ bool QnnConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
       << "Expected qnn conv2d type(int32, int16) for output but was " << param->out_dtype;
   ICHECK(param->out_dtype.bits() > 0) << "Output dtype bits should be greater than 0.";
 
+  // Check the types of scale and zero points.
   for (size_t i = 2; i < 5; ++i) {
     if (types[i].as<IncompleteTypeNode>()) {
       return false;
     }
   }
-  // Check the types of scale and zero points.
   ICHECK(IsScalarType(types[2], DataType::Int(32)));    // input_zero_point
-  ICHECK(IsScalarType(types[3], DataType::Int(32)));    // kernel_zero_point
+  ICHECK(IsScalarType(types[3], DataType::Int(32)));    // weight_zero_point
   ICHECK(IsScalarType(types[4], DataType::Float(32)));  // input_scale
   // Kernel scale can be a vector of length output_channels or a scalar.
   if (param->groups == 1) {
     size_t axis = param->kernel_layout.operator std::string().find('O');
     ICHECK(axis != std::string::npos) << "Kernel layout attribute is not defined";
-    AssignType(types[5], DataType::Float(32), weight->shape[axis], reporter);  // kernel scale
+    AssignType(types[5], DataType::Float(32), weight->shape[axis], reporter);  // weight_scale
   } else {
     // Here, total number of output channels depend on depth multiplier.
     size_t o_axis = param->kernel_layout.operator std::string().find('O');
@@ -77,7 +79,7 @@ bool QnnConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
     ICHECK(o_axis != std::string::npos || i_axis != std::string::npos)
         << "Kernel layout attribute is not defined";
     AssignType(types[5], DataType::Float(32), weight->shape[i_axis] * weight->shape[o_axis],
-               reporter);  // kernel scale
+               reporter);  // weight_scale
   }
 
   // Collect the input tensor and output tensor devoid of scale and zero points to reuse Relay
diff --git a/src/relay/qnn/op/convolution_transpose.cc b/src/relay/qnn/op/convolution_transpose.cc
index 4c4492df0f24..bde398df5e33 100644
--- a/src/relay/qnn/op/convolution_transpose.cc
+++ b/src/relay/qnn/op/convolution_transpose.cc
@@ -81,6 +81,8 @@ Array<Array<Layout>> QnnConvTransposeInferCorrectLayout(
 
 bool QnnConv2DTransposeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
                            const TypeReporter& reporter) {
+  // Expected Types: data, weight, input_zero_point, weight_zero_point, input_scale, weight_scale,
+  // out_type
   ICHECK_EQ(types.size(), 7);
   const auto* data = types[0].as<TensorTypeNode>();
   const auto* weight = types[1].as<TensorTypeNode>();
@@ -101,15 +103,14 @@ bool QnnConv2DTransposeRel(const Array<Type>& types, int num_inputs, const Attrs
       return false;
     }
   }
-
   ICHECK(IsScalarType(types[2], DataType::Int(32)));    // input_zero_point
-  ICHECK(IsScalarType(types[3], DataType::Int(32)));    // kernel_zero_point
+  ICHECK(IsScalarType(types[3], DataType::Int(32)));    // weight_zero_point
   ICHECK(IsScalarType(types[4], DataType::Float(32)));  // input_scale
   // Kernel scale can be a vector of length output_channels or a scalar.
   if (param->groups == 1) {
     size_t axis = param->kernel_layout.find('O');
     ICHECK(axis != std::string::npos) << "Kernel layout attribute is not defined";
-    AssignType(types[5], DataType::Float(32), weight->shape[axis], reporter);  // kernel scale
+    AssignType(types[5], DataType::Float(32), weight->shape[axis], reporter);  // weight_scale
   } else {
     // Here, total number of output channels depend on depth multiplier.
     size_t o_axis = param->kernel_layout.find('O');
diff --git a/src/relay/qnn/op/dense.cc b/src/relay/qnn/op/dense.cc
index 221dc9c27969..6284524bff27 100644
--- a/src/relay/qnn/op/dense.cc
+++ b/src/relay/qnn/op/dense.cc
@@ -39,6 +39,8 @@ namespace qnn {
 
 bool QnnDenseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
                  const TypeReporter& reporter) {
+  // Expected Types: data, weight, input_zero_point, weight_zero_point, input_scale, weight_scale,
+  // out_type
   ICHECK_EQ(types.size(), 7);
   const auto* data = types[0].as<TensorTypeNode>();
   const auto* weight = types[1].as<TensorTypeNode>();
@@ -58,11 +60,10 @@ bool QnnDenseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
       return false;
     }
   }
-
-  ICHECK(IsScalarType(types[2], DataType::Int(32)));    // input_zero_point
-  ICHECK(IsScalarType(types[3], DataType::Int(32)));    // kernel_zero_point
-  ICHECK(IsScalarType(types[4], DataType::Float(32)));  // input_scale
-  AssignType(types[5], DataType::Float(32), param->units, reporter);
+  ICHECK(IsScalarType(types[2], DataType::Int(32)));                  // input_zero_point
+  ICHECK(IsScalarType(types[3], DataType::Int(32)));                  // weight_zero_point
+  ICHECK(IsScalarType(types[4], DataType::Float(32)));                // input_scale
+  AssignType(types[5], DataType::Float(32), param->units, reporter);  // weight_scale
 
   ICHECK(param->out_dtype.bits() > 0) << "Output dtype bits should be greater than 0.";
 
diff --git a/src/relay/qnn/op/op_common.h b/src/relay/qnn/op/op_common.h
index 1a719bf69c11..0f77db4f501a 100644
--- a/src/relay/qnn/op/op_common.h
+++ b/src/relay/qnn/op/op_common.h
@@ -168,10 +168,18 @@ inline Array<Array<Layout> > QnnBinaryBroadcastLayout(const Attrs& attrs,
 
 static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
                                    const TypeReporter& reporter) {
+  // Expected Types: lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, output_scale,
+  // output_zero_point, out_type
   ICHECK_EQ(types.size(), kNumQnnBinaryOpArgTypes);
 
+  // Check the lhs and rhs types
+  for (size_t i = 0; i < 2; ++i) {
+    if (types[i].as<IncompleteTypeNode>()) {
+      return false;
+    }
+  }
   // Check the scale and zero point types
-  for (size_t i = 0; i < 8; ++i) {
+  for (size_t i = 2; i < 8; ++i) {
     if (types[i].as<IncompleteTypeNode>()) {
       return false;
     }
diff --git a/src/relay/qnn/op/requantize.cc b/src/relay/qnn/op/requantize.cc
index e038ff37df77..2ae879595659 100644
--- a/src/relay/qnn/op/requantize.cc
+++ b/src/relay/qnn/op/requantize.cc
@@ -256,6 +256,7 @@ Expr RequantizeQnnCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
  */
 bool RequantizeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
                    const TypeReporter& reporter) {
+  // Expected Types: data, input_scale, input_zero_point, output_scale, output_zero_point, output
   ICHECK_EQ(types.size(), 6);
   const auto* data = types[0].as<TensorTypeNode>();
 
@@ -263,9 +264,7 @@ bool RequantizeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
     return false;
   }
 
-  if (types[0].as<IncompleteTypeNode>()) {
-    return false;
-  }
+  // Check the scale and zero point types
   for (size_t i = 3; i < 5; ++i) {
     if (types[i].as<IncompleteTypeNode>()) {
       return false;