apache · zhiics · Nov 16, 2019 · Nov 8, 2019 · Nov 9, 2019 · Nov 9, 2019
diff --git a/include/tvm/relay/qnn/attrs.h b/include/tvm/relay/qnn/attrs.h
@@ -135,6 +135,10 @@ struct QnnConv2DAttrs : public tvm::AttrsNode<QnnConv2DAttrs> {
   // Quantization related attributes.
   int32_t input_zero_point;
   int32_t kernel_zero_point;
+  // The input tensor scale and kernel tensor scales are stored
+  // for easy access to this information.
+  double input_scale;
+  double kernel_scale;
 
   TVM_DECLARE_ATTRS(QnnConv2DAttrs, "relay.attrs.QnnConv2DAttrs") {
     TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
@@ -177,6 +181,10 @@ struct QnnConv2DAttrs : public tvm::AttrsNode<QnnConv2DAttrs> {
         .describe("The zero point of the input tensor.");
     TVM_ATTR_FIELD(kernel_zero_point)
         .describe("The zero point of the kernel tensor.");
+    TVM_ATTR_FIELD(input_scale)
+      .describe("The quantization scale for the input tensor.");
+    TVM_ATTR_FIELD(kernel_scale)
+      .describe("The quantization scale for the weight tensor.");
   }
 };
 
@@ -212,6 +220,8 @@ struct QnnDenseAttrs : public tvm::AttrsNode<QnnDenseAttrs> {
   // Quantization related attributes.
   int32_t input_zero_point;
   int32_t kernel_zero_point;
+  double input_scale;
+  double kernel_scale;
 
   TVM_DECLARE_ATTRS(QnnDenseAttrs, "relay.attrs.QnnDenseAttrs") {
     TVM_ATTR_FIELD(units)
@@ -222,6 +232,10 @@ struct QnnDenseAttrs : public tvm::AttrsNode<QnnDenseAttrs> {
       .describe("The zero point of the input tensor.");
     TVM_ATTR_FIELD(kernel_zero_point)
       .describe("The zero point of the kernel tensor.");
+    TVM_ATTR_FIELD(input_scale)
+      .describe("The input tensor scale.");
+    TVM_ATTR_FIELD(kernel_scale)
+      .describe("The kernel tensor scale.");
   }
 };
 

diff --git a/python/tvm/relay/frontend/tflite.py b/python/tvm/relay/frontend/tflite.py
@@ -730,9 +730,13 @@ def convert_fully_connected(self, op):
         weight_expr = self.exp_tab.new_const(weight_value, dtype=weight_tensor_type_str)
 
         if input_tensor.qnn_params:
+            input_scale = input_tensor.qnn_params['scale']
+            kernel_scale = weight_tensor.qnn_params['scale']
             out = _qnn.op.dense(in_expr, weight_expr,
                                 input_zero_point=input_tensor.qnn_params['zero_point'],
                                 kernel_zero_point=weight_tensor.qnn_params['zero_point'],
+                                input_scale=input_scale,
+                                kernel_scale=kernel_scale,
                                 out_dtype='int32')
         else:
             out = _op.nn.dense(in_expr, weight_expr)
@@ -936,6 +940,8 @@ def convert_conv(self, op, conv_type):
             qnn_conv2d_params['input_zero_point'] = input_tensor.qnn_params['zero_point']
             qnn_conv2d_params['kernel_zero_point'] = weight_tensor.qnn_params['zero_point']
             qnn_conv2d_params['out_dtype'] = 'int32'
+            qnn_conv2d_params['input_scale'] = input_tensor.qnn_params['scale']
+            qnn_conv2d_params['kernel_scale'] = weight_tensor.qnn_params['scale']
             out = _qnn.op.conv2d(in_expr, weight_expr, **qnn_conv2d_params)
         else:
             out = _op.nn.conv2d(in_expr, weight_expr, **params)

diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py
@@ -88,6 +88,8 @@ def helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay_op):
     new_attrs = {k : attrs[k] for k in attrs.keys()}
     del new_attrs['kernel_zero_point']
     del new_attrs['input_zero_point']
+    del new_attrs['input_scale']
+    del new_attrs['kernel_scale']
     return relay_op(shift_data, shift_kernel, **new_attrs)
 
 # Helper function to change dtypes to uint8 x int8. Intel VNNI instructions prefer this setting.

diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
@@ -189,6 +189,8 @@ def conv2d(data,
            kernel,
            input_zero_point,
            kernel_zero_point,
+           input_scale,
+           kernel_scale,
            strides=(1, 1),
            padding=(0, 0),
            dilation=(1, 1),
@@ -219,6 +221,16 @@ def conv2d(data,
     input_zero_point: int
            The zero point of the data distribution.
 
+    input_scale: float
+           The scale for the input tensor. The scale for the input tensor is
+           stored purely for convenience here. See more commentary below.
+
+    kernel_scale: float
+           The scale for the weight tensor. The scale for the weight tensor is
+           stored for access to this during relay. This information is not
+           needed in the pass pipeline after qnn.conv2d is lowered to the
+           sequence of steps as in nn.conv2d. See also input_scale in Requantize.
+
     kernel_zero_point: int
            The zero point of the quantized_kernel distribution.
 
@@ -260,6 +272,7 @@ def conv2d(data,
 
     return _make.conv2d(data, kernel,
                         input_zero_point, kernel_zero_point,
+                        input_scale, kernel_scale,
                         strides, padding, dilation,
                         groups, channels, kernel_size,
                         data_layout, kernel_layout, out_layout, out_dtype)
@@ -317,6 +330,8 @@ def dense(data,
           weight,
           input_zero_point,
           kernel_zero_point,
+          input_scale,
+          kernel_scale,
           units=None,
           out_dtype="int32"):
     """Qnn Dense operator.
@@ -332,6 +347,17 @@ def dense(data,
         The quantized input data to the operator.
     weight : tvm.relay.Expr
         The quantized weight expressions.
+    input_zero_point: int
+        The input zero point.
+    kernel_zero_point: int
+        The kernel zero point.
+    input_scale: float
+        The scale for the input tensor.
+    kernel_scale: float
+        The scale for the weight tensor. The scale for the weight tensor is
+        stored for access to this during relay. This information is not
+        needed in the pass pipeline after qnn.conv2d is lowered to the
+        sequence of steps as in nn.conv2d. See also input_scale in Requantize.
     units : int, optional
         Number of hidden units of the dense transformation.
     out_dtype : str, optional
@@ -345,9 +371,11 @@ def dense(data,
 
     return _make.dense(data,
                        weight,
-                       units,
                        input_zero_point,
                        kernel_zero_point,
+                       input_scale,
+                       kernel_scale,
+                       units,
                        out_dtype)
 
 

diff --git a/src/relay/qnn/op/convolution.cc b/src/relay/qnn/op/convolution.cc
@@ -440,7 +440,8 @@ Expr QnnConv2DCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
 // Positional relay function to create quantized conv2d operator
 // used by frontend FFI.
 Expr MakeQnnConv2D(Expr data, Expr weight, int32_t input_zero_point, int32_t kernel_zero_point,
-                   Array<IndexExpr> strides, Array<IndexExpr> padding, Array<IndexExpr> dilation,
+                   double input_scale, double kernel_scale, Array<IndexExpr> strides,
+                   Array<IndexExpr> padding, Array<IndexExpr> dilation,
                    int groups, IndexExpr channels, Array<IndexExpr> kernel_size,
                    std::string data_layout, std::string kernel_layout, std::string out_layout,
                    DataType out_dtype) {
@@ -457,6 +458,8 @@ Expr MakeQnnConv2D(Expr data, Expr weight, int32_t input_zero_point, int32_t ker
   attrs->out_dtype = std::move(out_dtype);
   attrs->input_zero_point = std::move(input_zero_point);
   attrs->kernel_zero_point = std::move(kernel_zero_point);
+  attrs->input_scale = std::move(input_scale);
+  attrs->kernel_scale = std::move(kernel_scale);
   static const Op& op = Op::Get("qnn.conv2d");
   return CallNode::make(op, {data, weight}, Attrs(attrs), {});
 }

diff --git a/src/relay/qnn/op/dense.cc b/src/relay/qnn/op/dense.cc
@@ -57,13 +57,17 @@ bool QnnDenseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
 }
 
 // Positional relay function to create quantized dense operator used by frontend FFI.
-Expr MakeQuantizedDense(Expr data, Expr weight, IndexExpr units, int32_t input_zero_point,
-                        int32_t kernel_zero_point, DataType out_dtype) {
+Expr MakeQuantizedDense(Expr data, Expr weight, int32_t input_zero_point,
+                        int32_t kernel_zero_point,  double input_scale,
+                        double kernel_scale, IndexExpr units,
+                        DataType out_dtype) {
   auto attrs = make_node<QnnDenseAttrs>();
   attrs->units = std::move(units);
   attrs->out_dtype = out_dtype;
   attrs->input_zero_point = input_zero_point;
   attrs->kernel_zero_point = kernel_zero_point;
+  attrs->input_scale = input_scale;
+  attrs->kernel_scale = kernel_scale;
   static const Op& op = Op::Get("qnn.dense");
   return CallNode::make(op, {data, weight}, Attrs(attrs), {});
 }