[Relay] [Quantization] WIP - Prototyping requantize op.

anijain2305 · Jul 8, 2019 · 5485b58 · 5485b58
1 parent 8d9e317
commit 5485b58
Show file tree

Hide file tree

Showing 4 changed files with 396 additions and 0 deletions.
diff --git a/include/tvm/relay/attrs/qnn.h b/include/tvm/relay/attrs/qnn.h
@@ -30,7 +30,31 @@
 namespace tvm {
 namespace relay {
 
+/*! \brief Attribute for requantize operator */
+struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
+  double input_scale;
+  int32_t input_zero_point;
+  double output_scale;
+  int32_t output_zero_point;
+  bool use_int_compute;
+  DataType out_dtype;
 
+  TVM_DECLARE_ATTRS(RequantizeAttrs, "relay.attrs.RequantizeAttrs") {
+    TVM_ATTR_FIELD(input_zero_point)
+        .describe("The zero point of the input tensor.");
+    TVM_ATTR_FIELD(output_zero_point)
+        .describe("The zero point of the output tensor.");
+    TVM_ATTR_FIELD(input_scale)
+        .describe("The scale of the input tensor.");
+    TVM_ATTR_FIELD(output_scale)
+        .describe("The scale of the output tensor.");
+    TVM_ATTR_FIELD(use_int_compute).set_default(false)
+        .describe("When true, the integer computation is used to handle output scale");
+    TVM_ATTR_FIELD(out_dtype)
+        .set_default(NullValue<DataType>())
+        .describe("Output data type, set to explicit type under mixed precision setting");
+  }
+};
 
 }  // namespace relay
 }  // namespace tvm

diff --git a/python/tvm/relay/op/qnn/qnn.py b/python/tvm/relay/op/qnn/qnn.py
@@ -19,3 +19,49 @@
 from __future__ import absolute_import as _abs
 from . import _make
 
+
+def requantize(input_data, input_zero_point, input_scale, output_zero_point,
+        output_scale, out_dtype="int32", use_int_compute=False):
+    r"""Requantized operator.
+
+    The requantize operator converts one quantized tensor to another quantized
+    tensor. For the output tensor, we are provided with output scale and zero
+    point. The computation looks like this
+
+    Q_output = zp_output +  (scale_input)/(scale_ouptut) * (Q_input - zp_input)
+
+    The above computation can be done in floating point as the scales are in
+    FP32. Alternatively, we can approximate floating point with fixed point
+    computation. This is controlled by use_int_compute.
+
+    Parameters
+    ----------
+    quantized_data : tvm.relay.Expr
+        The input quantized_data to the operator.
+
+    input_scale: float
+           The float scalar to scale the quantized_data int8 values back to FP32.
+
+    output_scale: float
+           The float scalar to scale the quantized_output int8 values back to FP32.
+
+    input_zero_point: int
+           The zero point of the quantized_data distribution.
+
+    output_zero_point: int
+           The zero point of the quantized_output distribution.
+
+    out_dtype : str, optional
+        Specifies the output quantized_data type for mixed precision conv2d.
+
+    use_int_compute : bool, optional
+        Use fully integer computation for requantizing.
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    return _make.requantize(input_data, input_zero_point, input_scale,
+                            output_zero_point, output_scale, out_dtype,
+                            use_int_compute)
diff --git a/src/relay/op/nn/requantize.cc b/src/relay/op/nn/requantize.cc
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file requantize.cc
+ * \brief Quantized convolution operators
+ */
+
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/attrs/qnn.h>
+#include <tvm/relay/quantize_util.h>
+
+namespace tvm {
+namespace relay {
+
+TVM_REGISTER_NODE_TYPE(RequantizeAttrs);
+
+
+bool RequantizeRel(const Array<Type>& types,
+                   int num_inputs,
+                   const Attrs& attrs,
+                   const TypeReporter& reporter) {
+  CHECK_EQ(types.size(), 2);
+  const auto* data = types[0].as<TensorTypeNode>();
+  const auto input_dtype = data->dtype;
+  CHECK(is_valid_quantized_op_input_type(QuantizeOpType::Requantize, input_dtype))
+    << "Input type should be a quantized type (u)int8 or (u)int16 but was " <<  input_dtype;
+
+  const Array<tvm::Expr> oshape = data->shape;
+  // assign output type
+  const RequantizeAttrs* param = attrs.as<RequantizeAttrs>();
+  reporter->Assign(types[1], TensorTypeNode::make(oshape, param->out_dtype));
+  return true;
+}
+
+// Positional relay function to create quantized conv2d operator
+// used by frontend FFI.
+Expr MakeRequantize(Expr data,
+                    int32_t input_zero_point,
+                    double input_scale,
+                    int32_t output_zero_point,
+                    double output_scale,
+                    DataType out_dtype,
+                    bool use_int_compute) {
+  auto attrs = make_node<RequantizeAttrs>();
+  attrs->out_dtype = std::move(out_dtype);
+  attrs->input_zero_point = std::move(input_zero_point);
+  attrs->output_zero_point = std::move(output_zero_point);
+  attrs->input_scale = std::move(input_scale);
+  attrs->output_scale = std::move(output_scale);
+  attrs->use_int_compute = std::move(use_int_compute);
+  static const Op& op = Op::Get("qnn.requantize");
+  return CallNode::make(op, {data}, Attrs(attrs), {});
+}
+
+RELAY_REGISTER_OP("qnn.requantize")
+.describe(R"code(Requantize operator.
+
+FIXME
+)code" TVM_ADD_FILELINE)
+.set_attrs_type_key("relay.attrs.RequantizeAttrs")
+.set_num_inputs(1)
+.add_argument("data", "Tensor", "The quantized input tensor.")
+.set_support_level(10)
+.add_type_rel("Requantize", RequantizeRel);
+
+TVM_REGISTER_API("relay.op.qnn._make.requantize")
+.set_body_typed(MakeRequantize);
+
+}  // namespace relay
+}  // namespace tvm