[BYOC][ACL] Support asymmetric per-layer quantized operators (apache#…

…6109) * [BYOC][ACL] Support asymmetric per-layer quantization Adds support for asymmetric per-layer quantization in the ACL runtime. This includes support for qnn.conv2d, nn.maxpool2d and reshape. Reflected these changes in codegen and runtime tests. Change-Id: I8f610bd37af1e3740fd48c2d502bcc4727d9d712 * Address comments Change-Id: I4f9e3e7dbf6053066927cf07c4c19ecc88572e9d * Fix tutorial Change-Id: I4371e9d97a120fb7776db40ffcde60f46927af4d * Improve test infrastructure * Doc-string for generate trials * Output params on error Change-Id: Ib2e2b1fcdf05cdc77f7f4fb4b46395f28c129957
trevor-m · Sep 2, 2020 · e0ca076 · e0ca076
1 parent 0629939
commit e0ca076
Show file tree

Hide file tree

Showing 12 changed files with 865 additions and 268 deletions.
diff --git a/docs/deploy/arm_compute_lib.rst b/docs/deploy/arm_compute_lib.rst
@@ -121,6 +121,33 @@ networks refer to the tests: `tests/python/contrib/test_arm_compute_lib`. Here y
 `infrastructure.py` to use the remote device you have setup.
 
 
+Operator support
+----------------
++--------------+-------------------------------------------------------------------------+
+| Relay Node   | Remarks                                                                 |
++==============+=========================================================================+
+| nn.conv2d    | fp32:                                                                   |
+|              |   Simple: nn.conv2d                                                     |
+|              |   Composite: nn.pad?, nn.conv2d, nn.bias_add?, nn.relu?                 |
+|              |                                                                         |
+|              | (only groups = 1 supported)                                             |
++--------------+-------------------------------------------------------------------------+
+| qnn.conv2d   | uint8:                                                                  |
+|              |   Composite: nn.pad?, nn.conv2d, nn.bias_add?, nn.relu?, qnn.requantize |
+|              |                                                                         |
+|              | (only groups = 1 supported)                                             |
++--------------+-------------------------------------------------------------------------+
+| nn.maxpool2d | fp32, uint8                                                             |
++--------------+-------------------------------------------------------------------------+
+| reshape      | fp32, uint8                                                             |
++--------------+-------------------------------------------------------------------------+
+
+.. note::
+    A composite operator is a series of operators that map to a single Arm Compute Library operator. You can view this
+    as being a single fused operator from the view point of Arm Compute Library. '?' denotes an optional operator in
+    the series of operators that make up a composite operator.
+
+
 Adding a new operator
 ---------------------
 Adding a new operator requires changes to a series of places. This section will give a hint on

diff --git a/python/tvm/relay/op/contrib/arm_compute_lib.py b/python/tvm/relay/op/contrib/arm_compute_lib.py
@@ -81,14 +81,41 @@ def conv_pattern():
         pattern = pattern.optional(is_op('nn.relu'))
         return pattern
 
+    def qnn_conv_pattern():
+        """Create a quantized convolution pattern.
+
+        Returns
+        -------
+        pattern : dataflow_pattern.AltPattern
+            Denotes the convolution pattern.
+        """
+        pattern = is_op('nn.pad')(wildcard()) | wildcard()
+        pattern = is_op('qnn.conv2d')(
+            pattern, is_constant(), is_constant(), is_constant(), is_constant(), is_constant())
+        pattern = pattern.optional(lambda x: is_op('nn.bias_add')(x, is_constant()))
+        pattern = pattern.optional(is_op('nn.relu'))
+        pattern = is_op('qnn.requantize')(
+            pattern, wildcard(), wildcard(), is_constant(), is_constant())
+        return pattern
+
     def check_conv(extract):
         """Check conv pattern is supported by ACL."""
         call = extract
         while call.op.name != "nn.conv2d":
             call = call.args[0]
         return conv2d(call.attrs, call.args)
 
-    return [('arm_compute_lib.conv2d', conv_pattern(), check_conv)]
+    def check_qnn_conv(extract):
+        """Check qnn conv pattern is supported by ACL."""
+        if extract.attrs.out_dtype != "uint8":
+            return False
+        call = extract
+        while call.op.name != "qnn.conv2d":
+            call = call.args[0]
+        return qnn_conv2d(call.attrs, call.args)
+
+    return [('arm_compute_lib.conv2d', conv_pattern(), check_conv),
+            ('arm_compute_lib.qnn_conv2d', qnn_conv_pattern(), check_qnn_conv)]
 
 
 def _register_external_op_helper(op_name, supported=True):
@@ -115,7 +142,24 @@ def conv2d(attrs, args):
     if len(data_typ.shape) != 4 or data_typ.shape[0] != 1 or data_typ.dtype != "float32":
         return False
     kernel_typ = args[1].checked_type
-    if kernel_typ.dtype != "float32":
+    if len(kernel_typ.shape) != 4 or kernel_typ.dtype != "float32":
+        return False
+    return True
+
+
+def qnn_conv2d(attrs, args):
+    """Check if the external ACL codegen for qnn.conv2d should be used."""
+    if attrs.groups != 1:
+        return False
+    if attrs.data_layout != "NHWC":
+        return False
+    if attrs.out_dtype != "int32" and attrs.out_dtype != "":
+        return False
+    data_typ = args[0].checked_type
+    if len(data_typ.shape) != 4 or data_typ.shape[0] != 1 or data_typ.dtype != "uint8":
+        return False
+    kernel_typ = args[1].checked_type
+    if len(kernel_typ.shape) != 4 or kernel_typ.dtype != "uint8":
         return False
     return True
 
@@ -126,6 +170,6 @@ def max_pool2d(attrs, args):
     if attrs.layout != "NHWC":
         return False
     typ = args[0].checked_type
-    if typ.dtype != "float32":
+    if typ.dtype not in ["float32", "uint8"]:
         return False
     return True
diff --git a/python/tvm/relay/qnn/op/layout_conversions.py b/python/tvm/relay/qnn/op/layout_conversions.py
@@ -20,6 +20,8 @@
 
 from tvm.relay.op import op as reg
 
+from ...op.strategy.generic import is_depthwise_conv2d
+
 
 @reg.register_convert_op_layout("qnn.conv2d")
 def convert_qnn_conv2d(attrs, inputs, tinfos, desired_layouts):
@@ -51,11 +53,20 @@ def convert_qnn_conv2d(attrs, inputs, tinfos, desired_layouts):
     new_attrs = dict(attrs)
     new_attrs['data_layout'] = desired_data_layout
 
+    if desired_kernel_layout != "default":
+        new_attrs['kernel_layout'] = desired_kernel_layout
+        return relay.qnn.op.conv2d(*inputs, **new_attrs)
+
     if desired_data_layout == 'NCHW':
-        if desired_kernel_layout != "default":
-            new_attrs['kernel_layout'] = desired_kernel_layout
+        new_attrs['kernel_layout'] = 'OIHW'
+        return relay.qnn.op.conv2d(*inputs, **new_attrs)
+    if desired_data_layout == 'NHWC':
+        # Check for depthwise convolution.
+        if is_depthwise_conv2d(inputs[0].shape, attrs['data_layout'], inputs[1].shape,
+                               attrs['kernel_layout'], attrs['groups']):
+            new_attrs['kernel_layout'] = 'HWOI'
         else:
-            new_attrs['kernel_layout'] = 'OIHW'
+            new_attrs['kernel_layout'] = 'HWIO'
         return relay.qnn.op.conv2d(*inputs, **new_attrs)
 
     raise ValueError('Layout %s is not yet supported' % desired_data_layout)
diff --git a/src/relay/backend/contrib/arm_compute_lib/codegen.cc b/src/relay/backend/contrib/arm_compute_lib/codegen.cc
@@ -49,6 +49,18 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
  public:
   ACLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {}
 
+  /*!
+   * \brief A series of operators that form a composite
+   * convolution. Supports both nn.conv2d and qnn.conv2d.
+   */
+  struct CompositeConvNode {
+    const CallNode* pad = nullptr;
+    const CallNode* conv = nullptr;
+    const CallNode* bias = nullptr;
+    const CallNode* activation = nullptr;
+    const CallNode* requantize = nullptr;
+  };
+
   /*!
    * \brief Visit call nodes and generate appropriate JSON node.
    *
@@ -68,7 +80,7 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
     CHECK(comp.defined()) << "Arm Compute Library JSON runtime only supports composite functions.";
     const std::string name = comp.value();
     std::shared_ptr<JSONGraphNode> json_node;
-    if (name == "arm_compute_lib.conv2d") {
+    if (name == "arm_compute_lib.conv2d" || name == "arm_compute_lib.qnn_conv2d") {
       json_node = CreateCompositeConvJSONNode(cn);
     } else {
       LOG(FATAL) << "Unrecognized Arm Compute Library pattern: " << name;
@@ -78,57 +90,86 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
 
  private:
   /*!
-   * \brief Create a JSON representation of a composite convolution.
+   * \brief Extract convolution nodes from a composite function.
    *
-   * \param call The call to be represented.
-   * \return A JSON representation of a specific operator.
+   * \param cn The call node of the composite function.
+   * \return Extracted composite convolution nodes.
    */
-  std::shared_ptr<JSONGraphNode> CreateCompositeConvJSONNode(const CallNode* cn) {
-    const std::string name = "nn.conv2d";
-    const CallNode* pad = nullptr;
-    const CallNode* conv = nullptr;
-    const CallNode* bias = nullptr;
-    bool has_activation = false;
-
-    // Unpack composite function
+  static CompositeConvNode UnpackCompositeConvolution(const CallNode* cn) {
+    CompositeConvNode nodes{};
     const auto* fn = cn->op.as<FunctionNode>();
     CHECK(fn);
+
+    // Traverse composite convolution function from child to parent
     const auto* current_call = fn->body.as<CallNode>();
+    if (backend::IsOp(current_call, "qnn.requantize")) {
+      nodes.requantize = current_call;
+      current_call = current_call->args[0].as<CallNode>();
+    }
     if (backend::IsOp(current_call, "nn.relu")) {
-      has_activation = true;
+      nodes.activation = current_call;
       current_call = current_call->args[0].as<CallNode>();
     }
     if (backend::IsOp(current_call, "nn.bias_add")) {
-      bias = current_call;
+      nodes.bias = current_call;
       current_call = current_call->args[0].as<CallNode>();
     }
-    CHECK(backend::IsOp(current_call, "nn.conv2d"));
-    conv = current_call;
+    // Enforce a convolution node exists at this point during traversal
+    if (nodes.requantize) {
+      CHECK(backend::IsOp(current_call, "qnn.conv2d"));
+    } else {
+      CHECK(backend::IsOp(current_call, "nn.conv2d"));
+    }
+    nodes.conv = current_call;
     if (!current_call->args.empty() && current_call->args[0]->IsInstance<CallNode>()) {
       current_call = current_call->args[0].as<CallNode>();
       if (backend::IsOp(current_call, "nn.pad")) {
-        pad = current_call;
+        nodes.pad = current_call;
       }
     }
+    return nodes;
+  }
+
+  /*!
+   * \brief Create a JSON representation of a composite convolution.
+   *
+   * \param cn The call to be represented.
+   * \return A JSON representation of a specific operator.
+   */
+  std::shared_ptr<JSONGraphNode> CreateCompositeConvJSONNode(const CallNode* cn) {
+    CompositeConvNode nodes = UnpackCompositeConvolution(cn);
+    std::string name = "nn.conv2d";
 
-    const auto* conv_attr = conv->attrs.as<Conv2DAttrs>();
+    const auto* conv_attr = nodes.conv->attrs.as<Conv2DAttrs>();
     CHECK(conv_attr);
     CHECK(conv_attr->kernel_layout == "OHWI")
         << "Kernel layout must be OHWI, has the module been pre-processed correctly?";
 
+    // Inputs must be added in the same order they appear in the relay graph.
     std::vector<JSONGraphNodeEntry> inputs;
     inputs.push_back(VisitExpr(cn->args[0])[0]);
-    inputs.push_back(VisitExpr(conv->args[1])[0]);
-    if (bias) {
-      inputs.push_back(VisitExpr(bias->args[1])[0]);
+    inputs.push_back(VisitExpr(nodes.conv->args[1])[0]);
+    if (nodes.requantize) {
+      name = "qnn.conv2d";
+      inputs.push_back(VisitExpr(nodes.conv->args[2])[0]);  // input zero-point
+      inputs.push_back(VisitExpr(nodes.conv->args[3])[0]);  // kernel zero-point
+      inputs.push_back(VisitExpr(nodes.conv->args[4])[0]);  // input scale
+      inputs.push_back(VisitExpr(nodes.conv->args[5])[0]);  // kernel scale
+    }
+    if (nodes.bias) {
+      inputs.push_back(VisitExpr(nodes.bias->args[1])[0]);
+    }
+    if (nodes.requantize) {
+      inputs.push_back(VisitExpr(nodes.requantize->args[3])[0]);  // output scale
+      inputs.push_back(VisitExpr(nodes.requantize->args[4])[0]);  // output zero-point
     }
 
     auto json_node = std::make_shared<JSONGraphNode>(name, "kernel", inputs, 1);
-    SetCallNodeAttribute(json_node, conv);
+    SetCallNodeAttribute(json_node, nodes.conv);
 
     // Override attributes
-    if (pad) {
-      const auto* pad_attr = pad->attrs.as<PadAttrs>();
+    if (nodes.pad) {
+      const auto* pad_attr = nodes.pad->attrs.as<PadAttrs>();
       CHECK(pad_attr);
       auto p = pad_attr->pad_width;
       // Convert to TVM layout for now, conversion to ACL layout takes place in runtime.
@@ -141,7 +182,7 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
       padding_attr.emplace_back(padding);
       json_node->SetAttr("padding", padding_attr);
     }
-    if (has_activation) {
+    if (nodes.activation) {
       std::vector<std::string> activation_type = {"relu"};
       std::vector<dmlc::any> act_attr;
       act_attr.emplace_back(activation_type);
@@ -161,7 +202,8 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
  */
 IRModule PreProcessModule(const IRModule& mod) {
   IRModule preprocessed_module;
-  tvm::Map<String, Array<String>> desired_layouts = {{"nn.conv2d", {"NHWC", "OHWI"}}};
+  tvm::Map<String, Array<String>> desired_layouts = {{"nn.conv2d", {"NHWC", "OHWI"}},
+                                                     {"qnn.conv2d", {"NHWC", "OHWI"}}};
   preprocessed_module = transform::ConvertLayout(desired_layouts)(mod);
   preprocessed_module = transform::FoldConstant()(preprocessed_module);
   return preprocessed_module;