Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BYOC][ACL] Support asymmetric per-layer quantized operators #6109

Merged
merged 4 commits into from
Jul 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions docs/deploy/arm_compute_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,33 @@ networks refer to the tests: `tests/python/contrib/test_arm_compute_lib`. Here y
`infrastructure.py` to use the remote device you have setup.


Operator support
----------------
+--------------+-------------------------------------------------------------------------+
| Relay Node | Remarks |
+==============+=========================================================================+
| nn.conv2d | fp32: |
| | Simple: nn.conv2d |
| | Composite: nn.pad?, nn.conv2d, nn.bias_add?, nn.relu? |
| | |
| | (only groups = 1 supported) |
+--------------+-------------------------------------------------------------------------+
| qnn.conv2d | uint8: |
| | Composite: nn.pad?, nn.conv2d, nn.bias_add?, nn.relu?, qnn.requantize |
| | |
| | (only groups = 1 supported) |
+--------------+-------------------------------------------------------------------------+
| nn.maxpool2d | fp32, uint8 |
+--------------+-------------------------------------------------------------------------+
| reshape | fp32, uint8 |
+--------------+-------------------------------------------------------------------------+

.. note::
A composite operator is a series of operators that map to a single Arm Compute Library operator. You can view this
as being a single fused operator from the view point of Arm Compute Library. '?' denotes an optional operator in
the series of operators that make up a composite operator.


Adding a new operator
---------------------
Adding a new operator requires changes to a series of places. This section will give a hint on
Expand Down
50 changes: 47 additions & 3 deletions python/tvm/relay/op/contrib/arm_compute_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,41 @@ def conv_pattern():
pattern = pattern.optional(is_op('nn.relu'))
return pattern

def qnn_conv_pattern():
"""Create a quantized convolution pattern.

Returns
-------
pattern : dataflow_pattern.AltPattern
Denotes the convolution pattern.
"""
pattern = is_op('nn.pad')(wildcard()) | wildcard()
pattern = is_op('qnn.conv2d')(
pattern, is_constant(), is_constant(), is_constant(), is_constant(), is_constant())
pattern = pattern.optional(lambda x: is_op('nn.bias_add')(x, is_constant()))
pattern = pattern.optional(is_op('nn.relu'))
pattern = is_op('qnn.requantize')(
pattern, wildcard(), wildcard(), is_constant(), is_constant())
return pattern

def check_conv(extract):
"""Check conv pattern is supported by ACL."""
call = extract
while call.op.name != "nn.conv2d":
call = call.args[0]
return conv2d(call.attrs, call.args)

return [('arm_compute_lib.conv2d', conv_pattern(), check_conv)]
def check_qnn_conv(extract):
"""Check qnn conv pattern is supported by ACL."""
if extract.attrs.out_dtype != "uint8":
return False
call = extract
while call.op.name != "qnn.conv2d":
call = call.args[0]
return qnn_conv2d(call.attrs, call.args)

return [('arm_compute_lib.conv2d', conv_pattern(), check_conv),
('arm_compute_lib.qnn_conv2d', qnn_conv_pattern(), check_qnn_conv)]


def _register_external_op_helper(op_name, supported=True):
Expand All @@ -115,7 +142,24 @@ def conv2d(attrs, args):
if len(data_typ.shape) != 4 or data_typ.shape[0] != 1 or data_typ.dtype != "float32":
return False
kernel_typ = args[1].checked_type
if kernel_typ.dtype != "float32":
if len(kernel_typ.shape) != 4 or kernel_typ.dtype != "float32":
return False
return True


def qnn_conv2d(attrs, args):
"""Check if the external ACL codegen for qnn.conv2d should be used."""
if attrs.groups != 1:
return False
if attrs.data_layout != "NHWC":
return False
if attrs.out_dtype != "int32" and attrs.out_dtype != "":
return False
data_typ = args[0].checked_type
if len(data_typ.shape) != 4 or data_typ.shape[0] != 1 or data_typ.dtype != "uint8":
return False
kernel_typ = args[1].checked_type
if len(kernel_typ.shape) != 4 or kernel_typ.dtype != "uint8":
return False
return True

Expand All @@ -126,6 +170,6 @@ def max_pool2d(attrs, args):
if attrs.layout != "NHWC":
return False
typ = args[0].checked_type
if typ.dtype != "float32":
if typ.dtype not in ["float32", "uint8"]:
return False
return True
17 changes: 14 additions & 3 deletions python/tvm/relay/qnn/op/layout_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

from tvm.relay.op import op as reg

from ...op.strategy.generic import is_depthwise_conv2d


@reg.register_convert_op_layout("qnn.conv2d")
def convert_qnn_conv2d(attrs, inputs, tinfos, desired_layouts):
Expand Down Expand Up @@ -51,11 +53,20 @@ def convert_qnn_conv2d(attrs, inputs, tinfos, desired_layouts):
new_attrs = dict(attrs)
new_attrs['data_layout'] = desired_data_layout

if desired_kernel_layout != "default":
new_attrs['kernel_layout'] = desired_kernel_layout
return relay.qnn.op.conv2d(*inputs, **new_attrs)

if desired_data_layout == 'NCHW':
if desired_kernel_layout != "default":
new_attrs['kernel_layout'] = desired_kernel_layout
new_attrs['kernel_layout'] = 'OIHW'
return relay.qnn.op.conv2d(*inputs, **new_attrs)
if desired_data_layout == 'NHWC':
# Check for depthwise convolution.
if is_depthwise_conv2d(inputs[0].shape, attrs['data_layout'], inputs[1].shape,
attrs['kernel_layout'], attrs['groups']):
new_attrs['kernel_layout'] = 'HWOI'
else:
new_attrs['kernel_layout'] = 'OIHW'
new_attrs['kernel_layout'] = 'HWIO'
return relay.qnn.op.conv2d(*inputs, **new_attrs)

raise ValueError('Layout %s is not yet supported' % desired_data_layout)
94 changes: 68 additions & 26 deletions src/relay/backend/contrib/arm_compute_lib/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
public:
ACLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {}

/*!
* \brief A series of operators that form a composite
* convolution. Supports both nn.conv2d and qnn.conv2d.
*/
struct CompositeConvNode {
const CallNode* pad = nullptr;
const CallNode* conv = nullptr;
const CallNode* bias = nullptr;
const CallNode* activation = nullptr;
const CallNode* requantize = nullptr;
};

/*!
* \brief Visit call nodes and generate appropriate JSON node.
*
Expand All @@ -68,7 +80,7 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
CHECK(comp.defined()) << "Arm Compute Library JSON runtime only supports composite functions.";
const std::string name = comp.value();
std::shared_ptr<JSONGraphNode> json_node;
if (name == "arm_compute_lib.conv2d") {
if (name == "arm_compute_lib.conv2d" || name == "arm_compute_lib.qnn_conv2d") {
json_node = CreateCompositeConvJSONNode(cn);
} else {
LOG(FATAL) << "Unrecognized Arm Compute Library pattern: " << name;
Expand All @@ -78,57 +90,86 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {

private:
/*!
* \brief Create a JSON representation of a composite convolution.
* \brief Extract convolution nodes from a composite function.
*
* \param call The call to be represented.
* \return A JSON representation of a specific operator.
* \param cn The call node of the composite function.
* \return Extracted composite convolution nodes.
*/
std::shared_ptr<JSONGraphNode> CreateCompositeConvJSONNode(const CallNode* cn) {
const std::string name = "nn.conv2d";
const CallNode* pad = nullptr;
const CallNode* conv = nullptr;
const CallNode* bias = nullptr;
bool has_activation = false;

// Unpack composite function
static CompositeConvNode UnpackCompositeConvolution(const CallNode* cn) {
CompositeConvNode nodes{};
const auto* fn = cn->op.as<FunctionNode>();
CHECK(fn);

// Traverse composite convolution function from child to parent
const auto* current_call = fn->body.as<CallNode>();
if (backend::IsOp(current_call, "qnn.requantize")) {
nodes.requantize = current_call;
current_call = current_call->args[0].as<CallNode>();
}
if (backend::IsOp(current_call, "nn.relu")) {
has_activation = true;
nodes.activation = current_call;
current_call = current_call->args[0].as<CallNode>();
}
if (backend::IsOp(current_call, "nn.bias_add")) {
bias = current_call;
nodes.bias = current_call;
current_call = current_call->args[0].as<CallNode>();
}
CHECK(backend::IsOp(current_call, "nn.conv2d"));
conv = current_call;
// Enforce a convolution node exists at this point during traversal
if (nodes.requantize) {
CHECK(backend::IsOp(current_call, "qnn.conv2d"));
lhutton1 marked this conversation as resolved.
Show resolved Hide resolved
} else {
CHECK(backend::IsOp(current_call, "nn.conv2d"));
}
nodes.conv = current_call;
if (!current_call->args.empty() && current_call->args[0]->IsInstance<CallNode>()) {
current_call = current_call->args[0].as<CallNode>();
if (backend::IsOp(current_call, "nn.pad")) {
pad = current_call;
nodes.pad = current_call;
}
}
return nodes;
}

/*!
* \brief Create a JSON representation of a composite convolution.
*
* \param cn The call to be represented.
* \return A JSON representation of a specific operator.
*/
std::shared_ptr<JSONGraphNode> CreateCompositeConvJSONNode(const CallNode* cn) {
CompositeConvNode nodes = UnpackCompositeConvolution(cn);
std::string name = "nn.conv2d";

const auto* conv_attr = conv->attrs.as<Conv2DAttrs>();
const auto* conv_attr = nodes.conv->attrs.as<Conv2DAttrs>();
CHECK(conv_attr);
CHECK(conv_attr->kernel_layout == "OHWI")
<< "Kernel layout must be OHWI, has the module been pre-processed correctly?";

// Inputs must be added in the same order they appear in the relay graph.
std::vector<JSONGraphNodeEntry> inputs;
inputs.push_back(VisitExpr(cn->args[0])[0]);
inputs.push_back(VisitExpr(conv->args[1])[0]);
if (bias) {
inputs.push_back(VisitExpr(bias->args[1])[0]);
inputs.push_back(VisitExpr(nodes.conv->args[1])[0]);
if (nodes.requantize) {
name = "qnn.conv2d";
inputs.push_back(VisitExpr(nodes.conv->args[2])[0]); // input zero-point
inputs.push_back(VisitExpr(nodes.conv->args[3])[0]); // kernel zero-point
inputs.push_back(VisitExpr(nodes.conv->args[4])[0]); // input scale
inputs.push_back(VisitExpr(nodes.conv->args[5])[0]); // kernel scale
}
if (nodes.bias) {
inputs.push_back(VisitExpr(nodes.bias->args[1])[0]);
}
if (nodes.requantize) {
inputs.push_back(VisitExpr(nodes.requantize->args[3])[0]); // output scale
inputs.push_back(VisitExpr(nodes.requantize->args[4])[0]); // output zero-point
}

auto json_node = std::make_shared<JSONGraphNode>(name, "kernel", inputs, 1);
SetCallNodeAttribute(json_node, conv);
SetCallNodeAttribute(json_node, nodes.conv);

// Override attributes
if (pad) {
const auto* pad_attr = pad->attrs.as<PadAttrs>();
if (nodes.pad) {
const auto* pad_attr = nodes.pad->attrs.as<PadAttrs>();
CHECK(pad_attr);
auto p = pad_attr->pad_width;
// Convert to TVM layout for now, conversion to ACL layout takes place in runtime.
Expand All @@ -141,7 +182,7 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
padding_attr.emplace_back(padding);
json_node->SetAttr("padding", padding_attr);
}
if (has_activation) {
if (nodes.activation) {
std::vector<std::string> activation_type = {"relu"};
std::vector<dmlc::any> act_attr;
act_attr.emplace_back(activation_type);
Expand All @@ -161,7 +202,8 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
*/
IRModule PreProcessModule(const IRModule& mod) {
IRModule preprocessed_module;
tvm::Map<String, Array<String>> desired_layouts = {{"nn.conv2d", {"NHWC", "OHWI"}}};
tvm::Map<String, Array<String>> desired_layouts = {{"nn.conv2d", {"NHWC", "OHWI"}},
{"qnn.conv2d", {"NHWC", "OHWI"}}};
preprocessed_module = transform::ConvertLayout(desired_layouts)(mod);
preprocessed_module = transform::FoldConstant()(preprocessed_module);
return preprocessed_module;
Expand Down
Loading