Skip to content

Commit

Permalink
[QNN] Add - Refactoring to C++
Browse files Browse the repository at this point in the history
  • Loading branch information
anijain2305 committed Aug 22, 2019
1 parent b818fd5 commit d85a4aa
Show file tree
Hide file tree
Showing 11 changed files with 792 additions and 125 deletions.
58 changes: 58 additions & 0 deletions include/tvm/relay/qnn/attrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,64 @@ struct DequantizeAttrs : public tvm::AttrsNode<DequantizeAttrs> {
}
};

/*! \brief Attributes used in QNN concatenate operators */
struct QnnConcatenateAttrs : public tvm::AttrsNode<QnnConcatenateAttrs> {
Array<tvm::Expr> input_scales;
Array<tvm::Expr> input_zero_points;
double output_scale;
int32_t output_zero_point;
int axis;

TVM_DECLARE_ATTRS(QnnConcatenateAttrs, "relay.attrs.QnnConcatenateAttrs") {
TVM_ATTR_FIELD(input_scales)
.describe("The list of scales of input quantized tensors.");

TVM_ATTR_FIELD(input_zero_points)
.describe("The list of zero points of input quantized tensors.");

TVM_ATTR_FIELD(output_zero_point)
.describe("The zero_point for the output tensor.");

TVM_ATTR_FIELD(output_scale)
.describe("The scale for the output tensor.");

TVM_ATTR_FIELD(axis)
.describe("The axis at which the input arrays are concatenated."
"Should lie in range `[-ndim, ndim)`.")
.set_default(0);
}
}; // struct QnnConcatenateAttrs

/*! \brief Attribute for QNN binary operator */
struct QnnBinaryOpAttrs : public tvm::AttrsNode<QnnBinaryOpAttrs> {
int32_t lhs_zero_point;
double lhs_scale;
int32_t rhs_zero_point;
double rhs_scale;
int32_t output_zero_point;
double output_scale;

TVM_DECLARE_ATTRS(QnnBinaryOpAttrs, "relay.attrs.QnnBinaryOpAttrs") {
TVM_ATTR_FIELD(lhs_zero_point)
.describe("The zero_point for the lhs input tensor of this op.");

TVM_ATTR_FIELD(lhs_scale)
.describe("The scale for the lhs input tensor of this op.");

TVM_ATTR_FIELD(rhs_zero_point)
.describe("The zero_point for the rhs input tensor of this op.");

TVM_ATTR_FIELD(rhs_scale)
.describe("The scale for the rhs input tensor of this op.");

TVM_ATTR_FIELD(output_zero_point)
.describe("The zero_point for the activation of this op.");

TVM_ATTR_FIELD(output_scale)
.describe("The scale for the activation of this op.");
}
};

} // namespace qnn
} // namespace relay
} // namespace tvm
Expand Down
96 changes: 56 additions & 40 deletions python/tvm/relay/qnn/op/qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
"""QNN dialect operators."""

from __future__ import absolute_import as _abs
from tvm import relay
from tvm.expr import FloatImm, IntImm
from tvm.relay.expr import Tuple
from . import _make

def requantize(data,
Expand Down Expand Up @@ -134,6 +135,8 @@ def dequantize(data,
return _make.dequantize(data,
input_scale,
input_zero_point)


def concatenate(data,
input_scales,
input_zero_points,
Expand Down Expand Up @@ -169,42 +172,55 @@ def concatenate(data,
"""

data = list(data)
requantized_exprs = list(data)

# Find the dtype of the input expr. This is required for the requantize op. Since, this is
# concatenate op, the dtype of the input is same as dtype of the output.
mod = relay.Module.from_expr(data[0])
mod = relay.transform.InferType()(mod)
entry = mod["main"]
data0 = entry if isinstance(data[0], relay.Function) else entry.body
in_dtype = data0.checked_type.dtype

# First check if all the input qnn params match. If yes, we can call concatenate first, followed
# by a requantize.
if all(scale == input_scales[0] for scale in input_scales)\
and all(zero_point == input_zero_points[0] for zero_point in input_zero_points):
out = relay.concatenate(tuple(data), axis)
input_scale = input_scales[0]
input_zero_point = input_zero_points[0]
if input_scale != output_scale or input_zero_point != output_zero_point:
out = requantize(data=out,
input_scale=input_scales[0],
input_zero_point=input_zero_points[0],
output_scale=output_scale,
output_zero_point=output_zero_point,
out_dtype=in_dtype)
return out

# If the output qnn params do not match the input qnn params, we can call requantize on the
# input expr first, followed by a concatenate on the requantized input exprs.
for idx, quantized_expr in enumerate(data):
input_scale = input_scales[idx]
input_zero_point = input_zero_points[idx]
if input_scale != output_scale or input_zero_point != output_zero_point:
requantized_exprs[idx] = requantize(data=quantized_expr,
input_scale=input_scale,
input_zero_point=input_zero_point,
output_scale=output_scale,
output_zero_point=output_zero_point,
out_dtype=in_dtype)
return relay.concatenate(tuple(requantized_exprs), axis)
if not data:
raise ValueError("relay.concatenate requires data to be non-empty.")
if not isinstance(axis, int):
raise ValueError("For now, we only support integer axis")

return _make.concatenate(Tuple(data),
[FloatImm("float64", x) for x in input_scales],
[IntImm("int32", x) for x in input_zero_points],
output_scale,
output_zero_point,
axis)

def add(lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, output_scale,
output_zero_point):
"""Quantized addition with numpy-style broadcasting.
Parameters
----------
lhs : relay.Expr
The left hand side quantized input data.
rhs : relay.Expr
The right hand side quantized input data.
lhs_scale: float
The scale of the lhs quantized expr.
lhs_zero_point: int
The zero point of lhs quantized expr.
rhs_scale: float
The scale of the rhs quantized expr.
rhs_zero_point: int
The zero point of rhs quantized expr.
output_scale: float
The scale of the output quantized expr.
output_zero_point: int
The zero point of output quantized expr.
Returns
-------
result : relay.Expr
The computed result.
"""
return _make.add(lhs, rhs,
lhs_scale, lhs_zero_point,
rhs_scale, rhs_zero_point,
output_scale, output_zero_point)
83 changes: 2 additions & 81 deletions src/relay/op/tensor/transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "../op_common.h"
#include "../../../arithmetic/compute_expr.h"
#include "../../pass/alter_op_layout.h"
#include "transform.h"

namespace tvm {
namespace relay {
Expand Down Expand Up @@ -210,86 +211,6 @@ RELAY_REGISTER_OP("expand_dims")
// relay.concatenate
TVM_REGISTER_NODE_TYPE(ConcatenateAttrs);

bool ConcatenateRel(const Array<Type>& types,
int num_inputs,
const Attrs& attrs,
const TypeReporter& reporter) {
// types: [data, result]
CHECK_EQ(types.size(), 2);
/* If we receive a tuple we can continue, if we receive
* anything but an incomplete type we should signal an
* error.
*/
const auto* tensor_tuple = types[0].as<TupleTypeNode>();
if (tensor_tuple == nullptr) {
throw relay::Error(
RELAY_ERROR(
"concatenate requires a tuple of tensors as the first argument, found "
<< PrettyPrint(types[0])));
} else if (types[0].as<IncompleteTypeNode>() != nullptr) {
return false;
}

const auto* param = attrs.as<ConcatenateAttrs>();
if (tensor_tuple->fields[0].as<IncompleteTypeNode>()) {
return false;
}
const auto& first = Downcast<TensorType>(tensor_tuple->fields[0]);
// Sanity check: ndim and dtype.
const int ndim = static_cast<int>(first->shape.size());
const DataType dtype = first->dtype;

for (const Type& ele : tensor_tuple->fields) {
if (ele.as<IncompleteTypeNode>()) {
return false;
}

const auto& e = Downcast<TensorType>(ele);

int e_ndim = static_cast<int>(e->shape.size());
const DataType& e_dtype = e->dtype;
if (e_ndim != ndim) {
throw relay::Error("relay.concatenate requires all tensors have the same ndim");
}
if (e_dtype != dtype) {
throw relay::Error("relay.concatenate requires all tensors have the same dtype");
}
}
// Sanity check: axis
int axis = param->axis;
if (!(-ndim <= axis && axis < ndim)) {
throw relay::Error(RELAY_ERROR(
"concatenate only accepts `axis` in [-ndim, ndim)" <<
", but got axis = " << axis <<
", and ndim = " << ndim));
}
axis = axis < 0 ? ndim + axis : axis;
// Calculate shape
std::vector<IndexExpr> oshape(first->shape.begin(), first->shape.end());
IndexExpr &concat_dim = oshape[axis];
bool has_any = false;
if (concat_dim.as<Any>()) {
has_any = true;
} else {
for (int i = 1; i < static_cast<int>(tensor_tuple->fields.size()); ++i) {
const auto& e = Downcast<TensorType>(tensor_tuple->fields[i]);
if (e->shape[axis].as<Any>()) {
has_any = true;
break;
}
concat_dim += e->shape[axis];
}
}

if (has_any) {
concat_dim = Any::make();
}

auto rtype = TensorTypeNode::make(oshape, dtype);
reporter->Assign(types[1], rtype);
return true;
}

Array<Tensor> ConcatenateCompute(const Attrs& attrs,
const Array<Tensor>& inputs,
const Type& out_type,
Expand Down Expand Up @@ -358,7 +279,7 @@ RELAY_REGISTER_OP("concatenate")
.set_num_inputs(1)
.add_argument("data", "Tensor", "The input list of tensors.")
.set_support_level(1)
.add_type_rel("Concatenate", ConcatenateRel)
.add_type_rel("Concatenate", ConcatenateRel<ConcatenateAttrs>)
.set_attr<FInferCorrectLayout>("FInferCorrectLayout", ConcatenateLayout)
.set_attr<FTVMCompute>("FTVMCompute", ConcatenateCompute)
.set_attr<TOpPattern>("TOpPattern", kInjective);
Expand Down
Loading

0 comments on commit d85a4aa

Please sign in to comment.