Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retain qnn input kernel scales #4292

Merged
merged 14 commits into from
Nov 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions include/tvm/relay/qnn/attrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ struct QnnConv2DAttrs : public tvm::AttrsNode<QnnConv2DAttrs> {
// Quantization related attributes.
int32_t input_zero_point;
int32_t kernel_zero_point;
// The input tensor scale and kernel tensor scales are stored
// for easy access to this information.
double input_scale;
double kernel_scale;

TVM_DECLARE_ATTRS(QnnConv2DAttrs, "relay.attrs.QnnConv2DAttrs") {
TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
Expand Down Expand Up @@ -177,6 +181,10 @@ struct QnnConv2DAttrs : public tvm::AttrsNode<QnnConv2DAttrs> {
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(kernel_zero_point)
.describe("The zero point of the kernel tensor.");
TVM_ATTR_FIELD(input_scale)
.describe("The quantization scale for the input tensor.");
u99127 marked this conversation as resolved.
Show resolved Hide resolved
TVM_ATTR_FIELD(kernel_scale)
.describe("The quantization scale for the weight tensor.");
}
};

Expand Down Expand Up @@ -212,6 +220,8 @@ struct QnnDenseAttrs : public tvm::AttrsNode<QnnDenseAttrs> {
// Quantization related attributes.
int32_t input_zero_point;
int32_t kernel_zero_point;
double input_scale;
double kernel_scale;

TVM_DECLARE_ATTRS(QnnDenseAttrs, "relay.attrs.QnnDenseAttrs") {
TVM_ATTR_FIELD(units)
Expand All @@ -222,6 +232,10 @@ struct QnnDenseAttrs : public tvm::AttrsNode<QnnDenseAttrs> {
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(kernel_zero_point)
.describe("The zero point of the kernel tensor.");
TVM_ATTR_FIELD(input_scale)
.describe("The input tensor scale.");
TVM_ATTR_FIELD(kernel_scale)
.describe("The kernel tensor scale.");
}
};

Expand Down
6 changes: 6 additions & 0 deletions python/tvm/relay/frontend/tflite.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,9 +730,13 @@ def convert_fully_connected(self, op):
weight_expr = self.exp_tab.new_const(weight_value, dtype=weight_tensor_type_str)

if input_tensor.qnn_params:
input_scale = input_tensor.qnn_params['scale']
kernel_scale = weight_tensor.qnn_params['scale']
out = _qnn.op.dense(in_expr, weight_expr,
input_zero_point=input_tensor.qnn_params['zero_point'],
kernel_zero_point=weight_tensor.qnn_params['zero_point'],
input_scale=input_scale,
kernel_scale=kernel_scale,
out_dtype='int32')
else:
out = _op.nn.dense(in_expr, weight_expr)
Expand Down Expand Up @@ -936,6 +940,8 @@ def convert_conv(self, op, conv_type):
qnn_conv2d_params['input_zero_point'] = input_tensor.qnn_params['zero_point']
qnn_conv2d_params['kernel_zero_point'] = weight_tensor.qnn_params['zero_point']
qnn_conv2d_params['out_dtype'] = 'int32'
qnn_conv2d_params['input_scale'] = input_tensor.qnn_params['scale']
qnn_conv2d_params['kernel_scale'] = weight_tensor.qnn_params['scale']
out = _qnn.op.conv2d(in_expr, weight_expr, **qnn_conv2d_params)
else:
out = _op.nn.conv2d(in_expr, weight_expr, **params)
Expand Down
2 changes: 2 additions & 0 deletions python/tvm/relay/qnn/op/legalizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay_op):
new_attrs = {k : attrs[k] for k in attrs.keys()}
del new_attrs['kernel_zero_point']
del new_attrs['input_zero_point']
del new_attrs['input_scale']
del new_attrs['kernel_scale']
return relay_op(shift_data, shift_kernel, **new_attrs)

# Helper function to change dtypes to uint8 x int8. Intel VNNI instructions prefer this setting.
Expand Down
30 changes: 29 additions & 1 deletion python/tvm/relay/qnn/op/qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ def conv2d(data,
kernel,
input_zero_point,
kernel_zero_point,
input_scale,
kernel_scale,
strides=(1, 1),
padding=(0, 0),
dilation=(1, 1),
Expand Down Expand Up @@ -219,6 +221,16 @@ def conv2d(data,
input_zero_point: int
The zero point of the data distribution.

input_scale: float
The scale for the input tensor. The scale for the input tensor is
stored purely for convenience here. See more commentary below.

kernel_scale: float
u99127 marked this conversation as resolved.
Show resolved Hide resolved
The scale for the weight tensor. The scale for the weight tensor is
stored for access to this during relay. This information is not
needed in the pass pipeline after qnn.conv2d is lowered to the
sequence of steps as in nn.conv2d. See also input_scale in Requantize.

kernel_zero_point: int
The zero point of the quantized_kernel distribution.

Expand Down Expand Up @@ -260,6 +272,7 @@ def conv2d(data,

return _make.conv2d(data, kernel,
input_zero_point, kernel_zero_point,
input_scale, kernel_scale,
strides, padding, dilation,
groups, channels, kernel_size,
data_layout, kernel_layout, out_layout, out_dtype)
Expand Down Expand Up @@ -317,6 +330,8 @@ def dense(data,
weight,
input_zero_point,
kernel_zero_point,
input_scale,
kernel_scale,
units=None,
out_dtype="int32"):
"""Qnn Dense operator.
Expand All @@ -332,6 +347,17 @@ def dense(data,
The quantized input data to the operator.
weight : tvm.relay.Expr
The quantized weight expressions.
input_zero_point: int
The input zero point.
kernel_zero_point: int
The kernel zero point.
input_scale: float
The scale for the input tensor.
kernel_scale: float
The scale for the weight tensor. The scale for the weight tensor is
stored for access to this during relay. This information is not
needed in the pass pipeline after qnn.conv2d is lowered to the
sequence of steps as in nn.conv2d. See also input_scale in Requantize.
units : int, optional
Number of hidden units of the dense transformation.
out_dtype : str, optional
Expand All @@ -345,9 +371,11 @@ def dense(data,

return _make.dense(data,
weight,
units,
input_zero_point,
kernel_zero_point,
input_scale,
u99127 marked this conversation as resolved.
Show resolved Hide resolved
kernel_scale,
units,
out_dtype)


Expand Down
5 changes: 4 additions & 1 deletion src/relay/qnn/op/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,8 @@ Expr QnnConv2DCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
// Positional relay function to create quantized conv2d operator
// used by frontend FFI.
Expr MakeQnnConv2D(Expr data, Expr weight, int32_t input_zero_point, int32_t kernel_zero_point,
Array<IndexExpr> strides, Array<IndexExpr> padding, Array<IndexExpr> dilation,
double input_scale, double kernel_scale, Array<IndexExpr> strides,
Array<IndexExpr> padding, Array<IndexExpr> dilation,
int groups, IndexExpr channels, Array<IndexExpr> kernel_size,
std::string data_layout, std::string kernel_layout, std::string out_layout,
DataType out_dtype) {
Expand All @@ -457,6 +458,8 @@ Expr MakeQnnConv2D(Expr data, Expr weight, int32_t input_zero_point, int32_t ker
attrs->out_dtype = std::move(out_dtype);
attrs->input_zero_point = std::move(input_zero_point);
attrs->kernel_zero_point = std::move(kernel_zero_point);
attrs->input_scale = std::move(input_scale);
attrs->kernel_scale = std::move(kernel_scale);
static const Op& op = Op::Get("qnn.conv2d");
return CallNode::make(op, {data, weight}, Attrs(attrs), {});
}
Expand Down
8 changes: 6 additions & 2 deletions src/relay/qnn/op/dense.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,17 @@ bool QnnDenseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
}

// Positional relay function to create quantized dense operator used by frontend FFI.
Expr MakeQuantizedDense(Expr data, Expr weight, IndexExpr units, int32_t input_zero_point,
int32_t kernel_zero_point, DataType out_dtype) {
Expr MakeQuantizedDense(Expr data, Expr weight, int32_t input_zero_point,
int32_t kernel_zero_point, double input_scale,
double kernel_scale, IndexExpr units,
DataType out_dtype) {
auto attrs = make_node<QnnDenseAttrs>();
attrs->units = std::move(units);
attrs->out_dtype = out_dtype;
attrs->input_zero_point = input_zero_point;
attrs->kernel_zero_point = kernel_zero_point;
attrs->input_scale = input_scale;
attrs->kernel_scale = kernel_scale;
static const Op& op = Op::Get("qnn.dense");
return CallNode::make(op, {data, weight}, Attrs(attrs), {});
}
Expand Down
Loading