Skip to content

Commit

Permalink
[QNN] Add operator.
Browse files Browse the repository at this point in the history
  • Loading branch information
anijain2305 committed Aug 9, 2019
1 parent 3ac27fc commit c4c4d90
Show file tree
Hide file tree
Showing 2 changed files with 206 additions and 0 deletions.
102 changes: 102 additions & 0 deletions python/tvm/relay/qnn/op/qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"""QNN dialect operators."""

from __future__ import absolute_import as _abs
from tvm import relay
from . import _make

def requantize(data,
Expand Down Expand Up @@ -72,3 +73,104 @@ def requantize(data,
output_zero_point,
rounding,
out_dtype)


def add(lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, output_scale,
output_zero_point):
"""Quantized addition with numpy-style broadcasting.
Parameters
----------
lhs : relay.Expr
The left hand side quantized input data.
rhs : relay.Expr
The right hand side quantized input data.
lhs_scale: float
The scale of the lhs quantized expr.
lhs_zero_point: int
The zero point of lhs quantized expr.
rhs_scale: float
The scale of the rhs quantized expr.
rhs_zero_point: int
The zero point of rhs quantized expr.
output_scale: float
The scale of the output quantized expr.
output_zero_point: int
The zero point of output quantized expr.
Returns
-------
result : relay.Expr
The computed result.
"""

# Find the dtype of the input expr. This is required for the requantize op. Since, this is
# add op, the dtype of the input is same as dtype of the output.
data0 = relay.transform.infer_type(lhs)
in_dtype = data0.checked_type.dtype

# First, check if the qnn params of lhs and rhs match. If yes, we can avoid one requantize by
# calling add first and then requantize. The whole process can be represented as follows
#
# scale_c * (Q_c - zp_c) = scale * (Q_a - zp) + scale * (Q_b - zp)
# scale_c * (Q_c - zp_c) = scale * (Q_a + Q_b - zp - zp)
#
# RHS looks like a quantized tensor with scale = scale, and zero_point = zp + zp
# This can be handled by first subtracting the zero point, followed by requantize with the
# output qnn params.

if lhs_scale == rhs_scale and lhs_zero_point == rhs_zero_point:
out = relay.add(lhs, rhs)
out = relay.subtract(out, relay.const(lhs_zero_point, dtype=in_dtype))
if lhs_scale != output_scale or lhs_zero_point != output_zero_point:
out = requantize(data=out,
input_scale=lhs_scale,
input_zero_point=lhs_zero_point,
output_scale=output_scale,
output_zero_point=output_zero_point,
out_dtype=in_dtype)
return out

# Since the input qnn params can be different than output qnn params, we first requantize the
# input tensors to the output qnn params. Then we call relay.add on the requantized inputs. This
# addition results in extra addition of the output zero point. We futher subtract the zero
# point. The whole process can be represented using following equations
#
# scale_c * (Q_c - zp_c) = scale_a * (Q_a - zp_a) + scale_b * (Q_b - zp_b)
#
# After requantizing Q_a and Q_b, equation becomes,
# scale_c * (Q_c - zp_c) = scale_c * (Q_a' - zp_c) + scale_c * (Q_b' - zp_c)
# scale_c * (Q_c - zp_c) = scale_c * (Q_a' + Q_b' - zp_c - zp_c)
#
# Comparing the LHS and RHS, it results in
# Q_c = Q_a' + Q_b' - zp_c

requantized_lhs = lhs
if lhs_scale != output_scale or lhs_zero_point != output_zero_point:
requantized_lhs = requantize(data=lhs,
input_scale=lhs_scale,
input_zero_point=lhs_zero_point,
output_scale=output_scale,
output_zero_point=output_zero_point,
out_dtype=in_dtype)

requantized_rhs = rhs
if rhs_scale != output_scale or rhs_zero_point != output_zero_point:
requantized_rhs = requantize(data=rhs,
input_scale=rhs_scale,
input_zero_point=rhs_zero_point,
output_scale=output_scale,
output_zero_point=output_zero_point,
out_dtype=in_dtype)

out = relay.add(requantized_lhs, requantized_rhs)
out = relay.subtract(out, relay.const(output_zero_point, dtype=in_dtype))
return out
104 changes: 104 additions & 0 deletions tests/python/relay/test_qnn_add.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import tvm
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
import topi.testing

def test_tflite_same_io_qnn_params():
data_dtype = 'uint8'
axis = 0

x = relay.var("x", shape=(1, 4), dtype=data_dtype)
y = relay.var("y", shape=(1, 4), dtype=data_dtype)
z = relay.qnn.op.add(lhs=x, rhs=y,
lhs_scale=0.00784314,
lhs_zero_point=127,
rhs_scale=0.00784314,
rhs_zero_point=127,
output_scale=0.00784314,
output_zero_point=127)

func = relay.Function([x, y], z)
mod = relay.Module.from_expr(func)
mod = relay.transform.Legalize()(mod)
func = mod["main"]

x_datas = [np.array((140, 153, 165, 178)).reshape((1,4)),
np.array((25, 153, 178, 216)).reshape((1,4)),
np.array((25, 153, 216, 165)).reshape((1,4))]
y_datas = [np.array((204, 178, 165, 140)).reshape((1,4)),
np.array((204, 178, 191, 25)).reshape((1,4)),
np.array((204, 178, 25, 191)).reshape((1,4))]
golden_outputs = [np.array((217,204,203,191)).reshape((1, 4)),
np.array((102, 204, 242, 114)).reshape((1,4)),
np.array((102, 204, 114, 229)).reshape((1,4))]

for i in range(0, 3):
x_data = x_datas[i]
y_data = y_datas[i]
golden_output = golden_outputs[i]

intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
op_res = intrp.evaluate(func)(x_data, y_data)
np.testing.assert_equal(op_res.asnumpy(), golden_output)


def test_tflite_different_io_qnn_params():
data_dtype = 'uint8'
axis = 0

x = relay.var("x", shape=(1, 4), dtype=data_dtype)
y = relay.var("y", shape=(1, 4), dtype=data_dtype)
z = relay.qnn.op.add(lhs=x, rhs=y,
lhs_scale=0.0156863,
lhs_zero_point=127,
rhs_scale=0.0117647,
rhs_zero_point=85,
output_scale=0.0235294,
output_zero_point=128)

func = relay.Function([x, y], z)
mod = relay.Module.from_expr(func)
mod = relay.transform.Legalize()(mod)
func = mod["main"]

x_datas = [np.array((76, 140, 153, 172)).reshape((1,4)),
np.array((133, 140, 146, 153)).reshape((1,4)),
np.array((76, 140, 172, 146)).reshape((1,4))]
y_datas = [np.array((136, 119, 128, 17)).reshape((1,4)),
np.array((136, 119, 111, 94)).reshape((1,4)),
np.array((136, 119, 17, 128)).reshape((1,4))]
golden_outputs = [np.array((120, 154, 167, 124)).reshape((1, 4)),
np.array((158, 154, 154, 150)).reshape((1,4)),
np.array((120, 154, 124, 163)).reshape((1,4))]

for i in range(0, 3):
x_data = x_datas[i]
y_data = y_datas[i]
golden_output = golden_outputs[i]

intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
op_res = intrp.evaluate(func)(x_data, y_data)
np.testing.assert_equal(op_res.asnumpy(), golden_output)


if __name__ == '__main__':
test_tflite_same_io_qnn_params()
test_tflite_different_io_qnn_params()

0 comments on commit c4c4d90

Please sign in to comment.