Skip to content

Commit

Permalink
add symmetric quant in softmax (microsoft#14640)
Browse files Browse the repository at this point in the history
### Description

microsoft#14626


### Motivation and Context

microsoft#14626
  • Loading branch information
chenfucn authored and preetha-intel committed Feb 15, 2023
1 parent 56f89dd commit d7ccad2
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 8 deletions.
5 changes: 5 additions & 0 deletions onnxruntime/python/tools/quantization/operators/softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ def quantize(self):
if self.quantizer.activation_qType == onnx.onnx_pb.TensorProto.UINT8:
out_scale = 1 / 256.0
out_zero_point = 0
elif self.quantizer.is_activation_symmetric:
# results are all greater or equal to 0, so we can only use
# half of the range
out_scale = 1 / 127.0
out_zero_point = 0
else:
out_scale = 1 / 256.0
out_zero_point = -128
Expand Down
41 changes: 33 additions & 8 deletions onnxruntime/test/python/quantization/test_op_softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
# --------------------------------------------------------------------------

import unittest
from pathlib import Path

import numpy as np
import onnx
from onnx import TensorProto, helper
from onnx import TensorProto, helper, numpy_helper
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type

from onnxruntime.quantization import QuantFormat, QuantType, quantize_static
Expand Down Expand Up @@ -148,13 +149,33 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
weight_type=weight_type,
extra_options=extra_options,
)
qdqnode_counts = {
"Conv": 1,
"QuantizeLinear": 3,
"DequantizeLinear": 4,
"Softmax": 1,
}
check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)

result_model = onnx.load(Path(model_q8_qdq_path))
qnode_cnt = 0
dqnode_cnt = 0
softmax_cnt = 0
qnode_zeropoints = []
for node in result_model.graph.node:
if node.op_type == "QuantizeLinear":
qnode_cnt += 1
qnode_zeropoints.append(node.input[2])
elif node.op_type == "DequantizeLinear":
dqnode_cnt += 1
elif node.op_type == "Softmax":
softmax_cnt += 1
self.assertEqual(3, qnode_cnt, "Expected 3 QuantizeLinear nodes, found {}".format(qnode_cnt))
self.assertEqual(4, dqnode_cnt, "Expected 4 DequantizeLinear nodes, found {}".format(dqnode_cnt))
self.assertEqual(1, softmax_cnt, "Expected 1 Softmax node, found {}".format(softmax_cnt))
if extra_options.get("ActivationSymmetric", False):
for tensor in result_model.graph.initializer:
if tensor.name in qnode_zeropoints:
np_value = numpy_helper.to_array(tensor)
self.assertEqual(
0,
np_value,
"QuantizeLinear node zero point value must be 0, found {} instead!".format(np_value),
)

qnode_io_qtypes = {
"QuantizeLinear": [
["i", 2, activation_proto_qtype],
Expand All @@ -169,6 +190,10 @@ def test_quantize_softmax(self):
self.quantize_softmax_test(QuantType.QUInt8, QuantType.QUInt8)

def test_quantize_softmax_s8s8(self):
self.quantize_softmax_test(
QuantType.QInt8,
QuantType.QInt8,
)
self.quantize_softmax_test(
QuantType.QInt8,
QuantType.QInt8,
Expand Down

0 comments on commit d7ccad2

Please sign in to comment.