-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ONNX] QLinearAveragePool and QLinearGlobalAveragePool contrib op #9017
Changes from all commits
00214cd
5b7ab84
e3f90c9
d240901
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -272,6 +272,13 @@ class Pool(OnnxOpConverter): | |
|
||
@classmethod | ||
def _impl_v1(cls, inputs, attr, params): | ||
attr_cvt, data = cls._run_calculation(inputs, attr, params) | ||
return attr_cvt([data], attr, params) | ||
|
||
@classmethod | ||
def _run_calculation(cls, inputs, attr, params): | ||
"""Helper method to return the processed input data and AttrCvt object""" | ||
|
||
data = inputs[0] | ||
input_shape = infer_shape(data) | ||
input_dtype = infer_type(data).checked_type.dtype | ||
|
@@ -321,16 +328,19 @@ def _impl_v1(cls, inputs, attr, params): | |
else: | ||
attr["layout"] = onnx_default_layout(dims=(len(input_shape) - 2), op_name=cls.name) | ||
|
||
return AttrCvt( | ||
op_name=dimension_picker(cls.name), | ||
transforms={ | ||
"kernel_shape": "pool_size", | ||
"pads": ("padding", 0), | ||
"dilations": ("dilation", 1), | ||
}, | ||
ignores=["storage_order"], | ||
custom_check=dimension_constraint(), | ||
)([data], attr, params) | ||
return ( | ||
AttrCvt( | ||
op_name=dimension_picker(cls.name), | ||
transforms={ | ||
"kernel_shape": "pool_size", | ||
"pads": ("padding", 0), | ||
"dilations": ("dilation", 1), | ||
}, | ||
ignores=["storage_order"], | ||
custom_check=dimension_constraint(), | ||
), | ||
data, | ||
) | ||
|
||
|
||
class Absolute(Unary): | ||
|
@@ -351,6 +361,29 @@ class AveragePool(Pool): | |
name = "avg_pool" | ||
|
||
|
||
class QLinearAveragePool(Pool): | ||
"""Operator converter for QLinearAveragePool from Microsoft onnxruntime contrib opset.""" | ||
|
||
name = "avg_pool" | ||
|
||
@classmethod | ||
def _impl_v1(cls, inputs, attr, params): | ||
x_scale = get_scalar(inputs[1], params) | ||
x_zero_point = get_scalar(inputs[2], params, dtype="int32") | ||
y_scale = fold_constant(get_scalar(inputs[3], params)) | ||
y_zero_point = get_scalar(inputs[4], params, dtype="int32") | ||
|
||
attr_cvt, data = cls._run_calculation(inputs, attr, params) | ||
|
||
input_dtype = infer_type(data).checked_type.dtype | ||
# Onnxruntime doesn't actually do this op in integer, they dequantize to fp32 | ||
# and then requantize afer (according to documentation below) | ||
# https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md#com.microsoft.QLinearAveragePool | ||
float_node = _qnn.op.dequantize(data, x_scale, x_zero_point) | ||
out = attr_cvt([float_node], attr, params) | ||
return _qnn.op.quantize(out, y_scale, y_zero_point, out_dtype=input_dtype) | ||
|
||
|
||
class BatchNorm(OnnxOpConverter): | ||
"""Operator converter for BatchNorm.""" | ||
|
||
|
@@ -654,6 +687,40 @@ def _impl_v1(cls, inputs, attr, params): | |
) | ||
|
||
|
||
class QLinearGlobalAveragePool(OnnxOpConverter): | ||
"Operator converter for QLinearGlobalAveragePool from Microsoft onnxruntime contrib opset." | ||
|
||
@classmethod | ||
def _impl_v1(cls, inputs, attr, params): | ||
rank = len(infer_shape(inputs[0])) | ||
|
||
x_scale = get_scalar(inputs[1], params) | ||
x_zero_point = get_scalar(inputs[2], params, dtype="int32") | ||
y_scale = fold_constant(get_scalar(inputs[3], params)) | ||
y_zero_point = get_scalar(inputs[4], params, dtype="int32") | ||
|
||
input_dtype = infer_type(inputs[0]).checked_type.dtype | ||
|
||
# Onnxruntime documentation does not mention that this global avg_pool should follow the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm fine with this for now but this should be a TODO since I believe the actual implementation does not dq -> pool -> q There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added the TODO, Thanks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have AvgPool in FakeQuantizationToInteger already, we can use that to convert to integer if needed. I haven't done GlobalAvgPool yet. |
||
# sequence dequantize -> float op -> quantize, but that is how QLinearAveragePool is done. | ||
# | ||
# This op also follows the same pattern since qnn op is not available right now. | ||
# TODO: Generate QNN op to perform quantized operation instead of dequant -> op -> quant | ||
x = _qnn.op.dequantize(inputs[0], x_scale, x_zero_point) | ||
if rank == 3: | ||
out = _op.nn.global_avg_pool1d(x) | ||
elif rank == 4: | ||
out = _op.nn.global_avg_pool2d(x) | ||
elif rank == 5: | ||
out = _op.nn.global_avg_pool3d(x) | ||
else: | ||
raise NotImplementedError( | ||
"Global average pooling is only implemented for 1D, 2D, and 3D kernels, got %dD." | ||
% (rank - 2), | ||
) | ||
return _qnn.op.quantize(out, y_scale, y_zero_point, out_dtype=input_dtype) | ||
|
||
|
||
class GlobalMaxPool(OnnxOpConverter): | ||
"""Operator converter for GlobalMaxPool""" | ||
|
||
|
@@ -3879,6 +3946,8 @@ def _get_convert_map(opset): | |
"QLinearAdd": QLinearAdd.get_converter(opset), | ||
"QLinearMul": QLinearMul.get_converter(opset), | ||
"ConvInteger": ConvInteger.get_converter(opset), | ||
"QLinearAveragePool": QLinearAveragePool.get_converter(opset), | ||
"QLinearGlobalAveragePool": QLinearGlobalAveragePool.get_converter(opset), | ||
# Random number generation. | ||
"RandomUniform": RandomUniform.get_converter(opset), | ||
# Loss functions / training | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3056,6 +3056,152 @@ def verify_global_pooling(x_shape, mode): | |
verify_global_pooling([4, 1, 2, 6, 4], mode) | ||
|
||
|
||
@tvm.testing.parametrize_targets | ||
def test_qlinear_average_pool(target, dev): | ||
def verify_qlinear_average_pool( | ||
x_shape, kernel_shape, strides, pads, out_shape, auto_pad="NOTSET" | ||
): | ||
input_nodes = [ | ||
helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape)), | ||
] | ||
|
||
output_nodes = [ | ||
helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape)), | ||
] | ||
|
||
input_names = ["X"] | ||
|
||
node = helper.make_node( | ||
"AveragePool", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these be QLinear Nodes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually it should be the non-quantized names, since we're only creating the floating point ops. When we quantize the graph, it gets converted to QLinearAveragePool. I printed the node that gets generated after quantization to verify, and I got the below output:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see, I did not notice that we run a quantization routine first |
||
inputs=input_names, | ||
outputs=["Y"], | ||
kernel_shape=kernel_shape, | ||
strides=strides, | ||
) | ||
|
||
if pads is None: | ||
pad_attr = helper.make_attribute("auto_pad", auto_pad) | ||
else: | ||
pad_attr = helper.make_attribute("pads", pads) | ||
node.attribute.append(pad_attr) | ||
|
||
graph = helper.make_graph( | ||
[node], | ||
"qlinear_average_pool_test", | ||
inputs=input_nodes, | ||
outputs=output_nodes, | ||
) | ||
|
||
model = helper.make_model(graph, producer_name="qlinear_average_pool_Test") | ||
quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev) | ||
|
||
# Pool1D | ||
verify_qlinear_average_pool( | ||
x_shape=[1, 1, 32], | ||
kernel_shape=[3], | ||
strides=[1], | ||
pads=[1, 1], | ||
out_shape=[1, 1, 32], | ||
) | ||
# Pool2D | ||
verify_qlinear_average_pool( | ||
x_shape=[1, 1, 32, 32], | ||
kernel_shape=[3, 3], | ||
strides=[1, 1], | ||
pads=[1, 1, 1, 1], | ||
out_shape=[1, 1, 32, 32], | ||
) | ||
|
||
# Pool1D with stride | ||
verify_qlinear_average_pool( | ||
x_shape=[1, 1, 32], | ||
kernel_shape=[3], | ||
strides=[2], | ||
pads=[1, 1], | ||
out_shape=[1, 1, 16], | ||
) | ||
# Pool2D with stride | ||
verify_qlinear_average_pool( | ||
x_shape=[1, 1, 32, 32], | ||
kernel_shape=[3, 3], | ||
strides=[2, 2], | ||
pads=[1, 1, 1, 1], | ||
out_shape=[1, 1, 16, 16], | ||
) | ||
|
||
# Pool1D with stride and autopadding | ||
verify_qlinear_average_pool( | ||
x_shape=[1, 1, 32], | ||
kernel_shape=[3], | ||
strides=[2], | ||
pads=None, | ||
out_shape=[1, 1, 16], | ||
auto_pad="SAME_UPPER", | ||
) | ||
# Pool2D with stride and autopadding | ||
verify_qlinear_average_pool( | ||
x_shape=[1, 1, 32, 32], | ||
kernel_shape=[3, 3], | ||
strides=[2, 2], | ||
pads=None, | ||
out_shape=[1, 1, 16, 16], | ||
auto_pad="SAME_UPPER", | ||
) | ||
|
||
# Pool3D with stride | ||
verify_qlinear_average_pool( | ||
x_shape=[1, 1, 32, 32, 32], | ||
kernel_shape=[3, 3, 3], | ||
strides=[2, 2, 2], | ||
pads=[1, 1, 1, 1, 1, 1], | ||
out_shape=[1, 1, 16, 16, 16], | ||
) | ||
|
||
# Pool3D with stride and autopadding | ||
verify_qlinear_average_pool( | ||
x_shape=[1, 1, 32, 32, 32], | ||
kernel_shape=[3, 3, 3], | ||
strides=[2, 2, 2], | ||
pads=None, | ||
out_shape=[1, 1, 16, 16, 16], | ||
auto_pad="SAME_UPPER", | ||
) | ||
|
||
|
||
@tvm.testing.parametrize_targets | ||
def test_qlinear_global_average_pool(target, dev): | ||
def verify_qlinear_global_average_pool(x_shape): | ||
out_shape = x_shape[:2] + [1] * (len(x_shape) - 2) | ||
|
||
node_type = "GlobalAveragePool" | ||
|
||
input_names = ["X"] | ||
|
||
pool_node = helper.make_node(node_type, inputs=input_names, outputs=["Y"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these be QLinear Nodes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be the floating point op name as mentioned in the comment above. |
||
|
||
graph = helper.make_graph( | ||
[pool_node], | ||
"qlinear_global_average_pool_test", | ||
inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape))], | ||
outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape))], | ||
) | ||
|
||
model = helper.make_model(graph, producer_name="qlinear_global_average_pool_test") | ||
quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev) | ||
|
||
# 1D Pooling (NCW) | ||
verify_qlinear_global_average_pool([1, 8, 8]) | ||
verify_qlinear_global_average_pool([4, 1, 4]) | ||
|
||
# 2D Pooling (NCHW) | ||
verify_qlinear_global_average_pool([1, 8, 8, 8]) | ||
verify_qlinear_global_average_pool([4, 1, 6, 4]) | ||
|
||
# 3D Pooling (NCDHW) | ||
verify_qlinear_global_average_pool([1, 8, 6, 8, 8]) | ||
verify_qlinear_global_average_pool([4, 1, 2, 6, 4]) | ||
|
||
|
||
@tvm.testing.parametrize_targets | ||
def test_mod(target, dev): | ||
def verify_mod(x_shape, y_shape, fmod, out_shape, dtype="float32"): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think composition rather than subclassing would be a cleaner solution. Right now all the code to handle quantization + not quantization are in the same place which makes it a bit harder to read. Please separate it.
You can do something like refactor the Pool impl to a new class method like _run_calculation(...) and call it from QLinearAveragePool
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've refactored the code to separate out the quantized and non-quantized ops in a way that doesn't touch any of the non-quantized op classes. Let me know if this is close to what you had in mind, thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah this is fine