Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ONNX] QLinearAveragePool and QLinearGlobalAveragePool contrib op #9017

Merged
merged 4 commits into from
Sep 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 79 additions & 10 deletions python/tvm/relay/frontend/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,13 @@ class Pool(OnnxOpConverter):

@classmethod
def _impl_v1(cls, inputs, attr, params):
attr_cvt, data = cls._run_calculation(inputs, attr, params)
return attr_cvt([data], attr, params)

@classmethod
def _run_calculation(cls, inputs, attr, params):
"""Helper method to return the processed input data and AttrCvt object"""

data = inputs[0]
input_shape = infer_shape(data)
input_dtype = infer_type(data).checked_type.dtype
Expand Down Expand Up @@ -321,16 +328,19 @@ def _impl_v1(cls, inputs, attr, params):
else:
attr["layout"] = onnx_default_layout(dims=(len(input_shape) - 2), op_name=cls.name)

return AttrCvt(
op_name=dimension_picker(cls.name),
transforms={
"kernel_shape": "pool_size",
"pads": ("padding", 0),
"dilations": ("dilation", 1),
},
ignores=["storage_order"],
custom_check=dimension_constraint(),
)([data], attr, params)
return (
AttrCvt(
op_name=dimension_picker(cls.name),
transforms={
"kernel_shape": "pool_size",
"pads": ("padding", 0),
"dilations": ("dilation", 1),
},
ignores=["storage_order"],
custom_check=dimension_constraint(),
),
data,
)


class Absolute(Unary):
Expand All @@ -351,6 +361,29 @@ class AveragePool(Pool):
name = "avg_pool"


class QLinearAveragePool(Pool):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think composition rather than subclassing would be a cleaner solution. Right now all the code to handle quantization + not quantization are in the same place which makes it a bit harder to read. Please separate it.

You can do something like refactor the Pool impl to a new class method like _run_calculation(...) and call it from QLinearAveragePool

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've refactored the code to separate out the quantized and non-quantized ops in a way that doesn't touch any of the non-quantized op classes. Let me know if this is close to what you had in mind, thanks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is fine

"""Operator converter for QLinearAveragePool from Microsoft onnxruntime contrib opset."""

name = "avg_pool"

@classmethod
def _impl_v1(cls, inputs, attr, params):
x_scale = get_scalar(inputs[1], params)
x_zero_point = get_scalar(inputs[2], params, dtype="int32")
y_scale = fold_constant(get_scalar(inputs[3], params))
y_zero_point = get_scalar(inputs[4], params, dtype="int32")

attr_cvt, data = cls._run_calculation(inputs, attr, params)

input_dtype = infer_type(data).checked_type.dtype
# Onnxruntime doesn't actually do this op in integer, they dequantize to fp32
# and then requantize afer (according to documentation below)
# https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md#com.microsoft.QLinearAveragePool
float_node = _qnn.op.dequantize(data, x_scale, x_zero_point)
out = attr_cvt([float_node], attr, params)
return _qnn.op.quantize(out, y_scale, y_zero_point, out_dtype=input_dtype)


class BatchNorm(OnnxOpConverter):
"""Operator converter for BatchNorm."""

Expand Down Expand Up @@ -654,6 +687,40 @@ def _impl_v1(cls, inputs, attr, params):
)


class QLinearGlobalAveragePool(OnnxOpConverter):
"Operator converter for QLinearGlobalAveragePool from Microsoft onnxruntime contrib opset."

@classmethod
def _impl_v1(cls, inputs, attr, params):
rank = len(infer_shape(inputs[0]))

x_scale = get_scalar(inputs[1], params)
x_zero_point = get_scalar(inputs[2], params, dtype="int32")
y_scale = fold_constant(get_scalar(inputs[3], params))
y_zero_point = get_scalar(inputs[4], params, dtype="int32")

input_dtype = infer_type(inputs[0]).checked_type.dtype

# Onnxruntime documentation does not mention that this global avg_pool should follow the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with this for now but this should be a TODO since I believe the actual implementation does not dq -> pool -> q
https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/mlas/lib/qlgavgpool.cpp

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the TODO, Thanks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have AvgPool in FakeQuantizationToInteger already, we can use that to convert to integer if needed. I haven't done GlobalAvgPool yet.

# sequence dequantize -> float op -> quantize, but that is how QLinearAveragePool is done.
#
# This op also follows the same pattern since qnn op is not available right now.
# TODO: Generate QNN op to perform quantized operation instead of dequant -> op -> quant
x = _qnn.op.dequantize(inputs[0], x_scale, x_zero_point)
if rank == 3:
out = _op.nn.global_avg_pool1d(x)
elif rank == 4:
out = _op.nn.global_avg_pool2d(x)
elif rank == 5:
out = _op.nn.global_avg_pool3d(x)
else:
raise NotImplementedError(
"Global average pooling is only implemented for 1D, 2D, and 3D kernels, got %dD."
% (rank - 2),
)
return _qnn.op.quantize(out, y_scale, y_zero_point, out_dtype=input_dtype)


class GlobalMaxPool(OnnxOpConverter):
"""Operator converter for GlobalMaxPool"""

Expand Down Expand Up @@ -3879,6 +3946,8 @@ def _get_convert_map(opset):
"QLinearAdd": QLinearAdd.get_converter(opset),
"QLinearMul": QLinearMul.get_converter(opset),
"ConvInteger": ConvInteger.get_converter(opset),
"QLinearAveragePool": QLinearAveragePool.get_converter(opset),
"QLinearGlobalAveragePool": QLinearGlobalAveragePool.get_converter(opset),
# Random number generation.
"RandomUniform": RandomUniform.get_converter(opset),
# Loss functions / training
Expand Down
146 changes: 146 additions & 0 deletions tests/python/frontend/onnx/test_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -3056,6 +3056,152 @@ def verify_global_pooling(x_shape, mode):
verify_global_pooling([4, 1, 2, 6, 4], mode)


@tvm.testing.parametrize_targets
def test_qlinear_average_pool(target, dev):
def verify_qlinear_average_pool(
x_shape, kernel_shape, strides, pads, out_shape, auto_pad="NOTSET"
):
input_nodes = [
helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape)),
]

output_nodes = [
helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape)),
]

input_names = ["X"]

node = helper.make_node(
"AveragePool",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these be QLinear Nodes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually it should be the non-quantized names, since we're only creating the floating point ops. When we quantize the graph, it gets converted to QLinearAveragePool. I printed the node that gets generated after quantization to verify, and I got the below output:

[node {
  input: "X_quantized"
  input: "X_scale"
  input: "X_zero_point"
  input: "Y_scale"
  input: "Y_zero_point"
  output: "Y_quantized"
  op_type: "QLinearAveragePool"
  attribute {
    name: "auto_pad"
    s: "SAME_UPPER"
    type: STRING
  }
  attribute {
    name: "kernel_shape"
    ints: 3
    ints: 3
    ints: 3
    type: INTS
  }
  attribute {
    name: "strides"
    ints: 2
    ints: 2
    ints: 2
    type: INTS
  }
  domain: "com.microsoft"
}](url)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see, I did not notice that we run a quantization routine first

inputs=input_names,
outputs=["Y"],
kernel_shape=kernel_shape,
strides=strides,
)

if pads is None:
pad_attr = helper.make_attribute("auto_pad", auto_pad)
else:
pad_attr = helper.make_attribute("pads", pads)
node.attribute.append(pad_attr)

graph = helper.make_graph(
[node],
"qlinear_average_pool_test",
inputs=input_nodes,
outputs=output_nodes,
)

model = helper.make_model(graph, producer_name="qlinear_average_pool_Test")
quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev)

# Pool1D
verify_qlinear_average_pool(
x_shape=[1, 1, 32],
kernel_shape=[3],
strides=[1],
pads=[1, 1],
out_shape=[1, 1, 32],
)
# Pool2D
verify_qlinear_average_pool(
x_shape=[1, 1, 32, 32],
kernel_shape=[3, 3],
strides=[1, 1],
pads=[1, 1, 1, 1],
out_shape=[1, 1, 32, 32],
)

# Pool1D with stride
verify_qlinear_average_pool(
x_shape=[1, 1, 32],
kernel_shape=[3],
strides=[2],
pads=[1, 1],
out_shape=[1, 1, 16],
)
# Pool2D with stride
verify_qlinear_average_pool(
x_shape=[1, 1, 32, 32],
kernel_shape=[3, 3],
strides=[2, 2],
pads=[1, 1, 1, 1],
out_shape=[1, 1, 16, 16],
)

# Pool1D with stride and autopadding
verify_qlinear_average_pool(
x_shape=[1, 1, 32],
kernel_shape=[3],
strides=[2],
pads=None,
out_shape=[1, 1, 16],
auto_pad="SAME_UPPER",
)
# Pool2D with stride and autopadding
verify_qlinear_average_pool(
x_shape=[1, 1, 32, 32],
kernel_shape=[3, 3],
strides=[2, 2],
pads=None,
out_shape=[1, 1, 16, 16],
auto_pad="SAME_UPPER",
)

# Pool3D with stride
verify_qlinear_average_pool(
x_shape=[1, 1, 32, 32, 32],
kernel_shape=[3, 3, 3],
strides=[2, 2, 2],
pads=[1, 1, 1, 1, 1, 1],
out_shape=[1, 1, 16, 16, 16],
)

# Pool3D with stride and autopadding
verify_qlinear_average_pool(
x_shape=[1, 1, 32, 32, 32],
kernel_shape=[3, 3, 3],
strides=[2, 2, 2],
pads=None,
out_shape=[1, 1, 16, 16, 16],
auto_pad="SAME_UPPER",
)


@tvm.testing.parametrize_targets
def test_qlinear_global_average_pool(target, dev):
def verify_qlinear_global_average_pool(x_shape):
out_shape = x_shape[:2] + [1] * (len(x_shape) - 2)

node_type = "GlobalAveragePool"

input_names = ["X"]

pool_node = helper.make_node(node_type, inputs=input_names, outputs=["Y"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these be QLinear Nodes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be the floating point op name as mentioned in the comment above.


graph = helper.make_graph(
[pool_node],
"qlinear_global_average_pool_test",
inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape))],
outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape))],
)

model = helper.make_model(graph, producer_name="qlinear_global_average_pool_test")
quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev)

# 1D Pooling (NCW)
verify_qlinear_global_average_pool([1, 8, 8])
verify_qlinear_global_average_pool([4, 1, 4])

# 2D Pooling (NCHW)
verify_qlinear_global_average_pool([1, 8, 8, 8])
verify_qlinear_global_average_pool([4, 1, 6, 4])

# 3D Pooling (NCDHW)
verify_qlinear_global_average_pool([1, 8, 6, 8, 8])
verify_qlinear_global_average_pool([4, 1, 2, 6, 4])


@tvm.testing.parametrize_targets
def test_mod(target, dev):
def verify_mod(x_shape, y_shape, fmod, out_shape, dtype="float32"):
Expand Down