-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[microNPU] Add hardware constraints for binary elementwise #13772
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Does not fuse min and max operations with requantize if there are different scales as it is not supported on NPU. Since there are hardware constraints, we cannot perform min or max operation fused with requantize (please look at NPU_SET_OFM_SCALE register description https://developer.arm.com/documentation/102420/0200/Programmers-model/Command-stream/cmd1-commands-) when we have different scales. min/max operations with matching scales are offloaded to NPU as ethosu_binary_elementwise min/max operations with different scales are offloaded to NPU as ethosu_binary_elementwise + ethosu_identity
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,6 +53,13 @@ def partition_ethosu_by_table(mod, pattern_table): | |
return mod | ||
|
||
|
||
def relu_n1_to_1(x): | ||
""" | ||
The specific pattern will be replaced into RELU_N1_TO_1 by tflite. | ||
""" | ||
return tf.math.maximum(-1.0, tf.math.minimum(x, 1.0)) | ||
|
||
|
||
def test_split_indices_legalize(): | ||
def create_graph(axis): | ||
x = relay.var("x", shape=(1, 50, 50, 3)) | ||
|
@@ -881,7 +888,7 @@ def verify(ext_func): | |
([1, 4, 4], [4, 1], False), | ||
], | ||
) | ||
@pytest.mark.parametrize("activation_function", ["NONE", "RELU"]) | ||
@pytest.mark.parametrize("activation_function", [None, tf.nn.relu, tf.nn.relu6, relu_n1_to_1]) | ||
def test_tflite_binary_elemwise_legalize( | ||
operator_type, | ||
ifm_shape, | ||
|
@@ -906,8 +913,8 @@ def tf_function(self, x, y): | |
op = tf.math.minimum(x, y) | ||
elif operator_type == "MAX": | ||
op = tf.math.maximum(x, y) | ||
if activation_function == "RELU": | ||
op = tf.nn.relu(op) | ||
if activation_function: | ||
op = activation_function(op) | ||
return op | ||
|
||
model = Model() | ||
|
@@ -938,9 +945,13 @@ def verify(ext_func): | |
op = ext_func.body | ||
|
||
has_reshaped_output = False | ||
has_separate_requantize = False | ||
shapes_padded = [[1] * (4 - len(s)) + s for s in shapes] | ||
out_padded = [1] * (4 - len(out_shape)) + out_shape | ||
if op.op.name != "contrib.ethosu.binary_elementwise": | ||
if op.op.name == "contrib.ethosu.identity": | ||
op = op.args[0] | ||
has_separate_requantize = True | ||
if op.op.name == "reshape": | ||
has_reshaped_output = True | ||
op = op.args[0] | ||
|
||
|
@@ -951,20 +962,30 @@ def verify(ext_func): | |
assert op.checked_type.dtype == dtype | ||
assert op.attrs.operator_type == operator_type | ||
assert op.attrs.reversed_operands == reversed_operands | ||
if activation_function == "RELU": | ||
if activation_function != None: | ||
assert str(op.attrs.activation) == "CLIP" | ||
|
||
if operator_type in ["MIN", "MAX"]: | ||
# MIN and MAX with an activation must have a requantize operation | ||
# baked into the output. To check the extra requantize node was | ||
# picked up by the pattern, we can make sure the quantization | ||
# information is not default. | ||
assert float(op.attrs.ifm_scale) != 1.0 | ||
assert int(op.attrs.ifm_zero_point) != 0 | ||
assert float(op.attrs.ifm2_scale) != 1.0 | ||
assert int(op.attrs.ifm2_zero_point) != 0 | ||
assert float(op.attrs.ofm_scale) != 1.0 | ||
assert int(op.attrs.ofm_zero_point) != 0 | ||
if has_separate_requantize: | ||
# In case when requantize cannot be fused with MIN/MAX + CLIP due to hardware constraints | ||
# there should be default quantization values since requantize is separate operation. | ||
assert float(op.attrs.ifm_scale) == 1.0 | ||
assert int(op.attrs.ifm_zero_point) == 0 | ||
assert float(op.attrs.ifm2_scale) == 1.0 | ||
assert int(op.attrs.ifm2_zero_point) == 0 | ||
assert float(op.attrs.ofm_scale) == 1.0 | ||
assert int(op.attrs.ofm_zero_point) == 0 | ||
else: | ||
# MIN and MAX with an activation must have a requantize operation | ||
# baked into the output. To check the extra requantize node was | ||
# picked up by the pattern, we can make sure the quantization | ||
# information is not default. | ||
assert float(op.attrs.ifm_scale) != 1.0 | ||
assert int(op.attrs.ifm_zero_point) != 0 | ||
assert float(op.attrs.ifm2_scale) != 1.0 | ||
assert int(op.attrs.ifm2_zero_point) != 0 | ||
assert float(op.attrs.ofm_scale) != 1.0 | ||
assert int(op.attrs.ofm_zero_point) != 0 | ||
Comment on lines
+969
to
+988
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do both of these blocks get run? It looks like we are using the same method of generating representative dataset (which will determine the qnn params) for all the tests, so I suspect we will always create IFMs with differing qnn params and therefore test only one of the patterns here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, both of these blocks get run, the first block is run for cases with MAX operation and relu_n1_to_1 activation for example test_tflite_binary_elemwise_legalize[relu_n1_to_1-ifm_shape0-ifm2_shape0-False-MAX]
in this cases the scales are different in others the same. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok cool, thanks for clarifying :) |
||
|
||
if has_reshaped_output: | ||
assert list(ext_func.body.checked_type.shape) == out_shape | ||
|
@@ -997,22 +1018,42 @@ def verify(ext_func): | |
), | ||
] | ||
elif operator_type == "MIN": | ||
rewriter = legalize.MinRewriter() | ||
rewriter = [legalize.MinRewriter(), legalize.RequantizeRewriter()] | ||
pattern_table = [ | ||
( | ||
ethosu.MinParams.composite_name, | ||
ethosu.minimum_clip_requantize_pattern(), | ||
lambda pat: ethosu.MinParams(pat).is_valid(), | ||
), | ||
( | ||
ethosu.MinParams.composite_name, | ||
ethosu.minimum_pattern(), | ||
lambda pat: ethosu.MinParams(pat).is_valid(), | ||
), | ||
( | ||
ethosu.RequantizeParams.composite_name, | ||
ethosu.requantize_pattern(), | ||
lambda pat: ethosu.RequantizeParams(pat).is_valid(), | ||
), | ||
] | ||
elif operator_type == "MAX": | ||
rewriter = legalize.MaxRewriter() | ||
rewriter = [legalize.MaxRewriter(), legalize.RequantizeRewriter()] | ||
pattern_table = [ | ||
( | ||
ethosu.MaxParams.composite_name, | ||
ethosu.maximum_clip_requantize_pattern(), | ||
lambda pat: ethosu.MaxParams(pat).is_valid(), | ||
), | ||
( | ||
ethosu.MaxParams.composite_name, | ||
ethosu.maximum_pattern(), | ||
lambda pat: ethosu.MaxParams(pat).is_valid(), | ||
), | ||
( | ||
ethosu.RequantizeParams.composite_name, | ||
ethosu.requantize_pattern(), | ||
lambda pat: ethosu.RequantizeParams(pat).is_valid(), | ||
), | ||
] | ||
|
||
tflite_graph = create_tflite_graph() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this test looks at the graph entirely at Relay level, where all the RELUs are just Relay clip operations, I don't think we benefit much from extra 70 tests with just different min and max attributes to clip.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with this, I will add separate test for case with MAX operation and relu_n1_to_1 activation.