-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[microNPU][ETHOSU] Channel pad offloaded to NPU #14765
Changes from 1 commit
741f00b
ed58b59
ff98133
6870f7f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -1447,6 +1447,84 @@ def callback( | |||||||
) | ||||||||
|
||||||||
|
||||||||
class ChannelPadRewriter(DFPatternCallback): | ||||||||
"""Convert ethos-u.pad2d composite function to the Relay concatenate operation""" | ||||||||
|
||||||||
def __init__(self): | ||||||||
super().__init__(require_type=True) | ||||||||
self.pattern = ( | ||||||||
wildcard().has_attr({"Composite": ethosu_patterns.ChannelPadParams.composite_name}) | ||||||||
)(wildcard()) | ||||||||
|
||||||||
def callback( | ||||||||
self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map | ||||||||
) -> tvm.relay.Expr: | ||||||||
params = ethosu_patterns.ChannelPadParams(post.op.body) | ||||||||
params.ifm.tensor = post.args[0] | ||||||||
|
||||||||
concat_args = list() | ||||||||
# Activations requiring LUT is currently not supported, so setting it to an empty list | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know every operator here has this copy pasted legacy comment, but let's remove it... Firstly LUT based activations are supported and secondly it could leave an impression that implementing something like fused pad + sigmoid is a TODO. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it, thank you. |
||||||||
lut = relay.const([], dtype="int8") | ||||||||
# pad channels before | ||||||||
if params.ch_padding[0] > 0: | ||||||||
shape1 = list(params.ifm.shape) | ||||||||
shape1[3] = params.ch_padding[0].value | ||||||||
pad_channels = relay.Constant( | ||||||||
tvm.nd.array( | ||||||||
np.full( | ||||||||
shape=shape1, | ||||||||
fill_value=int(params.ifm.q_params.zero_point), | ||||||||
dtype=params.ifm.dtype, | ||||||||
) | ||||||||
) | ||||||||
) | ||||||||
identity1 = ethosu_ops.ethosu_identity( | ||||||||
ifm=pad_channels, | ||||||||
lut=lut, | ||||||||
ifm_scale=float(params.ifm.q_params.scale_f32), | ||||||||
ifm_zero_point=int(params.ifm.q_params.zero_point), | ||||||||
ofm_scale=float(params.ofm.q_params.scale_f32), | ||||||||
ofm_zero_point=int(params.ofm.q_params.zero_point), | ||||||||
) | ||||||||
concat_args.append(identity1) | ||||||||
|
||||||||
identity2 = ethosu_ops.ethosu_identity( | ||||||||
ifm=params.ifm.tensor, | ||||||||
lut=lut, | ||||||||
ifm_scale=float(params.ifm.q_params.scale_f32), | ||||||||
ifm_zero_point=int(params.ifm.q_params.zero_point), | ||||||||
ofm_scale=float(params.ofm.q_params.scale_f32), | ||||||||
ofm_zero_point=int(params.ofm.q_params.zero_point), | ||||||||
) | ||||||||
concat_args.append(identity2) | ||||||||
|
||||||||
# pad channels after | ||||||||
if params.ch_padding[1] > 0: | ||||||||
shape3 = list(params.ifm.shape) | ||||||||
shape3[3] = params.ch_padding[1].value | ||||||||
pad_channels3 = relay.Constant( | ||||||||
tvm.nd.array( | ||||||||
np.full( | ||||||||
shape=shape3, | ||||||||
fill_value=int(params.ifm.q_params.zero_point), | ||||||||
dtype=params.ifm.dtype, | ||||||||
) | ||||||||
) | ||||||||
) | ||||||||
identity3 = ethosu_ops.ethosu_identity( | ||||||||
ifm=pad_channels3, | ||||||||
lut=lut, | ||||||||
ifm_scale=float(params.ifm.q_params.scale_f32), | ||||||||
ifm_zero_point=int(params.ifm.q_params.zero_point), | ||||||||
ofm_scale=float(params.ofm.q_params.scale_f32), | ||||||||
ofm_zero_point=int(params.ofm.q_params.zero_point), | ||||||||
) | ||||||||
concat_args.append(identity3) | ||||||||
|
||||||||
axis = 3 | ||||||||
return relay.op.concatenate(relay.Tuple(concat_args), axis=axis) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since it is not used elsewhere, maybe just
Suggested change
|
||||||||
|
||||||||
|
||||||||
@util.create_npu_function_pass(opt_level=1) | ||||||||
class LegalizeEthosU: | ||||||||
"""This is the pass to call graph-rewrites to perform graph transformation | ||||||||
|
@@ -1461,6 +1539,7 @@ def transform_npu_function(self, _, func: relay.Function) -> relay.Function: | |||||||
rewriters = [ | ||||||||
PartitionedSplitRewriter(), | ||||||||
SplitRewriter(), | ||||||||
ChannelPadRewriter(), | ||||||||
Conv2DRewriter(), | ||||||||
Conv2DTransposeRewriter(), | ||||||||
DepthwiseConv2DRewriter(), | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1940,32 +1940,32 @@ class PadParams: | |
padding_bounds = [31, 31, 32, 32] | ||
|
||
def __init__(self, func_body: Call): | ||
from tvm.relay.backend.contrib.ethosu.util import QPad2DArgs | ||
from tvm.relay.backend.contrib.ethosu.util import QPadArgs | ||
|
||
# there is no 'layout' attribute in nn.pad | ||
layout = "NHWC" | ||
self.ifm = TensorParams( | ||
tensor=func_body.args[QPad2DArgs.IFM.value], | ||
tensor=func_body.args[QPadArgs.IFM.value], | ||
layout=layout, | ||
scale=tvm.relay.Constant(tvm.nd.array(np.array(1.0, dtype="float32"))), | ||
zero_point=func_body.args[QPad2DArgs.IFM_ZERO_POINT.value], | ||
zero_point=func_body.args[QPadArgs.IFM_ZERO_POINT.value], | ||
) | ||
|
||
self.padding = self.extract_padding(func_body) | ||
self.ofm = TensorParams( | ||
tensor=func_body, | ||
layout=layout, | ||
scale=tvm.relay.Constant(tvm.nd.array(np.array(1.0, dtype="float32"))), | ||
zero_point=func_body.args[QPad2DArgs.IFM_ZERO_POINT.value], | ||
zero_point=func_body.args[QPadArgs.IFM_ZERO_POINT.value], | ||
) | ||
|
||
@staticmethod | ||
def extract_padding( | ||
padding: relay.Call, | ||
) -> Optional[Tuple[int, int, int, int]]: | ||
""" | ||
Here we check whether a separate padding operation can be rewritten | ||
as NPU depthwise convolution. If the padding specified by the | ||
Here we check whether a separate spatial-dimension padding operation can be | ||
rewritten as NPU depthwise convolution. If the padding specified by the | ||
separate nn.pad operation is not supported by NPU depthwise convolution, | ||
None will be returned. This will cause the nn.pad not to be offloaded to NPU. | ||
""" | ||
|
@@ -2000,6 +2000,79 @@ def is_valid(self): | |
return True | ||
|
||
|
||
class ChannelPadParams: | ||
""" | ||
This class will parse a call to a ethosu.pad2d composite function | ||
sergio-grovety marked this conversation as resolved.
Show resolved
Hide resolved
|
||
and extract the parameter information. | ||
""" | ||
|
||
composite_name = "ethos-u.channel-pad" | ||
# The ethos-u.channel-pad composite function will be transformed | ||
# to the Relay concatenate operation. | ||
|
||
def __init__(self, func_body: Call): | ||
from tvm.relay.backend.contrib.ethosu.util import QPadArgs | ||
|
||
# there is no 'layout' attribute in nn.pad | ||
layout = "NHWC" | ||
self.ifm = TensorParams( | ||
tensor=func_body.args[QPadArgs.IFM.value], | ||
layout=layout, | ||
scale=tvm.relay.Constant(tvm.nd.array(np.array(1.0, dtype="float32"))), | ||
zero_point=func_body.args[QPadArgs.IFM_ZERO_POINT.value], | ||
) | ||
|
||
self.ch_padding = self.extract_ch_padding(func_body) | ||
self.ofm = TensorParams( | ||
tensor=func_body, | ||
layout=layout, | ||
scale=tvm.relay.Constant(tvm.nd.array(np.array(1.0, dtype="float32"))), | ||
zero_point=func_body.args[QPadArgs.IFM_ZERO_POINT.value], | ||
) | ||
|
||
@staticmethod | ||
def extract_ch_padding( | ||
padding: relay.Call, | ||
) -> Optional[Tuple[int, int]]: | ||
""" | ||
Here we check whether a separate channel-dimension padding operation can be | ||
rewritten as Relay concatenate operation. If the padding specified by the | ||
separate nn.pad operation is not supported by NPU, None will be returned. | ||
This will cause the nn.pad not to be offloaded to NPU. | ||
""" | ||
pad_width = padding.attrs["pad_width"] | ||
if len(pad_width) != 4: | ||
return None | ||
if ( | ||
list(pad_width[0]) != [0, 0] | ||
or list(pad_width[1]) != [0, 0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there networks that have paddings in height, width and channels? If there are such, then it would be possible to remove width and height restrictions and add width and height padding processing to the legalization using depthwise convolution as it is done for pad2d. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, you are right, spatial and channel pad can of course occur in neural networks. This is a separate task, we discussed that it will be useful in the future. We plan to solve it when we have time or a network with such pad appears. |
||
or list(pad_width[2]) != [0, 0] | ||
): | ||
return None | ||
return [ | ||
pad_width[3][0], | ||
pad_width[3][1], | ||
] | ||
|
||
def is_valid(self): | ||
""" | ||
This function checks whether pad has compatible attributes | ||
with the Relay concatenate operation | ||
""" | ||
tensor_params = [self.ifm, self.ofm] | ||
if not check_valid_dtypes(tensor_params, supported_dtypes=[np.uint8, np.int8]): | ||
return False | ||
if self.ifm.dtype != self.ofm.dtype: | ||
return False | ||
if not check_batch_size(self.ifm): | ||
return False | ||
if not self.ch_padding: | ||
return False | ||
if not check_dimensions(self.ifm) or not check_dimensions(self.ofm): | ||
return False | ||
return True | ||
|
||
|
||
def pad_pattern(): | ||
"""Create pattern for pad""" | ||
pattern = is_op("nn.pad")(wildcard(), is_constant()) | ||
|
@@ -2066,6 +2139,11 @@ def softmax_pattern() -> tvm.relay.dataflow_pattern.DFPattern: | |
@register_pattern_table("ethos-u") | ||
def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Callable]]: | ||
return [ | ||
( | ||
ChannelPadParams.composite_name, | ||
pad_pattern(), | ||
lambda pat: ChannelPadParams(pat).is_valid(), | ||
), | ||
( | ||
QnnConv2DParams.composite_name, | ||
qnn_conv2d_pattern(), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.