Skip to content

Commit

Permalink
Address comments from Ashutosh
Browse files Browse the repository at this point in the history
  • Loading branch information
guberti committed Sep 5, 2022
1 parent 0e385da commit d5d4caa
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 9 deletions.
10 changes: 5 additions & 5 deletions python/tvm/relay/op/strategy/arm_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,19 +237,19 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
)

# Optimized special case depthwiseConv2D operation. Requires a 3x3 kernel, a
# NHWC layout, a HWOI kernel layout (which we would ideally rearrange), no dilation,
# "SAME" padding, int8 inputs and outputs, the same number of input and output
# channels, and for that channel count to be divisible by 4.
#
# Additional work could remove some of these restrictions.
# NHWC layout, a HWOI kernel layout (which we rearrange), no dilation, int8 inputs,
# int32 output, the same number of input and output channels, and for that channel
# count to be divisible by 4. Additional work could remove these restrictions.

elif (
target.features.has_dsp
and kernel.shape[0] == kernel.shape[1] == 3
and dilation_w == dilation_h == 1
and kernel.shape[3] == 1 # channel_multiplier == 1
and data.dtype == "int8"
and out_type.dtype == "int32"
and data.shape[3] % 4 == 0
and (padding != "SAME" or data.shape[1] % stride_h == data.shape[2] % stride_w == 0)
):
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.depthwise_conv2d_nhwc_dsp),
Expand Down
5 changes: 3 additions & 2 deletions python/tvm/topi/arm_cpu/mprofile/dsp/depthwise_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,9 @@ def depthwise_conv2d_nhwc_dsp_compute(_cfg, data, kernel, strides, padding, dila
output_h = height // stride_h
output_w = width // stride_w

# Note - this padding behavior is DIFFERENT from Tensorflow, which pads the top left if
# stride > 1. Need to investigate and decide which behavior we want.
# This padding behavior is consistent with other TVM depthwise_conv2d schedules. However it
# differs from the TensorFlow, which only pads the bottom right if stride > 1. This probably
# brings down accuracy slightly for models imported from TFLite.
pad_down = 1 if stride_h == 1 else 0
pad_right = 1 if stride_w == 1 else 0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def intrin_func(ins, outs):
builder.emit(
tir.call_extern(
"int32",
f"kernel_convolve_{tensor_w}_{channels}_{kernel_h}_{kernel_w}_{suffix}",
f"kernel_convolve_w{tensor_w}_c{channels}_kh{kernel_h}_kw{kernel_w}_{suffix}",
outs[0].access_ptr("w"),
ins[0].access_ptr("r"),
ins[1].access_ptr("r"),
Expand Down Expand Up @@ -131,7 +131,7 @@ def quad_channel_convolve_impl(tensor_w, channels, kernel_h, kernel_w, suffix):
#ifdef __cplusplus
extern "C"
#endif
int32_t kernel_convolve_{tensor_w}_{channels}_{kernel_h}_{kernel_w}_{suffix}(
int32_t kernel_convolve_w{tensor_w}_c{channels}_kh{kernel_h}_kw{kernel_w}_{suffix}(
uint32_t *out,
uint32_t *tensor,
uint32_t *packed_kernel) {{
Expand Down

0 comments on commit d5d4caa

Please sign in to comment.