From 3946cf548511995dcbb68163de09c2d4e2d92f3a Mon Sep 17 00:00:00 2001 From: Jack Frankland Date: Thu, 6 Apr 2023 14:59:51 +0100 Subject: [PATCH] [TOPI] Vectorize depthwise conv2d output operator Depthwise Conv2D operations may consists of a convolution + an output operator e.g. Relu. This commit will: * Apply vectorization across the inner channel loop when there is an output operator. * Remove some unused variables in `schedule_depthwise_conv2d_nhwc`. * Limit the loop splitting to 8 elements in the inner loop. --- python/tvm/topi/arm_cpu/depthwise_conv2d.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/python/tvm/topi/arm_cpu/depthwise_conv2d.py b/python/tvm/topi/arm_cpu/depthwise_conv2d.py index b6c15a30c037..59660e6bb90c 100644 --- a/python/tvm/topi/arm_cpu/depthwise_conv2d.py +++ b/python/tvm/topi/arm_cpu/depthwise_conv2d.py @@ -292,13 +292,13 @@ def schedule_depthwise_conv2d_nhwc(cfg, outs): out = outs[0] ##### space definition begin ##### - n, h, w, c = s[out].op.axis + _, h, w, c = s[out].op.axis # Split the number of input/output channels - cfg.define_split("tile_c", c, num_outputs=2) + cfg.define_split("tile_c", c, num_outputs=2, filter=lambda entry: entry.size[1] <= 8) # Split the height of the convolution - _, hi = cfg.define_split("tile_h", h, num_outputs=2) + cfg.define_split("tile_h", h, num_outputs=2) # Split the width of the convolution - _, wi = cfg.define_split("tile_w", w, num_outputs=2) + cfg.define_split("tile_w", w, num_outputs=2) # Additional out (e.g., requantization, bias addition, etc..) # 0: locate the output on the second last axis of the main compuation # 1: locate the output closest to the main computation @@ -394,7 +394,8 @@ def schedule_conv_out(out): ci_outer, ci_inner = s[out].split(ci, 4) s[out].vectorize(ci_inner) s[out].unroll(ci_outer) - + else: + s[out].vectorize(ci) fused_n_ho = s[out].fuse(n, ho) return hi, wi, fused_n_ho