[Relay, TOPI] Refactor Adaptive pool and add 3d support (#5049)

* add stub for nd impl * refactored indices compute * refactored divide step * remove unused variables, add doc * fix lint * add relay op def * add python registration * refactor topi test * update relay tests, but test result is weird * workaround for weird bug * add relay adaptive pool 3d test * add topi tests * update doc for 3d * typo fix * fix lint * add more tests including NDHWC
apache · Mar 15, 2020 · 7c5ff50 · 7c5ff50
1 parent b38c65c
commit 7c5ff50
Show file tree

Hide file tree

Showing 11 changed files with 528 additions and 97 deletions.
diff --git a/include/tvm/relay/attrs/nn.h b/include/tvm/relay/attrs/nn.h
@@ -528,6 +528,21 @@ struct AdaptivePool2DAttrs : public tvm::AttrsNode<AdaptivePool2DAttrs> {
   }
 };
 
+struct AdaptivePool3DAttrs : public tvm::AttrsNode<AdaptivePool3DAttrs> {
+  Array<IndexExpr> output_size;
+  std::string layout;
+
+  TVM_DECLARE_ATTRS(AdaptivePool3DAttrs, "relay.attrs.AdaptivePool3DAttrs") {
+    TVM_ATTR_FIELD(output_size).set_default(Array<IndexExpr>({}))
+      .describe("Output depth, height and width.");
+    TVM_ATTR_FIELD(layout).set_default("NCDHW")
+      .describe("Dimension ordering of data and weight. Can be 'NCDHW', 'NDHWC', etc."
+                  "'N', 'C', 'D', 'H', 'W' stands for batch, channel, depth, height, and width"
+                  "dimensions respectively. Convolution is applied on 'D', 'H' and"
+                  "'W' dimensions.");
+  }
+};
+
 
 /*! \brief Attributes for 1D max pool operator */
 struct MaxPool1DAttrs : public tvm::AttrsNode<MaxPool1DAttrs> {

diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py
@@ -257,6 +257,16 @@ def legalize_conv2d_transpose(attrs, inputs, types):
 reg.register_pattern("nn.adaptive_avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
 
 
+# adaptive_max_pool3d
+reg.register_schedule("nn.adaptive_max_pool3d", strategy.schedule_adaptive_pool)
+reg.register_pattern("nn.adaptive_max_pool3d", OpPattern.OUT_ELEMWISE_FUSABLE)
+
+
+# adaptive_avg_pool3d
+reg.register_schedule("nn.adaptive_avg_pool3d", strategy.schedule_adaptive_pool)
+reg.register_pattern("nn.adaptive_avg_pool3d", OpPattern.OUT_ELEMWISE_FUSABLE)
+
+
 # leaky_relu
 reg.register_broadcast_schedule("nn.leaky_relu")
 reg.register_pattern("nn.leaky_relu", OpPattern.ELEMWISE)

diff --git a/python/tvm/relay/op/nn/nn.py b/python/tvm/relay/op/nn/nn.py
@@ -2371,3 +2371,95 @@ def adaptive_avg_pool2d(data,
     """
     output_size = [] or output_size
     return _make.adaptive_avg_pool2d(data, output_size, layout)
+
+
+def adaptive_max_pool3d(data,
+                        output_size=None,
+                        layout="NCDHW"):
+    r"""3D adaptive max pooling operator. This operator is experimental.
+
+    This operator takes data as input and does 3D max value calculation
+    across each window represented by DxWxH.
+
+    In the default case, where the data_layout is `NCDHW`
+    a data Tensor with shape `(batch_size, in_channels, depth, height, width)`,
+    to produce an output Tensor with shape
+    (batch_size, in_channels, output_depth, output_height, output_width).
+
+    The pooling kernel and stride sizes are automatically chosen for
+    desired output sizes.
+
+    For output_size:
+        If this argument is not provided, input depth, height and width will be used
+        as output depth, height and width.
+
+        If a single integer is provided for output_size, the output size is
+        (N x C x output_size x output_size x output_size) for any input (NCDHW).
+
+        If a tuple of integers (depth, height, width) are provided for output_size,
+        the output size is (N x C x depth x height x width) for any input (NCDHW).
+
+    Parameters
+    ----------
+    data : tvm.relay.Expr
+        The input data to the operator.
+
+    output_size : tuple of int. optional
+        Output height and width.
+
+    layout : str, optional
+        Layout of the input.
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    output_size = [] or output_size
+    return _make.adaptive_max_pool3d(data, output_size, layout)
+
+
+def adaptive_avg_pool3d(data,
+                        output_size=None,
+                        layout="NCDHW"):
+    r"""3D adaptive avg pooling operator. This operator is experimental.
+
+    This operator takes data as input and does 3D avg value calculation
+    across each window represented by DxWxH.
+
+    In the default case, where the data_layout is `NCDHW`
+    a data Tensor with shape `(batch_size, in_channels, depth, height, width)`,
+    to produce an output Tensor with shape
+    (batch_size, in_channels, output_depth, output_height, output_width).
+
+    The pooling kernel and stride sizes are automatically chosen for
+    desired output sizes.
+
+    For output_size:
+        If this argument is not provided, input depth, height and width will be used
+        as output depth, height and width.
+
+        If a single integer is provided for output_size, the output size is
+        (N x C x output_size x output_size x output_size) for any input (NCDHW).
+
+        If a tuple of integers (depth, height, width) are provided for output_size,
+        the output size is (N x C x depth x height x width) for any input (NCDHW).
+
+    Parameters
+    ----------
+    data : tvm.relay.Expr
+        The input data to the operator.
+
+    output_size : tuple of int. optional
+        Output height and width.
+
+    layout : str, optional
+        Layout of the input.
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    output_size = [] or output_size
+    return _make.adaptive_avg_pool3d(data, output_size, layout)
diff --git a/src/relay/op/nn/pooling.cc b/src/relay/op/nn/pooling.cc
@@ -537,7 +537,6 @@ RELAY_REGISTER_OP("nn.adaptive_avg_pool2d")
                                PoolInferCorrectLayout<AdaptivePool2DAttrs>)
 .set_attr<FTVMCompute>("FTVMCompute", AdaptivePool2DCompute<topi::nn::kAvgPool>);
 
-
 // relay.nn.adaptive_max_pool2d
 Expr MakeAdaptiveMaxPool2D(Expr data,
                            Array<IndexExpr> output_size,
@@ -577,6 +576,180 @@ RELAY_REGISTER_OP("nn.adaptive_max_pool2d")
 .set_attr<FTVMCompute>("FTVMCompute", AdaptivePool2DCompute<topi::nn::kMaxPool>);
 
 
+TVM_REGISTER_NODE_TYPE(AdaptivePool3DAttrs);
+
+bool AdaptivePool3DRel(const Array<Type>& types,
+                       int num_inputs,
+                       const Attrs& attrs,
+                       const TypeReporter& reporter) {
+  CHECK_EQ(types.size(), 2);
+  const auto* data = types[0].as<TensorTypeNode>();
+  if (data == nullptr) { return false; }
+  const auto dshape = data->shape;
+  CHECK_GE(dshape.size(), 3U)
+    << "Pool3D only support input >= 3-D: input must have depth, height and width";
+  const auto* param = attrs.as<AdaptivePool3DAttrs>();
+  CHECK(param != nullptr);
+
+  Layout layout(param->layout);
+  CHECK(layout.Contains(LayoutAxis::Get('D')) && layout.Contains(LayoutAxis::Get('H')) &&
+        layout.Contains(LayoutAxis::Get('W')) && !layout.Contains(LayoutAxis::Get('d')) &&
+       !layout.Contains(LayoutAxis::Get('h')) && !layout.Contains(LayoutAxis::Get('w')))
+    << "Invalid layout " << layout
+    << ". Pool3D layout must have D, H and W, which cannot be split";
+
+  const auto didx = layout.IndexOf(LayoutAxis::Get('D'));
+  const auto hidx = layout.IndexOf(LayoutAxis::Get('H'));
+  const auto widx = layout.IndexOf(LayoutAxis::Get('W'));
+  Array<IndexExpr> oshape(dshape);
+  auto output_size = param->output_size;
+  CHECK_LE(output_size.size(), 3U)
+    << "output_size can have up to 3 elements.";
+  IndexExpr output_depth, output_height, output_width;
+  if (output_size.empty()) {
+    output_depth = dshape[didx];
+    output_height = dshape[hidx];
+    output_width = dshape[widx];
+  } else if (output_size.size() == 1) {
+    output_depth = output_size[0];
+    output_height = output_size[0];
+    output_width = output_size[0];
+  } else {
+    output_depth = output_size[0];
+    output_height = output_size[1];
+    output_width = output_size[2];
+  }
+
+  oshape.Set(didx, output_depth);
+  oshape.Set(hidx, output_height);
+  oshape.Set(widx, output_width);
+
+  // assign output type
+  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+  return true;
+}
+
+template<topi::nn::PoolType mode>
+Array<te::Tensor> AdaptivePool3DCompute(const Attrs& attrs,
+                                        const Array<te::Tensor>& inputs,
+                                        const Type& out_type) {
+  static const Layout kNCDHW("NCDHW");
+  const auto* param = attrs.as<AdaptivePool3DAttrs>();
+  CHECK(param != nullptr);
+  Layout layout(param->layout);
+  CHECK(BijectiveLayoutNode::make(layout, kNCDHW).defined())
+    << "Adaptive pool3d currently only supports layouts that are convertible from NCDHW";
+  CHECK_EQ(layout.IndexOf(LayoutAxis::Get('d')), -1)
+    << "Adaptive pool3d does not support input split on depth";
+  CHECK_EQ(layout.IndexOf(LayoutAxis::Get('h')), -1)
+    << "Adaptive pool3d does not support input split on height";
+  CHECK_EQ(layout.IndexOf(LayoutAxis::Get('w')), -1)
+    << "Adaptive pool3d does not support input split on width";
+
+  CHECK(inputs[0].ndim() == 5U || inputs[0].ndim() == 6U)
+    << "Pool3D only support 5-D input (e.g., NCDHW)"
+    << " or 6-D input (last dimension is a split of channel)";
+
+  auto output_size = param->output_size;
+  const auto didx = layout.IndexOf(LayoutAxis::Get('D'));
+  const auto hidx = layout.IndexOf(LayoutAxis::Get('H'));
+  const auto widx = layout.IndexOf(LayoutAxis::Get('W'));
+  IndexExpr output_depth, output_height, output_width;
+  if (output_size.empty()) {
+    output_depth = inputs[0]->shape[didx];
+    output_height = inputs[0]->shape[hidx];
+    output_width = inputs[0]->shape[widx];
+  } else if (output_size.size() == 1) {
+    output_depth = output_size[0];
+    output_height = output_size[0];
+    output_width = output_size[0];
+  } else {
+    output_depth = output_size[0];
+    output_height = output_size[1];
+    output_width = output_size[2];
+  }
+
+  auto osize = Array<IndexExpr>{ output_depth, output_height, output_width };
+  return Array<te::Tensor> {
+    topi::nn::adaptive_pool3d(inputs[0], osize,  mode, layout.name())
+  };
+}
+
+// relay.nn.adaptive_max_pool3d
+Expr MakeAdaptiveMaxPool3D(Expr data,
+                           Array<IndexExpr> output_size,
+                           std::string layout) {
+  auto attrs = make_object<AdaptivePool3DAttrs>();
+  attrs->output_size = std::move(output_size);
+  attrs->layout = std::move(layout);
+  static const Op& op = Op::Get("nn.adaptive_max_pool3d");
+  return CallNode::make(op, {data}, Attrs(attrs), {});
+}
+
+TVM_REGISTER_GLOBAL("relay.op.nn._make.adaptive_max_pool3d")
+.set_body_typed(MakeAdaptiveMaxPool3D);
+
+RELAY_REGISTER_OP("nn.adaptive_max_pool3d")
+  .describe(R"code(Adaptive max pooling operation for 3D data.
+
+- **data**: This depends on the `layout` parameter. Input is 5D array of shape
+            (batch_size, channels, depth, height, width) if `layout` is `NCDHW`.
+- **output_size**: If this argument is not provided, input depth, height and width will be used
+                   as output depth, height and width.
+                   If a single integer is provided for output_size, the output size is
+                   (N x C x output_size x output_size x output_size) for any input (NCDHW).
+                   If a tuple of integers (depth, height, width) are provided for output_size,
+                   the output size is (N x C x depth x height x width) for any input (NCDHW).
+- **out**: This depends on the `layout` parameter. Output is 5D array of shape
+           (batch_size, channels, output_depth, output_height, output_width)  if `layout` is `NCDHW`.
+
+)code" TVM_ADD_FILELINE)
+.set_attrs_type<AdaptivePool3DAttrs>()
+.set_num_inputs(1)
+.add_argument("data", "Tensor", "The input tensor.")
+.set_support_level(10)
+.add_type_rel("AdaptiveMaxPool3D", AdaptivePool3DRel)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout",
+                               PoolInferCorrectLayout<AdaptivePool3DAttrs>)
+.set_attr<FTVMCompute>("FTVMCompute", AdaptivePool3DCompute<topi::nn::kMaxPool>);
+
+// relay.nn.adaptive_max_pool3d
+Expr MakeAdaptiveAvgPool3D(Expr data,
+                           Array<IndexExpr> output_size,
+                           std::string layout) {
+  auto attrs = make_object<AdaptivePool3DAttrs>();
+  attrs->output_size = std::move(output_size);
+  attrs->layout = std::move(layout);
+  static const Op& op = Op::Get("nn.adaptive_avg_pool3d");
+  return CallNode::make(op, {data}, Attrs(attrs), {});
+}
+
+TVM_REGISTER_GLOBAL("relay.op.nn._make.adaptive_avg_pool3d")
+.set_body_typed(MakeAdaptiveAvgPool3D);
+
+RELAY_REGISTER_OP("nn.adaptive_avg_pool3d")
+  .describe(R"code(Adaptive avg pooling operation for 3D data.
+- **data**: This depends on the `layout` parameter. Input is 5D array of shape
+            (batch_size, channels, depth, height, width) if `layout` is `NCDHW`.
+- **output_size**: If this argument is not provided, input depth, height and width will be used
+                   as output depth, height and width.
+                   If a single integer is provided for output_size, the output size is
+                   (N x C x output_size x output_size x output_size) for any input (NCDHW).
+                   If a tuple of integers (depth, height, width) are provided for output_size,
+                   the output size is (N x C x depth x height x width) for any input (NCDHW).
+- **out**: This depends on the `layout` parameter. Output is 5D array of shape
+           (batch_size, channels, output_depth, output_height, output_width)  if `layout` is `NCDHW`.
+)code" TVM_ADD_FILELINE)
+.set_attrs_type<AdaptivePool3DAttrs>()
+.set_num_inputs(1)
+.add_argument("data", "Tensor", "The input tensor.")
+.set_support_level(10)
+.add_type_rel("AdaptiveAvgPool3D", AdaptivePool3DRel)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout",
+                               PoolInferCorrectLayout<AdaptivePool3DAttrs>)
+.set_attr<FTVMCompute>("FTVMCompute", AdaptivePool3DCompute<topi::nn::kAvgPool>);
+
+
 bool Pool2DGradRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
                    const TypeReporter& reporter) {
   CHECK_EQ(types.size(), 3);