Skip to content

Commit

Permalink
[Relay, TOPI] Refactor Adaptive pool and add 3d support (#5049)
Browse files Browse the repository at this point in the history
* add stub for nd impl

* refactored indices compute

* refactored divide step

* remove unused variables, add doc

* fix lint

* add relay op def

* add python registration

* refactor topi test

* update relay tests, but test result is weird

* workaround for weird bug

* add relay adaptive pool 3d test

* add topi tests

* update doc for 3d

* typo fix

* fix lint

* add more tests including NDHWC
  • Loading branch information
masahi authored Mar 15, 2020
1 parent b38c65c commit 7c5ff50
Show file tree
Hide file tree
Showing 11 changed files with 528 additions and 97 deletions.
15 changes: 15 additions & 0 deletions include/tvm/relay/attrs/nn.h
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,21 @@ struct AdaptivePool2DAttrs : public tvm::AttrsNode<AdaptivePool2DAttrs> {
}
};

struct AdaptivePool3DAttrs : public tvm::AttrsNode<AdaptivePool3DAttrs> {
Array<IndexExpr> output_size;
std::string layout;

TVM_DECLARE_ATTRS(AdaptivePool3DAttrs, "relay.attrs.AdaptivePool3DAttrs") {
TVM_ATTR_FIELD(output_size).set_default(Array<IndexExpr>({}))
.describe("Output depth, height and width.");
TVM_ATTR_FIELD(layout).set_default("NCDHW")
.describe("Dimension ordering of data and weight. Can be 'NCDHW', 'NDHWC', etc."
"'N', 'C', 'D', 'H', 'W' stands for batch, channel, depth, height, and width"
"dimensions respectively. Convolution is applied on 'D', 'H' and"
"'W' dimensions.");
}
};


/*! \brief Attributes for 1D max pool operator */
struct MaxPool1DAttrs : public tvm::AttrsNode<MaxPool1DAttrs> {
Expand Down
10 changes: 10 additions & 0 deletions python/tvm/relay/op/nn/_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,16 @@ def legalize_conv2d_transpose(attrs, inputs, types):
reg.register_pattern("nn.adaptive_avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)


# adaptive_max_pool3d
reg.register_schedule("nn.adaptive_max_pool3d", strategy.schedule_adaptive_pool)
reg.register_pattern("nn.adaptive_max_pool3d", OpPattern.OUT_ELEMWISE_FUSABLE)


# adaptive_avg_pool3d
reg.register_schedule("nn.adaptive_avg_pool3d", strategy.schedule_adaptive_pool)
reg.register_pattern("nn.adaptive_avg_pool3d", OpPattern.OUT_ELEMWISE_FUSABLE)


# leaky_relu
reg.register_broadcast_schedule("nn.leaky_relu")
reg.register_pattern("nn.leaky_relu", OpPattern.ELEMWISE)
Expand Down
92 changes: 92 additions & 0 deletions python/tvm/relay/op/nn/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2371,3 +2371,95 @@ def adaptive_avg_pool2d(data,
"""
output_size = [] or output_size
return _make.adaptive_avg_pool2d(data, output_size, layout)


def adaptive_max_pool3d(data,
output_size=None,
layout="NCDHW"):
r"""3D adaptive max pooling operator. This operator is experimental.
This operator takes data as input and does 3D max value calculation
across each window represented by DxWxH.
In the default case, where the data_layout is `NCDHW`
a data Tensor with shape `(batch_size, in_channels, depth, height, width)`,
to produce an output Tensor with shape
(batch_size, in_channels, output_depth, output_height, output_width).
The pooling kernel and stride sizes are automatically chosen for
desired output sizes.
For output_size:
If this argument is not provided, input depth, height and width will be used
as output depth, height and width.
If a single integer is provided for output_size, the output size is
(N x C x output_size x output_size x output_size) for any input (NCDHW).
If a tuple of integers (depth, height, width) are provided for output_size,
the output size is (N x C x depth x height x width) for any input (NCDHW).
Parameters
----------
data : tvm.relay.Expr
The input data to the operator.
output_size : tuple of int. optional
Output height and width.
layout : str, optional
Layout of the input.
Returns
-------
result : tvm.relay.Expr
The computed result.
"""
output_size = [] or output_size
return _make.adaptive_max_pool3d(data, output_size, layout)


def adaptive_avg_pool3d(data,
output_size=None,
layout="NCDHW"):
r"""3D adaptive avg pooling operator. This operator is experimental.
This operator takes data as input and does 3D avg value calculation
across each window represented by DxWxH.
In the default case, where the data_layout is `NCDHW`
a data Tensor with shape `(batch_size, in_channels, depth, height, width)`,
to produce an output Tensor with shape
(batch_size, in_channels, output_depth, output_height, output_width).
The pooling kernel and stride sizes are automatically chosen for
desired output sizes.
For output_size:
If this argument is not provided, input depth, height and width will be used
as output depth, height and width.
If a single integer is provided for output_size, the output size is
(N x C x output_size x output_size x output_size) for any input (NCDHW).
If a tuple of integers (depth, height, width) are provided for output_size,
the output size is (N x C x depth x height x width) for any input (NCDHW).
Parameters
----------
data : tvm.relay.Expr
The input data to the operator.
output_size : tuple of int. optional
Output height and width.
layout : str, optional
Layout of the input.
Returns
-------
result : tvm.relay.Expr
The computed result.
"""
output_size = [] or output_size
return _make.adaptive_avg_pool3d(data, output_size, layout)
175 changes: 174 additions & 1 deletion src/relay/op/nn/pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,6 @@ RELAY_REGISTER_OP("nn.adaptive_avg_pool2d")
PoolInferCorrectLayout<AdaptivePool2DAttrs>)
.set_attr<FTVMCompute>("FTVMCompute", AdaptivePool2DCompute<topi::nn::kAvgPool>);


// relay.nn.adaptive_max_pool2d
Expr MakeAdaptiveMaxPool2D(Expr data,
Array<IndexExpr> output_size,
Expand Down Expand Up @@ -577,6 +576,180 @@ RELAY_REGISTER_OP("nn.adaptive_max_pool2d")
.set_attr<FTVMCompute>("FTVMCompute", AdaptivePool2DCompute<topi::nn::kMaxPool>);


TVM_REGISTER_NODE_TYPE(AdaptivePool3DAttrs);

bool AdaptivePool3DRel(const Array<Type>& types,
int num_inputs,
const Attrs& attrs,
const TypeReporter& reporter) {
CHECK_EQ(types.size(), 2);
const auto* data = types[0].as<TensorTypeNode>();
if (data == nullptr) { return false; }
const auto dshape = data->shape;
CHECK_GE(dshape.size(), 3U)
<< "Pool3D only support input >= 3-D: input must have depth, height and width";
const auto* param = attrs.as<AdaptivePool3DAttrs>();
CHECK(param != nullptr);

Layout layout(param->layout);
CHECK(layout.Contains(LayoutAxis::Get('D')) && layout.Contains(LayoutAxis::Get('H')) &&
layout.Contains(LayoutAxis::Get('W')) && !layout.Contains(LayoutAxis::Get('d')) &&
!layout.Contains(LayoutAxis::Get('h')) && !layout.Contains(LayoutAxis::Get('w')))
<< "Invalid layout " << layout
<< ". Pool3D layout must have D, H and W, which cannot be split";

const auto didx = layout.IndexOf(LayoutAxis::Get('D'));
const auto hidx = layout.IndexOf(LayoutAxis::Get('H'));
const auto widx = layout.IndexOf(LayoutAxis::Get('W'));
Array<IndexExpr> oshape(dshape);
auto output_size = param->output_size;
CHECK_LE(output_size.size(), 3U)
<< "output_size can have up to 3 elements.";
IndexExpr output_depth, output_height, output_width;
if (output_size.empty()) {
output_depth = dshape[didx];
output_height = dshape[hidx];
output_width = dshape[widx];
} else if (output_size.size() == 1) {
output_depth = output_size[0];
output_height = output_size[0];
output_width = output_size[0];
} else {
output_depth = output_size[0];
output_height = output_size[1];
output_width = output_size[2];
}

oshape.Set(didx, output_depth);
oshape.Set(hidx, output_height);
oshape.Set(widx, output_width);

// assign output type
reporter->Assign(types[1], TensorType(oshape, data->dtype));
return true;
}

template<topi::nn::PoolType mode>
Array<te::Tensor> AdaptivePool3DCompute(const Attrs& attrs,
const Array<te::Tensor>& inputs,
const Type& out_type) {
static const Layout kNCDHW("NCDHW");
const auto* param = attrs.as<AdaptivePool3DAttrs>();
CHECK(param != nullptr);
Layout layout(param->layout);
CHECK(BijectiveLayoutNode::make(layout, kNCDHW).defined())
<< "Adaptive pool3d currently only supports layouts that are convertible from NCDHW";
CHECK_EQ(layout.IndexOf(LayoutAxis::Get('d')), -1)
<< "Adaptive pool3d does not support input split on depth";
CHECK_EQ(layout.IndexOf(LayoutAxis::Get('h')), -1)
<< "Adaptive pool3d does not support input split on height";
CHECK_EQ(layout.IndexOf(LayoutAxis::Get('w')), -1)
<< "Adaptive pool3d does not support input split on width";

CHECK(inputs[0].ndim() == 5U || inputs[0].ndim() == 6U)
<< "Pool3D only support 5-D input (e.g., NCDHW)"
<< " or 6-D input (last dimension is a split of channel)";

auto output_size = param->output_size;
const auto didx = layout.IndexOf(LayoutAxis::Get('D'));
const auto hidx = layout.IndexOf(LayoutAxis::Get('H'));
const auto widx = layout.IndexOf(LayoutAxis::Get('W'));
IndexExpr output_depth, output_height, output_width;
if (output_size.empty()) {
output_depth = inputs[0]->shape[didx];
output_height = inputs[0]->shape[hidx];
output_width = inputs[0]->shape[widx];
} else if (output_size.size() == 1) {
output_depth = output_size[0];
output_height = output_size[0];
output_width = output_size[0];
} else {
output_depth = output_size[0];
output_height = output_size[1];
output_width = output_size[2];
}

auto osize = Array<IndexExpr>{ output_depth, output_height, output_width };
return Array<te::Tensor> {
topi::nn::adaptive_pool3d(inputs[0], osize, mode, layout.name())
};
}

// relay.nn.adaptive_max_pool3d
Expr MakeAdaptiveMaxPool3D(Expr data,
Array<IndexExpr> output_size,
std::string layout) {
auto attrs = make_object<AdaptivePool3DAttrs>();
attrs->output_size = std::move(output_size);
attrs->layout = std::move(layout);
static const Op& op = Op::Get("nn.adaptive_max_pool3d");
return CallNode::make(op, {data}, Attrs(attrs), {});
}

TVM_REGISTER_GLOBAL("relay.op.nn._make.adaptive_max_pool3d")
.set_body_typed(MakeAdaptiveMaxPool3D);

RELAY_REGISTER_OP("nn.adaptive_max_pool3d")
.describe(R"code(Adaptive max pooling operation for 3D data.
- **data**: This depends on the `layout` parameter. Input is 5D array of shape
(batch_size, channels, depth, height, width) if `layout` is `NCDHW`.
- **output_size**: If this argument is not provided, input depth, height and width will be used
as output depth, height and width.
If a single integer is provided for output_size, the output size is
(N x C x output_size x output_size x output_size) for any input (NCDHW).
If a tuple of integers (depth, height, width) are provided for output_size,
the output size is (N x C x depth x height x width) for any input (NCDHW).
- **out**: This depends on the `layout` parameter. Output is 5D array of shape
(batch_size, channels, output_depth, output_height, output_width) if `layout` is `NCDHW`.
)code" TVM_ADD_FILELINE)
.set_attrs_type<AdaptivePool3DAttrs>()
.set_num_inputs(1)
.add_argument("data", "Tensor", "The input tensor.")
.set_support_level(10)
.add_type_rel("AdaptiveMaxPool3D", AdaptivePool3DRel)
.set_attr<FInferCorrectLayout>("FInferCorrectLayout",
PoolInferCorrectLayout<AdaptivePool3DAttrs>)
.set_attr<FTVMCompute>("FTVMCompute", AdaptivePool3DCompute<topi::nn::kMaxPool>);

// relay.nn.adaptive_max_pool3d
Expr MakeAdaptiveAvgPool3D(Expr data,
Array<IndexExpr> output_size,
std::string layout) {
auto attrs = make_object<AdaptivePool3DAttrs>();
attrs->output_size = std::move(output_size);
attrs->layout = std::move(layout);
static const Op& op = Op::Get("nn.adaptive_avg_pool3d");
return CallNode::make(op, {data}, Attrs(attrs), {});
}

TVM_REGISTER_GLOBAL("relay.op.nn._make.adaptive_avg_pool3d")
.set_body_typed(MakeAdaptiveAvgPool3D);

RELAY_REGISTER_OP("nn.adaptive_avg_pool3d")
.describe(R"code(Adaptive avg pooling operation for 3D data.
- **data**: This depends on the `layout` parameter. Input is 5D array of shape
(batch_size, channels, depth, height, width) if `layout` is `NCDHW`.
- **output_size**: If this argument is not provided, input depth, height and width will be used
as output depth, height and width.
If a single integer is provided for output_size, the output size is
(N x C x output_size x output_size x output_size) for any input (NCDHW).
If a tuple of integers (depth, height, width) are provided for output_size,
the output size is (N x C x depth x height x width) for any input (NCDHW).
- **out**: This depends on the `layout` parameter. Output is 5D array of shape
(batch_size, channels, output_depth, output_height, output_width) if `layout` is `NCDHW`.
)code" TVM_ADD_FILELINE)
.set_attrs_type<AdaptivePool3DAttrs>()
.set_num_inputs(1)
.add_argument("data", "Tensor", "The input tensor.")
.set_support_level(10)
.add_type_rel("AdaptiveAvgPool3D", AdaptivePool3DRel)
.set_attr<FInferCorrectLayout>("FInferCorrectLayout",
PoolInferCorrectLayout<AdaptivePool3DAttrs>)
.set_attr<FTVMCompute>("FTVMCompute", AdaptivePool3DCompute<topi::nn::kAvgPool>);


bool Pool2DGradRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
const TypeReporter& reporter) {
CHECK_EQ(types.size(), 3);
Expand Down
Loading

0 comments on commit 7c5ff50

Please sign in to comment.