PaddlePaddle · wanghaoshuang · Jan 23, 2018 · Oct 11, 2017 · Oct 12, 2017 · Oct 16, 2017
diff --git a/paddle/operators/im2sequence_op.cc b/paddle/operators/im2sequence_op.cc
@@ -0,0 +1,157 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/im2sequence_op.h"
+
+namespace paddle {
+namespace operators {
+
+class Im2SequenceOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of Im2SequenceOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of Im2SequenceOp op should not be null.");
+
+    auto in_dim = ctx->GetInputDim("X");
+    PADDLE_ENFORCE_EQ(in_dim.size(), 4,
+                      "Input(X) format must be 4D tensor, eg., NCHW.");
+
+    auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
+    auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
+    auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
+
+    int batch_size = in_dim[0];
+    int img_channels = in_dim[1];
+    int img_height = in_dim[2];
+    int img_width = in_dim[3];
+
+    int output_height = OutputSize(img_height, kernels[0], paddings[0],
+                                   paddings[2], strides[0]);
+    int output_width =
+        OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]);
+
+    ctx->SetOutputDim("Out", {batch_size * output_height * output_width,
+                              img_channels * kernels[0] * kernels[1]});
+  }
+};
+
+class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  Im2SequenceOpMaker(OpProto* proto, OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X",
+             "(Tensor) The input tensor has NCHW format."
+             "N: batch size"
+             "C: channels"
+             "H: height"
+             "W: width");
+    AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
+    AddAttr<std::vector<int>>("kernels",
+                              "(vector<int>), the "
+                              "kernels(kernel_height, kernel_width)");
+    AddAttr<std::vector<int>>("strides",
+                              "(vector<int> default:{1, 1}), the "
+                              "strides(h_stride, w_stride)")
+        .SetDefault({1, 1});
+    AddAttr<std::vector<int>>("paddings",
+                              "(vector<int> default:{0, 0, 0, 0}), the "
+                              "paddings(up_pad, left_pad, down_pad, right_pad)")
+        .SetDefault({0, 0, 0, 0});
+    AddComment(R"DOC(
+This op uses kernels to scan images and converts these images to sequences.
+After expanding, The number of time steps are output_height * output_width
+and the dimension of each time step is kernel_height * kernel_width * channels,
+in which:
+
+output_height =
+    1 + (padding_height + padding_down + img_height - kernel_height + stride_height - 1) /
+            stride_height;
+output_width =
+    1 + (padding_left + padding+right + img_width - kernel_width + stride_width - 1) /
+            stride_width;
+
+This op can be used after convolution neural network, and before recurrent neural network.
+
+Given:
+
+x = [[[[ 6.  2.  1.]
+       [ 8.  3.  5.]
+       [ 0.  2.  6.]]
+
+      [[ 2.  4.  4.]
+       [ 6.  3.  0.]
+       [ 6.  4.  7.]]]
+
+     [[[ 6.  7.  1.]
+       [ 5.  7.  9.]
+       [ 2.  4.  8.]]
+
+      [[ 1.  2.  1.]
+       [ 1.  3.  5.]
+       [ 9.  0.  8.]]]]
+x.dims = {2, 2, 3, 3}
+
+And:
+
+kernels = [2, 2]
+strides = [1, 1]
+paddings = [0, 0, 0, 0]
+
+Then:
+
+output.data = [[ 6.  2.  8.  3.  2.  4.  6.  3.]
+               [ 2.  1.  3.  5.  4.  4.  3.  0.]
+               [ 8.  3.  0.  2.  6.  3.  6.  4.]
+               [ 3.  5.  2.  6.  3.  0.  4.  7.]
+               [ 6.  7.  5.  7.  1.  2.  1.  3.]
+               [ 7.  1.  7.  9.  2.  1.  3.  5.]
+               [ 5.  7.  2.  4.  1.  3.  9.  0.]
+               [ 7.  9.  4.  8.  3.  5.  0.  8.]]
+output.dims = {8, 9}
+output.lod = [[0, 4, 8]]
+
+)DOC");
+  }
+};
+
+class Im2SequenceGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
+    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
+                   "Input(Out@GRAD) shouldn't be null.");
+    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker,
+            im2sequence_grad, ops::Im2SequenceGradOp);
+REGISTER_OP_CPU_KERNEL(
+    im2sequence,
+    ops::Im2SequenceKernel<paddle::platform::CPUDeviceContext, float>);
+REGISTER_OP_CPU_KERNEL(
+    im2sequence_grad,
+    ops::Im2SequenceGradKernel<paddle::platform::CPUDeviceContext, float>);
diff --git a/paddle/operators/im2sequence_op.cu b/paddle/operators/im2sequence_op.cu
@@ -0,0 +1,25 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/im2sequence_op.h"
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_CUDA_KERNEL(
+    im2sequence,
+    ops::Im2SequenceKernel<paddle::platform::CUDADeviceContext, float>);
+REGISTER_OP_CUDA_KERNEL(
+    im2sequence_grad,
+    ops::Im2SequenceGradKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/operators/im2sequence_op.h b/paddle/operators/im2sequence_op.h
@@ -0,0 +1,135 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   You may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/data_layout.h"
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/im2col.h"
+#include "paddle/operators/math/math_function.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
+
+inline int OutputSize(int input_size, int filter_size, int padding_0,
+                      int padding_1, int stride) {
+  const int output_size =
+      (input_size + padding_0 + padding_1 - filter_size) / stride + 1;
+  return output_size;
+}
+
+template <typename DeviceContext, typename T>
+class Im2SequenceKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    const Tensor* in = ctx.Input<Tensor>("X");
+    LoDTensor* out = ctx.Output<LoDTensor>("Out");
+    out->mutable_data<T>(ctx.GetPlace());
+    // TODO(wanghaoshuang): Add layout checker after 'set_layout'
+    // being available for python API
+    // PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW,
+    //                  "Input(X) layout must be NCHW");
+    auto in_dim = in->dims();
+    int batch_size = in_dim[0];
+    int img_channels = in_dim[1];
+    int img_height = in_dim[2];
+    int img_width = in_dim[3];
+
+    auto kernels = ctx.Attr<std::vector<int>>("kernels");
+    auto strides = ctx.Attr<std::vector<int>>("strides");
+    auto paddings = ctx.Attr<std::vector<int>>("paddings");
+    int output_height = OutputSize(img_height, kernels[0], paddings[0],
+                                   paddings[2], strides[0]);
+    int output_width =
+        OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]);
+
+    const std::vector<int> dilations({1, 1});
+
+    auto out_dims = out->dims();
+    out->Resize({batch_size, out->numel() / batch_size});
+    for (int i = 0; i < batch_size; i++) {
+      const Tensor src =
+          in->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
+      Tensor dst = out->Slice(i, i + 1).Resize(
+          {output_height, output_width, img_channels, kernels[0], kernels[1]});
+
+      math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
+      auto& dev_ctx = ctx.template device_context<DeviceContext>();
+      f(dev_ctx, src, dilations, strides, paddings, &dst);
+    }
+    out->Resize(out_dims);
+
+    // set lod information
+    // TODO(wanghaoshuang): Move this to InferShape
+    framework::LoD lod(1);
+    lod[0].reserve(batch_size + 1);
+    for (int i = 0, offset = 0; i < batch_size + 1; ++i) {
+      lod[0][i] = offset;
+      offset += output_height * output_width;
+    }
+    out->set_lod(lod);
+  }
+};
+
+template <typename DeviceContext, typename T>
+class Im2SequenceGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* in = ctx.Input<Tensor>("X");
+    Tensor* d_out =
+        const_cast<Tensor*>(ctx.Input<Tensor>(framework::GradVarName("Out")));
+    auto* d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
+    d_x->mutable_data<T>(ctx.GetPlace());
+
+    auto x_v = framework::EigenVector<T>::Flatten(*d_x);
+    auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
+    x_v.device(place) = x_v.constant(0.0);
+
+    auto in_dim = in->dims();
+    int batch_size = in_dim[0];
+    int img_channels = in_dim[1];
+    int img_height = in_dim[2];
+    int img_width = in_dim[3];
+
+    auto kernels = ctx.Attr<std::vector<int>>("kernels");
+    auto strides = ctx.Attr<std::vector<int>>("strides");
+    auto paddings = ctx.Attr<std::vector<int>>("paddings");
+    int output_height = OutputSize(img_height, kernels[0], paddings[0],
+                                   paddings[2], strides[0]);
+    int output_width =
+        OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]);
+
+    const std::vector<int> dilations({1, 1});
+
+    auto d_out_dims = d_out->dims();
+    d_out->Resize({batch_size, d_out->numel() / batch_size});
+    for (int i = 0; i < batch_size; i++) {
+      Tensor dst =
+          d_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
+      const Tensor src = d_out->Slice(i, i + 1).Resize(
+          {output_height, output_width, img_channels, kernels[0], kernels[1]});
+      math::Col2ImFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
+      auto& dev_ctx = ctx.template device_context<DeviceContext>();
+      f(dev_ctx, src, dilations, strides, paddings, &dst);
+    }
+    d_out->Resize(d_out_dims);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle