Merged PR 1144: Implement transpose kernel

gramalingam · gramalingam · commit bb3737f5b8c0 · 2018-03-29T18:08:39.000Z
Implement transpose kernel Related work items: #147
diff --git a/lotus/core/providers/cpu/tensor/transpose.cc b/lotus/core/providers/cpu/tensor/transpose.cc
@@ -0,0 +1,83 @@
+#include "core/providers/cpu/tensor/transpose.h"
+
+namespace Lotus {
+
+/* A permutation [a,b,c,...] indicates that 
+   - The 0-th dimension of the output corresponds to the a-th dimension of input
+   - The 1-st dimension of the output corresponds to the b-th dimension of input
+   - The 2-nd dimension of the output corresponds to the c-th dimension of input
+   etc.
+   */
+
+// The following is a reference (unoptimized) implementation of Transpose.
+// TODO: Optimize the implementation to use memcpy for sub-blocks that can be so copied.
+
+template <>
+Status Transpose<float>::compute(OpKernelContext* ctx) const {
+  const Tensor& X = *ctx->input<Tensor>(0);
+  const TensorShape& input_shape = X.shape();
+  const std::vector<int64_t>& input_dims = input_shape.GetDims();
+  size_t rank = input_dims.size();
+
+  // Determine permutation to use:
+  // If no permutation was specified in the attributes, the default is [rank-1, ..., 0]
+  const std::vector<int64_t>* p_perm;
+  std::vector<int64_t> default_perm(rank);
+
+  if (perm_specified_)
+    p_perm = &perm_;
+  else {
+    for (int i = 0; i < rank; ++i)
+      default_perm[i] = rank - i - 1;
+    p_perm = &default_perm;
+  }
+
+  // Determine shape of output, as well as stride to be used:
+  // stride[i] indicates the stride for the input-tensor dimension corresponding
+  // to the i-th dimension of the output
+
+  std::vector<int64_t> output_dims(rank);
+  std::vector<size_t> stride(rank);
+  for (int i = 0; i < rank; i++) {
+    size_t inpdim = (*p_perm)[i];
+    output_dims[i] = input_dims[inpdim];
+    if (inpdim + 1 < rank)
+      stride[i] = input_shape.SizeFromDimension(inpdim + 1);
+    else
+      stride[i] = 1;
+  }
+
+  TensorShape output_shape{output_dims};
+  Tensor* Y = ctx->output(0, output_shape);
+  const float* Xdata = X.data<float>();
+  float* Ydata = Y->mutable_data<float>();
+  auto size = output_shape.Size();
+  std::vector<int64_t> y_index(rank, 0);  // index used to iterate over Y's iteration-space
+  for (size_t i = 0; i < size; ++i) {
+    // convert y_index into offset in X's data
+    size_t x_offset = 0;
+    for (int j = 0; j < rank; ++j) {
+      x_offset += y_index[j] * stride[j];
+    }
+    // copy
+    LOTUS_ENFORCE((0 <= x_offset) && (x_offset < size));
+    *(Ydata + i) = *(Xdata + x_offset);
+    // increment y_index:
+    for (int64_t k = rank - 1; k >= 0; --k) {
+      y_index[k]++;
+      if (y_index[k] < output_dims[k]) break;
+      y_index[k] = 0;
+    }
+  }
+
+  return Status::OK();
+}
+
+REGISTER_KERNEL(KernelDef("Transpose")
+                    .Domain(LotusIR::kOnnxDomain)
+                    .SinceVersion(1, 2)
+                    .Provider(LotusIR::kCpuExecutionProvider)
+                    .TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+                Transpose<float>);
+
+}  // namespace Lotus
diff --git a/lotus/core/providers/cpu/tensor/transpose.h b/lotus/core/providers/cpu/tensor/transpose.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "gsl/gsl_util"
+#include "core/common/common.h"
+#include "core/framework/op_kernel.h"
+
+namespace Lotus {
+template <typename T>
+class Transpose final : public OpKernel {
+ public:
+  Transpose(const OpKernelInfo& info) : OpKernel{info}, perm_specified_(false) {
+    Status status = info.GetAttrs<int64_t>("perm", perm_);
+
+    if (status.IsOK()) {
+      perm_specified_ = true;
+      size_t rank = perm_.size();
+      std::vector<bool> seen(rank, false);
+      // Check that perm_ is a valid permutation of [0,rank-1]
+      for (auto i : perm_) {
+        if ((i < 0) || (i >= gsl::narrow<int64_t>(rank)))
+          LOTUS_THROW("Attribute perm of Transpose has an invalid value. Value ", i, " is outside range.");
+        if (seen[i])
+          LOTUS_THROW("Attribute perm of Transpose has an invalid value. Value ", i, " is repeated.");
+        seen[i] = true;
+      }
+    }
+  }
+
+  Status compute(OpKernelContext* context) const override;
+
+ private:
+  bool perm_specified_;
+  std::vector<int64_t> perm_;
+};
+}  // namespace Lotus
diff --git a/lotus/test/providers/cpu/tensor/transpose_test.cc b/lotus/test/providers/cpu/tensor/transpose_test.cc
@@ -0,0 +1,117 @@
+#include "core/providers/cpu/tensor/transpose.h"
+#include "gtest/gtest.h"
+#include "test/test_utils.h"
+
+namespace Lotus {
+namespace Test {
+
+template <size_t count>
+void TransposeTest(std::vector<int64_t>& input_shape,
+                   std::vector<float>& input_vals,
+                   std::vector<int64_t>* p_perm,
+                   std::vector<int64_t> expected_shape,
+                   const float (&expected_vals)[count]) {
+  TypeProto tensor_float;
+  tensor_float.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT);
+  LotusIR::NodeArg input_def("X", &tensor_float), output_def("Y", &tensor_float);
+  std::vector<LotusIR::NodeArg*> input_defs{&input_def};
+  std::vector<LotusIR::NodeArg*> output_defs{&output_def};
+
+  TestModel model("TransposeTest", input_defs, output_defs);
+
+  if (nullptr != p_perm)
+    model.Node().AddAttribute("perm", *p_perm);
+
+  SimpleFloatTest<Transpose> test(model);
+  test.AddInput(input_shape, input_vals);
+  test.AddOutput(expected_shape);
+  test.Run(expected_shape, expected_vals);
+}
+
+// Test 2 dimensional transpose, with no permutation attribute specified
+TEST(TransposeOpTest, TwoDimNoAttr) {
+  std::vector<int64_t> input_shape({2, 3});
+  std::vector<float> input_vals = {
+      1.0f, 2.0f, 3.0f,
+      4.0f, 5.0f, 6.0f};
+
+  std::vector<int64_t> expected_shape({3, 2});
+  float expected_vals[] = {
+      1.0f, 4.0f,
+      2.0f, 5.0f,
+      3.0f, 6.0f};
+
+  TransposeTest(input_shape, input_vals, nullptr, expected_shape, expected_vals);
+}
+
+// Test 2 dimensional transpose, with permutation attribute specified
+TEST(TransposeOpTest, TwoDim) {
+  std::vector<int64_t> input_shape({2, 3});
+  std::vector<float> input_vals = {
+      1.0f, 2.0f, 3.0f,
+      4.0f, 5.0f, 6.0f};
+
+  std::vector<int64_t> perm = {1, 0};
+  std::vector<int64_t> expected_shape({3, 2});
+  float expected_vals[] = {
+      1.0f, 4.0f,
+      2.0f, 5.0f,
+      3.0f, 6.0f};
+
+  TransposeTest(input_shape, input_vals, &perm, expected_shape, expected_vals);
+}
+
+// Test 3 dimensional transpose, with permutation attribute specified
+TEST(TransposeOpTest, ThreeDim) {
+  std::vector<int64_t> input_shape({4, 2, 3});
+  std::vector<float> input_vals = {
+      1.0f, 2.0f, 3.0f,
+      4.0f, 5.0f, 6.0f,
+
+      1.1f, 2.1f, 3.1f,
+      4.1f, 5.1f, 6.1f,
+
+      1.2f, 2.2f, 3.2f,
+      4.2f, 5.2f, 6.2f,
+
+      1.3f, 2.3f, 3.3f,
+      4.3f, 5.3f, 6.3f};
+
+  std::vector<int64_t> perm = {0, 2, 1};
+  std::vector<int64_t> expected_shape({4, 3, 2});
+  float expected_vals[] = {
+      1.0f,
+      4.0f,
+      2.0f,
+      5.0f,
+      3.0f,
+      6.0f,
+
+      1.1f,
+      4.1f,
+      2.1f,
+      5.1f,
+      3.1f,
+      6.1f,
+
+      1.2f,
+      4.2f,
+      2.2f,
+      5.2f,
+      3.2f,
+      6.2f,
+
+      1.3f,
+      4.3f,
+      2.3f,
+      5.3f,
+      3.3f,
+      6.3f,
+
+  };
+
+  TransposeTest(input_shape, input_vals, &perm, expected_shape, expected_vals);
+}
+
+}  // namespace Test
+}  // namespace Lotus