resolve conflits

YuanRisheng · Oct 18, 2021 · e0710fd · e0710fd
2 parents e3e2b50 + 28a6374
commit e0710fd
Show file tree

Hide file tree

Showing 46 changed files with 176 additions and 98 deletions.
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
@@ -390,7 +390,7 @@ cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer)
 cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer)
 cc_library(generator SRCS generator.cc DEPS enforce place)
 
-cc_library(tcmpt_utils SRCS tcmpt_utils.cc DEPS lod_tensor selected_rows place tcmpt)
+cc_library(tcmpt_utils SRCS tcmpt_utils.cc DEPS lod_tensor selected_rows place tcmpt var_type_traits)
 
 # Get the current working branch
 execute_process(
@@ -454,3 +454,4 @@ if(WITH_TESTING AND TEST selected_rows_test)
 endif()
 
 cc_test(scope_guard_test SRCS scope_guard_test.cc)
+cc_test(tcmpt_utils_test SRCS tcmpt_utils_test.cc DEPS tcmpt_utils)
diff --git a/paddle/fluid/framework/tcmpt_utils.cc b/paddle/fluid/framework/tcmpt_utils.cc
@@ -77,20 +77,6 @@ std::shared_ptr<pt::DenseTensor> MakeTensorImpl<pt::DenseTensor>(
       pt::TransToPtDataLayout(tensor.layout()));
 }
 
-template <>
-void ShareTensorImpl<pt::DenseTensor>(pt::DenseTensor* tensor_impl,
-                                      LoDTensor* out) {
-  out->ResetHolderWithType(tensor_impl->allocation(),
-                           pt::TransToProtoVarType(tensor_impl->type()));
-}
-
-template <>
-void ShareTensorImpl<pt::DenseTensor>(pt::DenseTensor* tensor_impl,
-                                      Tensor* out) {
-  out->ResetHolderWithType(tensor_impl->allocation(),
-                           pt::TransToProtoVarType(tensor_impl->type()));
-}
-
 std::shared_ptr<pt::TensorInterface> InputVariableToPtTensor(
     const framework::Variable& variable, const pt::TensorArgDef& arg_def) {
   auto expected_place = pt::TransToFluidPlace(arg_def.backend);

diff --git a/paddle/fluid/framework/tcmpt_utils.h b/paddle/fluid/framework/tcmpt_utils.h
@@ -49,12 +49,6 @@ std::shared_ptr<PtTensorImplT> MakeTensorImpl(const Tensor& tensor,
                                               const platform::Place& place,
                                               proto::VarType::Type type);
 
-template <typename PtTensorImplT>
-void ShareTensorImpl(PtTensorImplT* tensor_impl, LoDTensor* out);
-
-template <typename PtTensorImplT>
-void ShareTensorImpl(PtTensorImplT* tensor_impl, Tensor* out);
-
 std::shared_ptr<pt::TensorInterface> InputVariableToPtTensor(
     const framework::Variable& variable, const pt::TensorArgDef& arg_def);
 std::shared_ptr<pt::TensorInterface> OutputVariableToPtTensor(

diff --git a/paddle/fluid/framework/tcmpt_utils_test.cc b/paddle/fluid/framework/tcmpt_utils_test.cc
@@ -0,0 +1,67 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/tcmpt_utils.h"
+#include "gtest/gtest.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/selected_rows.h"
+#include "paddle/fluid/framework/variable.h"
+
+namespace paddle {
+namespace framework {
+
+TEST(TcmptUtils, MakeTensor) {
+  // 1. create tensor
+  LoDTensor x;
+  Tensor x2;
+  x.Resize({2});
+  x.mutable_data<float>(platform::CPUPlace());
+  x.data<float>()[0] = 0.2;
+  x.data<float>()[1] = 0.5;
+
+  // 2. test API
+  auto dense_x = MakeTensorImpl<pt::DenseTensor>(x, x.place(), x.type());
+
+  // 3. check result
+  std::vector<float> expect_value = {0.2, 0.5};
+  ASSERT_EQ(dense_x->data<float>()[0], expect_value[0]);
+  ASSERT_EQ(dense_x->data<float>()[1], expect_value[1]);
+  ASSERT_EQ(dense_x->backend(), pt::Backend::kCPU);
+  ASSERT_EQ(dense_x->type(), pt::DataType::kFLOAT32);
+}
+
+TEST(TcmptUtils, VarToPtTensor) {
+  // 1. create Variable
+  Variable v;
+  auto selected_rows = v.GetMutable<SelectedRows>();
+  Tensor* value = selected_rows->mutable_value();
+  auto* data =
+      value->mutable_data<int>(make_ddim({1, 1}), paddle::platform::CPUPlace());
+  data[0] = 123;
+  pt::Backend expect_backend = pt::Backend::kCPU;
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  expect_backend = pt::Backend::kCUDA;
+#endif
+  auto tensor_def = pt::TensorArgDef(expect_backend, pt::DataLayout::kNCHW,
+                                     pt::DataType::kINT32);
+  // 2. test API
+  auto tensor_x = InputVariableToPtTensor(v, tensor_def);
+  // 3. check result
+  ASSERT_EQ(tensor_x->backend(), expect_backend);
+  ASSERT_EQ(tensor_x->type(), pt::DataType::kINT32);
+}
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/tcmpt/CMakeLists.txt b/paddle/tcmpt/CMakeLists.txt
@@ -5,27 +5,8 @@ add_subdirectory(api)
 add_subdirectory(hapi)
 # tcmpt core components
 add_subdirectory(core)
-# tcmpt eigne functors, now paddle must compiled with eigen, but eigen just is
-# one backend dtype, we should support cropping it for lite
-add_subdirectory(eigen)
 # tcmpt kernels for diff device
-add_subdirectory(cpu)
-if(WITH_GPU OR WITH_ROCM)
-  # TODO(chenweihang): if hip can split from cuda impl, we should add hip dir
-  add_subdirectory(cuda)
-endif()
-# TODO(chenweihang): migrate MKLDNN Kernel in the second phase of the project
-if(WITH_MKLDNN)
-  add_subdirectory(mkldnn)
-endif()
-# TODO(chenweihang): migrate NPU Kernel in the second phase of the project
-if(WITH_ASCEND_CL)
-  add_subdirectory(npu)
-endif()
-# TODO(chenweihang): migrate XPU Kernel in the second phase of the project
-if(WITH_XPU)
-  add_subdirectory(xpu)
-endif()
+add_subdirectory(kernels)
 # tcmpt infershape
 add_subdirectory(infershape)
 # TODO(xingfeng): tcmpt inner module API designed by a high-performance team

diff --git a/paddle/tcmpt/api/include/creation.h b/paddle/tcmpt/api/include/creation.h
@@ -14,5 +14,5 @@
 
 #pragma once
 
-#include "paddle/tcmpt/cpu/creation.h"
-#include "paddle/tcmpt/cuda/creation.h"
+#include "paddle/tcmpt/kernels/cpu/creation.h"
+#include "paddle/tcmpt/kernels/cuda/creation.h"
diff --git a/paddle/tcmpt/api/include/linalg.h b/paddle/tcmpt/api/include/linalg.h
@@ -15,5 +15,5 @@
 #pragma once
 
 // See Note: [ How do we organize the kernel directory ]
-#include "paddle/tcmpt/cpu/linalg.h"
-#include "paddle/tcmpt/cuda/linalg.h"
+#include "paddle/tcmpt/kernels/cpu/linalg.h"
+#include "paddle/tcmpt/kernels/cuda/linalg.h"
diff --git a/paddle/tcmpt/api/include/manipulation.h b/paddle/tcmpt/api/include/manipulation.h
@@ -15,5 +15,5 @@
 #pragma once
 
 // See Note: [ How do we organize the kernel directory ]
-#include "paddle/tcmpt/cpu/manipulation.h"
-#include "paddle/tcmpt/cuda/manipulation.h"
+#include "paddle/tcmpt/kernels/cpu/manipulation.h"
+#include "paddle/tcmpt/kernels/cuda/manipulation.h"
diff --git a/paddle/tcmpt/api/include/math.h b/paddle/tcmpt/api/include/math.h
@@ -15,5 +15,5 @@ limitations under the License. */
 #pragma once
 
 // See Note: [ How do we organize the kernel directory ]
-#include "paddle/tcmpt/cpu/math.h"
-#include "paddle/tcmpt/cuda/math.h"
+#include "paddle/tcmpt/kernels/cpu/math.h"
+#include "paddle/tcmpt/kernels/cuda/math.h"
diff --git a/paddle/tcmpt/core/kernel_factory.h b/paddle/tcmpt/core/kernel_factory.h
@@ -26,6 +26,8 @@
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/platform/enforce.h"
+#include "paddle/utils/flat_hash_map.h"
+#include "paddle/utils/small_vector.h"
 
 namespace pt {
 
@@ -209,25 +211,30 @@ class KernelArgsDef {
     attribute_defs_.emplace_back(AttributeArgDef(type_index));
   }
 
-  const std::vector<TensorArgDef>& input_defs() const { return input_defs_; }
+  const paddle::SmallVector<TensorArgDef>& input_defs() const {
+    return input_defs_;
+  }
 
-  const std::vector<TensorArgDef>& output_defs() const { return output_defs_; }
+  const paddle::SmallVector<TensorArgDef>& output_defs() const {
+    return output_defs_;
+  }
 
-  const std::vector<AttributeArgDef>& attribute_defs() const {
+  const paddle::SmallVector<AttributeArgDef>& attribute_defs() const {
     return attribute_defs_;
   }
 
-  std::vector<TensorArgDef>& input_defs() { return input_defs_; }
+  paddle::SmallVector<TensorArgDef>& input_defs() { return input_defs_; }
 
-  std::vector<TensorArgDef>& output_defs() { return output_defs_; }
+  paddle::SmallVector<TensorArgDef>& output_defs() { return output_defs_; }
 
-  std::vector<AttributeArgDef>& attribute_defs() { return attribute_defs_; }
+  paddle::SmallVector<AttributeArgDef>& attribute_defs() {
+    return attribute_defs_;
+  }
 
  private:
-  // TODO(chenweihang): replaced by paddle::small_vector
-  std::vector<TensorArgDef> input_defs_{{}};
-  std::vector<TensorArgDef> output_defs_{{}};
-  std::vector<AttributeArgDef> attribute_defs_{{}};
+  paddle::SmallVector<TensorArgDef> input_defs_{{}};
+  paddle::SmallVector<TensorArgDef> output_defs_{{}};
+  paddle::SmallVector<AttributeArgDef> attribute_defs_{{}};
 };
 
 class Kernel {
@@ -263,10 +270,10 @@ class Kernel {
 class KernelFactory {
  public:
   // replaced by paddle::flat_hash_map later
-  using KernelMap =
-      std::unordered_map<KernelName,
-                         std::unordered_map<KernelKey, Kernel, KernelKey::Hash>,
-                         KernelName::Hash>;
+  using KernelMap = paddle::flat_hash_map<
+      KernelName,
+      paddle::flat_hash_map<KernelKey, Kernel, KernelKey::Hash>,
+      KernelName::Hash>;
 
   static KernelFactory& Instance();
 

diff --git a/paddle/tcmpt/hapi/include/linalg.h b/paddle/tcmpt/hapi/include/linalg.h
@@ -21,5 +21,10 @@ namespace experimental {
 
 Tensor dot(const Tensor& x, const Tensor& y);
 
+Tensor matmul(const Tensor& x,
+              const Tensor& y,
+              bool transpose_x,
+              bool transpose_y);
+
 }  // namespace experimental
 }  // namespace paddle
diff --git a/paddle/tcmpt/kernels/CMakeLists.txt b/paddle/tcmpt/kernels/CMakeLists.txt
@@ -0,0 +1,18 @@
+# tcmpt kernels for diff device
+add_subdirectory(cpu)
+if(WITH_GPU OR WITH_ROCM)
+  # TODO(chenweihang): if hip can split from cuda impl, we should add hip dir
+  add_subdirectory(cuda)
+endif()
+# TODO(chenweihang): migrate MKLDNN Kernel in the second phase of the project
+if(WITH_MKLDNN)
+  add_subdirectory(mkldnn)
+endif()
+# TODO(chenweihang): migrate NPU Kernel in the second phase of the project
+if(WITH_ASCEND_CL)
+  add_subdirectory(npu)
+endif()
+# TODO(chenweihang): migrate XPU Kernel in the second phase of the project
+if(WITH_XPU)
+  add_subdirectory(xpu)
+endif()
diff --git a/paddle/tcmpt/eigen/CMakeLists.txt → ...tcmpt/kernels/common/eigen/CMakeLists.txt b/paddle/tcmpt/eigen/CMakeLists.txt → ...tcmpt/kernels/common/eigen/CMakeLists.txt
diff --git a/paddle/tcmpt/eigen/common.h → paddle/tcmpt/kernels/common/eigen/common.h b/paddle/tcmpt/eigen/common.h → paddle/tcmpt/kernels/common/eigen/common.h
diff --git a/paddle/tcmpt/eigen/dot.h → paddle/tcmpt/kernels/common/eigen/dot.h b/paddle/tcmpt/eigen/dot.h → paddle/tcmpt/kernels/common/eigen/dot.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/tcmpt/core/dense_tensor.h"
-#include "paddle/tcmpt/eigen/common.h"
+#include "paddle/tcmpt/kernels/common/eigen/common.h"
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/operators/eigen/eigen_function.h"

diff --git a/paddle/tcmpt/eigen/fill.h → paddle/tcmpt/kernels/common/eigen/fill.h b/paddle/tcmpt/eigen/fill.h → paddle/tcmpt/kernels/common/eigen/fill.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/tcmpt/core/dense_tensor.h"
-#include "paddle/tcmpt/eigen/common.h"
+#include "paddle/tcmpt/kernels/common/eigen/common.h"
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/operators/eigen/eigen_function.h"

diff --git a/paddle/tcmpt/eigen/mean.h → paddle/tcmpt/kernels/common/eigen/mean.h b/paddle/tcmpt/eigen/mean.h → paddle/tcmpt/kernels/common/eigen/mean.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/tcmpt/core/dense_tensor.h"
-#include "paddle/tcmpt/eigen/common.h"
+#include "paddle/tcmpt/kernels/common/eigen/common.h"
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/operators/eigen/eigen_function.h"

diff --git a/paddle/tcmpt/eigen/scale.h → paddle/tcmpt/kernels/common/eigen/scale.h b/paddle/tcmpt/eigen/scale.h → paddle/tcmpt/kernels/common/eigen/scale.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/tcmpt/core/dense_tensor.h"
-#include "paddle/tcmpt/eigen/common.h"
+#include "paddle/tcmpt/kernels/common/eigen/common.h"
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/operators/eigen/eigen_function.h"

diff --git a/paddle/tcmpt/eigen/sign.h → paddle/tcmpt/kernels/common/eigen/sign.h b/paddle/tcmpt/eigen/sign.h → paddle/tcmpt/kernels/common/eigen/sign.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/tcmpt/core/dense_tensor.h"
-#include "paddle/tcmpt/eigen/common.h"
+#include "paddle/tcmpt/kernels/common/eigen/common.h"
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/operators/eigen/eigen_function.h"

diff --git a/paddle/tcmpt/cpu/CMakeLists.txt → paddle/tcmpt/kernels/cpu/CMakeLists.txt b/paddle/tcmpt/cpu/CMakeLists.txt → paddle/tcmpt/kernels/cpu/CMakeLists.txt
@@ -1,5 +1,5 @@
 if(WIN32)
-    set(CURRENT_BINARY_DIR ${PADDLE_BINARY_DIR}/paddle/tcmpt/cpu)
+    set(CURRENT_BINARY_DIR ${PADDLE_BINARY_DIR}/paddle/tcmpt/kernels/cpu)
     kernel_instantiate(creation.cc)
     kernel_instantiate(math.cc)
     kernel_instantiate(linalg.cc)

diff --git a/paddle/tcmpt/cpu/creation.cc → paddle/tcmpt/kernels/cpu/creation.cc b/paddle/tcmpt/cpu/creation.cc → paddle/tcmpt/kernels/cpu/creation.cc
@@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/tcmpt/cpu/creation.h"
+#include "paddle/tcmpt/kernels/cpu/creation.h"
 
 #include "paddle/tcmpt/core/kernel_registry.h"
-#include "paddle/tcmpt/eigen/fill.h"
+#include "paddle/tcmpt/kernels/common/eigen/fill.h"
 
 namespace pt {
 

diff --git a/paddle/tcmpt/cpu/creation.h → paddle/tcmpt/kernels/cpu/creation.h b/paddle/tcmpt/cpu/creation.h → paddle/tcmpt/kernels/cpu/creation.h
diff --git a/paddle/tcmpt/cpu/linalg.cc → paddle/tcmpt/kernels/cpu/linalg.cc b/paddle/tcmpt/cpu/linalg.cc → paddle/tcmpt/kernels/cpu/linalg.cc
@@ -12,12 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/tcmpt/cpu/linalg.h"
+#include "paddle/tcmpt/kernels/cpu/linalg.h"
 
 #include "paddle/tcmpt/core/kernel_registry.h"
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/framework/eigen.h"
+#include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/platform/complex.h"
 
 namespace pt {
@@ -44,6 +45,14 @@ void Dot(const CPUContext& dev_ctx,
   }
 }
 
+template <typename T>
+void matmul(const CPUContext& dev_ctx,
+            const DenseTensor& x,
+            const DenseTensor& y,
+            bool transpose_x,
+            bool transpose_y,
+            DenseTensor* out) {}
+
 }  // namespace pt
 
 PT_REGISTER_MODULE(LinalgCPU);

diff --git a/paddle/tcmpt/cpu/linalg.h → paddle/tcmpt/kernels/cpu/linalg.h b/paddle/tcmpt/cpu/linalg.h → paddle/tcmpt/kernels/cpu/linalg.h
@@ -29,4 +29,12 @@ void Dot(const CPUContext& dev_ctx,
          const DenseTensor& y,
          DenseTensor* out);
 
+template <typename T>
+void matmul(const CPUContext& dev_ctx,
+            const DenseTensor& x,
+            const DenseTensor& y,
+            bool transpose_x,
+            bool transpose_y,
+            DenseTensor* out);
+
 }  // namespace pt
diff --git a/paddle/tcmpt/cpu/manipulation.cc → paddle/tcmpt/kernels/cpu/manipulation.cc b/paddle/tcmpt/cpu/manipulation.cc → paddle/tcmpt/kernels/cpu/manipulation.cc
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/tcmpt/cpu/manipulation.h"
-#include "paddle/tcmpt/cpu/utils.h"
+#include "paddle/tcmpt/kernels/cpu/manipulation.h"
 #include "paddle/tcmpt/infershape/unary.h"
+#include "paddle/tcmpt/kernels/cpu/utils.h"
 
 namespace pt {
 

diff --git a/paddle/tcmpt/cpu/manipulation.h → paddle/tcmpt/kernels/cpu/manipulation.h b/paddle/tcmpt/cpu/manipulation.h → paddle/tcmpt/kernels/cpu/manipulation.h