remove mkldnn tensor & polish details

PaddlePaddle · Oct 14, 2021 · 3f5f789 · 3f5f789 · paddle-bot-old · Oct 15, 2021
1 parent 06789ba
commit 3f5f789
Show file tree

Hide file tree

Showing 23 changed files with 249 additions and 208 deletions.
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
@@ -117,7 +117,7 @@ function(find_fluid_modules TARGET_NAME)
 endfunction(find_fluid_modules)
 
 set_property(GLOBAL PROPERTY TCMPT_MODULES "")
-# find all top modules is used for paddle static library
+# find all tcmpt modules is used for paddle static library
 # for building inference libs
 function(find_tcmpt_modules TARGET_NAME)
   get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)

diff --git a/cmake/tcmpt.cmake b/cmake/tcmpt.cmake
@@ -1,4 +1,10 @@
-# TODO(chenweihang): keep message comment for debuging, remove it if needless
+# `kernel_instantiate` functionis used to declare the template instantiation of
+# the Kernel function generated through code analysis, only for windows
+# (because the windows platform msvc compiler cannot automatically instantiate
+# the template function through decltype)
+# TODO(chenweihang): keep message comment for debuging, it is still useful,
+# I will remove it if needless later
+
 function(kernel_instantiate TARGET)
     set(target_file ${CURRENT_BINARY_DIR}/${TARGET}.tmp CACHE INTERNAL "${CURRENT_BINARY_DIR}/${TARGET} file")
     set(target_file_final ${CURRENT_BINARY_DIR}/${TARGET})
@@ -36,7 +42,6 @@ function(kernel_instantiate TARGET)
     endforeach()
     # message(STATUS "INST CONTENT: ${instantiate_context}")
     file(APPEND ${target_file} "${instantiate_context}\n")
-    # copy_if_different(${target_file} ${target_file_final})
     string(REPLACE "." "_" cmd_name ${TARGET})
     # this is a dummy target for custom command, should always be run firstly to update ${target_file_final}
     # TODO(chenweihang): nameing rule need to enchance

diff --git a/paddle/fluid/framework/eigen.h b/paddle/fluid/framework/eigen.h
@@ -19,8 +19,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/tensor.h"
 #include "unsupported/Eigen/CXX11/Tensor"
 
-#include "paddle/tcmpt/core/dense_tensor.h"
-
 namespace paddle {
 namespace framework {
 
@@ -69,28 +67,6 @@ struct EigenTensor {
   static ConstType From(const Tensor& tensor) {
     return From(tensor, tensor.dims_);
   }
-
-  // for pt::DenseTensor
-  static Type From(pt::DenseTensor& tensor, DDim dims) {  // NOLINT
-    // why tensor.data<T>() not work?
-    // return Type(const_cast<T*>(reinterpret_cast<const T*>(tensor.data())),
-    // EigenDim<D>::From(dims));
-    return Type(const_cast<T*>(tensor.data<T>()), EigenDim<D>::From(dims));
-  }
-
-  static Type From(pt::DenseTensor& tensor) {  // NOLINT
-    return From(tensor, tensor.dims());
-  }  // NOLINT
-
-  static ConstType From(const pt::DenseTensor& tensor, DDim dims) {
-    // return ConstType(reinterpret_cast<const T*>(tensor.data()),
-    // EigenDim<D>::From(dims));
-    return ConstType(tensor.data<T>(), EigenDim<D>::From(dims));
-  }
-
-  static ConstType From(const pt::DenseTensor& tensor) {
-    return From(tensor, tensor.dims());
-  }
 };
 
 template <typename T, int MajorType = Eigen::RowMajor,
@@ -133,17 +109,6 @@ struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
       const Tensor& tensor) {  // NOLINT
     return EigenVector::From(tensor, {product(tensor.dims_)});
   }
-
-  // for pt::DenseTensor
-  static typename EigenVector::Type Flatten(
-      pt::DenseTensor& tensor) {  // NOLINT
-    return EigenVector::From(tensor, {product(tensor.dims())});
-  }
-
-  static typename EigenVector::ConstType Flatten(
-      const pt::DenseTensor& tensor) {  // NOLINT
-    return EigenVector::From(tensor, {product(tensor.dims())});
-  }
 };
 
 template <typename T, int MajorType = Eigen::RowMajor,
@@ -160,15 +125,6 @@ struct EigenScalar {
   static ConstType From(const Tensor& tensor) {
     return ConstType(tensor.data<T>());
   }
-
-  // for pt::DenseTensor
-  static Type From(pt::DenseTensor& tensor) {  // NOLINT
-    return Type(const_cast<T*>(tensor.data<T>()));
-  }
-
-  static ConstType From(const pt::DenseTensor& tensor) {
-    return ConstType(tensor.data<T>());
-  }
 };
 
 // Define Tensor with 32-bit index.

diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc b/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
@@ -21,7 +21,7 @@
 #include "paddle/fluid/framework/parallel_executor.h"
 #include "paddle/fluid/framework/program_desc.h"
 
-USE_NO_KERNEL_OP(scale);
+USE_OP(scale);
 USE_OP(elementwise_mul);
 USE_OP(elementwise_add);
 USE_OP(elementwise_add_grad);

diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
@@ -1155,7 +1155,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
   // and RCOM backend, the XPU, NPU and MKLDNN will be supported in the second
   // phase
 
-  // VLOG(1) << "Pt KernelFactory: " << pt::KernelFactory::Instance();
   if (FLAGS_use_pt_kernel &&
       pt::KernelFactory::Instance().ContainsKernel(type_.c_str())) {
     if (pt_kernel_key_.get() == nullptr || pt_kernel_.get() == nullptr) {
@@ -1263,17 +1262,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
   }
 }
 
-static bool ContainSelectedRows(const VariableValueMap& inputs) {
-  for (auto& var_pair : inputs) {
-    for (auto* var : var_pair.second) {
-      if (var->IsType<SelectedRows>()) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
 // TODO(chenweihang): now only check single var input
 static bool IsValidVar(const std::string& name,
                        const VariableValueMap& inputs) {
@@ -1303,9 +1291,6 @@ static pt::KernelName ConstructPtKernelName(const std::string& op_type,
                                             const VariableValueMap& inputs) {
   std::string overload_name;
   // TODO(chenweihang): adapt SelectedRows by xiaowei's design
-  // if (ContainSelectedRows(inputs)) {
-  //   overload_name = pt::kContainSelectedRowsSuffix;
-  // }
   if (ContainHostTensor(op_proto, inputs)) {
     if (overload_name != "") {
       overload_name += ".";

diff --git a/paddle/fluid/framework/tcmpt_utils.cc b/paddle/fluid/framework/tcmpt_utils.cc
@@ -13,18 +13,15 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/framework/tcmpt_utils.h"
+
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/selected_rows.h"
-
 #include "paddle/fluid/framework/variable.h"
-#include "paddle/tcmpt/api/include/core.h"
-#include "paddle/tcmpt/api/include/symbols.h"
 
 namespace paddle {
 namespace framework {
 
 // TODO(chenweihang, shixiaowei): adapt SelectedRows
-
 template <>
 std::shared_ptr<pt::DenseTensor> MakeTensorImpl<pt::DenseTensor, LoDTensor>(
     const LoDTensor& tensor, pt::Backend backend, pt::DataType dtype,
@@ -167,38 +164,5 @@ std::shared_ptr<pt::TensorInterface> OutputVariableToPtTensor(
   return nullptr;
 }
 
-/* For MKLDNNDenseTensor (move this part into a single file later) */
-#ifdef PADDLE_WITH_MKLDNN
-
-template <>
-std::shared_ptr<pt::MKLDNNDenseTensor> MakeTensorImpl<pt::MKLDNNDenseTensor>(
-    const Tensor& tensor, const platform::Place& place,
-    proto::VarType::Type type) {
-  auto holder = tensor.Holder();
-  auto tensor_impl = std::make_shared<pt::MKLDNNDenseTensor>(
-      pt::TensorMeta(tensor.dims(), pt::TransToPtBackend(place),
-                     pt::TransToPtDataType(type),
-                     pt::TransToPtLayout(tensor.layout()), tensor.offset()),
-      pt::TensorStatus());
-
-  if (holder != nullptr) {
-    tensor_impl->ShareAllocation(tensor.Holder());
-  } else {
-    VLOG(1) << "Old MKLDNN Tensor holder is nullptr.";
-  }
-
-  tensor_impl->set_format(tensor.format());
-  return tensor_impl;
-}
-
-template <>
-void ShareTensorImpl(pt::MKLDNNDenseTensor* tensor_impl, Tensor* out) {
-  out->ResetHolderWithType(tensor_impl->allocation(),
-                           pt::TransToProtoVarType(tensor_impl->type()));
-  out->set_format(tensor_impl->format());
-}
-
-#endif
-
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h
@@ -33,7 +33,6 @@ class BlockDesc;
 class Variable;
 class InferNoNeedBufferVarsFN;
 
-// TODO(chenweihang): AttirbuteMap also need to be ordered
 // TODO(panyx0718): Replace vector with something like gtl::Vector.
 using VariableNameMap = std::map<std::string, std::vector<std::string>>;
 using VariableValueMap = std::map<std::string, std::vector<Variable*>>;
@@ -44,7 +43,6 @@ using Attribute = boost::variant<
     std::vector<std::string>, bool, std::vector<bool>, BlockDesc*, int64_t,
     std::vector<BlockDesc*>, std::vector<int64_t>, std::vector<double>>;
 
-// TODO(chenweihang): AttirbuteMap also need to be ordered
 using AttributeMap = std::unordered_map<std::string, Attribute>;
 
 #ifdef PADDLE_WITH_ASCEND_CL

diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc
@@ -137,18 +137,6 @@ static framework::VariableValueMap BuildInputMap(
   return inputs;
 }
 
-template <typename VarType>
-static bool ContainSelectedRows(const NameVarMap<VarType>& inputs) {
-  for (auto& var_pair : inputs) {
-    for (auto& var : var_pair.second) {
-      if (var->Var().template IsType<framework::SelectedRows>()) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
 // TODO(chenweihang): enhance rules, not all dispensable inputs
 // are host tensor, now only for scale kernel verify
 template <typename VarType>
@@ -169,9 +157,6 @@ static pt::KernelName ConstructPtKernelName(
     const NameVarMap<VarType>& inputs) {
   std::string overload_name;
   // TODO(chenweihang): adapt SelectedRows by xiaowei's design
-  // if (ContainSelectedRows<VarType>(inputs)) {
-  //   overload_name = pt::kContainSelectedRowsSuffix;
-  // }
   if (ContainHostTensor<VarType>(op_proto, inputs)) {
     if (overload_name != "") {
       overload_name += ".";

diff --git a/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc b/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc
@@ -32,7 +32,7 @@ using MultiVarMsg = ::paddle::distributed::MultiVariableMessage;
 using VarMsg = ::paddle::distributed::VariableMessage;
 DECLARE_double(eager_delete_tensor_gb);
 
-USE_NO_KERNEL_OP(scale);
+USE_OP(scale);
 USE_NO_KERNEL_OP(heter_listen_and_serv);
 
 framework::BlockDesc* AppendSendAndRecvBlock(framework::ProgramDesc* program) {

diff --git a/paddle/fluid/operators/pscore/heter_server_test.cc b/paddle/fluid/operators/pscore/heter_server_test.cc
@@ -29,7 +29,7 @@ namespace distributed = paddle::distributed;
 using MultiVarMsg = ::paddle::distributed::MultiVariableMessage;
 using VarMsg = ::paddle::distributed::VariableMessage;
 
-USE_NO_KERNEL_OP(scale);
+USE_OP(scale);
 
 std::shared_ptr<distributed::HeterServer> b_rpc_service;
 

diff --git a/paddle/fluid/operators/scale_op_xpu.cc b/paddle/fluid/operators/scale_op_xpu.cc
@@ -20,7 +20,6 @@ limitations under the License. */
 
 namespace paddle {
 namespace operators {
-
 template <typename DeviceContext, typename T>
 class ScaleXPUKernel : public framework::OpKernel<T> {
  public:

diff --git a/paddle/fluid/operators/sign_op.cc b/paddle/fluid/operators/sign_op.cc
@@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <memory>
-
 #include "paddle/fluid/operators/sign_op.h"
+#include <memory>
 #include "paddle/fluid/platform/float16.h"
 
 namespace paddle {

diff --git a/paddle/tcmpt/api/include/core.h b/paddle/tcmpt/api/include/core.h
@@ -19,5 +19,4 @@ limitations under the License. */
 #include "paddle/tcmpt/core/dense_tensor.h"
 #include "paddle/tcmpt/core/kernel_context.h"
 #include "paddle/tcmpt/core/kernel_factory.h"
-#include "paddle/tcmpt/core/mkldnn_dense_tensor.h"
 #include "paddle/tcmpt/core/scalar.h"
diff --git a/paddle/tcmpt/core/mkldnn_dense_tensor.h b/paddle/tcmpt/core/mkldnn_dense_tensor.h
diff --git a/paddle/tcmpt/cpu/CMakeLists.txt b/paddle/tcmpt/cpu/CMakeLists.txt
@@ -1,5 +1,6 @@
 if(WIN32)
     set(CURRENT_BINARY_DIR ${PADDLE_BINARY_DIR}/paddle/tcmpt/cpu)
+    kernel_instantiate(creation.cc)
     kernel_instantiate(math.cc)
     kernel_instantiate(linalg.cc)
 endif()

diff --git a/paddle/tcmpt/cuda/CMakeLists.txt b/paddle/tcmpt/cuda/CMakeLists.txt
@@ -1,5 +1,6 @@
 if(WIN32)
     set(CURRENT_BINARY_DIR ${PADDLE_BINARY_DIR}/paddle/tcmpt/cuda)
+    kernel_instantiate(creation.cu)
     kernel_instantiate(math.cu)
     kernel_instantiate(linalg.cu)
 endif()

diff --git a/paddle/tcmpt/cuda/linalg.cu b/paddle/tcmpt/cuda/linalg.cu
@@ -15,10 +15,9 @@
 #include "paddle/tcmpt/cuda/linalg.h"
 
 #include "paddle/tcmpt/core/kernel_registry.h"
+#include "paddle/tcmpt/eigen/dot.h"
 
 // See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/operators/eigen/eigen_function.h"
 #include "paddle/fluid/platform/complex.h"
 
 namespace pt {
@@ -28,22 +27,7 @@ void Dot(const CUDAContext& dev_ctx,
          const DenseTensor& x,
          const DenseTensor& y,
          DenseTensor* out) {
-  out->mutable_data();
-  if (1 == out->dims().size()) {
-    auto eigen_out = paddle::framework::EigenScalar<T>::From(*out);
-    auto eigen_x = paddle::framework::EigenVector<T>::Flatten(x);
-    auto eigen_y = paddle::framework::EigenVector<T>::Flatten(y);
-
-    auto& dev = *dev_ctx.eigen_device();
-    eigen_out.device(dev) = (eigen_x * eigen_y).sum();
-  } else {
-    auto eigen_out = paddle::framework::EigenMatrix<T>::From(*out);
-    auto eigen_x = paddle::framework::EigenMatrix<T>::From(x);
-    auto eigen_y = paddle::framework::EigenMatrix<T>::From(y);
-
-    auto& dev = *dev_ctx.eigen_device();
-    eigen_out.device(dev) = (eigen_x * eigen_y).sum(Eigen::DSizes<int, 1>(1));
-  }
+  eigen::Dot<CUDAContext, T>(dev_ctx, x, y, out);
 }
 
 }  // namespace pt