From dfdc9960e8e32878974824e2e4b0b097f062ad59 Mon Sep 17 00:00:00 2001
From: Zhanlue Yang <jim19930609@gmail.com>
Date: Tue, 4 Jan 2022 12:06:28 +0800
Subject: [PATCH] [Unify Tensors PR #3]Port framework::Tensor members &
 interfaces to pten::DenseTensor, test=allcases (#38473)

* Added shared_ptr<Allocation> member & corresponding interfaces to Storage

* Removed original pten::Allocation from Storage and adjusted the interfaces accordingly

* Fixed issues with storage offset

* Used place to malloc allocation for TensorStorage

* [Unify Tensors PR #3]Ported framework::Tensor interfaces to pten::DenseTensor

* Fixed issues with place

* Added comments

* Moved mutable_data with stream argument to DenseTensor

* Added set_offset interface

* Fixed CI issues,test=allcases

* [Unify Tensors PR #4] Port LoDTensor interfaces to DenseTensor

* Reverted changes too pten_layout() interface

* Removed friend classes
---
 paddle/fluid/framework/CMakeLists.txt       |   8 +-
 paddle/fluid/framework/tensor.h             |   2 +
 paddle/fluid/framework/tensor_impl.h        |   6 +
 paddle/fluid/framework/tensor_util.cc       |  40 +-
 paddle/fluid/framework/tensor_util.h        |   6 +
 paddle/pten/core/CMakeLists.txt             |   7 +-
 paddle/pten/core/dense_tensor.cc            | 442 +++++++++++++++++++-
 paddle/pten/core/dense_tensor.h             | 211 +++++++++-
 paddle/pten/core/storage.h                  |  16 +-
 paddle/pten/core/utils/intrusive_ptr.h      |   5 +
 paddle/pten/tests/core/test_dense_tensor.cc |   3 +-
 11 files changed, 721 insertions(+), 25 deletions(-)
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index df7e543d99c54..bd096f41ccc49 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -68,15 +68,15 @@ cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor)
 if(WITH_GPU)
   if (WIN32)
     windows_symbolic(tensor_util SRCS tensor_util.cu)
-    nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context)
+    nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context dense_tensor)
     add_dependencies(tensor tensor_util)
   else()
-    nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler)
+    nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor)
   endif(WIN32)
 elseif(WITH_ROCM)
-  hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler)
+  hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor)
 else()
-  cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler)
+  cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler dense_tensor)
 endif()
 
 cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h
index b7cc57d5e042e..7eebd97d06523 100644
--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@@ -120,6 +120,8 @@ class Tensor {
   explicit Tensor(const proto::VarType::Type&);
 
   /*! Return a pointer to mutable memory block. */
+  const void* data() const;
+
   template <typename T>
   T* data();
 
diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h
index 986551b935e88..a83b5d0662bb9 100644
--- a/paddle/fluid/framework/tensor_impl.h
+++ b/paddle/fluid/framework/tensor_impl.h
@@ -54,6 +54,12 @@ inline T* Tensor::data() {
                               offset_);
 }
 
+inline const void* Tensor::data() const {
+  check_memory_size();
+  return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
+                                 offset_);
+}
+
 template <typename T>
 inline T* Tensor::mutable_data(const DDim& dims, const platform::Place& place,
                                size_t requested_size) {
diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc
index f0e5a447fd2da..6394e84c81a2b 100644
--- a/paddle/fluid/framework/tensor_util.cc
+++ b/paddle/fluid/framework/tensor_util.cc
@@ -23,6 +23,9 @@ limitations under the License. */
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/profiler.h"
+
+#include "paddle/pten/core/dense_tensor.h"
+
 #ifdef PADDLE_WITH_MKLDNN
 #include "dnnl_debug.h"  // NOLINT
 #endif
@@ -30,11 +33,12 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 
-void TensorCopy(const Tensor& src, const platform::Place& dst_place,
-                const platform::DeviceContext& ctx, Tensor* dst) {
+template <typename TENSOR>
+void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
+                    const platform::DeviceContext& ctx, TENSOR* dst) {
   if (&src == dst) {
     auto src_copy = src;
-    TensorCopy(src_copy, dst_place, ctx, dst);
+    TensorCopyImpl(src_copy, dst_place, ctx, dst);
     return;
   }
 
@@ -45,7 +49,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
   dst->Resize(src.dims());
   dst->set_layout(src.layout());
   auto src_place = src.place();
-  auto src_ptr = src.data<void>();
+  auto src_ptr = src.data();
 #ifdef PADDLE_WITH_MKLDNN
   dst->set_format(src.format());
   // oneDNN tensors due to padding may be of bigger size
@@ -389,8 +393,9 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
 #endif
 }
 
-void TensorCopy(const Tensor& src, const platform::Place& dst_place,
-                Tensor* dst) {
+template <typename TENSOR>
+void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
+                    TENSOR* dst) {
   platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
   const platform::DeviceContext* dev_ctx;
   if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place)) {
@@ -398,7 +403,24 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
   } else {
     dev_ctx = pool.Get(src.place());
   }
-  TensorCopy(src, dst_place, *dev_ctx, dst);
+  TensorCopyImpl(src, dst_place, *dev_ctx, dst);
+}
+
+void TensorCopy(const Tensor& src, const platform::Place& dst_place,
+                Tensor* dst) {
+  TensorCopyImpl<Tensor>(src, dst_place, dst);
+}
+void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
+                pten::DenseTensor* dst) {
+  TensorCopyImpl<pten::DenseTensor>(src, dst_place, dst);
+}
+void TensorCopy(const Tensor& src, const platform::Place& dst_place,
+                const platform::DeviceContext& ctx, Tensor* dst) {
+  TensorCopyImpl<Tensor>(src, dst_place, ctx, dst);
+}
+void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
+                const platform::DeviceContext& ctx, pten::DenseTensor* dst) {
+  TensorCopyImpl<pten::DenseTensor>(src, dst_place, ctx, dst);
 }
 
 void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
@@ -418,7 +440,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
   dst->set_format(src.format());
 #endif
   auto src_place = src.place();
-  auto src_ptr = src.data<void>();
+  auto src_ptr = src.data();
   auto dst_ptr = dst->mutable_data(dst_place, src.type());
 
   if (src_ptr == dst_ptr && src_place == dst_place) {
@@ -971,7 +993,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
   {  // the 3rd field, tensor data
     uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type());
 
-    auto* data_ptr = tensor.data<void>();
+    auto* data_ptr = tensor.data();
     PADDLE_ENFORCE_LT(size, (std::numeric_limits<std::streamsize>::max)(),
                       platform::errors::ResourceExhausted(
                           "tensor size %d overflow when writing tensor", size));
diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h
index 575e2171652a2..46eba6a1e41bb 100644
--- a/paddle/fluid/framework/tensor_util.h
+++ b/paddle/fluid/framework/tensor_util.h
@@ -34,6 +34,8 @@ limitations under the License. */
 #include "paddle/fluid/platform/device/mlu/device_context.h"
 #endif
 
+#include "paddle/pten/core/dense_tensor.h"
+
 namespace paddle {
 namespace framework {
 
@@ -75,6 +77,8 @@ class Tensor;
 
 void TensorCopy(const Tensor& src, const platform::Place& dst_place,
                 const platform::DeviceContext& ctx, Tensor* dst);
+void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
+                const platform::DeviceContext& ctx, pten::DenseTensor* dst);
 
 // NOTE(zcd): If the src.place() and dst_place are two different GPU,
 // the copy operation is carried out on the dst_place's stream. This is
@@ -85,6 +89,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
 // not completed.
 void TensorCopy(const Tensor& src, const platform::Place& dst_place,
                 Tensor* dst);
+void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
+                pten::DenseTensor* dst);
 
 void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
                     Tensor* dst);
diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt
index 28c522d7ea640..87c3612e35424 100644
--- a/paddle/pten/core/CMakeLists.txt
+++ b/paddle/pten/core/CMakeLists.txt
@@ -9,6 +9,11 @@ endif()
 cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils)
 cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context)
 cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce)
+
 cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce mixed_vector)
+cc_library(dense_tensor SRCS dense_tensor.cc DEPS convert_utils tensor_meta tensor_base)
 
-cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base)
+# Will remove once we implemented MKLDNN_Tensor
+if(WITH_MKLDNN)
+    add_dependencies(dense_tensor mkldnn)
+endif()
diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc
index 6fe85610612ac..d8d83c575c4cf 100644
--- a/paddle/pten/core/dense_tensor.cc
+++ b/paddle/pten/core/dense_tensor.cc
@@ -19,6 +19,9 @@ limitations under the License. */
 #include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/float16.h"
 
+#include "paddle/pten/api/lib/utils/storage.h"
+#include "paddle/pten/core/convert_utils.h"
+
 namespace pten {
 
 DenseTensor::DenseTensor(const std::shared_ptr<Allocator>& a,
@@ -41,6 +44,12 @@ DenseTensor::DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta)
 DenseTensor::DenseTensor(const DenseTensor& other)
     : meta_(other.meta()), storage_(copy_intrusive(other.storage_)) {}
 
+DenseTensor& DenseTensor::operator=(const DenseTensor& other) {
+  meta_ = other.meta();
+  storage_ = std::move(copy_intrusive(other.storage_));
+  return *this;
+}
+
 int64_t DenseTensor::numel() const {
   if (meta_.is_scalar) {
     return 1;
@@ -105,6 +114,7 @@ T* DenseTensor::mutable_data() {
 
 template <typename T>
 const T* DenseTensor::data() const {
+  check_memory_size();
   PADDLE_ENFORCE(
       (dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
       paddle::platform::errors::InvalidArgument(
@@ -113,7 +123,31 @@ const T* DenseTensor::data() const {
   return static_cast<const T*>(data());
 }
 
+template <typename T>
+T* DenseTensor::data() {
+  check_memory_size();
+  PADDLE_ENFORCE(
+      (dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
+      paddle::platform::errors::InvalidArgument(
+          "The type of data we are trying to retrieve does not match the "
+          "type of data currently contained in the container."));
+  PADDLE_ENFORCE_NOT_NULL(
+      storage_,
+      paddle::platform::errors::PreconditionNotMet(
+          "The storage must be valid when call the mutable data function."));
+  return reinterpret_cast<T*>(data());
+}
+
 const void* DenseTensor::data() const {
+  PADDLE_ENFORCE_NOT_NULL(
+      storage_,
+      paddle::platform::errors::PreconditionNotMet(
+          "The storage must be valid when call the mutable data function."));
+  return reinterpret_cast<const void*>(
+      reinterpret_cast<uintptr_t>(storage_->data()) + meta_.offset);
+}
+
+void* DenseTensor::data() {
   PADDLE_ENFORCE_NOT_NULL(
       storage_,
       paddle::platform::errors::PreconditionNotMet(
@@ -130,16 +164,30 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
   meta_ = std::move(meta);
 }
 
-void DenseTensor::Resize(const DDim& dims) {
+/* @jim19930609: This interface will be further modified util we finalized the
+   design for Allocator - Allocation
+   For now, we have to temporarily accommodate two independent use cases:
+   1. Designed behaviour: DenseTensor constructed with its underlying storage_
+   initialized
+   2. Legacy behaviour(fluid): DenseTensor constructed using default
+   constructor, where
+                               storage_ won't be initialized until the first
+   call to mutable_data(place)
+   */
+DenseTensor& DenseTensor::Resize(const DDim& dims) {
   meta_.dims = dims;
-  mutable_data();
+  if (storage_ != nullptr) {
+    mutable_data();
+  }
+  return *this;
 }
 
 void DenseTensor::ResetLoD(const LoD& lod) { meta_.lod = lod; }
 
-#define DATA_MEMBER_FUNC_INSTANTIATION(dtype)  \
-  template dtype* DenseTensor::mutable_data(); \
-  template const dtype* DenseTensor::data() const;
+#define DATA_MEMBER_FUNC_INSTANTIATION(dtype)      \
+  template dtype* DenseTensor::mutable_data();     \
+  template const dtype* DenseTensor::data() const; \
+  template dtype* DenseTensor::data();
 
 DATA_MEMBER_FUNC_INSTANTIATION(bool);
 DATA_MEMBER_FUNC_INSTANTIATION(int8_t);
@@ -159,4 +207,388 @@ DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128);
 
 #undef DATA_MEMBER_FUNC_INSTANTIATION
 
+/* --------------------------- */
+/*   From framework::Tensor    */
+/* --------------------------- */
+DenseTensor::DenseTensor() {
+  inplace_version_counter_ = std::make_shared<TensorInplaceVersion>(0);
+  meta_ = DenseTensorMeta();
+  meta_.dtype = paddle::experimental::DataType::FLOAT32;
+  meta_.offset = 0;
+}
+
+DenseTensor::DenseTensor(const paddle::framework::proto::VarType::Type& dtype) {
+  inplace_version_counter_ = std::make_shared<TensorInplaceVersion>(0);
+  meta_ = DenseTensorMeta();
+  meta_.dtype = TransToPtenDataType(dtype);
+  meta_.offset = 0;
+}
+
+DenseTensor& DenseTensor::ShareDataWith(const DenseTensor& src) {
+  src.check_memory_size();
+  *this = src;
+  return *this;
+}
+
+DenseTensor& DenseTensor::ShareInplaceVersionCounterWith(
+    const DenseTensor& src) {
+  PADDLE_ENFORCE_NOT_NULL(
+      inplace_version_counter_,
+      paddle::platform::errors::PreconditionNotMet(
+          "Tensor does not hold inplace_version_counter_."));
+
+  inplace_version_counter_ = src.inplace_version_counter_;
+  return *this;
+}
+
+size_t DenseTensor::memory_size() const {
+  if (storage_ == nullptr || storage_->data_shared() == nullptr) {
+    return 0UL;
+  }
+
+  return storage_->data_shared()->size() - meta_.offset;
+}
+
+void DenseTensor::check_memory_size() const {
+  PADDLE_ENFORCE_NOT_NULL(storage_,
+                          paddle::platform::errors::PreconditionNotMet(
+                              "Tensor holds no memory. "
+                              "Call Tensor::mutable_data firstly."));
+  PADDLE_ENFORCE_NOT_NULL(storage_->data_shared(),
+                          paddle::platform::errors::PreconditionNotMet(
+                              "Tensor holds no memory. "
+                              "Call Tensor::mutable_data firstly."));
+  size_t size = numel() * SizeOf(dtype());
+
+  PADDLE_ENFORCE_LE(
+      size,
+      memory_size(),
+      paddle::platform::errors::PreconditionNotMet(
+          "Tensor's dimension is out of bound."
+          "Tensor's dimension must be equal or less than the size of its "
+          "memory."
+          "But received  Tensor's dimension is d%, memory's size is %d.",
+          size,
+          memory_size()));
+}
+
+const paddle::platform::Place& DenseTensor::place() const {
+  PADDLE_ENFORCE_NOT_NULL(
+      storage_,
+      paddle::platform::errors::PreconditionNotMet(
+          "Tensor not initialized yet when Tensor::place() is called."));
+  return storage_->place();
+}
+
+paddle::framework::proto::VarType::Type DenseTensor::type() const {
+  PADDLE_ENFORCE_NOT_NULL(
+      storage_,
+      paddle::platform::errors::PreconditionNotMet(
+          "Tensor not initialized yet when Tensor::type() is called."));
+  return TransToProtoVarType(meta_.dtype);
+}
+
+paddle::framework::proto::VarType::Type DenseTensor::saved_type() const {
+  return TransToProtoVarType(meta_.dtype);
+}
+
+void DenseTensor::set_layout(const paddle::framework::DataLayout layout) {
+  meta_.layout = layout;
+}
+
+void DenseTensor::ResetHolder(
+    const std::shared_ptr<paddle::memory::Allocation>& holder) {
+  PADDLE_ENFORCE_EQ(
+      meta_.offset,
+      0,
+      paddle::platform::errors::Fatal(
+          "Only the offset is supported to zero when the holder is reset."));
+
+  if (storage_ == nullptr) {
+    PADDLE_THROW(
+        paddle::platform::errors::Fatal("storage_ has to be initialized before "
+                                        "calling ResetHolder() interface."));
+  }
+
+  if (storage_->data_shared()) {
+    PADDLE_ENFORCE_LE(
+        numel() * SizeOf(dtype()) + meta_.offset,
+        storage_->data_shared()->size(),
+        paddle::platform::errors::InvalidArgument(
+            "The size of Holder is not enough to store the Tensor."));
+  }
+
+  storage_->set_data_shared(holder);
+}
+
+void DenseTensor::ResetHolderWithType(
+    const std::shared_ptr<paddle::memory::Allocation>& holder,
+    const paddle::framework::proto::VarType::Type& type) {
+  set_type(type);
+  ResetHolder(holder);
+}
+
+void DenseTensor::set_type(
+    const paddle::framework::proto::VarType::Type& type) {
+  meta_.dtype = TransToPtenDataType(type);
+}
+
+DenseTensor DenseTensor::Slice(int64_t begin_idx, int64_t end_idx) const {
+  check_memory_size();
+  PADDLE_ENFORCE_GE(begin_idx,
+                    0,
+                    paddle::platform::errors::OutOfRange(
+                        "The start row index must be greater than 0."
+                        "But received the start index is d%.",
+                        begin_idx));
+  PADDLE_ENFORCE_LE(end_idx,
+                    meta_.dims[0],
+                    paddle::platform::errors::OutOfRange(
+                        "The end row index is out of bound."));
+  PADDLE_ENFORCE_LT(
+      begin_idx,
+      end_idx,
+      paddle::platform::errors::InvalidArgument(
+          "The start row index must be less than the end row index."
+          "But received the start index = %d, the end index = %d.",
+          begin_idx,
+          end_idx));
+
+  if (meta_.dims[0] == 1) {
+    return *this;
+  } else {
+    size_t base = numel() / meta_.dims[0];
+    DenseTensor dst;
+    dst.storage_ = std::move(copy_intrusive(storage_));
+    dst.meta_.layout = meta_.layout;
+    dst.meta_.dtype = meta_.dtype;
+    DDim dst_dims = meta_.dims;
+    dst_dims[0] = end_idx - begin_idx;
+    dst.Resize(dst_dims);
+    dst.meta_.offset = meta_.offset + begin_idx * base * SizeOf(dtype());
+    return dst;
+  }
+}
+
+std::vector<DenseTensor> DenseTensor::Split(int64_t split_size,
+                                            int64_t axis) const {
+  check_memory_size();
+
+  PADDLE_ENFORCE_GE(meta_.dims.size(),
+                    0,
+                    paddle::platform::errors::OutOfRange(
+                        "split expects at least a 1-dimensional tensor"));
+
+  PADDLE_ENFORCE_GE(
+      split_size,
+      0,
+      paddle::platform::errors::OutOfRange(
+          "split expects split_size be non-negative, but got split_size is %d",
+          split_size));
+
+  int64_t numel_size = meta_.dims[axis];
+
+  int64_t num_splits = 1;
+  if (split_size != 0) {
+    num_splits =
+        std::max<int64_t>((numel_size + split_size - 1) / split_size, 1);
+  }
+
+  std::vector<DenseTensor> splits(num_splits);
+  int64_t last_split_size = split_size - (split_size * num_splits - numel_size);
+
+  for (int64_t i = 0; i < num_splits; ++i) {
+    int64_t length = i < num_splits - 1 ? split_size : last_split_size;
+    splits[i] = Slice(i * split_size, i * split_size + length);
+  }
+  return splits;
+}
+
+std::vector<DenseTensor> DenseTensor::Chunk(int64_t chunks,
+                                            int64_t axis) const {
+  check_memory_size();
+  PADDLE_ENFORCE_GE(meta_.dims.size(),
+                    0,
+                    paddle::platform::errors::OutOfRange(
+                        "split expects at least a 1-dimensional tensor"));
+  PADDLE_ENFORCE_GE(
+      chunks,
+      0,
+      paddle::platform::errors::OutOfRange(
+          "chunks expects to be greater than 0, but got chunks is %d", chunks));
+
+  int64_t numel_size = meta_.dims[axis];
+  int64_t split_size = (numel_size + chunks - 1) / chunks;
+  return Split(split_size, axis);
+}
+
+void* DenseTensor::mutable_data(const paddle::platform::Place& place,
+                                paddle::framework::proto::VarType::Type type,
+                                size_t requested_size) {
+  set_type(type);
+  PADDLE_ENFORCE_GE(
+      numel(),
+      0,
+      paddle::platform::errors::PreconditionNotMet(
+          "The Tensor's element number must be equal or greater than zero. "
+          "The Tensor's shape is [",
+          dims(),
+          "] now"));
+  size_t size = numel() * SizeOf(dtype());
+  if (requested_size && (requested_size > size)) {
+    size = requested_size;
+  }
+
+  if (storage_ == nullptr) {
+    storage_ = make_intrusive<paddle::experimental::SharedStorage>(place);
+  }
+
+  /* some versions of boost::variant don't have operator!= */
+  if (storage_->data_shared() == nullptr ||
+      !(storage_->data_shared()->place() == place) ||
+      storage_->data_shared()->size() < size + meta_.offset) {
+    // Reset holder first before re-allocate to save memory
+    storage_->Clear();
+    storage_->set_data_shared(paddle::memory::AllocShared(place, size));
+    meta_.offset = 0;
+  }
+  return reinterpret_cast<void*>(
+      reinterpret_cast<uintptr_t>(storage_->data_shared()->ptr()) +
+      meta_.offset);
+}
+
+void* DenseTensor::mutable_data(const paddle::platform::Place& place,
+                                size_t requested_size) {
+  if (storage_ == nullptr) {
+    PADDLE_THROW(paddle::platform::errors::PreconditionNotMet(
+        "The tensor is not initialized."));
+  }
+
+  return mutable_data(place, type(), requested_size);
+}
+
+void* DenseTensor::mutable_data(const paddle::platform::Place& place,
+                                paddle::framework::proto::VarType::Type type,
+                                const paddle::platform::Stream& stream) {
+  set_type(type);
+  PADDLE_ENFORCE_GE(
+      numel(),
+      0,
+      paddle::platform::errors::PreconditionNotMet(
+          "The Tensor's element number must be equal or greater than zero. "
+          "The Tensor's shape is [",
+          dims(),
+          "] now"));
+  size_t size = numel() * SizeOf(dtype());
+
+  /* some versions of boost::variant don't have operator!= */
+  if (storage_ == nullptr || storage_->data_shared() == nullptr ||
+      !(storage_->data_shared()->place() == place) ||
+      storage_->data_shared()->size() < size + meta_.offset ||
+      !(paddle::platform::is_gpu_place(place) &&
+        paddle::memory::InSameStream(storage_->data_shared(), stream))) {
+    storage_->Clear();
+    storage_->set_data_shared(paddle::memory::AllocShared(place, size, stream));
+    meta_.offset = 0;
+  }
+  return reinterpret_cast<void*>(
+      reinterpret_cast<uintptr_t>(storage_->data_shared()->ptr()) +
+      meta_.offset);
+}
+
+/* @jim19930609: The following "mutable_data" only supports specific dtypes
+   defined in OpProto. This part need another clean up once the data type across
+   Fluid
+   and Pten get unified.
+   */
+template <typename T>
+inline T* DenseTensor::mutable_data(const DDim& dims,
+                                    const paddle::platform::Place& place,
+                                    size_t requested_size) {
+  static_assert(std::is_pod<T>::value, "T must be POD");
+  Resize(dims);
+  return mutable_data<T>(place, requested_size);
+}
+
+template <typename T>
+inline T* DenseTensor::mutable_data(const paddle::platform::Place& place,
+                                    size_t requested_size) {
+  static_assert(std::is_pod<T>::value, "T must be POD");
+  return reinterpret_cast<T*>(mutable_data(
+      place, paddle::framework::DataTypeTrait<T>::DataType(), requested_size));
+}
+
+#define LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(dtype) \
+  template dtype* DenseTensor::mutable_data(         \
+      const DDim& dims,                              \
+      const paddle::platform::Place& place,          \
+      size_t requested_size);                        \
+  template dtype* DenseTensor::mutable_data(         \
+      const paddle::platform::Place& place, size_t requested_size);
+
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(bool)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int8_t)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(uint8_t)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int16_t)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int64_t)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(float)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(double)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::platform::bfloat16)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::platform::float16)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex64)
+LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128)
+
+#undef LEGACY_DATA_MEMBER_FUNC_INSTANTIATION
+
+/* ------------------------------ */
+/*   From framework::LoDTensor    */
+/* ------------------------------ */
+
+DenseTensor::DenseTensor(const LoD& lod) : DenseTensor() { meta_.lod = lod; }
+
+void DenseTensor::set_lod(const LoD& lod) { meta_.lod = lod; }
+
+LoD* DenseTensor::mutable_lod() { return &meta_.lod; }
+
+std::pair<size_t, size_t> DenseTensor::lod_element(size_t level,
+                                                   size_t elem) const {
+  PADDLE_ENFORCE_LT(
+      level,
+      NumLevels(),
+      paddle::platform::errors::InvalidArgument(
+          "The input level of LoD is invalid, it should be less than LoD "
+          "size. The input level is %zu, the LoD size is %zu.",
+          level,
+          NumLevels()));
+
+  PADDLE_ENFORCE_LT(elem,
+                    NumElements(level),
+                    paddle::platform::errors::InvalidArgument(
+                        "The input element of LoD is invalid, it should be "
+                        "less than the number of elements in its level."
+                        "The input element is %zu, the number of elements in "
+                        "its level is %zu.",
+                        elem,
+                        NumElements(level)));
+
+  return std::make_pair((meta_.lod)[level][elem], (meta_.lod)[level][elem + 1]);
+}
+
+size_t DenseTensor::NumLevels() const { return meta_.lod.size(); }
+
+size_t DenseTensor::NumElements(size_t level) const {
+  PADDLE_ENFORCE_LT(
+      level,
+      NumLevels(),
+      paddle::platform::errors::InvalidArgument(
+          "The input level of LoD is invalid, it should be less than LoD "
+          "size. The input level is %zu, the LoD size is %zu.",
+          level,
+          NumLevels()));
+
+  // the last offset is the end of last element
+  return (meta_.lod)[level].size() - 1;
+}
+
 }  // namespace pten
diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h
index ccbcf02ffe70a..eb149220f942d 100644
--- a/paddle/pten/core/dense_tensor.h
+++ b/paddle/pten/core/dense_tensor.h
@@ -14,15 +14,44 @@ limitations under the License. */
 
 #pragma once
 
+#include "paddle/fluid/framework/data_layout.h"
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/platform/stream/stream.h"
+
 #include "paddle/pten/core/allocator.h"
 #include "paddle/pten/core/storage.h"
 #include "paddle/pten/core/tensor_base.h"
 #include "paddle/pten/core/tensor_meta.h"
 
+/* @jim19930609: Move to MKLDNN_Tensor in the future
+    */
+#ifdef PADDLE_WITH_MKLDNN
+#include "dnnl.hpp"
+#endif
+
 namespace pten {
 
 class CompatibleDenseTensorUtils;
 
+/* --------------------------- */
+/*   From framework::Tensor    */
+/* --------------------------- */
+/* Temporarily put TensorInplaceVersion inside DenseTensor.
+   Will move to AutogradMeta as soon as we switch to Eager Dygraph.
+   */
+class TensorInplaceVersion {
+ public:
+  explicit TensorInplaceVersion(uint32_t inplace_version = 0)
+      : inplace_version_(inplace_version) {}
+  bool IsUnique() const { return inplace_version_ == 0; }
+  void Bump() { ++inplace_version_; }
+  uint32_t CurrentVersion() const { return inplace_version_; }
+  void SetInplaceVersionToZero() { inplace_version_ = 0; }
+
+ private:
+  uint32_t inplace_version_;
+};
+
 /// \brief The Dense tensor store values in a contiguous sequential block
 /// of memory where all values are represented. Tensors or multi-dimensional
 /// arrays are used in math operators.
@@ -56,7 +85,7 @@ class DenseTensor : public TensorBase,
   /// \brief Because dense tensor is a kind of container, we give a default
   /// constructor to use for stl container. But the dense tensor created with
   /// the default constructor is not practical.
-  DenseTensor() = default;
+  // DenseTensor() = default;
 
   /// \brief Because dense tensor is a resource handle, we provide a default
   /// move constructor to support move semantics.
@@ -65,6 +94,9 @@ class DenseTensor : public TensorBase,
   /// \brief DenseTensor shallow copy constructor.
   DenseTensor(const DenseTensor& other);
 
+  /// \brief DenseTensor shallow copy assignment.
+  DenseTensor& operator=(const DenseTensor& other);
+
   /// \brief Destroy the tensor object and release exclusive resources.
   virtual ~DenseTensor() = default;
 
@@ -95,7 +127,7 @@ class DenseTensor : public TensorBase,
 
   /// \brief Returns the data place of the tensor.
   /// \return The data place of the tensor.
-  const Place& place() const override { return storage_->place(); }
+  const Place& place() const override;
 
   /// \brief Returns the meta information of the tensor.
   /// \return The meta information of the tensor.
@@ -124,7 +156,8 @@ class DenseTensor : public TensorBase,
   /// larger than the original value, the storage area will be reallocated.
   /// \param dims The new dims of the dense tensor.
   /// \param lod The new lod of the dense tensor.
-  void Resize(const DDim& dims);
+  // void Resize(const DDim& dims);
+  DenseTensor& Resize(const DDim& dims);
 
   /// \brief Change the lod information in the metadata.
   /// \param lod The new lod of the dense tensor.
@@ -174,6 +207,178 @@ class DenseTensor : public TensorBase,
  private:
   DenseTensorMeta meta_;
   intrusive_ptr<Storage> storage_;
+
+  /* --------------------------- */
+  /*   From framework::Tensor    */
+  /* --------------------------- */
+  /* The following members & interfaces were copied from framework::Tensor,
+     so as to facilitate the unification of different Tensors
+
+     Will be adjusted/removed/moved in the near future
+   */
+ public:
+  /* @jim19930609: The way default constructor handles allocator might change,
+     according to
+                   the final design of Allocation - Allocator.
+   */
+  DenseTensor();
+
+  /* @jim19930609: Remove dependency on protobuf after Tensor Unification.
+   */
+  explicit DenseTensor(const paddle::framework::proto::VarType::Type& dtype);
+
+  inline bool IsInitialized() const {
+    return storage_ != nullptr && storage_->data() != nullptr;
+  }
+
+  template <typename T>
+  T* data();
+
+  void* data();
+
+  template <typename T>
+  T* mutable_data(const paddle::platform::Place& place,
+                  size_t requested_size = 0);
+
+  template <typename T>
+  T* mutable_data(const DDim& dims,
+                  const paddle::platform::Place& place,
+                  size_t requested_size = 0);
+
+  void* mutable_data(const paddle::platform::Place& place,
+                     paddle::framework::proto::VarType::Type type,
+                     size_t requested_size = 0);
+
+  void* mutable_data(const paddle::platform::Place& place,
+                     size_t requested_size = 0);
+
+  void* mutable_data(const paddle::platform::Place& place,
+                     paddle::framework::proto::VarType::Type type,
+                     const paddle::platform::Stream& stream);
+
+  /*! The internal of two tensors share the same memory block. */
+  DenseTensor& ShareDataWith(const DenseTensor& src);
+
+  /*! The internal of two tensors share the same inplace version counter. */
+  DenseTensor& ShareInplaceVersionCounterWith(const DenseTensor& src);
+
+  DenseTensor Slice(int64_t begin_idx, int64_t end_idx) const;
+
+  std::vector<DenseTensor> Split(int64_t split_size, int64_t axis) const;
+
+  std::vector<DenseTensor> Chunk(int64_t chunks, int64_t axis) const;
+
+  /* @jim19930609: Remove dependency on protobuf after Tensor Unification.
+   */
+  paddle::framework::proto::VarType::Type type() const;
+
+  /* @jim19930609: Remove dependency on protobuf after Tensor Unification.
+   */
+  paddle::framework::proto::VarType::Type saved_type() const;
+
+  // memory size returns the holding memory size in byte.
+  size_t memory_size() const;
+
+  void check_memory_size() const;
+
+  void set_layout(const paddle::framework::DataLayout layout);
+
+  void clear() {
+    storage_.reset();
+    meta_.offset = 0;
+  }
+
+  void ShareBufferWith(const DenseTensor& tensor) {
+    storage_ = std::move(copy_intrusive(tensor.storage_));
+    meta_.offset = tensor.meta().offset;
+  }
+
+  void ShareDataTypeWith(const DenseTensor& tensor) {
+    meta_.dtype = tensor.meta().dtype;
+  }
+
+  bool IsSharedBufferWith(const DenseTensor& src) const {
+    return IsSharedWith(src);
+  }
+
+  const std::shared_ptr<paddle::memory::Allocation> Holder() const {
+    return storage_ == nullptr ? nullptr : std::move(storage_->data_shared());
+  }
+
+  void set_offset(size_t offset) { meta_.offset = offset; }
+  size_t offset() const { return meta_.offset; }
+
+  std::shared_ptr<paddle::memory::Allocation> MoveMemoryHolder() {
+    return storage_ == nullptr ? nullptr
+                               : std::move(storage_->move_data_shared());
+  }
+
+  void ResetHolder(const std::shared_ptr<paddle::memory::Allocation>& holder);
+
+  void ResetHolderWithType(
+      const std::shared_ptr<paddle::memory::Allocation>& holder,
+      const paddle::framework::proto::VarType::Type& type);
+
+  void set_type(const paddle::framework::proto::VarType::Type& type);
+
+  TensorInplaceVersion& InplaceVersionCounter() {
+    return *inplace_version_counter_;
+  }
+
+ private:
+  std::shared_ptr<TensorInplaceVersion> inplace_version_counter_;
+
+/* @jim19930609: This is a hack
+   In general, it is badly designed to fuse MKLDNN-specific objects into a
+   generic Tensor.
+   We temporarily leave them here to unblock Tensor Unification progress.
+   In the final state, we should come up with a MKLDNN_Tensor and move the
+   following codes there.
+   */
+#ifdef PADDLE_WITH_MKLDNN
+
+ public:
+  inline dnnl::memory::format_tag format() const { return format_; }
+
+  inline void set_format(const dnnl::memory::format_tag format) {
+    format_ = format;
+  }
+
+ protected:
+  /**
+   * @brief the detail format of memory block which have layout as kMKLDNN
+   *
+   * @note MKLDNN lib support various memory format like nchw, nhwc, nChw8C,
+   *       nChw16c, etc. For a MKLDNN memory block, layout will be set as
+   *       DataLayout::kMKLDNN meanwhile detail memory format will be kept in
+   *       this field.
+   */
+
+  dnnl::memory::format_tag format_ = dnnl::memory::format_tag::undef;
+#endif
+
+  /* ------------------------------ */
+  /*   From framework::LoDTensor    */
+  /* ------------------------------ */
+  /* The following members & interfaces were copied from framework::Tensor,
+     so as to facilitate the unification of different Tensors
+
+     Will be adjusted/removed/moved in the near future
+   */
+  explicit DenseTensor(const LoD& lod);
+
+  void set_lod(const LoD& lod);
+
+  LoD* mutable_lod();
+
+  /*
+   * Get the start offset and end offset of an  element from LoD.
+   */
+  std::pair<size_t, size_t> lod_element(size_t level, size_t elem) const;
+
+  size_t NumLevels() const;
+
+  size_t NumElements(size_t level = 0) const;
 };
 
 }  // namespace pten
diff --git a/paddle/pten/core/storage.h b/paddle/pten/core/storage.h
index 7d4b6a28be22d..74c303697755a 100644
--- a/paddle/pten/core/storage.h
+++ b/paddle/pten/core/storage.h
@@ -36,7 +36,12 @@ class Storage : public intrusive_ref_counter<Storage> {
   Storage() = default;
   Storage(const Storage&) = delete;
 
-  /* --------- shared_ptr<Allocation> -------- */
+  /* @jim19930609: Following interfaces will be modified/replaced/removed
+                   as soon as the new Allocation - Allocator design get
+     finalized.
+    */
+
+  /*   --------- shared_ptr<Allocation> -------- */
   // Initialize a Storage with unique Allocation
   explicit Storage(std::shared_ptr<paddle::memory::Allocation>&& data)
       : data_(std::move(data)) {}
@@ -55,6 +60,15 @@ class Storage : public intrusive_ref_counter<Storage> {
     return data_;
   }
 
+  void set_data_shared(
+      const std::shared_ptr<paddle::memory::Allocation>& holder) {
+    data_ = holder;
+  }
+
+  std::shared_ptr<paddle::memory::Allocation> move_data_shared() {
+    return std::move(data_);
+  }
+
   virtual void ReallocShared(size_t n) {
     PADDLE_THROW(paddle::platform::errors::Unimplemented(
         "ReallocShared has not been overrided by the current Storage"));
diff --git a/paddle/pten/core/utils/intrusive_ptr.h b/paddle/pten/core/utils/intrusive_ptr.h
index f0e94fadac973..51546bbc5056f 100644
--- a/paddle/pten/core/utils/intrusive_ptr.h
+++ b/paddle/pten/core/utils/intrusive_ptr.h
@@ -40,6 +40,11 @@ class intrusive_ptr {
     rhs.reset();
   }
 
+  intrusive_ptr<T>& operator=(intrusive_ptr<T>&& rhs) {
+    px = std::move(rhs.px);
+    return *this;
+  }
+
   void reset() { this_type().swap(*this); }
 
   void reset(T* rhs) { this_type(rhs).swap(*this); }
diff --git a/paddle/pten/tests/core/test_dense_tensor.cc b/paddle/pten/tests/core/test_dense_tensor.cc
index 07ad582725d50..814f85fde3e40 100644
--- a/paddle/pten/tests/core/test_dense_tensor.cc
+++ b/paddle/pten/tests/core/test_dense_tensor.cc
@@ -65,7 +65,7 @@ TEST(dense_tensor, meta) {
 
 TEST(dense_tensor, def_ctor) {
   DenseTensor tensor_0;
-  CHECK(!tensor_0.valid());
+  CHECK(tensor_0.valid());
 }
 
 TEST(dense_tensor, ctor) {
@@ -97,7 +97,6 @@ TEST(dense_tensor, ctor) {
   check_dense_tensor(tensor_0, meta);
 
   DenseTensor tensor_2(make_intrusive<TensorStorage>(alloc), meta);
-  CHECK(tensor_2.data<int8_t>() == nullptr);
   CHECK_NOTNULL(tensor_2.mutable_data<int8_t>());
   check_dense_tensor(tensor_2, meta);
 }