From dfdc9960e8e32878974824e2e4b0b097f062ad59 Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Tue, 4 Jan 2022 12:06:28 +0800 Subject: [PATCH] [Unify Tensors PR #3]Port framework::Tensor members & interfaces to pten::DenseTensor, test=allcases (#38473) * Added shared_ptr member & corresponding interfaces to Storage * Removed original pten::Allocation from Storage and adjusted the interfaces accordingly * Fixed issues with storage offset * Used place to malloc allocation for TensorStorage * [Unify Tensors PR #3]Ported framework::Tensor interfaces to pten::DenseTensor * Fixed issues with place * Added comments * Moved mutable_data with stream argument to DenseTensor * Added set_offset interface * Fixed CI issues,test=allcases * [Unify Tensors PR #4] Port LoDTensor interfaces to DenseTensor * Reverted changes too pten_layout() interface * Removed friend classes --- paddle/fluid/framework/CMakeLists.txt | 8 +- paddle/fluid/framework/tensor.h | 2 + paddle/fluid/framework/tensor_impl.h | 6 + paddle/fluid/framework/tensor_util.cc | 40 +- paddle/fluid/framework/tensor_util.h | 6 + paddle/pten/core/CMakeLists.txt | 7 +- paddle/pten/core/dense_tensor.cc | 442 +++++++++++++++++++- paddle/pten/core/dense_tensor.h | 211 +++++++++- paddle/pten/core/storage.h | 16 +- paddle/pten/core/utils/intrusive_ptr.h | 5 + paddle/pten/tests/core/test_dense_tensor.cc | 3 +- 11 files changed, 721 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index df7e543d99c54..bd096f41ccc49 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -68,15 +68,15 @@ cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor) if(WITH_GPU) if (WIN32) windows_symbolic(tensor_util SRCS tensor_util.cu) - nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context) + nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context dense_tensor) add_dependencies(tensor tensor_util) else() - nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler) + nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor) endif(WIN32) elseif(WITH_ROCM) - hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler) + hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor) else() - cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler) + cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler dense_tensor) endif() cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index b7cc57d5e042e..7eebd97d06523 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -120,6 +120,8 @@ class Tensor { explicit Tensor(const proto::VarType::Type&); /*! Return a pointer to mutable memory block. */ + const void* data() const; + template T* data(); diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index 986551b935e88..a83b5d0662bb9 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -54,6 +54,12 @@ inline T* Tensor::data() { offset_); } +inline const void* Tensor::data() const { + check_memory_size(); + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + + offset_); +} + template inline T* Tensor::mutable_data(const DDim& dims, const platform::Place& place, size_t requested_size) { diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index f0e5a447fd2da..6394e84c81a2b 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -23,6 +23,9 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/profiler.h" + +#include "paddle/pten/core/dense_tensor.h" + #ifdef PADDLE_WITH_MKLDNN #include "dnnl_debug.h" // NOLINT #endif @@ -30,11 +33,12 @@ limitations under the License. */ namespace paddle { namespace framework { -void TensorCopy(const Tensor& src, const platform::Place& dst_place, - const platform::DeviceContext& ctx, Tensor* dst) { +template +void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place, + const platform::DeviceContext& ctx, TENSOR* dst) { if (&src == dst) { auto src_copy = src; - TensorCopy(src_copy, dst_place, ctx, dst); + TensorCopyImpl(src_copy, dst_place, ctx, dst); return; } @@ -45,7 +49,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, dst->Resize(src.dims()); dst->set_layout(src.layout()); auto src_place = src.place(); - auto src_ptr = src.data(); + auto src_ptr = src.data(); #ifdef PADDLE_WITH_MKLDNN dst->set_format(src.format()); // oneDNN tensors due to padding may be of bigger size @@ -389,8 +393,9 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, #endif } -void TensorCopy(const Tensor& src, const platform::Place& dst_place, - Tensor* dst) { +template +void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place, + TENSOR* dst) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); const platform::DeviceContext* dev_ctx; if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place)) { @@ -398,7 +403,24 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, } else { dev_ctx = pool.Get(src.place()); } - TensorCopy(src, dst_place, *dev_ctx, dst); + TensorCopyImpl(src, dst_place, *dev_ctx, dst); +} + +void TensorCopy(const Tensor& src, const platform::Place& dst_place, + Tensor* dst) { + TensorCopyImpl(src, dst_place, dst); +} +void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place, + pten::DenseTensor* dst) { + TensorCopyImpl(src, dst_place, dst); +} +void TensorCopy(const Tensor& src, const platform::Place& dst_place, + const platform::DeviceContext& ctx, Tensor* dst) { + TensorCopyImpl(src, dst_place, ctx, dst); +} +void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place, + const platform::DeviceContext& ctx, pten::DenseTensor* dst) { + TensorCopyImpl(src, dst_place, ctx, dst); } void TensorCopySync(const Tensor& src, const platform::Place& dst_place, @@ -418,7 +440,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, dst->set_format(src.format()); #endif auto src_place = src.place(); - auto src_ptr = src.data(); + auto src_ptr = src.data(); auto dst_ptr = dst->mutable_data(dst_place, src.type()); if (src_ptr == dst_ptr && src_place == dst_place) { @@ -971,7 +993,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor, { // the 3rd field, tensor data uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type()); - auto* data_ptr = tensor.data(); + auto* data_ptr = tensor.data(); PADDLE_ENFORCE_LT(size, (std::numeric_limits::max)(), platform::errors::ResourceExhausted( "tensor size %d overflow when writing tensor", size)); diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index 575e2171652a2..46eba6a1e41bb 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -34,6 +34,8 @@ limitations under the License. */ #include "paddle/fluid/platform/device/mlu/device_context.h" #endif +#include "paddle/pten/core/dense_tensor.h" + namespace paddle { namespace framework { @@ -75,6 +77,8 @@ class Tensor; void TensorCopy(const Tensor& src, const platform::Place& dst_place, const platform::DeviceContext& ctx, Tensor* dst); +void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place, + const platform::DeviceContext& ctx, pten::DenseTensor* dst); // NOTE(zcd): If the src.place() and dst_place are two different GPU, // the copy operation is carried out on the dst_place's stream. This is @@ -85,6 +89,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, // not completed. void TensorCopy(const Tensor& src, const platform::Place& dst_place, Tensor* dst); +void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place, + pten::DenseTensor* dst); void TensorCopySync(const Tensor& src, const platform::Place& dst_place, Tensor* dst); diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt index 28c522d7ea640..87c3612e35424 100644 --- a/paddle/pten/core/CMakeLists.txt +++ b/paddle/pten/core/CMakeLists.txt @@ -9,6 +9,11 @@ endif() cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils) cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context) cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce) + cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce mixed_vector) +cc_library(dense_tensor SRCS dense_tensor.cc DEPS convert_utils tensor_meta tensor_base) -cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base) +# Will remove once we implemented MKLDNN_Tensor +if(WITH_MKLDNN) + add_dependencies(dense_tensor mkldnn) +endif() diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index 6fe85610612ac..d8d83c575c4cf 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -19,6 +19,9 @@ limitations under the License. */ #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/api/lib/utils/storage.h" +#include "paddle/pten/core/convert_utils.h" + namespace pten { DenseTensor::DenseTensor(const std::shared_ptr& a, @@ -41,6 +44,12 @@ DenseTensor::DenseTensor(intrusive_ptr storage, DenseTensorMeta&& meta) DenseTensor::DenseTensor(const DenseTensor& other) : meta_(other.meta()), storage_(copy_intrusive(other.storage_)) {} +DenseTensor& DenseTensor::operator=(const DenseTensor& other) { + meta_ = other.meta(); + storage_ = std::move(copy_intrusive(other.storage_)); + return *this; +} + int64_t DenseTensor::numel() const { if (meta_.is_scalar) { return 1; @@ -105,6 +114,7 @@ T* DenseTensor::mutable_data() { template const T* DenseTensor::data() const { + check_memory_size(); PADDLE_ENFORCE( (dtype() == paddle::experimental::CppTypeToDataType::Type()), paddle::platform::errors::InvalidArgument( @@ -113,7 +123,31 @@ const T* DenseTensor::data() const { return static_cast(data()); } +template +T* DenseTensor::data() { + check_memory_size(); + PADDLE_ENFORCE( + (dtype() == paddle::experimental::CppTypeToDataType::Type()), + paddle::platform::errors::InvalidArgument( + "The type of data we are trying to retrieve does not match the " + "type of data currently contained in the container.")); + PADDLE_ENFORCE_NOT_NULL( + storage_, + paddle::platform::errors::PreconditionNotMet( + "The storage must be valid when call the mutable data function.")); + return reinterpret_cast(data()); +} + const void* DenseTensor::data() const { + PADDLE_ENFORCE_NOT_NULL( + storage_, + paddle::platform::errors::PreconditionNotMet( + "The storage must be valid when call the mutable data function.")); + return reinterpret_cast( + reinterpret_cast(storage_->data()) + meta_.offset); +} + +void* DenseTensor::data() { PADDLE_ENFORCE_NOT_NULL( storage_, paddle::platform::errors::PreconditionNotMet( @@ -130,16 +164,30 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) { meta_ = std::move(meta); } -void DenseTensor::Resize(const DDim& dims) { +/* @jim19930609: This interface will be further modified util we finalized the + design for Allocator - Allocation + For now, we have to temporarily accommodate two independent use cases: + 1. Designed behaviour: DenseTensor constructed with its underlying storage_ + initialized + 2. Legacy behaviour(fluid): DenseTensor constructed using default + constructor, where + storage_ won't be initialized until the first + call to mutable_data(place) + */ +DenseTensor& DenseTensor::Resize(const DDim& dims) { meta_.dims = dims; - mutable_data(); + if (storage_ != nullptr) { + mutable_data(); + } + return *this; } void DenseTensor::ResetLoD(const LoD& lod) { meta_.lod = lod; } -#define DATA_MEMBER_FUNC_INSTANTIATION(dtype) \ - template dtype* DenseTensor::mutable_data(); \ - template const dtype* DenseTensor::data() const; +#define DATA_MEMBER_FUNC_INSTANTIATION(dtype) \ + template dtype* DenseTensor::mutable_data(); \ + template const dtype* DenseTensor::data() const; \ + template dtype* DenseTensor::data(); DATA_MEMBER_FUNC_INSTANTIATION(bool); DATA_MEMBER_FUNC_INSTANTIATION(int8_t); @@ -159,4 +207,388 @@ DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128); #undef DATA_MEMBER_FUNC_INSTANTIATION +/* --------------------------- */ +/* From framework::Tensor */ +/* --------------------------- */ +DenseTensor::DenseTensor() { + inplace_version_counter_ = std::make_shared(0); + meta_ = DenseTensorMeta(); + meta_.dtype = paddle::experimental::DataType::FLOAT32; + meta_.offset = 0; +} + +DenseTensor::DenseTensor(const paddle::framework::proto::VarType::Type& dtype) { + inplace_version_counter_ = std::make_shared(0); + meta_ = DenseTensorMeta(); + meta_.dtype = TransToPtenDataType(dtype); + meta_.offset = 0; +} + +DenseTensor& DenseTensor::ShareDataWith(const DenseTensor& src) { + src.check_memory_size(); + *this = src; + return *this; +} + +DenseTensor& DenseTensor::ShareInplaceVersionCounterWith( + const DenseTensor& src) { + PADDLE_ENFORCE_NOT_NULL( + inplace_version_counter_, + paddle::platform::errors::PreconditionNotMet( + "Tensor does not hold inplace_version_counter_.")); + + inplace_version_counter_ = src.inplace_version_counter_; + return *this; +} + +size_t DenseTensor::memory_size() const { + if (storage_ == nullptr || storage_->data_shared() == nullptr) { + return 0UL; + } + + return storage_->data_shared()->size() - meta_.offset; +} + +void DenseTensor::check_memory_size() const { + PADDLE_ENFORCE_NOT_NULL(storage_, + paddle::platform::errors::PreconditionNotMet( + "Tensor holds no memory. " + "Call Tensor::mutable_data firstly.")); + PADDLE_ENFORCE_NOT_NULL(storage_->data_shared(), + paddle::platform::errors::PreconditionNotMet( + "Tensor holds no memory. " + "Call Tensor::mutable_data firstly.")); + size_t size = numel() * SizeOf(dtype()); + + PADDLE_ENFORCE_LE( + size, + memory_size(), + paddle::platform::errors::PreconditionNotMet( + "Tensor's dimension is out of bound." + "Tensor's dimension must be equal or less than the size of its " + "memory." + "But received Tensor's dimension is d%, memory's size is %d.", + size, + memory_size())); +} + +const paddle::platform::Place& DenseTensor::place() const { + PADDLE_ENFORCE_NOT_NULL( + storage_, + paddle::platform::errors::PreconditionNotMet( + "Tensor not initialized yet when Tensor::place() is called.")); + return storage_->place(); +} + +paddle::framework::proto::VarType::Type DenseTensor::type() const { + PADDLE_ENFORCE_NOT_NULL( + storage_, + paddle::platform::errors::PreconditionNotMet( + "Tensor not initialized yet when Tensor::type() is called.")); + return TransToProtoVarType(meta_.dtype); +} + +paddle::framework::proto::VarType::Type DenseTensor::saved_type() const { + return TransToProtoVarType(meta_.dtype); +} + +void DenseTensor::set_layout(const paddle::framework::DataLayout layout) { + meta_.layout = layout; +} + +void DenseTensor::ResetHolder( + const std::shared_ptr& holder) { + PADDLE_ENFORCE_EQ( + meta_.offset, + 0, + paddle::platform::errors::Fatal( + "Only the offset is supported to zero when the holder is reset.")); + + if (storage_ == nullptr) { + PADDLE_THROW( + paddle::platform::errors::Fatal("storage_ has to be initialized before " + "calling ResetHolder() interface.")); + } + + if (storage_->data_shared()) { + PADDLE_ENFORCE_LE( + numel() * SizeOf(dtype()) + meta_.offset, + storage_->data_shared()->size(), + paddle::platform::errors::InvalidArgument( + "The size of Holder is not enough to store the Tensor.")); + } + + storage_->set_data_shared(holder); +} + +void DenseTensor::ResetHolderWithType( + const std::shared_ptr& holder, + const paddle::framework::proto::VarType::Type& type) { + set_type(type); + ResetHolder(holder); +} + +void DenseTensor::set_type( + const paddle::framework::proto::VarType::Type& type) { + meta_.dtype = TransToPtenDataType(type); +} + +DenseTensor DenseTensor::Slice(int64_t begin_idx, int64_t end_idx) const { + check_memory_size(); + PADDLE_ENFORCE_GE(begin_idx, + 0, + paddle::platform::errors::OutOfRange( + "The start row index must be greater than 0." + "But received the start index is d%.", + begin_idx)); + PADDLE_ENFORCE_LE(end_idx, + meta_.dims[0], + paddle::platform::errors::OutOfRange( + "The end row index is out of bound.")); + PADDLE_ENFORCE_LT( + begin_idx, + end_idx, + paddle::platform::errors::InvalidArgument( + "The start row index must be less than the end row index." + "But received the start index = %d, the end index = %d.", + begin_idx, + end_idx)); + + if (meta_.dims[0] == 1) { + return *this; + } else { + size_t base = numel() / meta_.dims[0]; + DenseTensor dst; + dst.storage_ = std::move(copy_intrusive(storage_)); + dst.meta_.layout = meta_.layout; + dst.meta_.dtype = meta_.dtype; + DDim dst_dims = meta_.dims; + dst_dims[0] = end_idx - begin_idx; + dst.Resize(dst_dims); + dst.meta_.offset = meta_.offset + begin_idx * base * SizeOf(dtype()); + return dst; + } +} + +std::vector DenseTensor::Split(int64_t split_size, + int64_t axis) const { + check_memory_size(); + + PADDLE_ENFORCE_GE(meta_.dims.size(), + 0, + paddle::platform::errors::OutOfRange( + "split expects at least a 1-dimensional tensor")); + + PADDLE_ENFORCE_GE( + split_size, + 0, + paddle::platform::errors::OutOfRange( + "split expects split_size be non-negative, but got split_size is %d", + split_size)); + + int64_t numel_size = meta_.dims[axis]; + + int64_t num_splits = 1; + if (split_size != 0) { + num_splits = + std::max((numel_size + split_size - 1) / split_size, 1); + } + + std::vector splits(num_splits); + int64_t last_split_size = split_size - (split_size * num_splits - numel_size); + + for (int64_t i = 0; i < num_splits; ++i) { + int64_t length = i < num_splits - 1 ? split_size : last_split_size; + splits[i] = Slice(i * split_size, i * split_size + length); + } + return splits; +} + +std::vector DenseTensor::Chunk(int64_t chunks, + int64_t axis) const { + check_memory_size(); + PADDLE_ENFORCE_GE(meta_.dims.size(), + 0, + paddle::platform::errors::OutOfRange( + "split expects at least a 1-dimensional tensor")); + PADDLE_ENFORCE_GE( + chunks, + 0, + paddle::platform::errors::OutOfRange( + "chunks expects to be greater than 0, but got chunks is %d", chunks)); + + int64_t numel_size = meta_.dims[axis]; + int64_t split_size = (numel_size + chunks - 1) / chunks; + return Split(split_size, axis); +} + +void* DenseTensor::mutable_data(const paddle::platform::Place& place, + paddle::framework::proto::VarType::Type type, + size_t requested_size) { + set_type(type); + PADDLE_ENFORCE_GE( + numel(), + 0, + paddle::platform::errors::PreconditionNotMet( + "The Tensor's element number must be equal or greater than zero. " + "The Tensor's shape is [", + dims(), + "] now")); + size_t size = numel() * SizeOf(dtype()); + if (requested_size && (requested_size > size)) { + size = requested_size; + } + + if (storage_ == nullptr) { + storage_ = make_intrusive(place); + } + + /* some versions of boost::variant don't have operator!= */ + if (storage_->data_shared() == nullptr || + !(storage_->data_shared()->place() == place) || + storage_->data_shared()->size() < size + meta_.offset) { + // Reset holder first before re-allocate to save memory + storage_->Clear(); + storage_->set_data_shared(paddle::memory::AllocShared(place, size)); + meta_.offset = 0; + } + return reinterpret_cast( + reinterpret_cast(storage_->data_shared()->ptr()) + + meta_.offset); +} + +void* DenseTensor::mutable_data(const paddle::platform::Place& place, + size_t requested_size) { + if (storage_ == nullptr) { + PADDLE_THROW(paddle::platform::errors::PreconditionNotMet( + "The tensor is not initialized.")); + } + + return mutable_data(place, type(), requested_size); +} + +void* DenseTensor::mutable_data(const paddle::platform::Place& place, + paddle::framework::proto::VarType::Type type, + const paddle::platform::Stream& stream) { + set_type(type); + PADDLE_ENFORCE_GE( + numel(), + 0, + paddle::platform::errors::PreconditionNotMet( + "The Tensor's element number must be equal or greater than zero. " + "The Tensor's shape is [", + dims(), + "] now")); + size_t size = numel() * SizeOf(dtype()); + + /* some versions of boost::variant don't have operator!= */ + if (storage_ == nullptr || storage_->data_shared() == nullptr || + !(storage_->data_shared()->place() == place) || + storage_->data_shared()->size() < size + meta_.offset || + !(paddle::platform::is_gpu_place(place) && + paddle::memory::InSameStream(storage_->data_shared(), stream))) { + storage_->Clear(); + storage_->set_data_shared(paddle::memory::AllocShared(place, size, stream)); + meta_.offset = 0; + } + return reinterpret_cast( + reinterpret_cast(storage_->data_shared()->ptr()) + + meta_.offset); +} + +/* @jim19930609: The following "mutable_data" only supports specific dtypes + defined in OpProto. This part need another clean up once the data type across + Fluid + and Pten get unified. + */ +template +inline T* DenseTensor::mutable_data(const DDim& dims, + const paddle::platform::Place& place, + size_t requested_size) { + static_assert(std::is_pod::value, "T must be POD"); + Resize(dims); + return mutable_data(place, requested_size); +} + +template +inline T* DenseTensor::mutable_data(const paddle::platform::Place& place, + size_t requested_size) { + static_assert(std::is_pod::value, "T must be POD"); + return reinterpret_cast(mutable_data( + place, paddle::framework::DataTypeTrait::DataType(), requested_size)); +} + +#define LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(dtype) \ + template dtype* DenseTensor::mutable_data( \ + const DDim& dims, \ + const paddle::platform::Place& place, \ + size_t requested_size); \ + template dtype* DenseTensor::mutable_data( \ + const paddle::platform::Place& place, size_t requested_size); + +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(bool) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int8_t) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(uint8_t) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int16_t) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int64_t) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(float) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(double) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::platform::bfloat16) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::platform::float16) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex64) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128) + +#undef LEGACY_DATA_MEMBER_FUNC_INSTANTIATION + +/* ------------------------------ */ +/* From framework::LoDTensor */ +/* ------------------------------ */ + +DenseTensor::DenseTensor(const LoD& lod) : DenseTensor() { meta_.lod = lod; } + +void DenseTensor::set_lod(const LoD& lod) { meta_.lod = lod; } + +LoD* DenseTensor::mutable_lod() { return &meta_.lod; } + +std::pair DenseTensor::lod_element(size_t level, + size_t elem) const { + PADDLE_ENFORCE_LT( + level, + NumLevels(), + paddle::platform::errors::InvalidArgument( + "The input level of LoD is invalid, it should be less than LoD " + "size. The input level is %zu, the LoD size is %zu.", + level, + NumLevels())); + + PADDLE_ENFORCE_LT(elem, + NumElements(level), + paddle::platform::errors::InvalidArgument( + "The input element of LoD is invalid, it should be " + "less than the number of elements in its level." + "The input element is %zu, the number of elements in " + "its level is %zu.", + elem, + NumElements(level))); + + return std::make_pair((meta_.lod)[level][elem], (meta_.lod)[level][elem + 1]); +} + +size_t DenseTensor::NumLevels() const { return meta_.lod.size(); } + +size_t DenseTensor::NumElements(size_t level) const { + PADDLE_ENFORCE_LT( + level, + NumLevels(), + paddle::platform::errors::InvalidArgument( + "The input level of LoD is invalid, it should be less than LoD " + "size. The input level is %zu, the LoD size is %zu.", + level, + NumLevels())); + + // the last offset is the end of last element + return (meta_.lod)[level].size() - 1; +} + } // namespace pten diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h index ccbcf02ffe70a..eb149220f942d 100644 --- a/paddle/pten/core/dense_tensor.h +++ b/paddle/pten/core/dense_tensor.h @@ -14,15 +14,44 @@ limitations under the License. */ #pragma once +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/platform/stream/stream.h" + #include "paddle/pten/core/allocator.h" #include "paddle/pten/core/storage.h" #include "paddle/pten/core/tensor_base.h" #include "paddle/pten/core/tensor_meta.h" +/* @jim19930609: Move to MKLDNN_Tensor in the future + */ +#ifdef PADDLE_WITH_MKLDNN +#include "dnnl.hpp" +#endif + namespace pten { class CompatibleDenseTensorUtils; +/* --------------------------- */ +/* From framework::Tensor */ +/* --------------------------- */ +/* Temporarily put TensorInplaceVersion inside DenseTensor. + Will move to AutogradMeta as soon as we switch to Eager Dygraph. + */ +class TensorInplaceVersion { + public: + explicit TensorInplaceVersion(uint32_t inplace_version = 0) + : inplace_version_(inplace_version) {} + bool IsUnique() const { return inplace_version_ == 0; } + void Bump() { ++inplace_version_; } + uint32_t CurrentVersion() const { return inplace_version_; } + void SetInplaceVersionToZero() { inplace_version_ = 0; } + + private: + uint32_t inplace_version_; +}; + /// \brief The Dense tensor store values in a contiguous sequential block /// of memory where all values are represented. Tensors or multi-dimensional /// arrays are used in math operators. @@ -56,7 +85,7 @@ class DenseTensor : public TensorBase, /// \brief Because dense tensor is a kind of container, we give a default /// constructor to use for stl container. But the dense tensor created with /// the default constructor is not practical. - DenseTensor() = default; + // DenseTensor() = default; /// \brief Because dense tensor is a resource handle, we provide a default /// move constructor to support move semantics. @@ -65,6 +94,9 @@ class DenseTensor : public TensorBase, /// \brief DenseTensor shallow copy constructor. DenseTensor(const DenseTensor& other); + /// \brief DenseTensor shallow copy assignment. + DenseTensor& operator=(const DenseTensor& other); + /// \brief Destroy the tensor object and release exclusive resources. virtual ~DenseTensor() = default; @@ -95,7 +127,7 @@ class DenseTensor : public TensorBase, /// \brief Returns the data place of the tensor. /// \return The data place of the tensor. - const Place& place() const override { return storage_->place(); } + const Place& place() const override; /// \brief Returns the meta information of the tensor. /// \return The meta information of the tensor. @@ -124,7 +156,8 @@ class DenseTensor : public TensorBase, /// larger than the original value, the storage area will be reallocated. /// \param dims The new dims of the dense tensor. /// \param lod The new lod of the dense tensor. - void Resize(const DDim& dims); + // void Resize(const DDim& dims); + DenseTensor& Resize(const DDim& dims); /// \brief Change the lod information in the metadata. /// \param lod The new lod of the dense tensor. @@ -174,6 +207,178 @@ class DenseTensor : public TensorBase, private: DenseTensorMeta meta_; intrusive_ptr storage_; + + /* --------------------------- */ + /* From framework::Tensor */ + /* --------------------------- */ + /* The following members & interfaces were copied from framework::Tensor, + so as to facilitate the unification of different Tensors + + Will be adjusted/removed/moved in the near future + */ + public: + /* @jim19930609: The way default constructor handles allocator might change, + according to + the final design of Allocation - Allocator. + */ + DenseTensor(); + + /* @jim19930609: Remove dependency on protobuf after Tensor Unification. + */ + explicit DenseTensor(const paddle::framework::proto::VarType::Type& dtype); + + inline bool IsInitialized() const { + return storage_ != nullptr && storage_->data() != nullptr; + } + + template + T* data(); + + void* data(); + + template + T* mutable_data(const paddle::platform::Place& place, + size_t requested_size = 0); + + template + T* mutable_data(const DDim& dims, + const paddle::platform::Place& place, + size_t requested_size = 0); + + void* mutable_data(const paddle::platform::Place& place, + paddle::framework::proto::VarType::Type type, + size_t requested_size = 0); + + void* mutable_data(const paddle::platform::Place& place, + size_t requested_size = 0); + + void* mutable_data(const paddle::platform::Place& place, + paddle::framework::proto::VarType::Type type, + const paddle::platform::Stream& stream); + + /*! The internal of two tensors share the same memory block. */ + DenseTensor& ShareDataWith(const DenseTensor& src); + + /*! The internal of two tensors share the same inplace version counter. */ + DenseTensor& ShareInplaceVersionCounterWith(const DenseTensor& src); + + DenseTensor Slice(int64_t begin_idx, int64_t end_idx) const; + + std::vector Split(int64_t split_size, int64_t axis) const; + + std::vector Chunk(int64_t chunks, int64_t axis) const; + + /* @jim19930609: Remove dependency on protobuf after Tensor Unification. + */ + paddle::framework::proto::VarType::Type type() const; + + /* @jim19930609: Remove dependency on protobuf after Tensor Unification. + */ + paddle::framework::proto::VarType::Type saved_type() const; + + // memory size returns the holding memory size in byte. + size_t memory_size() const; + + void check_memory_size() const; + + void set_layout(const paddle::framework::DataLayout layout); + + void clear() { + storage_.reset(); + meta_.offset = 0; + } + + void ShareBufferWith(const DenseTensor& tensor) { + storage_ = std::move(copy_intrusive(tensor.storage_)); + meta_.offset = tensor.meta().offset; + } + + void ShareDataTypeWith(const DenseTensor& tensor) { + meta_.dtype = tensor.meta().dtype; + } + + bool IsSharedBufferWith(const DenseTensor& src) const { + return IsSharedWith(src); + } + + const std::shared_ptr Holder() const { + return storage_ == nullptr ? nullptr : std::move(storage_->data_shared()); + } + + void set_offset(size_t offset) { meta_.offset = offset; } + size_t offset() const { return meta_.offset; } + + std::shared_ptr MoveMemoryHolder() { + return storage_ == nullptr ? nullptr + : std::move(storage_->move_data_shared()); + } + + void ResetHolder(const std::shared_ptr& holder); + + void ResetHolderWithType( + const std::shared_ptr& holder, + const paddle::framework::proto::VarType::Type& type); + + void set_type(const paddle::framework::proto::VarType::Type& type); + + TensorInplaceVersion& InplaceVersionCounter() { + return *inplace_version_counter_; + } + + private: + std::shared_ptr inplace_version_counter_; + +/* @jim19930609: This is a hack + In general, it is badly designed to fuse MKLDNN-specific objects into a + generic Tensor. + We temporarily leave them here to unblock Tensor Unification progress. + In the final state, we should come up with a MKLDNN_Tensor and move the + following codes there. + */ +#ifdef PADDLE_WITH_MKLDNN + + public: + inline dnnl::memory::format_tag format() const { return format_; } + + inline void set_format(const dnnl::memory::format_tag format) { + format_ = format; + } + + protected: + /** + * @brief the detail format of memory block which have layout as kMKLDNN + * + * @note MKLDNN lib support various memory format like nchw, nhwc, nChw8C, + * nChw16c, etc. For a MKLDNN memory block, layout will be set as + * DataLayout::kMKLDNN meanwhile detail memory format will be kept in + * this field. + */ + + dnnl::memory::format_tag format_ = dnnl::memory::format_tag::undef; +#endif + + /* ------------------------------ */ + /* From framework::LoDTensor */ + /* ------------------------------ */ + /* The following members & interfaces were copied from framework::Tensor, + so as to facilitate the unification of different Tensors + + Will be adjusted/removed/moved in the near future + */ + explicit DenseTensor(const LoD& lod); + + void set_lod(const LoD& lod); + + LoD* mutable_lod(); + + /* + * Get the start offset and end offset of an element from LoD. + */ + std::pair lod_element(size_t level, size_t elem) const; + + size_t NumLevels() const; + + size_t NumElements(size_t level = 0) const; }; } // namespace pten diff --git a/paddle/pten/core/storage.h b/paddle/pten/core/storage.h index 7d4b6a28be22d..74c303697755a 100644 --- a/paddle/pten/core/storage.h +++ b/paddle/pten/core/storage.h @@ -36,7 +36,12 @@ class Storage : public intrusive_ref_counter { Storage() = default; Storage(const Storage&) = delete; - /* --------- shared_ptr -------- */ + /* @jim19930609: Following interfaces will be modified/replaced/removed + as soon as the new Allocation - Allocator design get + finalized. + */ + + /* --------- shared_ptr -------- */ // Initialize a Storage with unique Allocation explicit Storage(std::shared_ptr&& data) : data_(std::move(data)) {} @@ -55,6 +60,15 @@ class Storage : public intrusive_ref_counter { return data_; } + void set_data_shared( + const std::shared_ptr& holder) { + data_ = holder; + } + + std::shared_ptr move_data_shared() { + return std::move(data_); + } + virtual void ReallocShared(size_t n) { PADDLE_THROW(paddle::platform::errors::Unimplemented( "ReallocShared has not been overrided by the current Storage")); diff --git a/paddle/pten/core/utils/intrusive_ptr.h b/paddle/pten/core/utils/intrusive_ptr.h index f0e94fadac973..51546bbc5056f 100644 --- a/paddle/pten/core/utils/intrusive_ptr.h +++ b/paddle/pten/core/utils/intrusive_ptr.h @@ -40,6 +40,11 @@ class intrusive_ptr { rhs.reset(); } + intrusive_ptr& operator=(intrusive_ptr&& rhs) { + px = std::move(rhs.px); + return *this; + } + void reset() { this_type().swap(*this); } void reset(T* rhs) { this_type(rhs).swap(*this); } diff --git a/paddle/pten/tests/core/test_dense_tensor.cc b/paddle/pten/tests/core/test_dense_tensor.cc index 07ad582725d50..814f85fde3e40 100644 --- a/paddle/pten/tests/core/test_dense_tensor.cc +++ b/paddle/pten/tests/core/test_dense_tensor.cc @@ -65,7 +65,7 @@ TEST(dense_tensor, meta) { TEST(dense_tensor, def_ctor) { DenseTensor tensor_0; - CHECK(!tensor_0.valid()); + CHECK(tensor_0.valid()); } TEST(dense_tensor, ctor) { @@ -97,7 +97,6 @@ TEST(dense_tensor, ctor) { check_dense_tensor(tensor_0, meta); DenseTensor tensor_2(make_intrusive(alloc), meta); - CHECK(tensor_2.data() == nullptr); CHECK_NOTNULL(tensor_2.mutable_data()); check_dense_tensor(tensor_2, meta); }