Skip to content

Commit

Permalink
[Unify Tensors PR #5] framework::Tensor inherits from DenseTensor,tes…
Browse files Browse the repository at this point in the history
…t=allcases (PaddlePaddle#38632)

* Added shared_ptr<Allocation> member & corresponding interfaces to Storage

* Removed original pten::Allocation from Storage and adjusted the interfaces accordingly

* Fixed issues with storage offset

* Used place to malloc allocation for TensorStorage

* [Unify Tensors PR #3]Ported framework::Tensor interfaces to pten::DenseTensor

* Fixed issues with place

* Added comments

* Moved mutable_data with stream argument to DenseTensor

* Added set_offset interface

* Fixed CI issues,test=allcases

* [Unify Tensors PR #4] Port LoDTensor interfaces to DenseTensor

* Removed friend class EigenTensor/EigenMatrix/EigenVector from Tensor

* Modified framework::Tensor to inherit from DenseTensor

* Reverted changes too pten_layout() interface

* Removed friend classes

* Rearranged cfunction calls from tensor.data<void>() to tensor.data()

* Fixed CI issues

* Fixed lite issues

* Fixed data() interface issues,test=allcases

* Resolved IsInitialized() issues

* Fixed ResetHolder() issues

* Fixed MKLDNN & Storage issues

* Resolved ShareBufferWith() issues

* Fixed LoD issues
  • Loading branch information
jim19930609 authored Jan 10, 2022
1 parent 046553c commit 5c73a6e
Show file tree
Hide file tree
Showing 61 changed files with 251 additions and 731 deletions.
24 changes: 10 additions & 14 deletions paddle/fluid/distributed/service/brpc_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,17 @@ void SerializeLodTensor(framework::Variable* var,
if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(tensor->data<void>()),
data_len);
iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
} else {
#ifdef PADDLE_WITH_CUDA
char* temp_ptr =
new char[tensor->numel() * framework::SizeOfType(tensor->type())];
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(platform::CPUPlace(), temp_ptr,
BOOST_GET_CONST(platform::CUDAPlace, tensor->place()),
tensor->data<void>(),
tensor->numel() * framework::SizeOfType(tensor->type()),
stream);
memory::Copy(
platform::CPUPlace(), temp_ptr,
BOOST_GET_CONST(platform::CUDAPlace, tensor->place()), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
Expand Down Expand Up @@ -147,19 +145,17 @@ void SerializeSelectedRows(framework::Variable* var,
if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(tensor->data<void>()),
data_len);
iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
} else {
#ifdef PADDLE_WITH_CUDA
char* temp_ptr =
new char[tensor->numel() * framework::SizeOfType(tensor->type())];
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(platform::CPUPlace(), temp_ptr,
BOOST_GET_CONST(platform::CUDAPlace, tensor->place()),
tensor->data<void>(),
tensor->numel() * framework::SizeOfType(tensor->type()),
stream);
memory::Copy(
platform::CPUPlace(), temp_ptr,
BOOST_GET_CONST(platform::CUDAPlace, tensor->place()), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
Expand Down
11 changes: 5 additions & 6 deletions paddle/fluid/distributed/service/heter_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ int GetMicroId(const platform::DeviceContext& ctx,
auto micro_id = -1;
auto* tensor = var->GetMutable<framework::LoDTensor>();
if (platform::is_cpu_place(tensor->place())) {
auto data = reinterpret_cast<const float*>(tensor->data<void>());
auto data = reinterpret_cast<const float*>(tensor->data());
micro_id = static_cast<int>(data[0]);
} else {
#ifdef PADDLE_WITH_CUDA
Expand All @@ -43,11 +43,10 @@ int GetMicroId(const platform::DeviceContext& ctx,
char* temp_ptr = temp.data();
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(platform::CPUPlace(), temp_ptr,
BOOST_GET_CONST(platform::CUDAPlace, tensor->place()),
tensor->data<void>(),
tensor->numel() * framework::SizeOfType(tensor->type()),
stream);
memory::Copy(
platform::CPUPlace(), temp_ptr,
BOOST_GET_CONST(platform::CUDAPlace, tensor->place()), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
micro_id = static_cast<int>(temp_ptr_float[0]);
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/service/heter_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ class RequestSendAndRecvHandler final : public HeterRequestHandler {
platform::errors::InvalidArgument(
"Not find variable microbatch_id in scope."));
auto* tensor = var->GetMutable<framework::LoDTensor>();
auto data = reinterpret_cast<const float*>(tensor->data<void>());
auto data = reinterpret_cast<const float*>(tensor->data());
auto micro_id = static_cast<int>(data[0]);

int minibatch_index = micro_id / 10;
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ endif()
cc_test(copy_same_tensor_test SRCS copy_same_tensor_test.cc DEPS tensor)

cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_library(mixed_vector SRCS mixed_vector.cc DEPS device_context)
cc_library(mixed_vector SRCS mixed_vector.cc DEPS device_context place memory)

if(WITH_GPU)
nv_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS mixed_vector place memory device_context tensor)
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/copy_same_tensor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ static bool CopySameTensorTestMain(const DDim &dims,
TensorCopySync(src_tensor, platform::CPUPlace(), &dst_cpu_tensor);
}

const void *ground_truth_ptr = src_cpu_tensor.data<void>();
const void *result_ptr = dst_cpu_tensor.data<void>();
const void *ground_truth_ptr = src_cpu_tensor.data();
const void *result_ptr = dst_cpu_tensor.data();
size_t byte_num = product(dims) * sizeof(T);
return std::memcmp(ground_truth_ptr, result_ptr, byte_num) == 0;
}
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/framework/data_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ void TransformData(const OpKernelType &expected_kernel_type,
Tensor out;
const DataLayout lin = kernel_type_for_var.data_layout_;
const DataLayout lout = expected_kernel_type.data_layout_;

// do layout transform
if (NeedTransformLayout(lout, lin)) {
#ifdef PADDLE_WITH_MKLDNN
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/framework/details/all_reduce_op_handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ void AllReduceOpHandle::AllReduceImpl(
"The place type of tensors of the same variable "
"in different local scopes should be equal."));

lod_tensor_data.emplace_back(lod_tensor.data<void>());
lod_tensor_data.emplace_back(lod_tensor.data());
places.emplace_back(lod_tensor.place());

VLOG(10) << "place:" << i << ", input_name:" << in_var_handles[i]->name()
Expand Down Expand Up @@ -225,7 +225,7 @@ void AllReduceOpHandle::AllReduceFunc(
->GetMutable<LoDTensor>();

// Reduce All Tensor to trg in CPU
ReduceBufferData func(lod_tensor_data, trg.data<void>(), numel);
ReduceBufferData func(lod_tensor_data, trg.data(), numel);
VisitDataType(trg.type(), func);

for (size_t i = 1; i < local_exec_scopes_.size(); ++i) {
Expand All @@ -235,9 +235,9 @@ void AllReduceOpHandle::AllReduceFunc(

size_t size = numel * SizeOfType(trg.type());
RunAndRecordEvent(p, [&trg, var, p, size] {
auto dst_ptr = var->GetMutable<framework::LoDTensor>()->data<void>();
auto dst_ptr = var->GetMutable<framework::LoDTensor>()->data();
platform::CPUPlace cpu_place;
memory::Copy(cpu_place, dst_ptr, cpu_place, trg.data<void>(), size);
memory::Copy(cpu_place, dst_ptr, cpu_place, trg.data(), size);
});
}
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/details/broadcast_op_handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ void BroadcastOpHandle::BroadcastOneVar(

void *send_recv_buffer = nullptr;
if (root_id == dst_id) {
send_recv_buffer = const_cast<void *>(in_tensor.data<void>());
send_recv_buffer = const_cast<void *>(in_tensor.data());
out_handle = out_var_handle;
} else {
send_recv_buffer = VariableVisitor::GetMutableTensor(out_var)
Expand Down Expand Up @@ -162,7 +162,7 @@ void BroadcastOpHandle::BroadcastOneVar(

void *send_recv_buffer = nullptr;
if (root_id == dst_id) {
send_recv_buffer = const_cast<void *>(in_tensor.data<void>());
send_recv_buffer = const_cast<void *>(in_tensor.data());
out_handle = out_var_handle;
} else {
send_recv_buffer = VariableVisitor::GetMutableTensor(out_var)
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,17 +220,17 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
g_tensor.begin(), g_tensor.end(),
[](const std::pair<std::string, const LoDTensor *> &grad1,
const std::pair<std::string, const LoDTensor *> &grad2) -> bool {
return grad1.second->data<void>() < grad2.second->data<void>();
return grad1.second->data() < grad2.second->data();
});

size_t size_of_dtype = framework::SizeOfType(dtype);
for (size_t k = 1; k < g_tensor.size(); ++k) {
const void *cur_address = g_tensor.at(k - 1).second->data<void>();
const void *cur_address = g_tensor.at(k - 1).second->data();
int64_t len = g_tensor.at(k - 1).second->numel();
auto offset = platform::Alignment(len * size_of_dtype, places_[0]);
void *infer_next_address = reinterpret_cast<void *>(
reinterpret_cast<uintptr_t>(cur_address) + offset);
const void *next_address = g_tensor.at(k).second->data<void>();
const void *next_address = g_tensor.at(k).second->data();

VLOG(10) << string::Sprintf(
"Input[%d](%s) address: 0X%02x, Input[%d](%s) address: 0X%02x, Infer "
Expand Down Expand Up @@ -267,7 +267,7 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
std::vector<const void *> lod_tensor_data;
lod_tensor_data.reserve(place_num);
for (size_t scope_idx = 0; scope_idx < place_num; ++scope_idx) {
auto data = grads_tensor.at(scope_idx).at(0).second->data<void>();
auto data = grads_tensor.at(scope_idx).at(0).second->data();
lod_tensor_data.emplace_back(data);
}
std::vector<std::string> grad_var_names;
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/framework/details/reduce_op_handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ void ReduceOpHandle::RunImpl() {
VisitDataType(lod_tensors[0]->type(), func);

auto trg = out_var->GetMutable<framework::LoDTensor>();
if (reduce_sum_trg.data<void>() != trg->data<void>()) {
if (reduce_sum_trg.data() != trg->data()) {
TensorCopy(reduce_sum_trg, platform::CPUPlace(), trg);
}
}
Expand All @@ -181,7 +181,7 @@ void ReduceOpHandle::RunImpl() {
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p).device;
auto &nccl_ctx = nccl_ctxs_->at(dev_id);

void *buffer = const_cast<void *>(lod_tensor.data<void>());
void *buffer = const_cast<void *>(lod_tensor.data());
void *recvbuffer = nullptr;
if (root_id == dev_id) {
recvbuffer =
Expand Down Expand Up @@ -227,7 +227,7 @@ void ReduceOpHandle::RunImpl() {
int dev_id = BOOST_GET_CONST(platform::XPUPlace, p).device;
auto &bkcl_ctx = bkcl_ctxs_->at(dev_id);

void *buffer = const_cast<void *>(lod_tensor.data<void>());
void *buffer = const_cast<void *>(lod_tensor.data());
void *recvbuffer = nullptr;
if (root_id == dev_id) {
recvbuffer =
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
for (size_t i = 0; i < local_scopes_.size(); ++i) {
auto &place = places_[i];
auto &in = *ins[i];
void *in_tensor_buf = const_cast<void *>(in.data<void>());
void *in_tensor_buf = const_cast<void *>(in.data());

auto &out = *outs[i];
float *out_tensor_buf = out.data<float>();
Expand Down Expand Up @@ -175,7 +175,7 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
// dgc use ncclAllGather to get all the encoded data
// so the buffer need nranks.
int buf_size = nranks_ * encode_size;
void *gather_buff = gathers[i]->data<void>();
void *gather_buff = gathers[i]->data();

VLOG(10) << "in_numel:" << in_numel << ", out_numel:" << out_numel
<< ", nranks:" << nranks_ << ", gather_buf size:" << buf_size
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/dlpack_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ struct DLDeviceVisitor : public boost::static_visitor<::DLDevice> {

DLPackTensor::DLPackTensor(const Tensor &tensor, LaneType lanes) {
// init data, data buffer
t_.data = const_cast<void *>(tensor.data<void>());
t_.data = const_cast<void *>(tensor.data());

// init device, DLDevice type with device_type and device_id
auto place = tensor.place();
Expand Down
3 changes: 1 addition & 2 deletions paddle/fluid/framework/fleet/ascend_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,7 @@ class AscendInstance {
VarTypeToGeType(tensor->type()));
tensor_desc.SetRealDimCnt(vec_dim.size());

const uint8_t *data =
reinterpret_cast<const uint8_t *>(tensor->data<void>());
const uint8_t *data = reinterpret_cast<const uint8_t *>(tensor->data());
std::vector<uint8_t> dst(numel * GeTypeSize(tensor->type()));
memcpy(dst.data(), data, GeTypeSize(tensor->type()) * numel);
ge::Tensor ge_tensor(tensor_desc, dst);
Expand Down
9 changes: 4 additions & 5 deletions paddle/fluid/framework/fleet/heter_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,20 +112,19 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope,
char* data_ptr = const_cast<char*>(req_data->data());

if (platform::is_cpu_place(tensor->place())) {
memcpy(data_ptr, tensor->data<void>(),
memcpy(data_ptr, tensor->data(),
tensor->numel() * SizeOfType(tensor->type()));
} else {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
memory::Copy(platform::CPUPlace(), data_ptr,
BOOST_GET_CONST(platform::CUDAPlace, tensor->place()),
tensor->data<void>(),
tensor->numel() * SizeOfType(tensor->type()), nullptr);
tensor->data(), tensor->numel() * SizeOfType(tensor->type()),
nullptr);
#endif
#ifdef PADDLE_WITH_XPU
memory::Copy(platform::CPUPlace(), data_ptr,
BOOST_GET_CONST(platform::XPUPlace, tensor->place()),
tensor->data<void>(),
tensor->numel() * SizeOfType(tensor->type()));
tensor->data(), tensor->numel() * SizeOfType(tensor->type()));
#endif
}
}
Expand Down
12 changes: 6 additions & 6 deletions paddle/fluid/framework/heterxpu_trainer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
auto dev_id =
BOOST_GET_CONST(platform::CUDAPlace, thread_tensor->place()).device;
platform::CUDADeviceGuard guard(dev_id);
cudaMemset(thread_tensor->data<void>(), 0,
cudaMemset(thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(thread_tensor->type()));
#endif
#ifdef PADDLE_WITH_XPU
Expand All @@ -351,11 +351,11 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
platform::DeviceContext* dev_ctx = pool.Get(place);
const platform::XPUDeviceContext* xpu_ctx =
reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx);
xpu::memset(xpu_ctx->x_context(), thread_tensor->data<void>(), 0,
xpu::memset(xpu_ctx->x_context(), thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(thread_tensor->type()));
#endif
} else {
memset(thread_tensor->data<void>(), 0,
memset(thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(thread_tensor->type()));
}
}
Expand All @@ -367,7 +367,7 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
auto dev_id =
BOOST_GET_CONST(platform::CUDAPlace, root_tensor->place()).device;
platform::CUDADeviceGuard guard(dev_id);
cudaMemset(root_tensor->data<void>(), 0,
cudaMemset(root_tensor->data(), 0,
root_tensor->numel() * SizeOfType(root_tensor->type()));
#endif
#ifdef PADDLE_WITH_XPU
Expand All @@ -379,11 +379,11 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
platform::DeviceContext* dev_ctx = pool.Get(place);
const platform::XPUDeviceContext* xpu_ctx =
reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx);
xpu::memset(xpu_ctx->x_context(), root_tensor->data<void>(), 0,
xpu::memset(xpu_ctx->x_context(), root_tensor->data(), 0,
root_tensor->numel() * SizeOfType(root_tensor->type()));
#endif
} else {
memset(root_tensor->data<void>(), 0,
memset(root_tensor->data(), 0,
root_tensor->numel() * SizeOfType(root_tensor->type()));
}
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/lod_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ class LoDTensor : public Tensor {
*/
size_t NumLevels() const { return lod_.size(); }
/*
* Number of elements in a level.
*/
* Number of elements in a level.
*/
size_t NumElements(size_t level = 0) const {
PADDLE_ENFORCE_LT(
level, NumLevels(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ ProgramDesc load_from_file(const std::string& file_name) {
fin.seekg(0, std::ios::beg);
fin.read(&buffer[0], buffer.size());
fin.close();

ProgramDesc program_desc(buffer);
return program_desc;
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/parallel_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ void ParallelExecutor::BCastParamsToDevices(
void *buffer;

if (i == 0 && trainer_id == 0) {
buffer = const_cast<void *>(main_tensor.data<void>());
buffer = const_cast<void *>(main_tensor.data());
} else {
auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
Expand Down Expand Up @@ -831,7 +831,7 @@ void ParallelExecutor::BCastParamsToDevices(
void *buffer;

if (i == 0 && trainer_id == 0) {
buffer = const_cast<void *>(main_tensor.data<void>());
buffer = const_cast<void *>(main_tensor.data());
} else {
auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/framework/program_desc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,20 +101,25 @@ ProgramDesc::ProgramDesc(const std::string &binary_str) {
PADDLE_ENFORCE_EQ(desc_.ParseFromString(binary_str), true,
platform::errors::InvalidArgument(
"Failed to parse program_desc from binary string."));
VLOG(1) << 3333;
InitFromProto();
}

void ProgramDesc::InitFromProto() {
VLOG(1) << 4444;
for (auto &block_desc : *desc_.mutable_blocks()) {
blocks_.emplace_back(new BlockDesc(this, &block_desc));
}
VLOG(1) << 5555;
for (auto &block : blocks_) {
for (auto *op : block->AllOps()) {
for (const auto &attr : op->Proto()->attrs()) {
if (attr.type() == proto::AttrType::BLOCK) {
VLOG(1) << 6666;
size_t blk_idx = attr.block_idx();
op->SetBlockAttr(attr.name(), this->MutableBlock(blk_idx));
} else if (attr.type() == proto::AttrType::BLOCKS) {
VLOG(1) << 7777;
auto blks_idx = attr.blocks_idx();
std::vector<BlockDesc *> block_descs;
for (int blk_idx : blks_idx) {
Expand Down
Loading

0 comments on commit 5c73a6e

Please sign in to comment.