Skip to content

Commit

Permalink
[PTen]Migrate proto::VarType outside of Pten (PaddlePaddle#39411)
Browse files Browse the repository at this point in the history
* #1 migrate dist-related type()-> dtype()

* move datatype function from pten -> fluid/framework

* change type() in imperative into convert(dtype())

* modify xx_tensor->type into xx_tensor->dtype

* change the set_type interface and the caller

* modify xx_tensor.type into xx_tensor.dtype

* fix mutable_data(place, dtype())

* change caller of mutable_data in pten and distributed

* change the caller of mutable_data in fluid/framework

* change the caller of mutable_data in imperative directory

* mutable_data: inference

* update the call of mutable_data

* transfer MakePenScalarArray MakePtenScalar ResetHolderWithType

* pass the compile. the next step is remove VarType in Pten

* fix all and remove VarType from pten. success in linux. Next task is other platform

* fix conflict with develop

* fix compiled error

* Fix reset conversion

* fix conflict

* fix compiled problem

* fix typo

* Fix << in tensor_utils.cc

* fix type->dtype

* fix unittest

* fix tensor init constructor

* fix DataTypeSize for BFloat16

* fix code style

* fix npu compiled error

* fix npu

* compile npu sucessfully

* fix conflict

* fix conflict

Co-authored-by: xiongkun <xiongkun03@baidu.com>
  • Loading branch information
Aurelius84 and 2742195759 authored Feb 15, 2022
1 parent 9c2cee1 commit 7e7e940
Show file tree
Hide file tree
Showing 352 changed files with 2,175 additions and 1,445 deletions.
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/fleet_executor/dist_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ bool DistModel::FetchResults(std::vector<DistModelTensor> *output_data,
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
auto type = fetch.type();
auto type = framework::TransToProtoVarType(fetch.dtype());
auto output = &(output_data->at(i));
output->name = idx_to_fetches_[idx];
bool rst = false;
Expand Down
67 changes: 41 additions & 26 deletions paddle/fluid/distributed/ps/service/brpc_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/distributed/ps/service/brpc_utils.h"

#include <arpa/inet.h>
#include <netdb.h>

#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/enforce.h"

namespace paddle {
Expand Down Expand Up @@ -98,25 +101,29 @@ void SerializeLodTensor(framework::Variable* var,
}
}
}
var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type()));
var_msg->set_data_type(static_cast<VarMsg::Type>(
framework::TransToProtoVarType(tensor->dtype())));
for (auto& dim : framework::vectorize(tensor->dims())) {
var_msg->add_dims(dim);
}
// IO Buffer
if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
} else {
#ifdef PADDLE_WITH_CUDA
char* temp_ptr = new char[tensor->numel() *
framework::SizeOfType(tensor->type())]; // NOLINT
char* temp_ptr =
new char[tensor->numel() *
framework::DataTypeSize(tensor->dtype())]; // NOLINT
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(
platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
tensor->numel() * framework::SizeOfType(
framework::TransToProtoVarType(tensor->dtype())),
stream);
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
delete[] temp_ptr;
Expand All @@ -139,25 +146,29 @@ void SerializeSelectedRows(framework::Variable* var,
var_data->resize(rows->size() * sizeof(int64_t));
char* data_ptr = const_cast<char*>(var_data->data());
memcpy(data_ptr, &((*rows)[0]), rows->size() * sizeof(int64_t));
var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type()));
var_msg->set_data_type(static_cast<VarMsg::Type>(
framework::TransToProtoVarType(tensor->dtype())));
for (auto& dim : framework::vectorize(tensor->dims())) {
var_msg->add_dims(dim);
}
// IO Buffer
if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
} else {
#ifdef PADDLE_WITH_CUDA
char* temp_ptr = new char[tensor->numel() *
framework::SizeOfType(tensor->type())]; // NOLINT
char* temp_ptr =
new char[tensor->numel() *
framework::DataTypeSize(tensor->dtype())]; // NOLINT
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(
platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
tensor->numel() * framework::SizeOfType(
framework::TransToProtoVarType(tensor->dtype())),
stream);
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
delete[] temp_ptr;
Expand Down Expand Up @@ -225,8 +236,9 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
}
tensor->set_lod(lod);

void* tensor_data =
tensor->mutable_data(place, VarMessageToVarType(msg.data_type()));
void* tensor_data = tensor->mutable_data(
place,
framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));

// IO Buffer
if (platform::is_cpu_place(place)) {
Expand All @@ -236,15 +248,16 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
} else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA
unsigned long data_len; // NOLINT
char* temp_ptr = new char[tensor->numel() *
framework::SizeOfType(tensor->type())]; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len); // NOLINT
char* temp_ptr =
new char[tensor->numel() *
framework::DataTypeSize(tensor->dtype())]; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len); // NOLINT
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(
place, tensor_data, platform::CPUPlace(), (void*)temp_ptr, // NOLINT
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream);
delete[] temp_ptr;
#endif
}
Expand All @@ -266,24 +279,26 @@ void DeserializeSelectedRows(
vec_dim.push_back(x);
}
tensor->Resize(framework::make_ddim(vec_dim));
void* tensor_data =
tensor->mutable_data(place, VarMessageToVarType(msg.data_type()));
void* tensor_data = tensor->mutable_data(
place,
framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));
// IO Buffer
if (platform::is_cpu_place(place)) {
unsigned long data_len; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward(tensor_data, data_len);
} else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA
char* temp_ptr = new char[tensor->numel() *
framework::SizeOfType(tensor->type())]; // NOLINT
unsigned long data_len; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
char* temp_ptr =
new char[tensor->numel() *
framework::DataTypeSize(tensor->dtype())]; // NOLINT
unsigned long data_len; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward(temp_ptr, data_len);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(place, tensor_data, platform::CPUPlace(), temp_ptr,
tensor->numel() * framework::SizeOfType(tensor->type()),
tensor->numel() * framework::DataTypeSize(tensor->dtype()),
stream);
delete[] temp_ptr;
#endif
Expand Down
6 changes: 4 additions & 2 deletions paddle/fluid/distributed/ps/service/heter_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.

#include "paddle/fluid/distributed/ps/service/heter_client.h"

#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/string/split.h"

Expand All @@ -39,13 +41,13 @@ int GetMicroId(const platform::DeviceContext& ctx,
} else {
#ifdef PADDLE_WITH_CUDA
std::vector<char> temp;
temp.resize(tensor->numel() * framework::SizeOfType(tensor->type()));
temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype()));
char* temp_ptr = temp.data();
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(
platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream);
float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
micro_id = static_cast<int>(temp_ptr_float[0]);
#endif
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/eager/grad_tensor_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"

#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/pten/kernels/funcs/math_function.h"

Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -452,4 +452,10 @@ endif()

cc_test(scope_guard_test SRCS scope_guard_test.cc)
cc_test(pten_utils_test SRCS pten_utils_test.cc DEPS pten_utils)

if(WITH_GPU OR WITH_ROCM)
cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
else()
cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place)
endif()
cc_test(custom_kernel_test SRCS custom_kernel_test.cc DEPS custom_kernel pten_tensor)
185 changes: 185 additions & 0 deletions paddle/fluid/framework/convert_utils.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device/gpu/gpu_info.h"

namespace paddle {
namespace framework {

paddle::experimental::DataType TransToPtenDataType(
const paddle::framework::proto::VarType::Type& dtype) {
// Set the order of case branches according to the frequency with
// the data type is used
switch (dtype) {
case paddle::framework::proto::VarType::FP32:
return DataType::FLOAT32;
case paddle::framework::proto::VarType::FP64:
return DataType::FLOAT64;
case paddle::framework::proto::VarType::INT64:
return DataType::INT64;
case paddle::framework::proto::VarType::INT32:
return DataType::INT32;
case paddle::framework::proto::VarType::INT8:
return DataType::INT8;
case paddle::framework::proto::VarType::UINT8:
return DataType::UINT8;
case paddle::framework::proto::VarType::INT16:
return DataType::INT16;
case paddle::framework::proto::VarType::COMPLEX64:
return DataType::COMPLEX64;
case paddle::framework::proto::VarType::COMPLEX128:
return DataType::COMPLEX128;
case paddle::framework::proto::VarType::FP16:
return DataType::FLOAT16;
case paddle::framework::proto::VarType::BF16:
return DataType::BFLOAT16;
case paddle::framework::proto::VarType::BOOL:
return DataType::BOOL;
default:
return DataType::UNDEFINED;
}
}

paddle::framework::proto::VarType::Type TransToProtoVarType(
const paddle::experimental::DataType& dtype) {
// Set the order of case branches according to the frequency with
// the data type is used
switch (dtype) {
case DataType::FLOAT32:
return paddle::framework::proto::VarType::FP32;
case DataType::FLOAT64:
return paddle::framework::proto::VarType::FP64;
case DataType::INT64:
return paddle::framework::proto::VarType::INT64;
case DataType::INT32:
return paddle::framework::proto::VarType::INT32;
case DataType::INT8:
return paddle::framework::proto::VarType::INT8;
case DataType::UINT8:
return paddle::framework::proto::VarType::UINT8;
case DataType::INT16:
return paddle::framework::proto::VarType::INT16;
case DataType::COMPLEX64:
return paddle::framework::proto::VarType::COMPLEX64;
case DataType::COMPLEX128:
return paddle::framework::proto::VarType::COMPLEX128;
case DataType::FLOAT16:
return paddle::framework::proto::VarType::FP16;
case DataType::BFLOAT16:
return paddle::framework::proto::VarType::BF16;
case DataType::BOOL:
return paddle::framework::proto::VarType::BOOL;
default:
PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Unsupported data type `%s` when casting it into "
"paddle data type.",
dtype));
}
}

size_t DataTypeSize(DataType dtype) {
switch (dtype) {
case DataType::UNDEFINED:
return 0;
case DataType::BOOL:
return sizeof(bool);
case DataType::INT8:
return sizeof(int8_t);
case DataType::UINT8:
return sizeof(uint8_t);
case DataType::INT16:
return sizeof(int16_t);
case DataType::INT32:
return sizeof(int);
case DataType::INT64:
return sizeof(int64_t);
case DataType::BFLOAT16:
return sizeof(paddle::platform::bfloat16);
case DataType::FLOAT16:
return sizeof(paddle::platform::float16);
case DataType::FLOAT32:
return sizeof(float);
case DataType::FLOAT64:
return sizeof(double);
case DataType::COMPLEX64:
return sizeof(paddle::platform::complex<float>);
case DataType::COMPLEX128:
return sizeof(paddle::platform::complex<double>);
default:
return 0;
}
}

DataType String2DataType(const std::string& str) {
if (str == "bool") {
return DataType::BOOL;
} else if (str == "float16") {
return DataType::FLOAT16;
} else if (str == "float32") {
return DataType::FLOAT32;
} else if (str == "float64") {
return DataType::FLOAT64;
} else if (str == "int8") {
return DataType::INT8;
} else if (str == "int16") {
return DataType::INT16;
} else if (str == "int32") {
return DataType::INT32;
} else if (str == "int64") {
return DataType::INT64;
} else if (str == "uint8") {
return DataType::UINT8;
} else if (str == "complex64") {
return DataType::COMPLEX64;
} else if (str == "complex128") {
return DataType::COMPLEX128;
} else {
return DataType::UNDEFINED;
}
}

std::string DataType2String(DataType dtype) {
switch (dtype) {
case DataType::BOOL:
return "bool";
case DataType::INT8:
return "int8";
case DataType::UINT8:
return "uint8";
case DataType::INT16:
return "int16";
case DataType::INT32:
return "int32";
case DataType::INT64:
return "int64";
case DataType::FLOAT16:
return "float16";
case DataType::FLOAT32:
return "float32";
case DataType::FLOAT64:
return "float64";
case DataType::COMPLEX64:
return "complex64";
case DataType::COMPLEX128:
return "complex128";
default:
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"Unknow pten::DataType, the int value = %d.",
static_cast<int>(dtype)));
return "";
}
}
} // namespace framework
} // namespace paddle
Loading

0 comments on commit 7e7e940

Please sign in to comment.