Skip to content

Commit

Permalink
Dev/op2func refactor 3 (PaddlePaddle#30)
Browse files Browse the repository at this point in the history
* add a candidate dense tensor class, test=develop

* remove TensorBase::backend(), test=develop

* remove some ops, test=develop

* cherry-pick the pr of tensor meta, test=develop

* moves the dense tensor and some ops, test=develop

* update the linalg operator, test=develop

* update other operators, test=develop

* fix errors, test=develop

* fix bugs, test=develop

* try to resolve the problem of windows ci, test=develop

* updates codes, test=develop

* fix the tensor_utils.cc, test=develop

* modify the dense tensor, test=develop

* fix the data type, test=develop

Co-authored-by: shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
  • Loading branch information
chenwhql and Shixiaowei02 authored Oct 26, 2021
1 parent e3ed2c6 commit 5240ac0
Show file tree
Hide file tree
Showing 51 changed files with 632 additions and 1,106 deletions.
8 changes: 5 additions & 3 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,12 @@ cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_va

IF(WITH_XPU)
cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils pten pten_utils)
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils
pten pten_utils kernel_factory)
ELSE()
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils pten pten_utils)
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils
pten pten_utils kernel_factory)
ENDIF()

cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
Expand Down Expand Up @@ -392,7 +394,7 @@ cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer)
cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer)
cc_library(generator SRCS generator.cc DEPS enforce place)

cc_library(pten_utils SRCS pten_utils.cc DEPS lod_tensor selected_rows place pten var_type_traits)
cc_library(pten_utils SRCS pten_utils.cc DEPS lod_tensor selected_rows place pten var_type_traits pten_hapi_utils)

# Get the current working branch
execute_process(
Expand Down
12 changes: 6 additions & 6 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1819,10 +1819,10 @@ pten::KernelContext OperatorWithKernel::BuildPtenKernelContext(

paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_inputs;
for (auto var : ins_vector) {
auto pt_in = framework::InputVariableToPtenTensor(*var, in_def);
tmp_inputs.emplace_back(pt_in);
tmp_inputs.emplace_back(
experimental::MakePtenTensorBaseFromVar(*var, in_def));
}
op_kernel_ctx.EmplaceBackInputs(tmp_inputs);
op_kernel_ctx.EmplaceBackInputs(std::move(tmp_inputs));
}

for (size_t i = 0; i < output_names.size(); ++i) {
Expand All @@ -1831,10 +1831,10 @@ pten::KernelContext OperatorWithKernel::BuildPtenKernelContext(

paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_outputs;
for (auto var : outs_vector) {
auto pt_out = framework::OutputVariableToPtenTensor(var, out_def);
tmp_outputs.emplace_back(pt_out);
tmp_outputs.emplace_back(
experimental::MakePtenTensorBaseFromVar(var, out_def));
}
op_kernel_ctx.EmplaceBackOutputs(tmp_outputs);
op_kernel_ctx.EmplaceBackOutputs(std::move(tmp_outputs));
}

for (size_t i = 0; i < attr_names.size(); ++i) {
Expand Down
142 changes: 0 additions & 142 deletions paddle/fluid/framework/pten_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,148 +24,6 @@ limitations under the License. */
namespace paddle {
namespace framework {

// TODO(chenweihang, shixiaowei): adapt SelectedRows
template <>
std::shared_ptr<pten::DenseTensor> MakeTensorImpl<pten::DenseTensor, LoDTensor>(
const LoDTensor& tensor, pten::Backend backend,
paddle::experimental::DataType dtype,
paddle::experimental::DataLayout layout) {
auto holder = tensor.Holder();
auto tensor_impl = std::make_shared<pten::DenseTensor>(
pten::TensorMeta(tensor.dims(), backend, dtype, layout, tensor.offset()),
pten::TensorStatus());

if (holder != nullptr) {
tensor_impl->ShareAllocation(tensor.Holder());
}
return tensor_impl;
}

template <>
std::shared_ptr<pten::DenseTensor> MakeTensorImpl<pten::DenseTensor, Tensor>(
const Tensor& tensor, pten::Backend backend,
paddle::experimental::DataType dtype,
paddle::experimental::DataLayout layout) {
auto holder = tensor.Holder();
auto tensor_impl = std::make_shared<pten::DenseTensor>(
pten::TensorMeta(tensor.dims(), backend, dtype, layout, tensor.offset()),
pten::TensorStatus());

if (holder != nullptr) {
tensor_impl->ShareAllocation(tensor.Holder());
}
return tensor_impl;
}

template <>
std::shared_ptr<pten::DenseTensor> MakeTensorImpl<pten::DenseTensor>(
const LoDTensor& tensor, const platform::Place& place,
proto::VarType::Type type) {
return MakeTensorImpl<pten::DenseTensor, LoDTensor>(
tensor, pten::TransToPtenBackend(place), pten::TransToPtenDataType(type),
pten::TransToPtenDataLayout(tensor.layout()));
}

template <>
std::shared_ptr<pten::DenseTensor> MakeTensorImpl<pten::DenseTensor>(
const Tensor& tensor, const platform::Place& place,
proto::VarType::Type type) {
return MakeTensorImpl<pten::DenseTensor, Tensor>(
tensor, pten::TransToPtenBackend(place), pten::TransToPtenDataType(type),
pten::TransToPtenDataLayout(tensor.layout()));
}

template <>
void ShareTensorImpl<pten::DenseTensor>(pten::DenseTensor* tensor_impl,
LoDTensor* out) {
out->ResetHolderWithType(tensor_impl->allocation(),
pten::TransToProtoVarType(tensor_impl->data_type()));
}

template <>
void ShareTensorImpl<pten::DenseTensor>(pten::DenseTensor* tensor_impl,
Tensor* out) {
out->ResetHolderWithType(tensor_impl->allocation(),
pten::TransToProtoVarType(tensor_impl->data_type()));
}

std::shared_ptr<pten::TensorBase> InputVariableToPtenTensor(
const framework::Variable& variable, const pten::TensorArgDef& arg_def) {
auto expected_place = pten::TransToFluidPlace(arg_def.backend);

if (variable.template IsType<framework::LoDTensor>()) {
const auto& tensor = variable.template Get<framework::LoDTensor>();
if (!platform::is_same_place(tensor.place(), expected_place)) {
framework::LoDTensor tmp_tensor;
framework::TensorCopySync(tensor, expected_place, &tmp_tensor);
auto pt_in =
framework::MakeTensorImpl<pten::DenseTensor, framework::LoDTensor>(
tmp_tensor, arg_def.backend, arg_def.dtype, arg_def.layout);
return pt_in;
} else {
auto pt_in =
framework::MakeTensorImpl<pten::DenseTensor, framework::LoDTensor>(
tensor, arg_def.backend, arg_def.dtype, arg_def.layout);
return pt_in;
}
} else if (variable.template IsType<framework::SelectedRows>()) {
// TODO(chenweihang): now we don't deal with row and height
// by xiaowei's advice
const auto& tensor = variable.template Get<framework::SelectedRows>();
if (!platform::is_same_place(tensor.value().place(), expected_place)) {
framework::Tensor tmp_tensor;
TensorCopySync(tensor.value(), expected_place, &tmp_tensor);
// TODO(chenweihang): adapt SelectedRows by xiaowei's design
auto pt_in =
framework::MakeTensorImpl<pten::DenseTensor, framework::Tensor>(
tmp_tensor, arg_def.backend, arg_def.dtype, arg_def.layout);
return pt_in;
} else {
auto pt_in =
framework::MakeTensorImpl<pten::DenseTensor, framework::Tensor>(
tensor.value(), arg_def.backend, arg_def.dtype, arg_def.layout);
return pt_in;
}
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared input `%s` type now when call pt kernel.",
framework::ToTypeName(variable.Type())));
}
return nullptr;
}

std::shared_ptr<pten::TensorBase> OutputVariableToPtenTensor(
framework::Variable* variable, const pten::TensorArgDef& arg_def) {
// mutable_data before run kernel, to avoid share output form
// KernelContext to original tensor
if (variable->template IsType<framework::LoDTensor>()) {
auto* tensor = variable->template GetMutable<framework::LoDTensor>();
tensor->mutable_data(pten::TransToFluidPlace(arg_def.backend),
pten::TransToProtoVarType(arg_def.dtype));
auto pt_out =
framework::MakeTensorImpl<pten::DenseTensor, framework::LoDTensor>(
*tensor, arg_def.backend, arg_def.dtype, arg_def.layout);
return pt_out;
} else if (variable->template IsType<framework::SelectedRows>()) {
auto* tensor = variable->template GetMutable<framework::SelectedRows>();
tensor->mutable_value()->mutable_data(
pten::TransToFluidPlace(arg_def.backend),
pten::TransToProtoVarType(arg_def.dtype));
// TODO(chenweihang): adapt SelectedRows by xiaowei's design,
// here the row and height will lost in output!
auto pt_out =
framework::MakeTensorImpl<pten::DenseTensor, framework::Tensor>(
tensor->value(), arg_def.backend, arg_def.dtype, arg_def.layout);
return pt_out;
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared output `%s` type now when call pt kernel.",
framework::ToTypeName(variable->Type())));
}

return nullptr;
}

OpKernelType TransPtenKernelKeyToOpKernelType(
const pten::KernelKey& kernel_key) {
proto::VarType::Type data_type =
Expand Down
30 changes: 1 addition & 29 deletions paddle/fluid/framework/pten_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,41 +25,13 @@ limitations under the License. */
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/pten/api/include/core.h"
#include "paddle/pten/hapi/lib/utils/tensor_utils.h"
#include "paddle/utils/flat_hash_map.h"
#include "paddle/utils/small_vector.h"

namespace paddle {
namespace framework {

/* tensor translate */

template <typename PtenTensorImplT, typename VariableT>
std::shared_ptr<PtenTensorImplT> MakeTensorImpl(
const VariableT& tensor, pten::Backend backend,
paddle::experimental::DataType dtype,
paddle::experimental::DataLayout layout);

template <typename PtenTensorImplT>
std::shared_ptr<PtenTensorImplT> MakeTensorImpl(const LoDTensor& tensor,
const platform::Place& place,
proto::VarType::Type type);

template <typename PtenTensorImplT>
std::shared_ptr<PtenTensorImplT> MakeTensorImpl(const Tensor& tensor,
const platform::Place& place,
proto::VarType::Type type);

template <typename PtenTensorImplT>
void ShareTensorImpl(PtenTensorImplT* tensor_impl, LoDTensor* out);

template <typename PtenTensorImplT>
void ShareTensorImpl(PtenTensorImplT* tensor_impl, Tensor* out);

std::shared_ptr<pten::TensorBase> InputVariableToPtenTensor(
const framework::Variable& variable, const pten::TensorArgDef& arg_def);
std::shared_ptr<pten::TensorBase> OutputVariableToPtenTensor(
framework::Variable* variable, const pten::TensorArgDef& arg_def);

/* Kernel Key translate */

OpKernelType TransPtenKernelKeyToOpKernelType(
Expand Down
60 changes: 0 additions & 60 deletions paddle/fluid/framework/pten_utils_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,66 +18,6 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/variable.h"

TEST(PtenUtils, FluidTensorToPtenTensor) {
// 1. create tensor
paddle::framework::LoDTensor x;
paddle::framework::Tensor x2;
x.Resize({2});
x.mutable_data<float>(paddle::platform::CPUPlace());
x.data<float>()[0] = 0.2;
x.data<float>()[1] = 0.5;

// 2. test API
auto dense_x = paddle::framework::MakeTensorImpl<pten::DenseTensor>(
x, x.place(), x.type());

// 3. check result
std::vector<float> expect_value = {0.2, 0.5};
ASSERT_EQ(dense_x->data<float>()[0], expect_value[0]);
ASSERT_EQ(dense_x->data<float>()[1], expect_value[1]);
ASSERT_EQ(dense_x->backend(), pten::Backend::CPU);
ASSERT_EQ(dense_x->data_type(), pten::DataType::FLOAT32);
}

TEST(PtenUtils, VarToPtenTensor) {
// 1. create Variable
paddle::framework::Variable v;
auto selected_rows = v.GetMutable<paddle::framework::SelectedRows>();
paddle::framework::Tensor* value = selected_rows->mutable_value();
auto* data = value->mutable_data<int>(paddle::framework::make_ddim({1, 1}),
paddle::platform::CPUPlace());
data[0] = 123;
pten::Backend expect_backend = pten::Backend::CPU;

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
expect_backend = pten::Backend::CUDA;
#endif
auto tensor_def = pten::TensorArgDef(expect_backend, pten::DataLayout::NCHW,
pten::DataType::INT32);
// 2. test API
auto tensor_x = paddle::framework::InputVariableToPtenTensor(v, tensor_def);
// 3. check result
ASSERT_EQ(tensor_x->backend(), expect_backend);
ASSERT_EQ(tensor_x->data_type(), pten::DataType::INT32);
}

TEST(PtenUtils, PtenTensorToFluidTensor) {
pten::DenseTensor dense_tensor(
pten::TensorMeta(paddle::framework::make_ddim({1, 1}), pten::Backend::CPU,
pten::DataType::FLOAT32, pten::DataLayout::ANY),
pten::TensorStatus());
auto* data_ptr = dense_tensor.mutable_data<float>();
data_ptr[0] = 0.5;
// share allocation into fluid Tensor
paddle::framework::Tensor tensor;
paddle::framework::LoDTensor lod_tensor;
paddle::framework::ShareTensorImpl(&dense_tensor, &tensor);
paddle::framework::ShareTensorImpl(&dense_tensor, &lod_tensor);
// compare
ASSERT_EQ(tensor.data<float>()[0], 0.5);
ASSERT_EQ(lod_tensor.data<float>()[0], 0.5);
}

TEST(PtenUtils, TransPtenKernelKeyToOpKernelType) {
pten::KernelKey kernel_key(pten::Backend::CPU, pten::DataLayout::NCHW,
pten::DataType::FLOAT32);
Expand Down
14 changes: 6 additions & 8 deletions paddle/fluid/imperative/prepared_operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -293,11 +293,10 @@ static pten::KernelContext BuildDygraphPtenKernelContext(
paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_inputs;
for (auto var : ins_vector) {
const auto& variable = var->Var();

auto pt_in = framework::InputVariableToPtenTensor(variable, in_def);
tmp_inputs.emplace_back(pt_in);
tmp_inputs.emplace_back(
experimental::MakePtenTensorBaseFromVar(variable, in_def));
}
op_kernel_ctx.EmplaceBackInputs(tmp_inputs);
op_kernel_ctx.EmplaceBackInputs(std::move(tmp_inputs));
}

for (size_t i = 0; i < output_names.size(); ++i) {
Expand All @@ -307,11 +306,10 @@ static pten::KernelContext BuildDygraphPtenKernelContext(
paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_outputs;
for (auto var : outs_vector) {
auto* variable = var->MutableVar();

auto pt_out = framework::OutputVariableToPtenTensor(variable, out_def);
tmp_outputs.emplace_back(pt_out);
tmp_outputs.emplace_back(
experimental::MakePtenTensorBaseFromVar(variable, out_def));
}
op_kernel_ctx.EmplaceBackOutputs(tmp_outputs);
op_kernel_ctx.EmplaceBackOutputs(std::move(tmp_outputs));
}

for (size_t i = 0; i < attr_names.size(); ++i) {
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,9 @@ if(WITH_UNITY_BUILD)
endif()

set(OP_HEADER_DEPS ${OP_HEADER_DEPS} pten)
set(OP_HEADER_DEPS ${OP_HEADER_DEPS} pten_utils)
register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combine_op lstm_op run_program_op eye_op
#set(OP_HEADER_DEPS ${OP_HEADER_DEPS} pten_utils)
register_operators(EXCLUDES
py_layer_op py_func_op warpctc_op dgc_op load_combine_op lstm_op run_program_op eye_op
recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op spectral_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})

op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS})
Expand Down
11 changes: 4 additions & 7 deletions paddle/fluid/operators/dot_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/pten_utils.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/platform/for_range.h"

// only can include the headers in paddle/pten/api dirs
#include "paddle/pten/api/include/core.h"
#include "paddle/pten/api/include/linalg.h"
#include "paddle/pten/hapi/lib/utils/tensor_utils.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -244,12 +244,9 @@ class DotKernel : public framework::OpKernel<T> {
auto& dev_ctx = ctx.device_context<DeviceContext>();
out->mutable_data<T>(x->place());

auto pt_x =
framework::MakeTensorImpl<pten::DenseTensor>(*x, x->place(), x->type());
auto pt_y =
framework::MakeTensorImpl<pten::DenseTensor>(*y, y->place(), y->type());
auto pt_out = framework::MakeTensorImpl<pten::DenseTensor>(*out, x->place(),
x->type());
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*out);

// call new kernel
pten::Dot<T>(dev_ctx, *pt_x.get(), *pt_y.get(), pt_out.get());
Expand Down
6 changes: 2 additions & 4 deletions paddle/fluid/operators/fill_any_like_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,8 @@ class FillAnyLikeKernel : public framework::OpKernel<T> {
std::isnan(value), false,
platform::errors::InvalidArgument("The filled value is NaN."));

auto pt_x = framework::MakeTensorImpl<pten::DenseTensor>(*in, in->place(),
in->type());
auto pt_out = framework::MakeTensorImpl<pten::DenseTensor>(
*out, out->place(), out->type());
auto pt_x = paddle::experimental::MakePtenDenseTensor(*in);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*out);

const auto& dev_ctx = context.template device_context<DeviceContext>();
// call new kernel
Expand Down
Loading

0 comments on commit 5240ac0

Please sign in to comment.